summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/arp/arp.c
diff options
context:
space:
mode:
authorErik Nordmark <Erik.Nordmark@Sun.COM>2009-11-11 11:49:49 -0800
committerErik Nordmark <Erik.Nordmark@Sun.COM>2009-11-11 11:49:49 -0800
commitbd670b35a010421b6e1a5536c34453a827007c81 (patch)
tree97c2057b6771dd40411a12eb89d2db2e2b2cce31 /usr/src/uts/common/inet/arp/arp.c
parentb3388e4fc5f5c24c8a39fbe132a00b02dae5b717 (diff)
downloadillumos-gate-bd670b35a010421b6e1a5536c34453a827007c81.tar.gz
PSARC/2009/331 IP Datapath Refactoring
PSARC/2008/522 EOF of 2001/070 IPsec HW Acceleration support PSARC/2009/495 netstat -r flags for blackhole and reject routes PSARC 2009/496 EOF of XRESOLV PSARC/2009/494 IP_DONTFRAG socket option PSARC/2009/515 fragmentation controls for ping and traceroute 6798716 ip_newroute delenda est 6798739 ARP and IP are too separate 6807265 IPv4 ip2mac() support 6756382 Please remove Venus IPsec HWACCEL code 6880632 sendto/sendmsg never returns EHOSTUNREACH in Solaris 6748582 sendmsg() return OK, but doesn't send message using IPv4-mapped x IPv6 addr 1119790 TCP and path mtu discovery 4637227 should support equal-cost multi-path (ECMP) 5078568 getsockopt() for IPV6_PATHMTU on a non-connected socket should not succeed 6419648 "AR* contract private note" should be removed as part of ATM SW EOL 6274715 Arp could keep the old entry in the cache while it waits for an arp response 6605615 Remove duplicated TCP/IP opt_set/opt_get code; use conn_t 6874677 IP_TTL can be used to send with ttl zero 4034090 arp should not let you delete your own entry 6882140 Implement IP_DONTFRAG socket option 6883858 Implement ping -D option; traceroute -F should work for IPv6 and shared-IP zones 1119792 TCP/IP black hole detection is broken on receiver 4078796 Directed broadcast forwarding code has problems 4104337 restrict the IPPROTO_IP and IPPROTO_IPV6 options based on the socket family 4203747 Source address selection for source routed packets 4230259 pmtu is increased every ip_ire_pathmtu_interval timer value. 4300533 When sticky option ipv6_pktinfo set to bogus address subsequent connect time out 4471035 ire_delete_cache_gw is called through ire_walk unnecessarily 4514572 SO_DONTROUTE socket option doesn't work with IPv6 4524980 tcp_lookup_ipv4() should compare the ifindex against tcpb->tcpb_bound_if 4532714 machine fails to switch quickly among failed default routes 4634219 IPv6 path mtu discovery is broken when using routing header 4691581 udp broadcast handling causes too many replicas 4708405 mcast is broken on machines when all interfaces are IFF_POINTOPOINT 4770457 netstat/route: source address of interface routes pretends to be gateway address 4786974 use routing table to determine routes/interface for multicast 4792619 An ip_fanout_udp_ipc_v6() routine might lead to some simpler code 4816115 Nuke ipsec_out_use_global_policy 4862844 ipsec offload corner case 4867533 tcp_rq and tcp_wq are redundant 4868589 NCEs should be shared across an IPMP group 4872093 unplumbing an improper virtual interface panics in ip_newroute_get_dst_ill() 4901671 FireEngine needs some cleanup 4907617 IPsec identity latching should be done before sending SYN-ACK 4941461 scopeid and IPV6_PKTINFO with UDP/ICMP connect() does not work properly 4944981 ip does nothing with IP6I_NEXTHOP 4963353 IPv4 and IPv6 proto fanout codes could be brought closer 4963360 consider passing zoneid using ip6i_t instead of ipsec_out_t in NDP 4963734 new ip6_asp locking is used incorrectly in ip_newroute_v6() 5008315 IPv6 code passes ip6i_t to IPsec code instead of ip6_t 5009636 memory leak in ip_fanout_proto_v6() 5092337 tcp/udp option handling can use some cleanup 5035841 Solaris can fail to create a valid broadcast ire 5043747 ar_query_xmit: Could not find the ace 5051574 tcp_check_policy is missing some checks 6305037 full hardware checksum is discarded when there're more than 2 mblks in the chain 6311149 ip.c needs to be put through a woodchipper 4708860 Unable to reassemble CGTP fragmented multicast packets 6224628 Large IPv6 packets with IPsec protection sometimes have length mismatch. 6213243 Solaris does not currently support Dead Gateway Detection 5029091 duplicate code in IP's input path for TCP/UDP/SCTP 4674643 through IPv6 CGTP routes, the very first packet is sent only after a while 6207318 Multiple default routes do not round robin connections to routers. 4823410 IP has an inconsistent view of link mtu 5105520 adding interface route to down interface causes ifconfig hang 5105707 advanced sockets API introduced some dead code 6318399 IP option handling for icmp and udp is too complicated 6321434 Every dropped packet in IP should use ip_drop_packet() 6341693 ifconfig mtu should operate on the physical interface, not individual ipif's 6352430 The credentials attached to an mblk are not particularly useful 6357894 uninitialised ipp_hoplimit needs to be cleaned up. 6363568 ip_xmit_v6() may be missing IRE releases in error cases 6364828 ip_rput_forward needs a makeover 6384416 System panics when running as multicast forwarder using multicast tunnels 6402382 TX: UDP v6 slowpath is not modified to handle mac_exempt conns 6418413 assertion failed ipha->ipha_ident == 0||ipha->ipha_ident == 0xFFFF 6420916 assertion failures in ipv6 wput path 6430851 use of b_prev to store ifindex is not 100% safe 6446106 IPv6 packets stored in nce->nce_qd_mp will be sent with incorrect tcp/udp checksums 6453711 SCTP OOTB sent as if genetated by global zone 6465212 ARP/IP merge should remove ire_freemblk.esballoc 6490163 ip_input() could misbehave if the first mblk's size is not big enough 6496664 missing ipif_refrele leads to reference leak and deferred crash in ip_wput_ipsec_out_v6 6504856 memory leak in ip_fanout_proto_v6() when using link local outer tunnel addresses 6507765 IRE cache hash function performs badly 6510186 IP_FORWARD_PROG bit is easily overlooked 6514727 cgtp ipv6 failure on snv54 6528286 MULTIRT (CGTP) should offload checksum to hardware 6533904 SCTP: doesn't support traffic class for IPv6 6539415 TX: ipif source selection is flawed for unlabeled gateways 6539851 plumbed unworking nic blocks sending broadcast packets 6564468 non-solaris SCTP stack over rawip socket: netstat command counts rawipInData not rawipOutDatagrams 6568511 ipIfStatsOutDiscards not bumped when discarding an ipsec packet on the wrong NIC 6584162 tcp_g_q_inactive() makes incorrect use of taskq_dispatch() 6603974 round-robin default with many interfaces causes infinite temporary IRE thrashing 6611750 ilm_lookup_ill_index_v4 was born an orphan 6618423 ip_wput_frag_mdt sends out packets that void pfhooks 6620964 IRE max bucket count calculations performed in ip_ire_init() are flawed 6626266 various _broadcasts seem redundant 6638182 IP_PKTINFO + SO_DONTROUTE + CIPSO IP option == panic 6647710 IPv6 possible DoS vulnerability 6657357 nce should be kmem_cache alloc'ed from an nce_cache. 6685131 ilg_add -> conn_ilg_alloc interacting with conn_ilg[] walkers can cause panic. 6730298 adding 0.0.0.0 key with mask != 0 causes 'route delete default' to fail 6730976 vni and ipv6 doesn't quite work. 6740956 assertion failed: mp->b_next == 0L && mp->b_prev == 0L in nce_queue_mp_common() 6748515 BUMP_MIB() is occasionally done on the wrong ill 6753250 ip_output_v6() `notv6' error path has an errant ill_refrele() 6756411 NULL-pointer dereference in ip_wput_local() 6769582 IP must forward packet returned from FW-HOOK 6781525 bogus usesrc usage leads directly to panic 6422839 System paniced in ip_multicast_loopback due to NULL pointer dereference 6785521 initial IPv6 DAD solicitation is dropped in ip_newroute_ipif_v6() 6787370 ipnet devices not seeing forwarded IP packets on outgoing interface 6791187 ip*dbg() calls in ip_output_options() claim to originate from ip_wput() 6794047 nce_fp_mp prevents sharing of NCEs across an IPMP group 6797926 many unnecessary ip0dbg() in ip_rput_data_v6 6846919 Packet queued for ND gets sent in the clear. 6856591 ping doesn't send packets with DF set 6861113 arp module has incorrect dependency path for hook module 6865664 IPV6_NEXTHOP does not work with TCP socket 6874681 No ICMP time exceeded when a router receives packet with ttl = 0 6880977 ip_wput_ire() uses over 1k of stack 6595433 IPsec performance could be significantly better when calling hw crypto provider synchronously 6848397 ifconfig down of an interface can hang. 6849602 IPV6_PATHMTU size issue for UDP 6885359 Add compile-time option for testing pure IPsec overhead 6889268 Odd loopback source address selection with IPMP 6895420 assertion failed: connp->conn_helper_info == NULL 6851189 Routing-related panic occurred during reboot on T2000 system running snv_117 6896174 Post-async-encryption, AH+ESP packets may have misinitialized ipha/ip6 6896687 iptun presents IPv6 with an MTU < 1280 6897006 assertion failed: ipif->ipif_id != 0 in ip_sioctl_slifzone_restart
Diffstat (limited to 'usr/src/uts/common/inet/arp/arp.c')
-rw-r--r--usr/src/uts/common/inet/arp/arp.c4883
1 files changed, 0 insertions, 4883 deletions
diff --git a/usr/src/uts/common/inet/arp/arp.c b/usr/src/uts/common/inet/arp/arp.c
deleted file mode 100644
index abdbc39a47..0000000000
--- a/usr/src/uts/common/inet/arp/arp.c
+++ /dev/null
@@ -1,4883 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/* Copyright (c) 1990 Mentat Inc. */
-
-/* AR - Address Resolution Protocol */
-
-#include <sys/types.h>
-#include <sys/stream.h>
-#include <sys/stropts.h>
-#include <sys/strsubr.h>
-#include <sys/errno.h>
-#include <sys/strlog.h>
-#include <sys/dlpi.h>
-#include <sys/sockio.h>
-#define _SUN_TPI_VERSION 2
-#include <sys/tihdr.h>
-#include <sys/socket.h>
-#include <sys/ddi.h>
-#include <sys/sunddi.h>
-#include <sys/cmn_err.h>
-#include <sys/sdt.h>
-#include <sys/vtrace.h>
-#include <sys/strsun.h>
-#include <sys/policy.h>
-#include <sys/zone.h>
-#include <sys/ethernet.h>
-#include <sys/zone.h>
-#include <sys/random.h>
-#include <sys/sdt.h>
-#include <sys/hook_event.h>
-
-#include <inet/common.h>
-#include <inet/optcom.h>
-#include <inet/mi.h>
-#include <inet/nd.h>
-#include <inet/snmpcom.h>
-#include <net/if.h>
-#include <inet/arp.h>
-#include <netinet/ip6.h>
-#include <netinet/arp.h>
-#include <inet/ip.h>
-#include <inet/ip_ire.h>
-#include <inet/ip_ndp.h>
-#include <inet/mib2.h>
-#include <inet/arp_impl.h>
-
-/*
- * ARP entry life time and design notes
- * ------------------------------------
- *
- * ARP entries (ACEs) must last at least as long as IP knows about a given
- * MAC-IP translation (i.e., as long as the IRE cache entry exists). It's ok
- * if the ARP entry lasts longer, but not ok if it is removed before the IP
- * entry. The reason for this is that if ARP doesn't have an entry, we will be
- * unable to detect the difference between an ARP broadcast that represents no
- * change (same, known address of sender) and one that represents a change (new
- * address for existing entry). In the former case, we must not notify IP, or
- * we can suffer hurricane attack. In the latter case, we must notify IP, or
- * IP will drift out of sync with the network.
- *
- * Note that IP controls the lifetime of entries, not ARP.
- *
- * We don't attempt to reconfirm aging entries. If the system is no longer
- * talking to a given peer, then it doesn't matter if we have the right mapping
- * for that peer. It would be possible to send queries on aging entries that
- * are active, but this isn't done.
- *
- * IPMP Notes
- * ----------
- *
- * ARP is aware of IPMP. In particular, IP notifies ARP about all "active"
- * (able to transmit data packets) interfaces in a given group via
- * AR_IPMP_ACTIVATE and AR_IPMP_DEACTIVATE messages. These messages, combined
- * with the "IPMP arl_t" that ARP creates over the IPMP DLPI stub driver,
- * enable ARP to track all the arl_t's that are in the same group and thus
- * ensure that ACEs are shared across each group and the arl_t that ARP
- * chooses to transmit on for a given ACE is optimal.
- *
- * ARP relies on IP for hardware address updates. In particular, if the
- * hardware address of an interface changes (DL_NOTE_PHYS_ADDR), then IP will
- * bring the interface down and back up -- and as part of bringing it back
- * up, will send messages to ARP that allow it to update the affected arl's
- * with new hardware addresses.
- *
- * N.B.: One side-effect of this approach is that when an interface fails and
- * then starts to repair, it will temporarily populate the ARP cache with
- * addresses that are owned by it rather than the group's arl_t. To address
- * this, we could add more messages (e.g., AR_IPMP_JOIN and AR_IPMP_LEAVE),
- * but as the issue appears to be only cosmetic (redundant entries in the ARP
- * cache during interace repair), we've kept things simple for now.
- */
-
-/*
- * This is used when scanning for "old" (least recently broadcast) ACEs. We
- * don't want to have to walk the list for every single one, so we gather up
- * batches at a time.
- */
-#define ACE_RESCHED_LIST_LEN 8
-
-typedef struct {
- arl_t *art_arl;
- uint_t art_naces;
- ace_t *art_aces[ACE_RESCHED_LIST_LEN];
-} ace_resched_t;
-
-#define ACE_RESOLVED(ace) ((ace)->ace_flags & ACE_F_RESOLVED)
-#define ACE_NONPERM(ace) \
- (((ace)->ace_flags & (ACE_F_RESOLVED | ACE_F_PERMANENT)) == \
- ACE_F_RESOLVED)
-
-#define AR_DEF_XMIT_INTERVAL 500 /* time in milliseconds */
-#define AR_LL_HDR_SLACK 32 /* Leave the lower layer some room */
-
-#define AR_SNMP_MSG T_OPTMGMT_ACK
-#define AR_DRAINING (void *)0x11
-
-/*
- * The IPv4 Link Local address space is special; we do extra duplicate checking
- * there, as the entire assignment mechanism rests on random numbers.
- */
-#define IS_IPV4_LL_SPACE(ptr) (((uchar_t *)ptr)[0] == 169 && \
- ((uchar_t *)ptr)[1] == 254)
-
-/*
- * Check if the command needs to be enqueued by seeing if there are other
- * commands ahead of us or if some DLPI response is being awaited. Usually
- * there would be an enqueued command in the latter case, however if the
- * stream that originated the command has closed, the close would have
- * cleaned up the enqueued command. AR_DRAINING signifies that the command
- * at the head of the arl_queue has been internally dequeued on completion
- * of the previous command and is being called from ar_dlpi_done
- */
-#define CMD_NEEDS_QUEUEING(mp, arl) \
- (mp->b_prev != AR_DRAINING && (arl->arl_queue != NULL || \
- arl->arl_dlpi_pending != DL_PRIM_INVAL))
-
-#define ARH_FIXED_LEN 8
-
-/*
- * Macro used when creating ACEs to determine the arl that should own it.
- */
-#define OWNING_ARL(arl) \
- ((arl)->arl_ipmp_arl != NULL ? (arl)->arl_ipmp_arl : arl)
-
-/*
- * MAC-specific intelligence. Shouldn't be needed, but the DL_INFO_ACK
- * doesn't quite do it for us.
- */
-typedef struct ar_m_s {
- t_uscalar_t ar_mac_type;
- uint32_t ar_mac_arp_hw_type;
- t_scalar_t ar_mac_sap_length;
- uint32_t ar_mac_hw_addr_length;
-} ar_m_t;
-
-typedef struct msg2_args {
- mblk_t *m2a_mpdata;
- mblk_t *m2a_mptail;
-} msg2_args_t;
-
-static mblk_t *ar_alloc(uint32_t cmd, int);
-static int ar_ce_create(arl_t *arl, uint32_t proto, uchar_t *hw_addr,
- uint32_t hw_addr_len, uchar_t *proto_addr,
- uint32_t proto_addr_len, uchar_t *proto_mask,
- uchar_t *proto_extract_mask, uint32_t hw_extract_start,
- uchar_t *sender_addr, uint32_t flags);
-static void ar_ce_delete(ace_t *ace);
-static void ar_ce_delete_per_arl(ace_t *ace, void *arg);
-static ace_t **ar_ce_hash(arp_stack_t *as, uint32_t proto,
- const uchar_t *proto_addr, uint32_t proto_addr_length);
-static ace_t *ar_ce_lookup(arl_t *arl, uint32_t proto,
- const uchar_t *proto_addr, uint32_t proto_addr_length);
-static ace_t *ar_ce_lookup_entry(arl_t *arl, uint32_t proto,
- const uchar_t *proto_addr, uint32_t proto_addr_length);
-static ace_t *ar_ce_lookup_from_area(arp_stack_t *as, mblk_t *mp,
- ace_t *matchfn());
-static ace_t *ar_ce_lookup_mapping(arl_t *arl, uint32_t proto,
- const uchar_t *proto_addr, uint32_t proto_addr_length);
-static ace_t *ar_ce_lookup_permanent(arp_stack_t *as, uint32_t proto,
- uchar_t *proto_addr, uint32_t proto_addr_length);
-static boolean_t ar_ce_resolve(ace_t *ace, const uchar_t *hw_addr,
- uint32_t hw_addr_length);
-static void ar_ce_walk(arp_stack_t *as, void (*pfi)(ace_t *, void *),
- void *arg1);
-
-static void ar_client_notify(const arl_t *arl, mblk_t *mp, int code);
-static int ar_close(queue_t *q);
-static int ar_cmd_dispatch(queue_t *q, mblk_t *mp, boolean_t from_wput);
-static void ar_cmd_drain(arl_t *arl);
-static void ar_cmd_done(arl_t *arl);
-static mblk_t *ar_dlpi_comm(t_uscalar_t prim, size_t size);
-static void ar_dlpi_send(arl_t *, mblk_t *);
-static void ar_dlpi_done(arl_t *, t_uscalar_t);
-static int ar_entry_add(queue_t *q, mblk_t *mp);
-static int ar_entry_delete(queue_t *q, mblk_t *mp);
-static int ar_entry_query(queue_t *q, mblk_t *mp);
-static int ar_entry_squery(queue_t *q, mblk_t *mp);
-static int ar_interface_up(queue_t *q, mblk_t *mp);
-static int ar_interface_down(queue_t *q, mblk_t *mp);
-static int ar_interface_on(queue_t *q, mblk_t *mp);
-static int ar_interface_off(queue_t *q, mblk_t *mp);
-static int ar_ipmp_activate(queue_t *q, mblk_t *mp);
-static int ar_ipmp_deactivate(queue_t *q, mblk_t *mp);
-static void ar_ll_cleanup_arl_queue(queue_t *q);
-static void ar_ll_down(arl_t *arl);
-static arl_t *ar_ll_lookup_by_name(arp_stack_t *as, const char *name);
-static arl_t *ar_ll_lookup_from_mp(arp_stack_t *as, mblk_t *mp);
-static void ar_ll_init(arp_stack_t *, ar_t *, mblk_t *mp);
-static void ar_ll_set_defaults(arl_t *, mblk_t *mp);
-static void ar_ll_clear_defaults(arl_t *);
-static int ar_ll_up(arl_t *arl);
-static int ar_mapping_add(queue_t *q, mblk_t *mp);
-static boolean_t ar_mask_all_ones(uchar_t *mask, uint32_t mask_len);
-static ar_m_t *ar_m_lookup(t_uscalar_t mac_type);
-static int ar_nd_ioctl(queue_t *q, mblk_t *mp);
-static int ar_open(queue_t *q, dev_t *devp, int flag, int sflag,
- cred_t *credp);
-static int ar_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
-static boolean_t ar_param_register(IDP *ndp, arpparam_t *arppa, int cnt);
-static int ar_param_set(queue_t *q, mblk_t *mp, char *value,
- caddr_t cp, cred_t *cr);
-static void ar_query_delete(ace_t *ace, void *ar);
-static void ar_query_reply(ace_t *ace, int ret_val,
- uchar_t *proto_addr, uint32_t proto_addr_len);
-static clock_t ar_query_xmit(arp_stack_t *as, ace_t *ace);
-static void ar_rput(queue_t *q, mblk_t *mp_orig);
-static void ar_rput_dlpi(queue_t *q, mblk_t *mp);
-static void ar_set_address(ace_t *ace, uchar_t *addrpos,
- uchar_t *proto_addr, uint32_t proto_addr_len);
-static int ar_slifname(queue_t *q, mblk_t *mp);
-static int ar_set_ppa(queue_t *q, mblk_t *mp);
-static int ar_snmp_msg(queue_t *q, mblk_t *mp_orig);
-static void ar_snmp_msg2(ace_t *, void *);
-static void ar_wput(queue_t *q, mblk_t *mp);
-static void ar_wsrv(queue_t *q);
-static void ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto,
- uint32_t plen, const uchar_t *haddr1, const uchar_t *paddr1,
- const uchar_t *haddr2, const uchar_t *paddr2, const uchar_t *dstaddr,
- arp_stack_t *as);
-static void ar_cmd_enqueue(arl_t *arl, mblk_t *mp, queue_t *q,
- ushort_t cmd, boolean_t);
-static mblk_t *ar_cmd_dequeue(arl_t *arl);
-
-static void *arp_stack_init(netstackid_t stackid, netstack_t *ns);
-static void arp_stack_fini(netstackid_t stackid, void *arg);
-static void arp_stack_shutdown(netstackid_t stackid, void *arg);
-
-boolean_t arp_no_defense = B_FALSE;
-
-/*
- * All of these are alterable, within the min/max values given,
- * at run time. arp_publish_interval and arp_publish_count are
- * set by default to 2 seconds and 5 respectively. This is
- * useful during FAILOVER/FAILBACK to make sure that the ARP
- * packets are not lost. Assumed that it does not affect the
- * normal operations.
- */
-static arpparam_t arp_param_arr[] = {
- /* min max value name */
- { 30000, 3600000, 300000, "arp_cleanup_interval"},
- { 1000, 20000, 2000, "arp_publish_interval"},
- { 1, 20, 5, "arp_publish_count"},
- { 0, 20000, 1000, "arp_probe_delay"},
- { 10, 20000, 1500, "arp_probe_interval"},
- { 0, 20, 3, "arp_probe_count"},
- { 0, 20000, 100, "arp_fastprobe_delay"},
- { 10, 20000, 150, "arp_fastprobe_interval"},
- { 0, 20, 3, "arp_fastprobe_count"},
- { 0, 3600000, 300000, "arp_defend_interval"},
- { 0, 20000, 100, "arp_defend_rate"},
- { 0, 3600000, 15000, "arp_broadcast_interval"},
- { 5, 86400, 3600, "arp_defend_period"}
-};
-#define as_cleanup_interval as_param_arr[0].arp_param_value
-#define as_publish_interval as_param_arr[1].arp_param_value
-#define as_publish_count as_param_arr[2].arp_param_value
-#define as_probe_delay as_param_arr[3].arp_param_value
-#define as_probe_interval as_param_arr[4].arp_param_value
-#define as_probe_count as_param_arr[5].arp_param_value
-#define as_fastprobe_delay as_param_arr[6].arp_param_value
-#define as_fastprobe_interval as_param_arr[7].arp_param_value
-#define as_fastprobe_count as_param_arr[8].arp_param_value
-#define as_defend_interval as_param_arr[9].arp_param_value
-#define as_defend_rate as_param_arr[10].arp_param_value
-#define as_broadcast_interval as_param_arr[11].arp_param_value
-#define as_defend_period as_param_arr[12].arp_param_value
-
-static struct module_info arp_mod_info = {
- 0, "arp", 0, INFPSZ, 512, 128
-};
-
-static struct qinit arprinit = {
- (pfi_t)ar_rput, NULL, ar_open, ar_close, NULL, &arp_mod_info
-};
-
-static struct qinit arpwinit = {
- (pfi_t)ar_wput, (pfi_t)ar_wsrv, ar_open, ar_close, NULL, &arp_mod_info
-};
-
-struct streamtab arpinfo = {
- &arprinit, &arpwinit
-};
-
-/*
- * TODO: we need a better mechanism to set the ARP hardware type since
- * the DLPI mac type does not include enough predefined values.
- */
-static ar_m_t ar_m_tbl[] = {
- { DL_CSMACD, ARPHRD_ETHER, -2, 6}, /* 802.3 */
- { DL_TPB, ARPHRD_IEEE802, -2, 6}, /* 802.4 */
- { DL_TPR, ARPHRD_IEEE802, -2, 6}, /* 802.5 */
- { DL_METRO, ARPHRD_IEEE802, -2, 6}, /* 802.6 */
- { DL_ETHER, ARPHRD_ETHER, -2, 6}, /* Ethernet */
- { DL_FDDI, ARPHRD_ETHER, -2, 6}, /* FDDI */
- { DL_IB, ARPHRD_IB, -2, 20}, /* Infiniband */
- { DL_OTHER, ARPHRD_ETHER, -2, 6}, /* unknown */
-};
-
-/*
- * Note that all routines which need to queue the message for later
- * processing have to be ioctl_aware to be able to queue the complete message.
- * Following are command entry flags in arct_flags
- */
-#define ARF_IOCTL_AWARE 0x1 /* Arp command can come down as M_IOCTL */
-#define ARF_ONLY_CMD 0x2 /* Command is exclusive to ARP */
-#define ARF_WPUT_OK 0x4 /* Command is allowed from ar_wput */
-
-/* ARP Cmd Table entry */
-typedef struct arct_s {
- int (*arct_pfi)(queue_t *, mblk_t *);
- uint32_t arct_cmd;
- int arct_min_len;
- uint32_t arct_flags;
- int arct_priv_req; /* Privilege required for this cmd */
- const char *arct_txt;
-} arct_t;
-
-/*
- * AR_ENTRY_ADD, QUERY and SQUERY are used by sdp, hence they need to
- * have ARF_WPUT_OK set.
- */
-static arct_t ar_cmd_tbl[] = {
- { ar_entry_add, AR_ENTRY_ADD, sizeof (area_t),
- ARF_IOCTL_AWARE | ARF_ONLY_CMD | ARF_WPUT_OK, OP_CONFIG,
- "AR_ENTRY_ADD" },
- { ar_entry_delete, AR_ENTRY_DELETE, sizeof (ared_t),
- ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_ENTRY_DELETE" },
- { ar_entry_query, AR_ENTRY_QUERY, sizeof (areq_t),
- ARF_IOCTL_AWARE | ARF_ONLY_CMD | ARF_WPUT_OK, OP_NP,
- "AR_ENTRY_QUERY" },
- { ar_entry_squery, AR_ENTRY_SQUERY, sizeof (area_t),
- ARF_IOCTL_AWARE | ARF_ONLY_CMD | ARF_WPUT_OK, OP_NP,
- "AR_ENTRY_SQUERY" },
- { ar_mapping_add, AR_MAPPING_ADD, sizeof (arma_t),
- ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_MAPPING_ADD" },
- { ar_interface_up, AR_INTERFACE_UP, sizeof (arc_t),
- ARF_ONLY_CMD, OP_CONFIG, "AR_INTERFACE_UP" },
- { ar_interface_down, AR_INTERFACE_DOWN, sizeof (arc_t),
- ARF_ONLY_CMD, OP_CONFIG, "AR_INTERFACE_DOWN" },
- { ar_interface_on, AR_INTERFACE_ON, sizeof (arc_t),
- ARF_ONLY_CMD, OP_CONFIG, "AR_INTERFACE_ON" },
- { ar_interface_off, AR_INTERFACE_OFF, sizeof (arc_t),
- ARF_ONLY_CMD, OP_CONFIG, "AR_INTERFACE_OFF" },
- { ar_ipmp_activate, AR_IPMP_ACTIVATE, sizeof (arie_t),
- ARF_ONLY_CMD, OP_CONFIG, "AR_IPMP_ACTIVATE" },
- { ar_ipmp_deactivate, AR_IPMP_DEACTIVATE, sizeof (arie_t),
- ARF_ONLY_CMD, OP_CONFIG, "AR_IPMP_DEACTIVATE" },
- { ar_set_ppa, (uint32_t)IF_UNITSEL, sizeof (int),
- ARF_IOCTL_AWARE | ARF_WPUT_OK, OP_CONFIG, "IF_UNITSEL" },
- { ar_nd_ioctl, ND_GET, 1,
- ARF_IOCTL_AWARE | ARF_WPUT_OK, OP_NP, "ND_GET" },
- { ar_nd_ioctl, ND_SET, 1,
- ARF_IOCTL_AWARE | ARF_WPUT_OK, OP_CONFIG, "ND_SET" },
- { ar_snmp_msg, AR_SNMP_MSG, sizeof (struct T_optmgmt_ack),
- ARF_IOCTL_AWARE | ARF_WPUT_OK | ARF_ONLY_CMD, OP_NP,
- "AR_SNMP_MSG" },
- { ar_slifname, (uint32_t)SIOCSLIFNAME, sizeof (struct lifreq),
- ARF_IOCTL_AWARE | ARF_WPUT_OK, OP_CONFIG, "SIOCSLIFNAME" }
-};
-
-/*
- * Lookup and return an arl appropriate for sending packets with either source
- * hardware address `hw_addr' or source protocol address `ip_addr', in that
- * order. If neither was specified or neither match, return any arl in the
- * same group as `arl'.
- */
-static arl_t *
-ar_ipmp_lookup_xmit_arl(arl_t *arl, uchar_t *hw_addr, uint_t hw_addrlen,
- uchar_t *ip_addr)
-{
- arlphy_t *ap;
- ace_t *src_ace;
- arl_t *xmit_arl = NULL;
- arp_stack_t *as = ARL_TO_ARPSTACK(arl);
-
- ASSERT(arl->arl_flags & ARL_F_IPMP);
-
- if (hw_addr != NULL && hw_addrlen != 0) {
- xmit_arl = as->as_arl_head;
- for (; xmit_arl != NULL; xmit_arl = xmit_arl->arl_next) {
- /*
- * There may be arls with the same HW address that are
- * not in our IPMP group; we don't want those.
- */
- if (xmit_arl->arl_ipmp_arl != arl)
- continue;
-
- ap = xmit_arl->arl_phy;
- if (ap != NULL && ap->ap_hw_addrlen == hw_addrlen &&
- bcmp(ap->ap_hw_addr, hw_addr, hw_addrlen) == 0)
- break;
- }
-
- DTRACE_PROBE4(xmit_arl_hwsrc, arl_t *, arl, arl_t *,
- xmit_arl, uchar_t *, hw_addr, uint_t, hw_addrlen);
- }
-
- if (xmit_arl == NULL && ip_addr != NULL) {
- src_ace = ar_ce_lookup_permanent(as, IP_ARP_PROTO_TYPE, ip_addr,
- IP_ADDR_LEN);
- if (src_ace != NULL)
- xmit_arl = src_ace->ace_xmit_arl;
-
- DTRACE_PROBE4(xmit_arl_ipsrc, arl_t *, arl, arl_t *,
- xmit_arl, uchar_t *, ip_addr, uint_t, IP_ADDR_LEN);
- }
-
- if (xmit_arl == NULL) {
- xmit_arl = as->as_arl_head;
- for (; xmit_arl != NULL; xmit_arl = xmit_arl->arl_next)
- if (xmit_arl->arl_ipmp_arl == arl && xmit_arl != arl)
- break;
-
- DTRACE_PROBE2(xmit_arl_any, arl_t *, arl, arl_t *, xmit_arl);
- }
-
- return (xmit_arl);
-}
-
-/*
- * ARP Cache Entry creation routine.
- * Cache entries are allocated within timer messages and inserted into
- * the global hash list based on protocol and protocol address.
- */
-static int
-ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len,
- uchar_t *proto_addr, uint_t proto_addr_len, uchar_t *proto_mask,
- uchar_t *proto_extract_mask, uint_t hw_extract_start, uchar_t *sender_addr,
- uint_t flags)
-{
- static ace_t ace_null;
- ace_t *ace;
- ace_t **acep;
- uchar_t *dst;
- mblk_t *mp;
- arp_stack_t *as = ARL_TO_ARPSTACK(arl);
- arl_t *xmit_arl;
- arlphy_t *ap;
-
- if ((flags & ~ACE_EXTERNAL_FLAGS_MASK) || arl == NULL)
- return (EINVAL);
-
- if (proto_addr == NULL || proto_addr_len == 0 ||
- (proto == IP_ARP_PROTO_TYPE && proto_addr_len != IP_ADDR_LEN))
- return (EINVAL);
-
- if (flags & ACE_F_MYADDR)
- flags |= ACE_F_PUBLISH | ACE_F_AUTHORITY;
-
- /*
- * Latch a transmit arl for this ace.
- */
- if (arl->arl_flags & ARL_F_IPMP) {
- ASSERT(proto == IP_ARP_PROTO_TYPE);
- xmit_arl = ar_ipmp_lookup_xmit_arl(arl, hw_addr, hw_addr_len,
- sender_addr);
- } else {
- xmit_arl = arl;
- }
-
- if (xmit_arl == NULL || xmit_arl->arl_phy == NULL)
- return (EINVAL);
-
- ap = xmit_arl->arl_phy;
-
- if (!hw_addr && hw_addr_len == 0) {
- if (flags == ACE_F_PERMANENT) { /* Not publish */
- /* 224.0.0.0 to zero length address */
- flags |= ACE_F_RESOLVED;
- } else { /* local address and unresolved case */
- hw_addr = ap->ap_hw_addr;
- hw_addr_len = ap->ap_hw_addrlen;
- if (flags & ACE_F_PUBLISH)
- flags |= ACE_F_RESOLVED;
- }
- } else {
- flags |= ACE_F_RESOLVED;
- }
-
- /* Handle hw_addr_len == 0 for DL_ENABMULTI_REQ etc. */
- if (hw_addr_len != 0 && hw_addr == NULL)
- return (EINVAL);
- if (hw_addr_len < ap->ap_hw_addrlen && hw_addr_len != 0)
- return (EINVAL);
- if (!proto_extract_mask && (flags & ACE_F_MAPPING))
- return (EINVAL);
-
- /*
- * If the underlying link doesn't have reliable up/down notification or
- * if we're working with the IPv4 169.254.0.0/16 Link Local Address
- * space, then don't use the fast timers. Otherwise, use them.
- */
- if (ap->ap_notifies &&
- !(proto == IP_ARP_PROTO_TYPE && IS_IPV4_LL_SPACE(proto_addr))) {
- flags |= ACE_F_FAST;
- }
-
- /*
- * Allocate the timer block to hold the ace.
- * (ace + proto_addr + proto_addr_mask + proto_extract_mask + hw_addr)
- */
- mp = mi_timer_alloc(sizeof (ace_t) + proto_addr_len + proto_addr_len +
- proto_addr_len + hw_addr_len);
- if (!mp)
- return (ENOMEM);
- ace = (ace_t *)mp->b_rptr;
- *ace = ace_null;
- ace->ace_proto = proto;
- ace->ace_mp = mp;
- ace->ace_arl = arl;
- ace->ace_xmit_arl = xmit_arl;
-
- dst = (uchar_t *)&ace[1];
-
- ace->ace_proto_addr = dst;
- ace->ace_proto_addr_length = proto_addr_len;
- bcopy(proto_addr, dst, proto_addr_len);
- dst += proto_addr_len;
- /*
- * The proto_mask allows us to add entries which will let us respond
- * to requests for a group of addresses. This makes it easy to provide
- * proxy ARP service for machines that don't understand about the local
- * subnet structure, if, for example, there are BSD4.2 systems lurking.
- */
- ace->ace_proto_mask = dst;
- if (proto_mask != NULL) {
- bcopy(proto_mask, dst, proto_addr_len);
- dst += proto_addr_len;
- } else {
- while (proto_addr_len-- > 0)
- *dst++ = (uchar_t)~0;
- }
-
- if (proto_extract_mask != NULL) {
- ace->ace_proto_extract_mask = dst;
- bcopy(proto_extract_mask, dst, ace->ace_proto_addr_length);
- dst += ace->ace_proto_addr_length;
- } else {
- ace->ace_proto_extract_mask = NULL;
- }
- ace->ace_hw_extract_start = hw_extract_start;
- ace->ace_hw_addr_length = hw_addr_len;
- ace->ace_hw_addr = dst;
- if (hw_addr != NULL) {
- bcopy(hw_addr, dst, hw_addr_len);
- dst += hw_addr_len;
- }
-
- ace->ace_flags = flags;
- if (ar_mask_all_ones(ace->ace_proto_mask,
- ace->ace_proto_addr_length)) {
- acep = ar_ce_hash(as, ace->ace_proto, ace->ace_proto_addr,
- ace->ace_proto_addr_length);
- } else {
- acep = &as->as_ce_mask_entries;
- }
- if ((ace->ace_next = *acep) != NULL)
- ace->ace_next->ace_ptpn = &ace->ace_next;
- *acep = ace;
- ace->ace_ptpn = acep;
- return (0);
-}
-
-/* Delete a cache entry. */
-static void
-ar_ce_delete(ace_t *ace)
-{
- ace_t **acep;
-
- /* Get out of the hash list. */
- acep = ace->ace_ptpn;
- if (ace->ace_next)
- ace->ace_next->ace_ptpn = acep;
- acep[0] = ace->ace_next;
- /* Mark it dying in case we have a timer about to fire. */
- ace->ace_flags |= ACE_F_DYING;
- /* Complete any outstanding queries immediately. */
- ar_query_reply(ace, ENXIO, NULL, (uint32_t)0);
- /* Free the timer, immediately, or when it fires. */
- mi_timer_free(ace->ace_mp);
-}
-
-/*
- * ar_ce_walk routine. Delete the ace if it is associated with the arl
- * that is going away.
- */
-static void
-ar_ce_delete_per_arl(ace_t *ace, void *arl)
-{
- if (ace->ace_arl == arl || ace->ace_xmit_arl == arl) {
- ace->ace_flags &= ~ACE_F_PERMANENT;
- ar_ce_delete(ace);
- }
-}
-
-/*
- * ar_ce_walk routine used when deactivating an `arl' in a group. Deletes
- * `ace' if it was using `arl_arg' as its output interface.
- */
-static void
-ar_ce_ipmp_deactivate(ace_t *ace, void *arl_arg)
-{
- arl_t *arl = arl_arg;
-
- ASSERT(!(arl->arl_flags & ARL_F_IPMP));
-
- if (ace->ace_arl == arl) {
- ASSERT(ace->ace_xmit_arl == arl);
- /*
- * This ACE is tied to the arl leaving the group (e.g., an
- * ACE_F_PERMANENT for a test address) and is not used by the
- * group, so we can leave it be.
- */
- return;
- }
-
- if (ace->ace_xmit_arl != arl)
- return;
-
- ASSERT(ace->ace_arl == arl->arl_ipmp_arl);
-
- /*
- * IP should've already sent us messages asking us to move any
- * ACE_F_MYADDR entries to another arl, but there are two exceptions:
- *
- * 1. The group was misconfigured with interfaces that have duplicate
- * hardware addresses, but in.mpathd was unable to offline those
- * duplicate interfaces.
- *
- * 2. The messages from IP were lost or never created (e.g. due to
- * memory pressure).
- *
- * We handle the first case by just quietly deleting the ACE. Since
- * the second case cannot be distinguished from a more serious bug in
- * the IPMP framework, we ASSERT() that this can't happen on DEBUG
- * systems, but quietly delete the ACE on production systems (the
- * deleted ACE will render the IP address unreachable).
- */
- if (ace->ace_flags & ACE_F_MYADDR) {
- arlphy_t *ap = arl->arl_phy;
- uint_t hw_addrlen = ap->ap_hw_addrlen;
-
- ASSERT(hw_addrlen == ace->ace_hw_addr_length &&
- bcmp(ap->ap_hw_addr, ace->ace_hw_addr, hw_addrlen) == 0);
- }
-
- /*
- * NOTE: it's possible this arl got selected as the ace_xmit_arl when
- * creating an ACE_F_PERMANENT ACE on behalf of an SIOCS*ARP ioctl for
- * an IPMP IP interface. But it's still OK for us to delete such an
- * ACE since ipmp_illgrp_refresh_arpent() will ask us to recreate it
- * and we'll pick another arl then.
- */
- ar_ce_delete(ace);
-}
-
-/* Cache entry hash routine, based on protocol and protocol address. */
-static ace_t **
-ar_ce_hash(arp_stack_t *as, uint32_t proto, const uchar_t *proto_addr,
- uint32_t proto_addr_length)
-{
- const uchar_t *up = proto_addr;
- unsigned int hval = proto;
- int len = proto_addr_length;
-
- while (--len >= 0)
- hval ^= *up++;
- return (&as->as_ce_hash_tbl[hval % ARP_HASH_SIZE]);
-}
-
-/* Cache entry lookup. Try to find an ace matching the parameters passed. */
-ace_t *
-ar_ce_lookup(arl_t *arl, uint32_t proto, const uchar_t *proto_addr,
- uint32_t proto_addr_length)
-{
- ace_t *ace;
-
- ace = ar_ce_lookup_entry(arl, proto, proto_addr, proto_addr_length);
- if (!ace)
- ace = ar_ce_lookup_mapping(arl, proto, proto_addr,
- proto_addr_length);
- return (ace);
-}
-
-/*
- * Cache entry lookup. Try to find an ace matching the parameters passed.
- * Look only for exact entries (no mappings)
- */
-static ace_t *
-ar_ce_lookup_entry(arl_t *arl, uint32_t proto, const uchar_t *proto_addr,
- uint32_t proto_addr_length)
-{
- ace_t *ace;
- arp_stack_t *as = ARL_TO_ARPSTACK(arl);
-
- if (!proto_addr)
- return (NULL);
- ace = *ar_ce_hash(as, proto, proto_addr, proto_addr_length);
- for (; ace; ace = ace->ace_next) {
- if ((ace->ace_arl == arl ||
- ace->ace_arl == arl->arl_ipmp_arl) &&
- ace->ace_proto_addr_length == proto_addr_length &&
- ace->ace_proto == proto) {
- int i1 = proto_addr_length;
- uchar_t *ace_addr = ace->ace_proto_addr;
- uchar_t *mask = ace->ace_proto_mask;
- /*
- * Note that the ace_proto_mask is applied to the
- * proto_addr before comparing to the ace_addr.
- */
- do {
- if (--i1 < 0)
- return (ace);
- } while ((proto_addr[i1] & mask[i1]) == ace_addr[i1]);
- }
- }
- return (ace);
-}
-
-/*
- * Extract cache entry lookup parameters from an external command message, then
- * call the supplied match function.
- */
-static ace_t *
-ar_ce_lookup_from_area(arp_stack_t *as, mblk_t *mp, ace_t *matchfn())
-{
- uchar_t *proto_addr;
- area_t *area = (area_t *)mp->b_rptr;
-
- proto_addr = mi_offset_paramc(mp, area->area_proto_addr_offset,
- area->area_proto_addr_length);
- if (!proto_addr)
- return (NULL);
- return ((*matchfn)(ar_ll_lookup_from_mp(as, mp), area->area_proto,
- proto_addr, area->area_proto_addr_length));
-}
-
-/*
- * Cache entry lookup. Try to find an ace matching the parameters passed.
- * Look only for mappings.
- */
-static ace_t *
-ar_ce_lookup_mapping(arl_t *arl, uint32_t proto, const uchar_t *proto_addr,
- uint32_t proto_addr_length)
-{
- ace_t *ace;
- arp_stack_t *as = ARL_TO_ARPSTACK(arl);
-
- if (!proto_addr)
- return (NULL);
- ace = as->as_ce_mask_entries;
- for (; ace; ace = ace->ace_next) {
- if (ace->ace_arl == arl &&
- ace->ace_proto_addr_length == proto_addr_length &&
- ace->ace_proto == proto) {
- int i1 = proto_addr_length;
- uchar_t *ace_addr = ace->ace_proto_addr;
- uchar_t *mask = ace->ace_proto_mask;
- /*
- * Note that the ace_proto_mask is applied to the
- * proto_addr before comparing to the ace_addr.
- */
- do {
- if (--i1 < 0)
- return (ace);
- } while ((proto_addr[i1] & mask[i1]) == ace_addr[i1]);
- }
- }
- return (ace);
-}
-
-/*
- * Look for a permanent entry for proto_addr across all interfaces.
- */
-static ace_t *
-ar_ce_lookup_permanent(arp_stack_t *as, uint32_t proto, uchar_t *proto_addr,
- uint32_t proto_addr_length)
-{
- ace_t *ace;
-
- ace = *ar_ce_hash(as, proto, proto_addr, proto_addr_length);
- for (; ace != NULL; ace = ace->ace_next) {
- if (!(ace->ace_flags & ACE_F_PERMANENT))
- continue;
- if (ace->ace_proto_addr_length == proto_addr_length &&
- ace->ace_proto == proto) {
- int i1 = proto_addr_length;
- uchar_t *ace_addr = ace->ace_proto_addr;
- uchar_t *mask = ace->ace_proto_mask;
-
- /*
- * Note that the ace_proto_mask is applied to the
- * proto_addr before comparing to the ace_addr.
- */
- do {
- if (--i1 < 0)
- return (ace);
- } while ((proto_addr[i1] & mask[i1]) == ace_addr[i1]);
- }
- }
- return (ace);
-}
-
-/*
- * ar_ce_resolve is called when a response comes in to an outstanding request.
- * Returns 'true' if the address has changed and we need to tell the client.
- * (We don't need to tell the client if there's still an outstanding query.)
- */
-static boolean_t
-ar_ce_resolve(ace_t *ace, const uchar_t *hw_addr, uint32_t hw_addr_length)
-{
- boolean_t hwchanged;
-
- if (hw_addr_length == ace->ace_hw_addr_length) {
- ASSERT(ace->ace_hw_addr != NULL);
- hwchanged = bcmp(hw_addr, ace->ace_hw_addr,
- hw_addr_length) != 0;
- if (hwchanged)
- bcopy(hw_addr, ace->ace_hw_addr, hw_addr_length);
- /*
- * No need to bother with ar_query_reply if no queries are
- * waiting.
- */
- ace->ace_flags |= ACE_F_RESOLVED;
- if (ace->ace_query_mp != NULL)
- ar_query_reply(ace, 0, NULL, (uint32_t)0);
- if (hwchanged)
- return (B_TRUE);
- }
- return (B_FALSE);
-}
-
-/*
- * There are 2 functions performed by this function.
- * 1. Resolution of unresolved entries and update of resolved entries.
- * 2. Detection of nodes with our own IP address (duplicates).
- *
- * If the resolving ARL is in the same group as a matching ACE's ARL, then
- * update the ACE. Otherwise, make no updates.
- *
- * For all entries, we first check to see if this is a duplicate (probable
- * loopback) message. If so, then just ignore it.
- *
- * Next, check to see if the entry has completed DAD. If not, then we've
- * failed, because someone is already using the address. Notify IP of the DAD
- * failure and remove the broken ace.
- *
- * Next, we check if we're the authority for this address. If so, then it's
- * time to defend it, because the other node is a duplicate. Report it as a
- * 'bogon' and let IP decide how to defend.
- *
- * Finally, if it's unresolved or if the arls match, we just update the MAC
- * address. This allows a published 'static' entry to be updated by an ARP
- * request from the node for which we're a proxy ARP server.
- *
- * Note that this logic does not update published ARP entries for mismatched
- * arls, as for example when we proxy arp across 2 subnets with differing
- * subnet masks.
- *
- * Return Values below
- */
-
-#define AR_NOTFOUND 1 /* No matching ace found in cache */
-#define AR_MERGED 2 /* Matching ace updated (RFC 826 Merge_flag) */
-#define AR_LOOPBACK 3 /* Our own arp packet was received */
-#define AR_BOGON 4 /* Another host has our IP addr. */
-#define AR_FAILED 5 /* Duplicate Address Detection has failed */
-#define AR_CHANGED 6 /* Address has changed; tell IP (and merged) */
-
-static int
-ar_ce_resolve_all(arl_t *arl, uint32_t proto, const uchar_t *src_haddr,
- uint32_t hlen, const uchar_t *src_paddr, uint32_t plen, arl_t **ace_arlp)
-{
- ace_t *ace;
- ace_t *ace_next;
- int i1;
- const uchar_t *paddr;
- uchar_t *ace_addr;
- uchar_t *mask;
- int retv = AR_NOTFOUND;
- arp_stack_t *as = ARL_TO_ARPSTACK(arl);
-
- ace = *ar_ce_hash(as, proto, src_paddr, plen);
- for (; ace != NULL; ace = ace_next) {
-
- /* ar_ce_resolve may delete the ace; fetch next pointer now */
- ace_next = ace->ace_next;
-
- if (ace->ace_proto_addr_length != plen ||
- ace->ace_proto != proto) {
- continue;
- }
-
- /*
- * Note that the ace_proto_mask is applied to the proto_addr
- * before comparing to the ace_addr.
- */
- paddr = src_paddr;
- i1 = plen;
- ace_addr = ace->ace_proto_addr;
- mask = ace->ace_proto_mask;
- while (--i1 >= 0) {
- if ((*paddr++ & *mask++) != *ace_addr++)
- break;
- }
- if (i1 >= 0)
- continue;
-
- *ace_arlp = ace->ace_arl;
-
- /*
- * If the IP address is ours, and the hardware address matches
- * one of our own arls, then this is a broadcast packet
- * emitted by one of our interfaces, reflected by the switch
- * and received on another interface. We return AR_LOOPBACK.
- */
- if (ace->ace_flags & ACE_F_MYADDR) {
- arl_t *hw_arl = as->as_arl_head;
- arlphy_t *ap;
-
- for (; hw_arl != NULL; hw_arl = hw_arl->arl_next) {
- ap = hw_arl->arl_phy;
- if (ap != NULL && ap->ap_hw_addrlen == hlen &&
- bcmp(ap->ap_hw_addr, src_haddr, hlen) == 0)
- return (AR_LOOPBACK);
- }
- }
-
- /*
- * If the entry is unverified, then we've just verified that
- * someone else already owns this address, because this is a
- * message with the same protocol address but different
- * hardware address. NOTE: the ace_xmit_arl check ensures we
- * don't send duplicate AR_FAILEDs if arl is in an IPMP group.
- */
- if ((ace->ace_flags & ACE_F_UNVERIFIED) &&
- arl == ace->ace_xmit_arl) {
- ar_ce_delete(ace);
- return (AR_FAILED);
- }
-
- /*
- * If the IP address matches ours and we're authoritative for
- * this entry, then some other node is using our IP addr, so
- * return AR_BOGON. Also reset the transmit count to zero so
- * that, if we're currently in initial announcement mode, we
- * switch back to the lazier defense mode. Knowing that
- * there's at least one duplicate out there, we ought not
- * blindly announce. NOTE: the ace_xmit_arl check ensures we
- * don't send duplicate AR_BOGONs if arl is in an IPMP group.
- */
- if ((ace->ace_flags & ACE_F_AUTHORITY) &&
- arl == ace->ace_xmit_arl) {
- ace->ace_xmit_count = 0;
- return (AR_BOGON);
- }
-
- /*
- * Only update this ACE if it's on the same network -- i.e.,
- * it's for our ARL or another ARL in the same IPMP group.
- */
- if (ace->ace_arl == arl || ace->ace_arl == arl->arl_ipmp_arl) {
- if (ar_ce_resolve(ace, src_haddr, hlen))
- retv = AR_CHANGED;
- else if (retv == AR_NOTFOUND)
- retv = AR_MERGED;
- }
- }
-
- if (retv == AR_NOTFOUND)
- *ace_arlp = NULL;
- return (retv);
-}
-
-/* Pass arg1 to the pfi supplied, along with each ace in existence. */
-static void
-ar_ce_walk(arp_stack_t *as, void (*pfi)(ace_t *, void *), void *arg1)
-{
- ace_t *ace;
- ace_t *ace1;
- int i;
-
- for (i = 0; i < ARP_HASH_SIZE; i++) {
- /*
- * We walk the hash chain in a way that allows the current
- * ace to get blown off by the called routine.
- */
- for (ace = as->as_ce_hash_tbl[i]; ace; ace = ace1) {
- ace1 = ace->ace_next;
- (*pfi)(ace, arg1);
- }
- }
- for (ace = as->as_ce_mask_entries; ace; ace = ace1) {
- ace1 = ace->ace_next;
- (*pfi)(ace, arg1);
- }
-}
-
-/*
- * Send a copy of interesting packets to the corresponding IP instance.
- * The corresponding IP instance is the ARP-IP-DEV instance for this
- * DEV (i.e. ARL).
- */
-static void
-ar_client_notify(const arl_t *arl, mblk_t *mp, int code)
-{
- ar_t *ar = ((ar_t *)arl->arl_rq->q_ptr)->ar_arl_ip_assoc;
- arcn_t *arcn;
- mblk_t *mp1;
- int arl_namelen = strlen(arl->arl_name) + 1;
-
- /* Looks like the association disappeared */
- if (ar == NULL) {
- freemsg(mp);
- return;
- }
-
- /* ar is the corresponding ARP-IP instance for this ARL */
- ASSERT(ar->ar_arl == NULL && ar->ar_wq->q_next != NULL);
-
- mp1 = allocb(sizeof (arcn_t) + arl_namelen, BPRI_MED);
- if (mp1 == NULL) {
- freemsg(mp);
- return;
- }
- DB_TYPE(mp1) = M_CTL;
- mp1->b_cont = mp;
- arcn = (arcn_t *)mp1->b_rptr;
- mp1->b_wptr = (uchar_t *)&arcn[1] + arl_namelen;
- arcn->arcn_cmd = AR_CLIENT_NOTIFY;
- arcn->arcn_name_offset = sizeof (arcn_t);
- arcn->arcn_name_length = arl_namelen;
- arcn->arcn_code = code;
- bcopy(arl->arl_name, &arcn[1], arl_namelen);
-
- putnext(ar->ar_wq, mp1);
-}
-
-/*
- * Send a delete-notify message down to IP. We've determined that IP doesn't
- * have a cache entry for the IP address itself, but it may have other cache
- * entries with the same hardware address, and we don't want to see those grow
- * stale. (The alternative is sending down updates for every ARP message we
- * get that doesn't match an existing ace. That's much more expensive than an
- * occasional delete and reload.)
- */
-static void
-ar_delete_notify(const ace_t *ace)
-{
- const arl_t *arl = ace->ace_arl;
- const arlphy_t *ap = ace->ace_xmit_arl->arl_phy;
- mblk_t *mp;
- size_t len;
- arh_t *arh;
-
- len = sizeof (*arh) + 2 * ace->ace_proto_addr_length;
- mp = allocb(len, BPRI_MED);
- if (mp == NULL)
- return;
- arh = (arh_t *)mp->b_rptr;
- mp->b_wptr = (uchar_t *)arh + len;
- U16_TO_BE16(ap->ap_arp_hw_type, arh->arh_hardware);
- U16_TO_BE16(ace->ace_proto, arh->arh_proto);
- arh->arh_hlen = 0;
- arh->arh_plen = ace->ace_proto_addr_length;
- U16_TO_BE16(ARP_RESPONSE, arh->arh_operation);
- bcopy(ace->ace_proto_addr, arh + 1, ace->ace_proto_addr_length);
- bcopy(ace->ace_proto_addr, (uchar_t *)(arh + 1) +
- ace->ace_proto_addr_length, ace->ace_proto_addr_length);
- ar_client_notify(arl, mp, AR_CN_ANNOUNCE);
-}
-
-/* ARP module close routine. */
-static int
-ar_close(queue_t *q)
-{
- ar_t *ar = (ar_t *)q->q_ptr;
- char name[LIFNAMSIZ];
- arl_t *arl, *xarl;
- arl_t **arlp;
- cred_t *cr;
- arc_t *arc;
- mblk_t *mp1;
- int index;
- arp_stack_t *as = ar->ar_as;
-
- TRACE_1(TR_FAC_ARP, TR_ARP_CLOSE,
- "arp_close: q %p", q);
-
- arl = ar->ar_arl;
- if (arl == NULL) {
- index = 0;
- /*
- * If this is the <ARP-IP-Driver> stream send down
- * a closing message to IP and wait for IP to send
- * an ack. This helps to make sure that messages
- * that are currently being sent up by IP are not lost.
- */
- if (ar->ar_on_ill_stream) {
- mp1 = allocb(sizeof (arc_t), BPRI_MED);
- if (mp1 != NULL) {
- DB_TYPE(mp1) = M_CTL;
- arc = (arc_t *)mp1->b_rptr;
- mp1->b_wptr = mp1->b_rptr + sizeof (arc_t);
- arc->arc_cmd = AR_ARP_CLOSING;
- putnext(WR(q), mp1);
- while (!ar->ar_ip_acked_close)
- /* If we are interrupted break out */
- if (qwait_sig(q) == 0)
- break;
- }
- }
- /* Delete all our pending queries, 'arl' is not dereferenced */
- ar_ce_walk(as, ar_query_delete, ar);
- /*
- * The request could be pending on some arl_queue also. This
- * happens if the arl is not yet bound, and bind is pending.
- */
- ar_ll_cleanup_arl_queue(q);
- } else {
- index = arl->arl_index;
- (void) strcpy(name, arl->arl_name);
- arl->arl_closing = 1;
- while (arl->arl_queue != NULL)
- qwait(arl->arl_rq);
-
- if (arl->arl_state == ARL_S_UP)
- ar_ll_down(arl);
-
- while (arl->arl_state != ARL_S_DOWN)
- qwait(arl->arl_rq);
-
- if (arl->arl_flags & ARL_F_IPMP) {
- /*
- * Though rude, someone could force the IPMP arl
- * closed without removing the underlying interfaces.
- * In that case, force the ARLs out of the group.
- */
- xarl = as->as_arl_head;
- for (; xarl != NULL; xarl = xarl->arl_next) {
- if (xarl->arl_ipmp_arl != arl || xarl == arl)
- continue;
- ar_ce_walk(as, ar_ce_ipmp_deactivate, xarl);
- xarl->arl_ipmp_arl = NULL;
- }
- }
-
- ar_ll_clear_defaults(arl);
- /*
- * If this is the control stream for an arl, delete anything
- * hanging off our arl.
- */
- ar_ce_walk(as, ar_ce_delete_per_arl, arl);
- /* Free any messages waiting for a bind_ack */
- /* Get the arl out of the chain. */
- rw_enter(&as->as_arl_lock, RW_WRITER);
- for (arlp = &as->as_arl_head; *arlp;
- arlp = &(*arlp)->arl_next) {
- if (*arlp == arl) {
- *arlp = arl->arl_next;
- break;
- }
- }
-
- ASSERT(arl->arl_dlpi_deferred == NULL);
- ar->ar_arl = NULL;
- rw_exit(&as->as_arl_lock);
-
- mi_free((char *)arl);
- }
- /* Let's break the association between an ARL and IP instance */
- if (ar->ar_arl_ip_assoc != NULL) {
- ASSERT(ar->ar_arl_ip_assoc->ar_arl_ip_assoc != NULL &&
- ar->ar_arl_ip_assoc->ar_arl_ip_assoc == ar);
- ar->ar_arl_ip_assoc->ar_arl_ip_assoc = NULL;
- ar->ar_arl_ip_assoc = NULL;
- }
- cr = ar->ar_credp;
- /* mi_close_comm frees the instance data. */
- (void) mi_close_comm(&as->as_head, q);
- qprocsoff(q);
- crfree(cr);
-
- if (index != 0) {
- hook_nic_event_t info;
-
- info.hne_nic = index;
- info.hne_lif = 0;
- info.hne_event = NE_UNPLUMB;
- info.hne_data = name;
- info.hne_datalen = strlen(name);
- (void) hook_run(as->as_net_data->netd_hooks,
- as->as_arpnicevents, (hook_data_t)&info);
- }
- netstack_rele(as->as_netstack);
- return (0);
-}
-
-/*
- * Dispatch routine for ARP commands. This routine can be called out of
- * either ar_wput or ar_rput, in response to IOCTLs or M_PROTO messages.
- */
-/* TODO: error reporting for M_PROTO case */
-static int
-ar_cmd_dispatch(queue_t *q, mblk_t *mp_orig, boolean_t from_wput)
-{
- arct_t *arct;
- uint32_t cmd;
- ssize_t len;
- mblk_t *mp = mp_orig;
- cred_t *cr = NULL;
-
- if (!mp)
- return (ENOENT);
-
- /* We get both M_PROTO and M_IOCTL messages, so watch out! */
- if (DB_TYPE(mp) == M_IOCTL) {
- struct iocblk *ioc;
- ioc = (struct iocblk *)mp->b_rptr;
- cmd = ioc->ioc_cmd;
- cr = ioc->ioc_cr;
- mp = mp->b_cont;
- if (!mp)
- return (ENOENT);
- } else {
- cr = msg_getcred(mp, NULL);
- /* For initial messages beteen IP and ARP, cr can be NULL */
- if (cr == NULL)
- cr = ((ar_t *)q->q_ptr)->ar_credp;
- }
- len = MBLKL(mp);
- if (len < sizeof (uint32_t) || !OK_32PTR(mp->b_rptr))
- return (ENOENT);
- if (mp_orig == mp)
- cmd = *(uint32_t *)mp->b_rptr;
- for (arct = ar_cmd_tbl; ; arct++) {
- if (arct >= A_END(ar_cmd_tbl))
- return (ENOENT);
- if (arct->arct_cmd == cmd)
- break;
- }
- if (len < arct->arct_min_len) {
- /*
- * If the command is exclusive to ARP, we return EINVAL,
- * else we need to pass the command downstream, so return
- * ENOENT
- */
- return ((arct->arct_flags & ARF_ONLY_CMD) ? EINVAL : ENOENT);
- }
- if (arct->arct_priv_req != OP_NP) {
- int error;
-
- if ((error = secpolicy_ip(cr, arct->arct_priv_req,
- B_FALSE)) != 0)
- return (error);
- }
- /* Disallow many commands except if from rput i.e. from IP */
- if (from_wput && !(arct->arct_flags & ARF_WPUT_OK)) {
- return (EINVAL);
- }
-
- if (arct->arct_flags & ARF_IOCTL_AWARE)
- mp = mp_orig;
-
- DTRACE_PROBE3(cmd_dispatch, queue_t *, q, mblk_t *, mp,
- arct_t *, arct);
- return (*arct->arct_pfi)(q, mp);
-}
-
-/* Allocate and do common initializations for DLPI messages. */
-static mblk_t *
-ar_dlpi_comm(t_uscalar_t prim, size_t size)
-{
- mblk_t *mp;
-
- if ((mp = allocb(size, BPRI_HI)) == NULL)
- return (NULL);
-
- /*
- * DLPIv2 says that DL_INFO_REQ and DL_TOKEN_REQ (the latter
- * of which we don't seem to use) are sent with M_PCPROTO, and
- * that other DLPI are M_PROTO.
- */
- DB_TYPE(mp) = (prim == DL_INFO_REQ) ? M_PCPROTO : M_PROTO;
-
- mp->b_wptr = mp->b_rptr + size;
- bzero(mp->b_rptr, size);
- ((union DL_primitives *)mp->b_rptr)->dl_primitive = prim;
-
- return (mp);
-}
-
-static void
-ar_dlpi_dispatch(arl_t *arl)
-{
- mblk_t *mp;
- t_uscalar_t primitive = DL_PRIM_INVAL;
-
- while (((mp = arl->arl_dlpi_deferred) != NULL) &&
- (arl->arl_dlpi_pending == DL_PRIM_INVAL)) {
- union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
-
- DTRACE_PROBE2(dlpi_dispatch, arl_t *, arl, mblk_t *, mp);
-
- ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
- arl->arl_dlpi_deferred = mp->b_next;
- mp->b_next = NULL;
-
- /*
- * If this is a DL_NOTIFY_CONF, no ack is expected.
- */
- if ((primitive = dlp->dl_primitive) != DL_NOTIFY_CONF)
- arl->arl_dlpi_pending = dlp->dl_primitive;
- putnext(arl->arl_wq, mp);
- }
-
- if (arl->arl_dlpi_pending == DL_PRIM_INVAL) {
- /*
- * No pending DLPI operation.
- */
- ASSERT(mp == NULL);
- DTRACE_PROBE1(dlpi_idle, arl_t *, arl);
-
- /*
- * If the last DLPI message dispatched is DL_NOTIFY_CONF,
- * it is not assoicated with any pending cmd request, drain
- * the rest of pending cmd requests, otherwise call
- * ar_cmd_done() to finish up the current pending cmd
- * operation.
- */
- if (primitive == DL_NOTIFY_CONF)
- ar_cmd_drain(arl);
- else
- ar_cmd_done(arl);
- } else if (mp != NULL) {
- DTRACE_PROBE2(dlpi_defer, arl_t *, arl, mblk_t *, mp);
- }
-}
-
-/*
- * The following two functions serialize DLPI messages to the driver, much
- * along the lines of ill_dlpi_send and ill_dlpi_done in IP. Basically,
- * we wait for a DLPI message, sent downstream, to be acked before sending
- * the next. If there are DLPI messages that have not yet been sent, queue
- * this message (mp), else send it downstream.
- */
-static void
-ar_dlpi_send(arl_t *arl, mblk_t *mp)
-{
- mblk_t **mpp;
-
- ASSERT(arl != NULL);
- ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
-
- /* Always queue the message. Tail insertion */
- mpp = &arl->arl_dlpi_deferred;
- while (*mpp != NULL)
- mpp = &((*mpp)->b_next);
- *mpp = mp;
-
- ar_dlpi_dispatch(arl);
-}
-
-/*
- * Called when an DLPI control message has been acked; send down the next
- * queued message (if any).
- * The DLPI messages of interest being bind, attach, unbind and detach since
- * these are the only ones sent by ARP via ar_dlpi_send.
- */
-static void
-ar_dlpi_done(arl_t *arl, t_uscalar_t prim)
-{
- if (arl->arl_dlpi_pending != prim) {
- DTRACE_PROBE2(dlpi_done_unexpected, arl_t *, arl,
- t_uscalar_t, prim);
- return;
- }
-
- DTRACE_PROBE2(dlpi_done, arl_t *, arl, t_uscalar_t, prim);
- arl->arl_dlpi_pending = DL_PRIM_INVAL;
- ar_dlpi_dispatch(arl);
-}
-
-/*
- * Send a DL_NOTE_REPLUMB_DONE message down to the driver to indicate
- * the replumb process has already been done. Note that mp is either a
- * DL_NOTIFY_IND message or an AR_INTERFACE_DOWN message (comes from IP).
- */
-static void
-arp_replumb_done(arl_t *arl, mblk_t *mp)
-{
- ASSERT(arl->arl_state == ARL_S_DOWN && arl->arl_replumbing);
-
- mp = mexchange(NULL, mp, sizeof (dl_notify_conf_t), M_PROTO,
- DL_NOTIFY_CONF);
- ((dl_notify_conf_t *)(mp->b_rptr))->dl_notification =
- DL_NOTE_REPLUMB_DONE;
- arl->arl_replumbing = B_FALSE;
- ar_dlpi_send(arl, mp);
-}
-
-static void
-ar_cmd_drain(arl_t *arl)
-{
- mblk_t *mp;
- queue_t *q;
-
- /*
- * Run the commands that have been enqueued while we were waiting
- * for the last command (AR_INTERFACE_UP or AR_INTERFACE_DOWN)
- * to complete.
- */
- while ((mp = arl->arl_queue) != NULL) {
- if (((uintptr_t)mp->b_prev & CMD_IN_PROGRESS) != 0) {
- /*
- * The current command is an AR_INTERFACE_UP or
- * AR_INTERFACE_DOWN and is waiting for a DLPI ack
- * from the driver. Return. We can't make progress now.
- */
- break;
- }
-
- mp = ar_cmd_dequeue(arl);
- mp->b_prev = AR_DRAINING;
- q = mp->b_queue;
- mp->b_queue = NULL;
-
- /*
- * Don't call put(q, mp) since it can lead to reorder of
- * messages by sending the current messages to the end of
- * arp's syncq
- */
- if (q->q_flag & QREADR)
- ar_rput(q, mp);
- else
- ar_wput(q, mp);
- }
-}
-
-static void
-ar_cmd_done(arl_t *arl)
-{
- mblk_t *mp;
- int cmd;
- int err;
- mblk_t *mp1;
- mblk_t *dlpi_op_done_mp = NULL;
- queue_t *dlpi_op_done_q;
- ar_t *ar_arl;
- ar_t *ar_ip;
-
- ASSERT(arl->arl_state == ARL_S_UP || arl->arl_state == ARL_S_DOWN);
-
- /*
- * If the current operation was initiated by IP there must be
- * an op enqueued in arl_queue. But if ar_close has sent down
- * a detach/unbind, there is no command enqueued. Also if the IP-ARP
- * stream has closed the cleanup would be done and there won't be any mp
- */
- if ((mp = arl->arl_queue) == NULL)
- return;
-
- if ((cmd = (uintptr_t)mp->b_prev) & CMD_IN_PROGRESS) {
- mp1 = ar_cmd_dequeue(arl);
- ASSERT(mp == mp1);
-
- cmd &= ~CMD_IN_PROGRESS;
- if (cmd == AR_INTERFACE_UP) {
- /*
- * There is an ioctl waiting for us...
- */
- if (arl->arl_state == ARL_S_UP)
- err = 0;
- else
- err = EINVAL;
-
- dlpi_op_done_mp = ar_alloc(AR_DLPIOP_DONE, err);
- if (dlpi_op_done_mp != NULL) {
- /*
- * Better performance if we send the response
- * after the potential MAPPING_ADDs command
- * that are likely to follow. (Do it below the
- * while loop, instead of putnext right now)
- */
- dlpi_op_done_q = WR(mp->b_queue);
- }
-
- if (err == 0) {
- /*
- * Now that we have the ARL instance
- * corresponding to the IP instance let's make
- * the association here.
- */
- ar_ip = (ar_t *)mp->b_queue->q_ptr;
- ar_arl = (ar_t *)arl->arl_rq->q_ptr;
- ar_arl->ar_arl_ip_assoc = ar_ip;
- ar_ip->ar_arl_ip_assoc = ar_arl;
- }
-
- inet_freemsg(mp);
- } else if (cmd == AR_INTERFACE_DOWN && arl->arl_replumbing) {
- /*
- * The arl is successfully brought down and this is
- * a result of the DL_NOTE_REPLUMB process. Reset
- * mp->b_prev first (it keeps the 'cmd' information
- * at this point).
- */
- mp->b_prev = NULL;
- arp_replumb_done(arl, mp);
- } else {
- inet_freemsg(mp);
- }
- }
-
- ar_cmd_drain(arl);
-
- if (dlpi_op_done_mp != NULL) {
- DTRACE_PROBE3(cmd_done_next, arl_t *, arl,
- queue_t *, dlpi_op_done_q, mblk_t *, dlpi_op_done_mp);
- putnext(dlpi_op_done_q, dlpi_op_done_mp);
- }
-}
-
-/*
- * Queue all arp commands coming from clients. Typically these commands
- * come from IP, but could also come from other clients. The commands
- * are serviced in FIFO order. Some commands need to wait and restart
- * after the DLPI response from the driver is received. Typically
- * AR_INTERFACE_UP and AR_INTERFACE_DOWN. ar_dlpi_done restarts
- * the command and then dequeues the queue at arl_queue and calls ar_rput
- * or ar_wput for each enqueued command. AR_DRAINING is used to signify
- * that the command is being executed thru a drain from ar_dlpi_done.
- * Functions handling the individual commands such as ar_entry_add
- * check for this flag in b_prev to determine whether the command has
- * to be enqueued for later processing or must be processed now.
- *
- * b_next used to thread the enqueued command mblks
- * b_queue used to identify the queue of the originating request(client)
- * b_prev used to store the command itself for easy parsing.
- */
-static void
-ar_cmd_enqueue(arl_t *arl, mblk_t *mp, queue_t *q, ushort_t cmd,
- boolean_t tail_insert)
-{
- mp->b_queue = q;
- if (arl->arl_queue == NULL) {
- ASSERT(arl->arl_queue_tail == NULL);
- mp->b_prev = (void *)((uintptr_t)(cmd | CMD_IN_PROGRESS));
- mp->b_next = NULL;
- arl->arl_queue = mp;
- arl->arl_queue_tail = mp;
- } else if (tail_insert) {
- mp->b_prev = (void *)((uintptr_t)cmd);
- mp->b_next = NULL;
- arl->arl_queue_tail->b_next = mp;
- arl->arl_queue_tail = mp;
- } else {
- /* head insert */
- mp->b_prev = (void *)((uintptr_t)cmd | CMD_IN_PROGRESS);
- mp->b_next = arl->arl_queue;
- arl->arl_queue = mp;
- }
-}
-
-static mblk_t *
-ar_cmd_dequeue(arl_t *arl)
-{
- mblk_t *mp;
-
- if (arl->arl_queue == NULL) {
- ASSERT(arl->arl_queue_tail == NULL);
- return (NULL);
- }
- mp = arl->arl_queue;
- arl->arl_queue = mp->b_next;
- if (arl->arl_queue == NULL)
- arl->arl_queue_tail = NULL;
- mp->b_next = NULL;
- return (mp);
-}
-
-/*
- * Standard ACE timer handling: compute 'fuzz' around a central value or from 0
- * up to a value, and then set the timer. The randomization is necessary to
- * prevent groups of systems from falling into synchronization on the network
- * and producing ARP packet storms.
- */
-static void
-ace_set_timer(ace_t *ace, boolean_t initial_time)
-{
- clock_t intv, rnd, frac;
-
- (void) random_get_pseudo_bytes((uint8_t *)&rnd, sizeof (rnd));
- /* Note that clock_t is signed; must chop off bits */
- rnd &= (1ul << (NBBY * sizeof (rnd) - 1)) - 1;
- intv = ace->ace_xmit_interval;
- if (initial_time) {
- /* Set intv to be anywhere in the [1 .. intv] range */
- if (intv <= 0)
- intv = 1;
- else
- intv = (rnd % intv) + 1;
- } else {
- /* Compute 'frac' as 20% of the configured interval */
- if ((frac = intv / 5) <= 1)
- frac = 2;
- /* Set intv randomly in the range [intv-frac .. intv+frac] */
- if ((intv = intv - frac + rnd % (2 * frac + 1)) <= 0)
- intv = 1;
- }
- mi_timer(ace->ace_arl->arl_wq, ace->ace_mp, intv);
-}
-
-/*
- * Process entry add requests from external messages.
- * It is also called by ip_rput_dlpi_writer() through
- * ipif_resolver_up() to change hardware address when
- * an asynchronous hardware address change notification
- * arrives from the driver.
- */
-static int
-ar_entry_add(queue_t *q, mblk_t *mp_orig)
-{
- area_t *area;
- ace_t *ace;
- uchar_t *hw_addr;
- uint32_t hw_addr_len;
- uchar_t *proto_addr;
- uint32_t proto_addr_len;
- uchar_t *proto_mask;
- arl_t *arl;
- mblk_t *mp = mp_orig;
- int err;
- uint_t aflags;
- boolean_t unverified;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- /* We handle both M_IOCTL and M_PROTO messages. */
- if (DB_TYPE(mp) == M_IOCTL)
- mp = mp->b_cont;
- arl = ar_ll_lookup_from_mp(as, mp);
- if (arl == NULL)
- return (EINVAL);
- /*
- * Newly received commands from clients go to the tail of the queue.
- */
- if (CMD_NEEDS_QUEUEING(mp_orig, arl)) {
- DTRACE_PROBE3(eadd_enqueued, queue_t *, q, mblk_t *, mp_orig,
- arl_t *, arl);
- ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_ADD, B_TRUE);
- return (EINPROGRESS);
- }
- mp_orig->b_prev = NULL;
-
- area = (area_t *)mp->b_rptr;
- aflags = area->area_flags;
-
- /*
- * If the previous entry wasn't published and we are now going
- * to publish, then we need to do address verification. The previous
- * entry may have been a local unpublished address or even an external
- * address. If the entry we find was in an unverified state we retain
- * this.
- * If it's a new published entry, then we're obligated to do
- * duplicate address detection now.
- */
- ace = ar_ce_lookup_from_area(as, mp, ar_ce_lookup_entry);
- if (ace != NULL) {
- unverified = !(ace->ace_flags & ACE_F_PUBLISH) &&
- (aflags & ACE_F_PUBLISH);
- if (ace->ace_flags & ACE_F_UNVERIFIED)
- unverified = B_TRUE;
- ar_ce_delete(ace);
- } else {
- unverified = (aflags & ACE_F_PUBLISH) != 0;
- }
-
- /* Allow client to request DAD restart */
- if (aflags & ACE_F_UNVERIFIED)
- unverified = B_TRUE;
-
- /* Extract parameters from the message. */
- hw_addr_len = area->area_hw_addr_length;
- hw_addr = mi_offset_paramc(mp, area->area_hw_addr_offset, hw_addr_len);
- proto_addr_len = area->area_proto_addr_length;
- proto_addr = mi_offset_paramc(mp, area->area_proto_addr_offset,
- proto_addr_len);
- proto_mask = mi_offset_paramc(mp, area->area_proto_mask_offset,
- proto_addr_len);
- if (proto_mask == NULL) {
- DTRACE_PROBE2(eadd_bad_mask, arl_t *, arl, area_t *, area);
- return (EINVAL);
- }
- err = ar_ce_create(
- arl,
- area->area_proto,
- hw_addr,
- hw_addr_len,
- proto_addr,
- proto_addr_len,
- proto_mask,
- NULL,
- (uint32_t)0,
- NULL,
- aflags & ~ACE_F_MAPPING & ~ACE_F_UNVERIFIED & ~ACE_F_DEFEND);
- if (err != 0) {
- DTRACE_PROBE3(eadd_create_failed, arl_t *, arl, area_t *, area,
- int, err);
- return (err);
- }
-
- if (aflags & ACE_F_PUBLISH) {
- arlphy_t *ap;
-
- ace = ar_ce_lookup(arl, area->area_proto, proto_addr,
- proto_addr_len);
- ASSERT(ace != NULL);
-
- ap = ace->ace_xmit_arl->arl_phy;
-
- if (hw_addr == NULL || hw_addr_len == 0) {
- hw_addr = ap->ap_hw_addr;
- } else if (aflags & ACE_F_MYADDR) {
- /*
- * If hardware address changes, then make sure
- * that the hardware address and hardware
- * address length fields in arlphy_t get updated
- * too. Otherwise, they will continue carrying
- * the old hardware address information.
- */
- ASSERT((hw_addr != NULL) && (hw_addr_len != 0));
- bcopy(hw_addr, ap->ap_hw_addr, hw_addr_len);
- ap->ap_hw_addrlen = hw_addr_len;
- }
-
- if (ace->ace_flags & ACE_F_FAST) {
- ace->ace_xmit_count = as->as_fastprobe_count;
- ace->ace_xmit_interval = as->as_fastprobe_delay;
- } else {
- ace->ace_xmit_count = as->as_probe_count;
- ace->ace_xmit_interval = as->as_probe_delay;
- }
-
- /*
- * If the user has disabled duplicate address detection for
- * this kind of interface (fast or slow) by setting the probe
- * count to zero, then pretend as if we've verified the
- * address, and go right to address defense mode.
- */
- if (ace->ace_xmit_count == 0)
- unverified = B_FALSE;
-
- /*
- * If we need to do duplicate address detection, then kick that
- * off. Otherwise, send out a gratuitous ARP message in order
- * to update everyone's caches with the new hardware address.
- */
- if (unverified) {
- ace->ace_flags |= ACE_F_UNVERIFIED;
- if (ace->ace_xmit_interval == 0) {
- /*
- * User has configured us to send the first
- * probe right away. Do so, and set up for
- * the subsequent probes.
- */
- DTRACE_PROBE2(eadd_probe, ace_t *, ace,
- area_t *, area);
- ar_xmit(ace->ace_xmit_arl, ARP_REQUEST,
- area->area_proto, proto_addr_len,
- hw_addr, NULL, NULL, proto_addr, NULL, as);
- ace->ace_xmit_count--;
- ace->ace_xmit_interval =
- (ace->ace_flags & ACE_F_FAST) ?
- as->as_fastprobe_interval :
- as->as_probe_interval;
- ace_set_timer(ace, B_FALSE);
- } else {
- DTRACE_PROBE2(eadd_delay, ace_t *, ace,
- area_t *, area);
- /* Regular delay before initial probe */
- ace_set_timer(ace, B_TRUE);
- }
- } else {
- DTRACE_PROBE2(eadd_announce, ace_t *, ace,
- area_t *, area);
- ar_xmit(ace->ace_xmit_arl, ARP_REQUEST,
- area->area_proto, proto_addr_len, hw_addr,
- proto_addr, ap->ap_arp_addr, proto_addr, NULL, as);
- ace->ace_last_bcast = ddi_get_lbolt();
-
- /*
- * If AUTHORITY is set, it is not just a proxy arp
- * entry; we believe we're the authority for this
- * entry. In that case, and if we're not just doing
- * one-off defense of the address, we send more than
- * one copy, so we'll still have a good chance of
- * updating everyone even when there's a packet loss
- * or two.
- */
- if ((aflags & ACE_F_AUTHORITY) &&
- !(aflags & ACE_F_DEFEND) &&
- as->as_publish_count > 0) {
- /* Account for the xmit we just did */
- ace->ace_xmit_count = as->as_publish_count - 1;
- ace->ace_xmit_interval =
- as->as_publish_interval;
- if (ace->ace_xmit_count > 0)
- ace_set_timer(ace, B_FALSE);
- }
- }
- }
- return (0);
-}
-
-/* Process entry delete requests from external messages. */
-static int
-ar_entry_delete(queue_t *q, mblk_t *mp_orig)
-{
- ace_t *ace;
- arl_t *arl;
- mblk_t *mp = mp_orig;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- /* We handle both M_IOCTL and M_PROTO messages. */
- if (DB_TYPE(mp) == M_IOCTL)
- mp = mp->b_cont;
- arl = ar_ll_lookup_from_mp(as, mp);
- if (arl == NULL)
- return (EINVAL);
- /*
- * Newly received commands from clients go to the tail of the queue.
- */
- if (CMD_NEEDS_QUEUEING(mp_orig, arl)) {
- DTRACE_PROBE3(edel_enqueued, queue_t *, q, mblk_t *, mp_orig,
- arl_t *, arl);
- ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_DELETE, B_TRUE);
- return (EINPROGRESS);
- }
- mp_orig->b_prev = NULL;
-
- /*
- * Need to know if it is a mapping or an exact match. Check exact
- * match first.
- */
- ace = ar_ce_lookup_from_area(as, mp, ar_ce_lookup);
- if (ace != NULL) {
- ared_t *ared = (ared_t *)mp->b_rptr;
-
- /*
- * If it's a permanent entry, then the client is the one who
- * told us to delete it, so there's no reason to notify.
- */
- if (ACE_NONPERM(ace))
- ar_delete_notify(ace);
- /*
- * Only delete the ARP entry if it is non-permanent, or
- * ARED_F_PRESERVE_PERM flags is not set.
- */
- if (ACE_NONPERM(ace) ||
- !(ared->ared_flags & ARED_F_PRESERVE_PERM)) {
- ar_ce_delete(ace);
- }
- return (0);
- }
- return (ENXIO);
-}
-
-/*
- * Process entry query requests from external messages.
- * Bump up the ire_stats_freed for all errors except
- * EINPROGRESS - which means the packet has been queued.
- * For all other errors the packet is going to be freed
- * and hence we account for ire being freed if it
- * is a M_PROTO message.
- */
-static int
-ar_entry_query(queue_t *q, mblk_t *mp_orig)
-{
- ace_t *ace;
- areq_t *areq;
- arl_t *arl;
- int err;
- mblk_t *mp = mp_orig;
- uchar_t *proto_addr;
- uchar_t *sender_addr;
- uint32_t proto_addr_len;
- clock_t ms;
- boolean_t is_mproto = B_TRUE;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- /* We handle both M_IOCTL and M_PROTO messages. */
- if (DB_TYPE(mp) == M_IOCTL) {
- is_mproto = B_FALSE;
- mp = mp->b_cont;
- }
- arl = ar_ll_lookup_from_mp(as, mp);
- if (arl == NULL) {
- DTRACE_PROBE2(query_no_arl, queue_t *, q, mblk_t *, mp);
- err = EINVAL;
- goto err_ret;
- }
- /*
- * Newly received commands from clients go to the tail of the queue.
- */
- if (CMD_NEEDS_QUEUEING(mp_orig, arl)) {
- DTRACE_PROBE3(query_enqueued, queue_t *, q, mblk_t *, mp_orig,
- arl_t *, arl);
- ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_QUERY, B_TRUE);
- return (EINPROGRESS);
- }
- mp_orig->b_prev = NULL;
-
- areq = (areq_t *)mp->b_rptr;
- proto_addr_len = areq->areq_target_addr_length;
- proto_addr = mi_offset_paramc(mp, areq->areq_target_addr_offset,
- proto_addr_len);
- if (proto_addr == NULL) {
- DTRACE_PROBE1(query_illegal_address, areq_t *, areq);
- err = EINVAL;
- goto err_ret;
- }
- /* Stash the reply queue pointer for later use. */
- mp->b_prev = (mblk_t *)OTHERQ(q);
- mp->b_next = NULL;
- if (areq->areq_xmit_interval == 0)
- areq->areq_xmit_interval = AR_DEF_XMIT_INTERVAL;
- ace = ar_ce_lookup(arl, areq->areq_proto, proto_addr, proto_addr_len);
- if (ace != NULL && (ace->ace_flags & ACE_F_OLD)) {
- /*
- * This is a potentially stale entry that IP's asking about.
- * Since IP is asking, it must not have an answer anymore,
- * either due to periodic ARP flush or due to SO_DONTROUTE.
- * Rather than go forward with what we've got, restart
- * resolution.
- */
- DTRACE_PROBE2(query_stale_ace, ace_t *, ace, areq_t *, areq);
- ar_ce_delete(ace);
- ace = NULL;
- }
- if (ace != NULL) {
- mblk_t **mpp;
- uint32_t count = 0;
-
- /*
- * There is already a cache entry. This means there is either
- * a permanent entry, or address resolution is in progress.
- * If the latter, there should be one or more queries queued
- * up. We link the current one in at the end, if there aren't
- * too many outstanding.
- */
- for (mpp = &ace->ace_query_mp; mpp[0]; mpp = &mpp[0]->b_next) {
- if (++count > areq->areq_max_buffered) {
- DTRACE_PROBE2(query_overflow, ace_t *, ace,
- areq_t *, areq);
- mp->b_prev = NULL;
- err = EALREADY;
- goto err_ret;
- }
- }
- /* Put us on the list. */
- mpp[0] = mp;
- if (count != 0) {
- /*
- * If a query was already queued up, then we must not
- * have an answer yet.
- */
- DTRACE_PROBE2(query_in_progress, ace_t *, ace,
- areq_t *, areq);
- return (EINPROGRESS);
- }
- if (ACE_RESOLVED(ace)) {
- /*
- * We have an answer already.
- * Keep a dup of mp since proto_addr points to it
- * and mp has been placed on the ace_query_mp list.
- */
- mblk_t *mp1;
-
- DTRACE_PROBE2(query_resolved, ace_t *, ace,
- areq_t *, areq);
- mp1 = dupmsg(mp);
- ar_query_reply(ace, 0, proto_addr, proto_addr_len);
- freemsg(mp1);
- return (EINPROGRESS);
- }
- if (ace->ace_flags & ACE_F_MAPPING) {
- /* Should never happen */
- DTRACE_PROBE2(query_unresolved_mapping, ace_t *, ace,
- areq_t *, areq);
- mpp[0] = mp->b_next;
- err = ENXIO;
- goto err_ret;
- }
- DTRACE_PROBE2(query_unresolved, ace_t, ace, areq_t *, areq);
- } else {
- /* No ace yet. Make one now. (This is the common case.) */
- if (areq->areq_xmit_count == 0) {
- DTRACE_PROBE2(query_template, arl_t *, arl,
- areq_t *, areq);
- mp->b_prev = NULL;
- err = ENXIO;
- goto err_ret;
- }
- /*
- * Check for sender addr being NULL or not before
- * we create the ace. It is easy to cleanup later.
- */
- sender_addr = mi_offset_paramc(mp,
- areq->areq_sender_addr_offset,
- areq->areq_sender_addr_length);
- if (sender_addr == NULL) {
- DTRACE_PROBE2(query_no_sender, arl_t *, arl,
- areq_t *, areq);
- mp->b_prev = NULL;
- err = EINVAL;
- goto err_ret;
- }
- err = ar_ce_create(OWNING_ARL(arl), areq->areq_proto, NULL, 0,
- proto_addr, proto_addr_len, NULL,
- NULL, (uint32_t)0, sender_addr,
- areq->areq_flags);
- if (err != 0) {
- DTRACE_PROBE3(query_create_failed, arl_t *, arl,
- areq_t *, areq, int, err);
- mp->b_prev = NULL;
- goto err_ret;
- }
- ace = ar_ce_lookup(arl, areq->areq_proto, proto_addr,
- proto_addr_len);
- if (ace == NULL || ace->ace_query_mp != NULL) {
- /* Shouldn't happen! */
- DTRACE_PROBE3(query_lookup_failed, arl_t *, arl,
- areq_t *, areq, ace_t *, ace);
- mp->b_prev = NULL;
- err = ENXIO;
- goto err_ret;
- }
- ace->ace_query_mp = mp;
- }
- ms = ar_query_xmit(as, ace);
- if (ms == 0) {
- /* Immediate reply requested. */
- ar_query_reply(ace, ENXIO, NULL, (uint32_t)0);
- } else {
- mi_timer(ace->ace_arl->arl_wq, ace->ace_mp, ms);
- }
- return (EINPROGRESS);
-err_ret:
- if (is_mproto) {
- ip_stack_t *ipst = as->as_netstack->netstack_ip;
-
- BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_freed);
- }
- return (err);
-}
-
-/* Handle simple query requests. */
-static int
-ar_entry_squery(queue_t *q, mblk_t *mp_orig)
-{
- ace_t *ace;
- area_t *area;
- arl_t *arl;
- uchar_t *hw_addr;
- uint32_t hw_addr_len;
- mblk_t *mp = mp_orig;
- uchar_t *proto_addr;
- int proto_addr_len;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- if (DB_TYPE(mp) == M_IOCTL)
- mp = mp->b_cont;
- arl = ar_ll_lookup_from_mp(as, mp);
- if (arl == NULL)
- return (EINVAL);
- /*
- * Newly received commands from clients go to the tail of the queue.
- */
- if (CMD_NEEDS_QUEUEING(mp_orig, arl)) {
- DTRACE_PROBE3(squery_enqueued, queue_t *, q, mblk_t *, mp_orig,
- arl_t *, arl);
- ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_SQUERY, B_TRUE);
- return (EINPROGRESS);
- }
- mp_orig->b_prev = NULL;
-
- /* Extract parameters from the request message. */
- area = (area_t *)mp->b_rptr;
- proto_addr_len = area->area_proto_addr_length;
- proto_addr = mi_offset_paramc(mp, area->area_proto_addr_offset,
- proto_addr_len);
- hw_addr_len = area->area_hw_addr_length;
- hw_addr = mi_offset_paramc(mp, area->area_hw_addr_offset, hw_addr_len);
- if (proto_addr == NULL || hw_addr == NULL) {
- DTRACE_PROBE1(squery_illegal_address, area_t *, area);
- return (EINVAL);
- }
- ace = ar_ce_lookup(arl, area->area_proto, proto_addr, proto_addr_len);
- if (ace == NULL) {
- return (ENXIO);
- }
- if (hw_addr_len < ace->ace_hw_addr_length) {
- return (EINVAL);
- }
- if (ACE_RESOLVED(ace)) {
- /* Got it, prepare the response. */
- ASSERT(area->area_hw_addr_length == ace->ace_hw_addr_length);
- ar_set_address(ace, hw_addr, proto_addr, proto_addr_len);
- } else {
- /*
- * We have an incomplete entry. Set the length to zero and
- * just return out the flags.
- */
- area->area_hw_addr_length = 0;
- }
- area->area_flags = ace->ace_flags;
- if (mp == mp_orig) {
- /* Non-ioctl case */
- /* TODO: change message type? */
- DB_TYPE(mp) = M_CTL; /* Caught by ip_wput */
- DTRACE_PROBE3(squery_reply, queue_t *, q, mblk_t *, mp,
- arl_t *, arl);
- qreply(q, mp);
- return (EINPROGRESS);
- }
- return (0);
-}
-
-/* Process an interface down causing us to detach and unbind. */
-/* ARGSUSED */
-static int
-ar_interface_down(queue_t *q, mblk_t *mp)
-{
- arl_t *arl;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- arl = ar_ll_lookup_from_mp(as, mp);
- if (arl == NULL || arl->arl_closing) {
- DTRACE_PROBE2(down_no_arl, queue_t *, q, mblk_t *, mp);
- return (EINVAL);
- }
-
- /*
- * Newly received commands from clients go to the tail of the queue.
- */
- if (CMD_NEEDS_QUEUEING(mp, arl)) {
- DTRACE_PROBE3(down_enqueued, queue_t *, q, mblk_t *, mp,
- arl_t *, arl);
- ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_DOWN, B_TRUE);
- return (EINPROGRESS);
- }
- mp->b_prev = NULL;
- /*
- * The arl is already down, no work to do.
- */
- if (arl->arl_state == ARL_S_DOWN) {
- if (arl->arl_replumbing) {
- /*
- * The arl is already down and this is a result of
- * the DL_NOTE_REPLUMB process. Return EINPROGRESS
- * so this mp won't be freed by ar_rput().
- */
- arp_replumb_done(arl, mp);
- return (EINPROGRESS);
- } else {
- /* ar_rput frees the mp */
- return (0);
- }
- }
-
- /*
- * This command cannot complete in a single shot now itself.
- * It has to be restarted after the receipt of the ack from
- * the driver. So we need to enqueue the command (at the head).
- */
- ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_DOWN, B_FALSE);
-
- ASSERT(arl->arl_state == ARL_S_UP);
-
- /* Free all arp entries for this interface */
- ar_ce_walk(as, ar_ce_delete_per_arl, arl);
-
- ar_ll_down(arl);
- /* Return EINPROGRESS so that ar_rput does not free the 'mp' */
- return (EINPROGRESS);
-}
-
-
-/* Process an interface up causing the info req sequence to start. */
-/* ARGSUSED */
-static int
-ar_interface_up(queue_t *q, mblk_t *mp)
-{
- arl_t *arl;
- int err;
- mblk_t *mp1;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- arl = ar_ll_lookup_from_mp(as, mp);
- if (arl == NULL || arl->arl_closing) {
- DTRACE_PROBE2(up_no_arl, queue_t *, q, mblk_t *, mp);
- err = EINVAL;
- goto done;
- }
-
- /*
- * Newly received commands from clients go to the tail of the queue.
- */
- if (CMD_NEEDS_QUEUEING(mp, arl)) {
- DTRACE_PROBE3(up_enqueued, queue_t *, q, mblk_t *, mp,
- arl_t *, arl);
- ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_UP, B_TRUE);
- return (EINPROGRESS);
- }
- mp->b_prev = NULL;
-
- /*
- * The arl is already up. No work to do.
- */
- if (arl->arl_state == ARL_S_UP) {
- err = 0;
- goto done;
- }
-
- /*
- * This command cannot complete in a single shot now itself.
- * It has to be restarted after the receipt of the ack from
- * the driver. So we need to enqueue the command (at the head).
- */
- ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_UP, B_FALSE);
-
- err = ar_ll_up(arl);
-
- /* Return EINPROGRESS so that ar_rput does not free the 'mp' */
- return (EINPROGRESS);
-
-done:
- /* caller frees 'mp' */
-
- mp1 = ar_alloc(AR_DLPIOP_DONE, err);
- if (mp1 != NULL) {
- q = WR(q);
- DTRACE_PROBE3(up_send_err, queue_t *, q, mblk_t *, mp1,
- int, err);
- putnext(q, mp1);
- }
- return (err);
-}
-
-/*
- * Given an arie_t `mp', find the arl_t's that it names and return them
- * in `*arlp' and `*ipmp_arlp'. If they cannot be found, return B_FALSE.
- */
-static boolean_t
-ar_ipmp_lookup(arp_stack_t *as, mblk_t *mp, arl_t **arlp, arl_t **ipmp_arlp)
-{
- arie_t *arie = (arie_t *)mp->b_rptr;
-
- *arlp = ar_ll_lookup_from_mp(as, mp);
- if (*arlp == NULL) {
- DTRACE_PROBE1(ipmp_lookup_no_arl, mblk_t *, mp);
- return (B_FALSE);
- }
-
- arie->arie_grifname[LIFNAMSIZ - 1] = '\0';
- *ipmp_arlp = ar_ll_lookup_by_name(as, arie->arie_grifname);
- if (*ipmp_arlp == NULL) {
- DTRACE_PROBE1(ipmp_lookup_no_ipmp_arl, mblk_t *, mp);
- return (B_FALSE);
- }
-
- DTRACE_PROBE2(ipmp_lookup, arl_t *, *arlp, arl_t *, *ipmp_arlp);
- return (B_TRUE);
-}
-
-/*
- * Bind an arl_t to an IPMP group arl_t.
- */
-static int
-ar_ipmp_activate(queue_t *q, mblk_t *mp)
-{
- arl_t *arl, *ipmp_arl;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- if (!ar_ipmp_lookup(as, mp, &arl, &ipmp_arl))
- return (EINVAL);
-
- if (arl->arl_ipmp_arl != NULL) {
- DTRACE_PROBE1(ipmp_activated_already, arl_t *, arl);
- return (EALREADY);
- }
-
- DTRACE_PROBE2(ipmp_activate, arl_t *, arl, arl_t *, ipmp_arl);
- arl->arl_ipmp_arl = ipmp_arl;
- return (0);
-}
-
-/*
- * Unbind an arl_t from an IPMP group arl_t and update the ace_t's so
- * that it is no longer part of the group.
- */
-static int
-ar_ipmp_deactivate(queue_t *q, mblk_t *mp)
-{
- arl_t *arl, *ipmp_arl;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- if (!ar_ipmp_lookup(as, mp, &arl, &ipmp_arl))
- return (EINVAL);
-
- if (ipmp_arl != arl->arl_ipmp_arl) {
- DTRACE_PROBE2(ipmp_deactivate_notactive, arl_t *, arl, arl_t *,
- ipmp_arl);
- return (EINVAL);
- }
-
- DTRACE_PROBE2(ipmp_deactivate, arl_t *, arl, arl_t *,
- arl->arl_ipmp_arl);
- ar_ce_walk(as, ar_ce_ipmp_deactivate, arl);
- arl->arl_ipmp_arl = NULL;
- return (0);
-}
-
-/*
- * Enable an interface to process ARP_REQUEST and ARP_RESPONSE messages.
- */
-/* ARGSUSED */
-static int
-ar_interface_on(queue_t *q, mblk_t *mp)
-{
- arl_t *arl;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- arl = ar_ll_lookup_from_mp(as, mp);
- if (arl == NULL) {
- DTRACE_PROBE2(on_no_arl, queue_t *, q, mblk_t *, mp);
- return (EINVAL);
- }
-
- DTRACE_PROBE3(on_intf, queue_t *, q, mblk_t *, mp, arl_t *, arl);
- arl->arl_flags &= ~ARL_F_NOARP;
- return (0);
-}
-
-/*
- * Disable an interface from processing
- * ARP_REQUEST and ARP_RESPONSE messages
- */
-/* ARGSUSED */
-static int
-ar_interface_off(queue_t *q, mblk_t *mp)
-{
- arl_t *arl;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- arl = ar_ll_lookup_from_mp(as, mp);
- if (arl == NULL) {
- DTRACE_PROBE2(off_no_arl, queue_t *, q, mblk_t *, mp);
- return (EINVAL);
- }
-
- DTRACE_PROBE3(off_intf, queue_t *, q, mblk_t *, mp, arl_t *, arl);
- arl->arl_flags |= ARL_F_NOARP;
- return (0);
-}
-
-/*
- * The queue 'q' is closing. Walk all the arl's and free any message
- * pending in the arl_queue if it originated from the closing q.
- * Also cleanup the ip_pending_queue, if the arp-IP stream is closing.
- */
-static void
-ar_ll_cleanup_arl_queue(queue_t *q)
-{
- arl_t *arl;
- mblk_t *mp;
- mblk_t *mpnext;
- mblk_t *prev;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
- ip_stack_t *ipst = as->as_netstack->netstack_ip;
-
- for (arl = as->as_arl_head; arl != NULL; arl = arl->arl_next) {
- for (prev = NULL, mp = arl->arl_queue; mp != NULL;
- mp = mpnext) {
- mpnext = mp->b_next;
- if ((void *)mp->b_queue == (void *)q ||
- (void *)mp->b_queue == (void *)OTHERQ(q)) {
- if (prev == NULL)
- arl->arl_queue = mp->b_next;
- else
- prev->b_next = mp->b_next;
- if (arl->arl_queue_tail == mp)
- arl->arl_queue_tail = prev;
- if (DB_TYPE(mp) == M_PROTO &&
- *(uint32_t *)mp->b_rptr == AR_ENTRY_QUERY) {
- BUMP_IRE_STATS(ipst->ips_ire_stats_v4,
- ire_stats_freed);
- }
- inet_freemsg(mp);
- } else {
- prev = mp;
- }
- }
- }
-}
-
-/*
- * Look up a lower level tap by name.
- */
-static arl_t *
-ar_ll_lookup_by_name(arp_stack_t *as, const char *name)
-{
- arl_t *arl;
-
- for (arl = as->as_arl_head; arl; arl = arl->arl_next) {
- if (strcmp(arl->arl_name, name) == 0) {
- return (arl);
- }
- }
- return (NULL);
-}
-
-/*
- * Look up a lower level tap using parameters extracted from the common
- * portion of the ARP command.
- */
-static arl_t *
-ar_ll_lookup_from_mp(arp_stack_t *as, mblk_t *mp)
-{
- arc_t *arc = (arc_t *)mp->b_rptr;
- uint8_t *name;
- size_t namelen = arc->arc_name_length;
-
- name = mi_offset_param(mp, arc->arc_name_offset, namelen);
- if (name == NULL || name[namelen - 1] != '\0')
- return (NULL);
- return (ar_ll_lookup_by_name(as, (char *)name));
-}
-
-static void
-ar_ll_init(arp_stack_t *as, ar_t *ar, mblk_t *mp)
-{
- arl_t *arl;
- dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr;
-
- ASSERT(ar->ar_arl == NULL);
-
- if ((arl = (arl_t *)mi_zalloc(sizeof (arl_t))) == NULL)
- return;
-
- if (dlia->dl_mac_type == SUNW_DL_IPMP) {
- arl->arl_flags |= ARL_F_IPMP;
- arl->arl_ipmp_arl = arl;
- }
-
- arl->arl_provider_style = dlia->dl_provider_style;
- arl->arl_rq = ar->ar_rq;
- arl->arl_wq = ar->ar_wq;
-
- arl->arl_dlpi_pending = DL_PRIM_INVAL;
-
- ar->ar_arl = arl;
-
- /*
- * If/when ARP gets pushed into the IP module then this code to make
- * a number uniquely identify an ARP instance can be removed and the
- * ifindex from IP used. Rather than try and reinvent or copy the
- * code used by IP for the purpose of allocating an index number
- * (and trying to keep the number small), just allocate it in an
- * ever increasing manner. This index number isn't ever exposed to
- * users directly, its only use is for providing the pfhooks interface
- * with a number it can use to uniquely identify an interface in time.
- *
- * Using a 32bit counter, over 136 plumbs would need to be done every
- * second of every day (non-leap year) for it to wrap around and the
- * for() loop below to kick in as a performance concern.
- */
- if (as->as_arp_counter_wrapped) {
- arl_t *arl1;
-
- do {
- for (arl1 = as->as_arl_head; arl1 != NULL;
- arl1 = arl1->arl_next)
- if (arl1->arl_index ==
- as->as_arp_index_counter) {
- as->as_arp_index_counter++;
- if (as->as_arp_index_counter == 0) {
- as->as_arp_counter_wrapped++;
- as->as_arp_index_counter = 1;
- }
- break;
- }
- } while (arl1 != NULL);
- } else {
- arl->arl_index = as->as_arp_index_counter;
- }
- as->as_arp_index_counter++;
- if (as->as_arp_index_counter == 0) {
- as->as_arp_counter_wrapped++;
- as->as_arp_index_counter = 1;
- }
-}
-
-/*
- * This routine is called during module initialization when the DL_INFO_ACK
- * comes back from the device. We set up defaults for all the device dependent
- * doo-dads we are going to need. This will leave us ready to roll if we are
- * attempting auto-configuration. Alternatively, these defaults can be
- * overridden by initialization procedures possessing higher intelligence.
- */
-static void
-ar_ll_set_defaults(arl_t *arl, mblk_t *mp)
-{
- ar_m_t *arm;
- dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr;
- dl_unitdata_req_t *dlur;
- uchar_t *up;
- arlphy_t *ap;
-
- ASSERT(arl != NULL);
-
- /*
- * Clear any stale defaults that might exist.
- */
- ar_ll_clear_defaults(arl);
-
- if (arl->arl_flags & ARL_F_IPMP) {
- /*
- * If this is an IPMP arl_t, we have nothing to do,
- * since we will never transmit or receive.
- */
- return;
- }
-
- ap = kmem_zalloc(sizeof (arlphy_t), KM_NOSLEEP);
- if (ap == NULL)
- goto bad;
- arl->arl_phy = ap;
-
- if ((arm = ar_m_lookup(dlia->dl_mac_type)) == NULL)
- arm = ar_m_lookup(DL_OTHER);
- ASSERT(arm != NULL);
-
- /*
- * We initialize based on parameters in the (currently) not too
- * exhaustive ar_m_tbl.
- */
- if (dlia->dl_version == DL_VERSION_2) {
- /* XXX DLPI spec allows dl_sap_length of 0 before binding. */
- ap->ap_saplen = dlia->dl_sap_length;
- ap->ap_hw_addrlen = dlia->dl_brdcst_addr_length;
- } else {
- ap->ap_saplen = arm->ar_mac_sap_length;
- ap->ap_hw_addrlen = arm->ar_mac_hw_addr_length;
- }
- ap->ap_arp_hw_type = arm->ar_mac_arp_hw_type;
-
- /*
- * Allocate the hardware and ARP addresses; note that the hardware
- * address cannot be filled in until we see the DL_BIND_ACK.
- */
- ap->ap_hw_addr = kmem_zalloc(ap->ap_hw_addrlen, KM_NOSLEEP);
- ap->ap_arp_addr = kmem_alloc(ap->ap_hw_addrlen, KM_NOSLEEP);
- if (ap->ap_hw_addr == NULL || ap->ap_arp_addr == NULL)
- goto bad;
-
- if (dlia->dl_version == DL_VERSION_2) {
- if ((up = mi_offset_param(mp, dlia->dl_brdcst_addr_offset,
- ap->ap_hw_addrlen)) == NULL)
- goto bad;
- bcopy(up, ap->ap_arp_addr, ap->ap_hw_addrlen);
- } else {
- /*
- * No choice but to assume a broadcast address of all ones,
- * known to work on some popular networks.
- */
- (void) memset(ap->ap_arp_addr, ~0, ap->ap_hw_addrlen);
- }
-
- /*
- * Make us a template DL_UNITDATA_REQ message which we will use for
- * broadcasting resolution requests, and which we will clone to hand
- * back as responses to the protocols.
- */
- ap->ap_xmit_mp = ar_dlpi_comm(DL_UNITDATA_REQ, ap->ap_hw_addrlen +
- ABS(ap->ap_saplen) + sizeof (dl_unitdata_req_t));
- if (ap->ap_xmit_mp == NULL)
- goto bad;
-
- dlur = (dl_unitdata_req_t *)ap->ap_xmit_mp->b_rptr;
- dlur->dl_priority.dl_min = 0;
- dlur->dl_priority.dl_max = 0;
- dlur->dl_dest_addr_length = ap->ap_hw_addrlen + ABS(ap->ap_saplen);
- dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t);
-
- /* NOTE: the destination address and sap offsets are permanently set */
- ap->ap_xmit_sapoff = dlur->dl_dest_addr_offset;
- ap->ap_xmit_addroff = dlur->dl_dest_addr_offset;
- if (ap->ap_saplen < 0)
- ap->ap_xmit_sapoff += ap->ap_hw_addrlen; /* sap last */
- else
- ap->ap_xmit_addroff += ap->ap_saplen; /* addr last */
-
- *(uint16_t *)((caddr_t)dlur + ap->ap_xmit_sapoff) = ETHERTYPE_ARP;
- return;
-bad:
- ar_ll_clear_defaults(arl);
-}
-
-static void
-ar_ll_clear_defaults(arl_t *arl)
-{
- arlphy_t *ap = arl->arl_phy;
-
- if (ap != NULL) {
- arl->arl_phy = NULL;
- if (ap->ap_hw_addr != NULL)
- kmem_free(ap->ap_hw_addr, ap->ap_hw_addrlen);
- if (ap->ap_arp_addr != NULL)
- kmem_free(ap->ap_arp_addr, ap->ap_hw_addrlen);
- freemsg(ap->ap_xmit_mp);
- kmem_free(ap, sizeof (arlphy_t));
- }
-}
-
-static void
-ar_ll_down(arl_t *arl)
-{
- mblk_t *mp;
- ar_t *ar;
-
- ASSERT(arl->arl_state == ARL_S_UP);
-
- /* Let's break the association between an ARL and IP instance */
- ar = (ar_t *)arl->arl_rq->q_ptr;
- if (ar->ar_arl_ip_assoc != NULL) {
- ASSERT(ar->ar_arl_ip_assoc->ar_arl_ip_assoc != NULL &&
- ar->ar_arl_ip_assoc->ar_arl_ip_assoc == ar);
- ar->ar_arl_ip_assoc->ar_arl_ip_assoc = NULL;
- ar->ar_arl_ip_assoc = NULL;
- }
-
- arl->arl_state = ARL_S_PENDING;
-
- mp = arl->arl_unbind_mp;
- ASSERT(mp != NULL);
- ar_dlpi_send(arl, mp);
- arl->arl_unbind_mp = NULL;
-
- if (arl->arl_provider_style == DL_STYLE2) {
- mp = arl->arl_detach_mp;
- ASSERT(mp != NULL);
- ar_dlpi_send(arl, mp);
- arl->arl_detach_mp = NULL;
- }
-}
-
-static int
-ar_ll_up(arl_t *arl)
-{
- mblk_t *attach_mp = NULL;
- mblk_t *bind_mp = NULL;
- mblk_t *detach_mp = NULL;
- mblk_t *unbind_mp = NULL;
- mblk_t *info_mp = NULL;
- mblk_t *notify_mp = NULL;
-
- ASSERT(arl->arl_state == ARL_S_DOWN);
-
- if (arl->arl_provider_style == DL_STYLE2) {
- attach_mp =
- ar_dlpi_comm(DL_ATTACH_REQ, sizeof (dl_attach_req_t));
- if (attach_mp == NULL)
- goto bad;
- ((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa =
- arl->arl_ppa;
-
- detach_mp =
- ar_dlpi_comm(DL_DETACH_REQ, sizeof (dl_detach_req_t));
- if (detach_mp == NULL)
- goto bad;
- }
-
- info_mp = ar_dlpi_comm(DL_INFO_REQ, sizeof (dl_info_req_t));
- if (info_mp == NULL)
- goto bad;
-
- /* Allocate and initialize a bind message. */
- bind_mp = ar_dlpi_comm(DL_BIND_REQ, sizeof (dl_bind_req_t));
- if (bind_mp == NULL)
- goto bad;
- ((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ETHERTYPE_ARP;
- ((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS;
-
- unbind_mp = ar_dlpi_comm(DL_UNBIND_REQ, sizeof (dl_unbind_req_t));
- if (unbind_mp == NULL)
- goto bad;
-
- notify_mp = ar_dlpi_comm(DL_NOTIFY_REQ, sizeof (dl_notify_req_t));
- if (notify_mp == NULL)
- goto bad;
- ((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications =
- DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_REPLUMB;
-
- arl->arl_state = ARL_S_PENDING;
- if (arl->arl_provider_style == DL_STYLE2) {
- ar_dlpi_send(arl, attach_mp);
- ASSERT(detach_mp != NULL);
- arl->arl_detach_mp = detach_mp;
- }
- ar_dlpi_send(arl, info_mp);
- ar_dlpi_send(arl, bind_mp);
- arl->arl_unbind_mp = unbind_mp;
- ar_dlpi_send(arl, notify_mp);
- return (0);
-
-bad:
- freemsg(attach_mp);
- freemsg(bind_mp);
- freemsg(detach_mp);
- freemsg(unbind_mp);
- freemsg(info_mp);
- freemsg(notify_mp);
- return (ENOMEM);
-}
-
-/* Process mapping add requests from external messages. */
-static int
-ar_mapping_add(queue_t *q, mblk_t *mp_orig)
-{
- arma_t *arma;
- mblk_t *mp = mp_orig;
- ace_t *ace;
- uchar_t *hw_addr;
- uint32_t hw_addr_len;
- uchar_t *proto_addr;
- uint32_t proto_addr_len;
- uchar_t *proto_mask;
- uchar_t *proto_extract_mask;
- uint32_t hw_extract_start;
- arl_t *arl;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- /* We handle both M_IOCTL and M_PROTO messages. */
- if (DB_TYPE(mp) == M_IOCTL)
- mp = mp->b_cont;
- arl = ar_ll_lookup_from_mp(as, mp);
- if (arl == NULL)
- return (EINVAL);
- /*
- * Newly received commands from clients go to the tail of the queue.
- */
- if (CMD_NEEDS_QUEUEING(mp_orig, arl)) {
- DTRACE_PROBE3(madd_enqueued, queue_t *, q, mblk_t *, mp_orig,
- arl_t *, arl);
- ar_cmd_enqueue(arl, mp_orig, q, AR_MAPPING_ADD, B_TRUE);
- return (EINPROGRESS);
- }
- mp_orig->b_prev = NULL;
-
- arma = (arma_t *)mp->b_rptr;
- ace = ar_ce_lookup_from_area(as, mp, ar_ce_lookup_mapping);
- if (ace != NULL)
- ar_ce_delete(ace);
- hw_addr_len = arma->arma_hw_addr_length;
- hw_addr = mi_offset_paramc(mp, arma->arma_hw_addr_offset, hw_addr_len);
- proto_addr_len = arma->arma_proto_addr_length;
- proto_addr = mi_offset_paramc(mp, arma->arma_proto_addr_offset,
- proto_addr_len);
- proto_mask = mi_offset_paramc(mp, arma->arma_proto_mask_offset,
- proto_addr_len);
- proto_extract_mask = mi_offset_paramc(mp,
- arma->arma_proto_extract_mask_offset, proto_addr_len);
- hw_extract_start = arma->arma_hw_mapping_start;
- if (proto_mask == NULL || proto_extract_mask == NULL) {
- DTRACE_PROBE2(madd_illegal_mask, arl_t *, arl, arpa_t *, arma);
- return (EINVAL);
- }
- return (ar_ce_create(
- arl,
- arma->arma_proto,
- hw_addr,
- hw_addr_len,
- proto_addr,
- proto_addr_len,
- proto_mask,
- proto_extract_mask,
- hw_extract_start,
- NULL,
- arma->arma_flags | ACE_F_MAPPING));
-}
-
-static boolean_t
-ar_mask_all_ones(uchar_t *mask, uint32_t mask_len)
-{
- if (mask == NULL)
- return (B_TRUE);
-
- while (mask_len-- > 0) {
- if (*mask++ != 0xFF) {
- return (B_FALSE);
- }
- }
- return (B_TRUE);
-}
-
-/* Find an entry for a particular MAC type in the ar_m_tbl. */
-static ar_m_t *
-ar_m_lookup(t_uscalar_t mac_type)
-{
- ar_m_t *arm;
-
- for (arm = ar_m_tbl; arm < A_END(ar_m_tbl); arm++) {
- if (arm->ar_mac_type == mac_type)
- return (arm);
- }
- return (NULL);
-}
-
-/* Respond to Named Dispatch requests. */
-static int
-ar_nd_ioctl(queue_t *q, mblk_t *mp)
-{
- ar_t *ar = (ar_t *)q->q_ptr;
- arp_stack_t *as = ar->ar_as;
-
- if (DB_TYPE(mp) == M_IOCTL && nd_getset(q, as->as_nd, mp))
- return (0);
- return (ENOENT);
-}
-
-/* ARP module open routine. */
-static int
-ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
-{
- ar_t *ar;
- int err;
- queue_t *tmp_q;
- mblk_t *mp;
- netstack_t *ns;
- arp_stack_t *as;
-
- TRACE_1(TR_FAC_ARP, TR_ARP_OPEN,
- "arp_open: q %p", q);
- /* Allow a reopen. */
- if (q->q_ptr != NULL) {
- return (0);
- }
-
- ns = netstack_find_by_cred(credp);
- ASSERT(ns != NULL);
- as = ns->netstack_arp;
- ASSERT(as != NULL);
-
- /* mi_open_comm allocates the instance data structure, etc. */
- err = mi_open_comm(&as->as_head, sizeof (ar_t), q, devp, flag, sflag,
- credp);
- if (err) {
- netstack_rele(as->as_netstack);
- return (err);
- }
-
- /*
- * We are D_MTPERMOD so it is safe to do qprocson before
- * the instance data has been initialized.
- */
- qprocson(q);
-
- ar = (ar_t *)q->q_ptr;
- ar->ar_rq = q;
- q = WR(q);
- ar->ar_wq = q;
- crhold(credp);
- ar->ar_credp = credp;
- ar->ar_as = as;
-
- /*
- * Probe for the DLPI info if we are not pushed on IP or UDP. Wait for
- * the reply. In case of error call ar_close() which will take
- * care of doing everything required to close this instance, such
- * as freeing the arl, restarting the timer on a different queue etc.
- */
- if (strcmp(q->q_next->q_qinfo->qi_minfo->mi_idname, "ip") == 0 ||
- strcmp(q->q_next->q_qinfo->qi_minfo->mi_idname, "udp") == 0) {
- arc_t *arc;
-
- /*
- * We are pushed directly on top of IP or UDP. There is no need
- * to send down a DL_INFO_REQ. Return success. This could
- * either be an ill stream (i.e. <arp-IP-Driver> stream)
- * or a stream corresponding to an open of /dev/arp
- * (i.e. <arp-IP> stream). Note that we don't support
- * pushing some module in between arp and IP.
- *
- * Tell IP, though, that we're an extended implementation, so
- * it knows to expect a DAD response after bringing an
- * interface up. Old ATM drivers won't do this, and IP will
- * just bring the interface up immediately.
- */
- ar->ar_on_ill_stream = (q->q_next->q_next != NULL);
- if (!ar->ar_on_ill_stream || arp_no_defense)
- return (0);
- mp = allocb(sizeof (arc_t), BPRI_MED);
- if (mp == NULL) {
- (void) ar_close(RD(q));
- return (ENOMEM);
- }
- DB_TYPE(mp) = M_CTL;
- arc = (arc_t *)mp->b_rptr;
- mp->b_wptr = mp->b_rptr + sizeof (arc_t);
- arc->arc_cmd = AR_ARP_EXTEND;
- putnext(q, mp);
- return (0);
- }
- tmp_q = q;
- /* Get the driver's queue */
- while (tmp_q->q_next != NULL)
- tmp_q = tmp_q->q_next;
-
- ASSERT(tmp_q->q_qinfo->qi_minfo != NULL);
-
- if (strcmp(tmp_q->q_qinfo->qi_minfo->mi_idname, "ip") == 0 ||
- strcmp(tmp_q->q_qinfo->qi_minfo->mi_idname, "udp") == 0) {
- /*
- * We don't support pushing ARP arbitrarily on an IP or UDP
- * driver stream. ARP has to be pushed directly above IP or
- * UDP.
- */
- (void) ar_close(RD(q));
- return (ENOTSUP);
- } else {
- /*
- * Send down a DL_INFO_REQ so we can find out what we are
- * talking to.
- */
- mp = ar_dlpi_comm(DL_INFO_REQ, sizeof (dl_info_req_t));
- if (mp == NULL) {
- (void) ar_close(RD(q));
- return (ENOMEM);
- }
- putnext(ar->ar_wq, mp);
- while (ar->ar_arl == NULL) {
- if (!qwait_sig(ar->ar_rq)) {
- (void) ar_close(RD(q));
- return (EINTR);
- }
- }
- }
- return (0);
-}
-
-/* Get current value of Named Dispatch item. */
-/* ARGSUSED */
-static int
-ar_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
-{
- arpparam_t *arppa = (arpparam_t *)cp;
-
- (void) mi_mpprintf(mp, "%d", arppa->arp_param_value);
- return (0);
-}
-
-/*
- * Walk through the param array specified registering each element with the
- * named dispatch handler.
- */
-static boolean_t
-ar_param_register(IDP *ndp, arpparam_t *arppa, int cnt)
-{
- for (; cnt-- > 0; arppa++) {
- if (arppa->arp_param_name && arppa->arp_param_name[0]) {
- if (!nd_load(ndp, arppa->arp_param_name,
- ar_param_get, ar_param_set,
- (caddr_t)arppa)) {
- nd_free(ndp);
- return (B_FALSE);
- }
- }
- }
- return (B_TRUE);
-}
-
-/* Set new value of Named Dispatch item. */
-/* ARGSUSED */
-static int
-ar_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
-{
- long new_value;
- arpparam_t *arppa = (arpparam_t *)cp;
-
- if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
- new_value < arppa->arp_param_min ||
- new_value > arppa->arp_param_max) {
- return (EINVAL);
- }
- arppa->arp_param_value = new_value;
- return (0);
-}
-
-/*
- * Process an I_PLINK ioctl. If the lower stream is an arp device stream,
- * append another mblk to the chain, that will carry the device name,
- * and the muxid. IP uses this info to lookup the corresponding ill, and
- * set the ill_arp_muxid atomically, as part of the I_PLINK, instead of
- * waiting for the SIOCSLIFMUXID. (which may never happen if ifconfig is
- * killed, and this has the bad effect of not being able to unplumb
- * subsequently)
- */
-static int
-ar_plink_send(queue_t *q, mblk_t *mp)
-{
- char *name;
- mblk_t *muxmp;
- mblk_t *mp1;
- ar_t *ar = (ar_t *)q->q_ptr;
- arp_stack_t *as = ar->ar_as;
- struct linkblk *li;
- struct ipmx_s *ipmxp;
- queue_t *arpwq;
-
- mp1 = mp->b_cont;
- ASSERT((mp1 != NULL) && (mp1->b_cont == NULL));
- li = (struct linkblk *)mp1->b_rptr;
- arpwq = li->l_qbot;
-
- /*
- * Allocate a new mblk which will hold an ipmx_s and chain it to
- * the M_IOCTL chain. The final chain will consist of 3 mblks,
- * namely the M_IOCTL, followed by the linkblk, followed by the ipmx_s
- */
- muxmp = allocb(sizeof (struct ipmx_s), BPRI_MED);
- if (muxmp == NULL)
- return (ENOMEM);
- ipmxp = (struct ipmx_s *)muxmp->b_wptr;
- ipmxp->ipmx_arpdev_stream = 0;
- muxmp->b_wptr += sizeof (struct ipmx_s);
- mp1->b_cont = muxmp;
-
- /*
- * The l_qbot represents the uppermost write queue of the
- * lower stream. Walk down this stream till we hit ARP.
- * We can safely walk, since STREAMS has made sure the stream
- * cannot close till the IOCACK goes up, and is not interruptible.
- */
- while (arpwq != NULL) {
- /*
- * Beware of broken modules like logsubr.c that
- * may not have a q_qinfo or qi_minfo.
- */
- if ((q->q_qinfo != NULL) && (q->q_qinfo->qi_minfo != NULL)) {
- name = arpwq->q_qinfo->qi_minfo->mi_idname;
- if (name != NULL && name[0] != NULL &&
- (strcmp(name, arp_mod_info.mi_idname) == 0))
- break;
- }
- arpwq = arpwq->q_next;
- }
-
- /*
- * Check if arpwq corresponds to an arp device stream, by walking
- * the mi list. If it does, then add the muxid and device name info
- * for use by IP. IP will send the M_IOCACK.
- */
- if (arpwq != NULL) {
- for (ar = (ar_t *)mi_first_ptr(&as->as_head); ar != NULL;
- ar = (ar_t *)mi_next_ptr(&as->as_head, (void *)ar)) {
- if ((ar->ar_wq == arpwq) && (ar->ar_arl != NULL)) {
- ipmxp->ipmx_arpdev_stream = 1;
- (void) strcpy((char *)ipmxp->ipmx_name,
- ar->ar_arl->arl_name);
- break;
- }
- }
- }
-
- putnext(q, mp);
- return (0);
-}
-
-/*
- * ar_ce_walk routine to delete any outstanding queries for an ar that is
- * going away.
- */
-static void
-ar_query_delete(ace_t *ace, void *arg)
-{
- ar_t *ar = arg;
- mblk_t **mpp = &ace->ace_query_mp;
- mblk_t *mp;
- arp_stack_t *as = ar->ar_as;
- ip_stack_t *ipst = as->as_netstack->netstack_ip;
-
- while ((mp = *mpp) != NULL) {
- /* The response queue was stored in the query b_prev. */
- if ((queue_t *)mp->b_prev == ar->ar_wq ||
- (queue_t *)mp->b_prev == ar->ar_rq) {
- *mpp = mp->b_next;
- if (DB_TYPE(mp) == M_PROTO &&
- *(uint32_t *)mp->b_rptr == AR_ENTRY_QUERY) {
- BUMP_IRE_STATS(ipst->ips_ire_stats_v4,
- ire_stats_freed);
- }
- inet_freemsg(mp);
- } else {
- mpp = &mp->b_next;
- }
- }
-}
-
-/*
- * This routine is called either when an address resolution has just been
- * found, or when it is time to give, or in some other error situation.
- * If a non-zero ret_val is provided, any outstanding queries for the
- * specified ace will be completed using that error value. Otherwise,
- * the completion status will depend on whether the address has been
- * resolved.
- */
-static void
-ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr,
- uint32_t proto_addr_len)
-{
- mblk_t *areq_mp;
- mblk_t *mp;
- mblk_t *xmit_mp;
- queue_t *arl_wq = ace->ace_arl->arl_wq;
- arp_stack_t *as = ARL_TO_ARPSTACK(ace->ace_arl);
- ip_stack_t *ipst = as->as_netstack->netstack_ip;
- arlphy_t *ap = ace->ace_xmit_arl->arl_phy;
-
- /*
- * On error or completion for a query, we need to shut down the timer.
- * However, the timer must not be stopped for an interface doing
- * Duplicate Address Detection, or it will never finish that phase.
- */
- if (!(ace->ace_flags & (ACE_F_UNVERIFIED | ACE_F_AUTHORITY)))
- mi_timer(arl_wq, ace->ace_mp, -1L);
-
- /* Establish the return value appropriate. */
- if (ret_val == 0) {
- if (!ACE_RESOLVED(ace) || ap == NULL)
- ret_val = ENXIO;
- }
- /* Terminate all outstanding queries. */
- while ((mp = ace->ace_query_mp) != 0) {
- /* The response queue was saved in b_prev. */
- queue_t *q = (queue_t *)mp->b_prev;
- mp->b_prev = NULL;
- ace->ace_query_mp = mp->b_next;
- mp->b_next = NULL;
- /*
- * If we have the answer, attempt to get a copy of the xmit
- * template to prepare for the client.
- */
- if (ret_val == 0 &&
- (xmit_mp = copyb(ap->ap_xmit_mp)) == NULL) {
- /* Too bad, buy more memory. */
- ret_val = ENOMEM;
- }
- /* Complete the response based on how the request arrived. */
- if (DB_TYPE(mp) == M_IOCTL) {
- struct iocblk *ioc = (struct iocblk *)mp->b_rptr;
-
- ioc->ioc_error = ret_val;
- if (ret_val != 0) {
- DB_TYPE(mp) = M_IOCNAK;
- ioc->ioc_count = 0;
- putnext(q, mp);
- continue;
- }
- /*
- * Return the xmit mp out with the successful IOCTL.
- */
- DB_TYPE(mp) = M_IOCACK;
- ioc->ioc_count = MBLKL(xmit_mp);
- /* Remove the areq mblk from the IOCTL. */
- areq_mp = mp->b_cont;
- mp->b_cont = areq_mp->b_cont;
- } else {
- if (ret_val != 0) {
- /* TODO: find some way to let the guy know? */
- inet_freemsg(mp);
- BUMP_IRE_STATS(ipst->ips_ire_stats_v4,
- ire_stats_freed);
- continue;
- }
- /*
- * In the M_PROTO case, the areq message is followed by
- * a message chain to be returned to the protocol. ARP
- * doesn't know (or care) what is in this chain, but in
- * the event that the reader is pondering the
- * relationship between ARP and IP (for example), the
- * areq is followed by an incipient IRE, and then the
- * original outbound packet. Here we detach the areq.
- */
- areq_mp = mp;
- mp = mp->b_cont;
- }
- ASSERT(ret_val == 0 && ap != NULL);
- if (ap->ap_saplen != 0) {
- /*
- * Copy the SAP type specified in the request into
- * the xmit mp.
- */
- areq_t *areq = (areq_t *)areq_mp->b_rptr;
- bcopy(areq->areq_sap, xmit_mp->b_rptr +
- ap->ap_xmit_sapoff, ABS(ap->ap_saplen));
- }
- /* Done with the areq message. */
- freeb(areq_mp);
- /*
- * Copy the resolved hardware address into the xmit mp
- * or perform the mapping operation.
- */
- ar_set_address(ace, xmit_mp->b_rptr + ap->ap_xmit_addroff,
- proto_addr, proto_addr_len);
- /*
- * Now insert the xmit mp after the response message. In
- * the M_IOCTL case, it will be the returned data block. In
- * the M_PROTO case, (again using IP as an example) it will
- * appear after the IRE and before the outbound packet.
- */
- xmit_mp->b_cont = mp->b_cont;
- mp->b_cont = xmit_mp;
- putnext(q, mp);
- }
-
- /*
- * Unless we are responding from a permanent cache entry, start the
- * cleanup timer or (on error) delete the entry.
- */
- if (!(ace->ace_flags & (ACE_F_PERMANENT | ACE_F_DYING))) {
- if (!ACE_RESOLVED(ace) || ap == NULL) {
- /*
- * No need to notify IP here, because the entry was
- * never resolved, so IP can't have any cached copies
- * of the address.
- */
- ar_ce_delete(ace);
- } else {
- mi_timer(arl_wq, ace->ace_mp, as->as_cleanup_interval);
- }
- }
-}
-
-/*
- * Returns number of milliseconds after which we should either rexmit or abort.
- * Return of zero means we should abort.
- */
-static clock_t
-ar_query_xmit(arp_stack_t *as, ace_t *ace)
-{
- areq_t *areq;
- mblk_t *mp;
- uchar_t *proto_addr;
- uchar_t *sender_addr;
- ace_t *src_ace;
- arl_t *xmit_arl = ace->ace_xmit_arl;
-
- mp = ace->ace_query_mp;
- /*
- * ar_query_delete may have just blown off the outstanding
- * ace_query_mp entries because the client who sent the query
- * went away. If this happens just before the ace_mp timer
- * goes off, we'd find a null ace_query_mp which is not an error.
- * The unresolved ace itself, and the timer, will be removed
- * when the arl stream goes away.
- */
- if (!mp)
- return (0);
- if (DB_TYPE(mp) == M_IOCTL)
- mp = mp->b_cont;
- areq = (areq_t *)mp->b_rptr;
- if (areq->areq_xmit_count == 0)
- return (0);
- areq->areq_xmit_count--;
- proto_addr = mi_offset_paramc(mp, areq->areq_target_addr_offset,
- areq->areq_target_addr_length);
- sender_addr = mi_offset_paramc(mp, areq->areq_sender_addr_offset,
- areq->areq_sender_addr_length);
-
- /*
- * Get the ace for the sender address, so that we can verify that
- * we have one and that DAD has completed.
- */
- src_ace = ar_ce_lookup(xmit_arl, areq->areq_proto, sender_addr,
- areq->areq_sender_addr_length);
- if (src_ace == NULL) {
- DTRACE_PROBE3(xmit_no_source, ace_t *, ace, areq_t *, areq,
- uchar_t *, sender_addr);
- return (0);
- }
-
- /*
- * If we haven't yet finished duplicate address checking on this source
- * address, then do *not* use it on the wire. Doing so will corrupt
- * the world's caches. Just allow the timer to restart. Note that
- * duplicate address checking will eventually complete one way or the
- * other, so this cannot go on "forever."
- */
- if (src_ace->ace_flags & ACE_F_UNVERIFIED) {
- DTRACE_PROBE2(xmit_source_unverified, ace_t *, ace,
- ace_t *, src_ace);
- areq->areq_xmit_count++;
- return (areq->areq_xmit_interval);
- }
-
- DTRACE_PROBE3(xmit_send, ace_t *, ace, ace_t *, src_ace,
- areq_t *, areq);
-
- ar_xmit(xmit_arl, ARP_REQUEST, areq->areq_proto,
- areq->areq_sender_addr_length, xmit_arl->arl_phy->ap_hw_addr,
- sender_addr, xmit_arl->arl_phy->ap_arp_addr, proto_addr, NULL, as);
- src_ace->ace_last_bcast = ddi_get_lbolt();
- return (areq->areq_xmit_interval);
-}
-
-/* Our read side put procedure. */
-static void
-ar_rput(queue_t *q, mblk_t *mp)
-{
- arh_t *arh;
- arl_t *arl;
- arl_t *client_arl;
- ace_t *dst_ace;
- uchar_t *dst_paddr;
- int err;
- uint32_t hlen;
- struct iocblk *ioc;
- mblk_t *mp1;
- int op;
- uint32_t plen;
- uint32_t proto;
- uchar_t *src_haddr;
- uchar_t *src_paddr;
- uchar_t *dst_haddr;
- boolean_t is_probe;
- boolean_t is_unicast = B_FALSE;
- dl_unitdata_ind_t *dlindp;
- int i;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- TRACE_1(TR_FAC_ARP, TR_ARP_RPUT_START,
- "arp_rput_start: q %p", q);
-
- /*
- * We handle ARP commands from below both in M_IOCTL and M_PROTO
- * messages. Actual ARP requests and responses will show up as
- * M_PROTO messages containing DL_UNITDATA_IND blocks.
- */
- switch (DB_TYPE(mp)) {
- case M_IOCTL:
- err = ar_cmd_dispatch(q, mp, B_FALSE);
- switch (err) {
- case ENOENT:
- DB_TYPE(mp) = M_IOCNAK;
- if ((mp1 = mp->b_cont) != 0) {
- /*
- * Collapse the data as a note to the
- * originator.
- */
- mp1->b_wptr = mp1->b_rptr;
- }
- break;
- case EINPROGRESS:
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "ioctl/inprogress");
- return;
- default:
- DB_TYPE(mp) = M_IOCACK;
- break;
- }
- ioc = (struct iocblk *)mp->b_rptr;
- ioc->ioc_error = err;
- if ((mp1 = mp->b_cont) != 0)
- ioc->ioc_count = MBLKL(mp1);
- else
- ioc->ioc_count = 0;
- qreply(q, mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "ioctl");
- return;
- case M_CTL:
- /*
- * IP is acking the AR_ARP_CLOSING message that we sent
- * in ar_close.
- */
- if (MBLKL(mp) == sizeof (arc_t)) {
- if (((arc_t *)mp->b_rptr)->arc_cmd == AR_ARP_CLOSING)
- ((ar_t *)q->q_ptr)->ar_ip_acked_close = 1;
- }
- freemsg(mp);
- return;
- case M_PCPROTO:
- case M_PROTO:
- dlindp = (dl_unitdata_ind_t *)mp->b_rptr;
- if (MBLKL(mp) >= sizeof (dl_unitdata_ind_t) &&
- dlindp->dl_primitive == DL_UNITDATA_IND) {
- is_unicast = (dlindp->dl_group_address == 0);
- arl = ((ar_t *)q->q_ptr)->ar_arl;
- if (arl != NULL && arl->arl_phy != NULL) {
- /* Real messages from the wire! */
- break;
- }
- putnext(q, mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "default");
- return;
- }
- err = ar_cmd_dispatch(q, mp, B_FALSE);
- switch (err) {
- case ENOENT:
- /* Miscellaneous DLPI messages get shuffled off. */
- ar_rput_dlpi(q, mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "proto/dlpi");
- break;
- case EINPROGRESS:
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "proto");
- break;
- default:
- inet_freemsg(mp);
- break;
- }
- return;
- default:
- putnext(q, mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "default");
- return;
- }
- /*
- * If the IFF_NOARP flag is on, then do not process any
- * incoming ARP_REQUEST or incoming ARP_RESPONSE.
- */
- if (arl->arl_flags & ARL_F_NOARP) {
- freemsg(mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "interface has IFF_NOARP set");
- return;
- }
-
- /*
- * What we should have at this point is a DL_UNITDATA_IND message
- * followed by an ARP packet. We do some initial checks and then
- * get to work.
- */
- mp1 = mp->b_cont;
- if (mp1 == NULL) {
- freemsg(mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "baddlpi");
- return;
- }
- if (mp1->b_cont != NULL) {
- /* No fooling around with funny messages. */
- if (!pullupmsg(mp1, -1)) {
- freemsg(mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "pullupmsgfail");
- return;
- }
- }
- arh = (arh_t *)mp1->b_rptr;
- hlen = arh->arh_hlen;
- plen = arh->arh_plen;
- if (MBLKL(mp1) < ARH_FIXED_LEN + 2 * hlen + 2 * plen) {
- freemsg(mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "short");
- return;
- }
- /*
- * hlen 0 is used for RFC 1868 UnARP.
- *
- * Note that the rest of the code checks that hlen is what we expect
- * for this hardware address type, so might as well discard packets
- * here that don't match.
- */
- if ((hlen > 0 && hlen != arl->arl_phy->ap_hw_addrlen) || plen == 0) {
- DTRACE_PROBE2(rput_bogus, arl_t *, arl, mblk_t *, mp1);
- freemsg(mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "hlenzero/plenzero");
- return;
- }
- /*
- * Historically, Solaris has been lenient about hardware type numbers.
- * We should check here, but don't.
- */
- DTRACE_PROBE2(rput_normal, arl_t *, arl, arh_t *, arh);
-
- DTRACE_PROBE3(arp__physical__in__start,
- arl_t *, arl, arh_t *, arh, mblk_t *, mp);
-
- ARP_HOOK_IN(as->as_arp_physical_in_event, as->as_arp_physical_in,
- arl->arl_index, arh, mp, mp1, as);
-
- DTRACE_PROBE1(arp__physical__in__end, mblk_t *, mp);
-
- if (mp == NULL)
- return;
-
- proto = (uint32_t)BE16_TO_U16(arh->arh_proto);
- src_haddr = (uchar_t *)arh;
- src_haddr = &src_haddr[ARH_FIXED_LEN];
- src_paddr = &src_haddr[hlen];
- dst_haddr = &src_haddr[hlen + plen];
- dst_paddr = &src_haddr[hlen + plen + hlen];
- op = BE16_TO_U16(arh->arh_operation);
-
- /* Determine if this is just a probe */
- for (i = 0; i < plen; i++)
- if (src_paddr[i] != 0)
- break;
- is_probe = i >= plen;
-
- /*
- * RFC 826: first check if the <protocol, sender protocol address> is
- * in the cache, if there is a sender protocol address. Note that this
- * step also handles resolutions based on source.
- *
- * Note that IP expects that each notification it receives will be
- * tied to the ill it received it on. Thus, we must talk to it over
- * the arl tied to the resolved IP address (if any), hence client_arl.
- */
- if (is_probe)
- err = AR_NOTFOUND;
- else
- err = ar_ce_resolve_all(arl, proto, src_haddr, hlen, src_paddr,
- plen, &client_arl);
-
- switch (err) {
- case AR_BOGON:
- ar_client_notify(client_arl, mp1, AR_CN_BOGON);
- mp1 = NULL;
- break;
- case AR_FAILED:
- ar_client_notify(client_arl, mp1, AR_CN_FAILED);
- mp1 = NULL;
- break;
- case AR_LOOPBACK:
- DTRACE_PROBE2(rput_loopback, arl_t *, arl, arh_t *, arh);
- freemsg(mp1);
- mp1 = NULL;
- break;
- }
- if (mp1 == NULL) {
- freeb(mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "unneeded");
- return;
- }
-
- /*
- * Now look up the destination address. By RFC 826, we ignore the
- * packet at this step if the target isn't one of our addresses. This
- * is true even if the target is something we're trying to resolve and
- * the packet is a response. To avoid duplicate responses, we also
- * ignore the packet if it was multicast/broadcast to an arl that's in
- * an IPMP group but was not the designated xmit_arl for the ACE.
- *
- * Note that in order to do this correctly, we need to know when to
- * notify IP of a change implied by the source address of the ARP
- * message. That implies that the local ARP table has entries for all
- * of the resolved entries cached in the client. This is why we must
- * notify IP when we delete a resolved entry and we know that IP may
- * have cached answers.
- */
- dst_ace = ar_ce_lookup_entry(arl, proto, dst_paddr, plen);
- if (dst_ace == NULL || !ACE_RESOLVED(dst_ace) ||
- (dst_ace->ace_xmit_arl != arl && !is_unicast) ||
- !(dst_ace->ace_flags & ACE_F_PUBLISH)) {
- /*
- * Let the client know if the source mapping has changed, even
- * if the destination provides no useful information for the
- * client.
- */
- if (err == AR_CHANGED)
- ar_client_notify(client_arl, mp1, AR_CN_ANNOUNCE);
- else
- freemsg(mp1);
- freeb(mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "nottarget");
- return;
- }
-
- /*
- * If the target is unverified by DAD, then one of two things is true:
- * either it's someone else claiming this address (on a probe or an
- * announcement) or it's just a regular request. The former is
- * failure, but a regular request is not.
- */
- if (dst_ace->ace_flags & ACE_F_UNVERIFIED) {
- /*
- * Check for a reflection. Some misbehaving bridges will
- * reflect our own transmitted packets back to us.
- */
- if (hlen == dst_ace->ace_hw_addr_length &&
- bcmp(src_haddr, dst_ace->ace_hw_addr, hlen) == 0) {
- DTRACE_PROBE3(rput_probe_reflected, arl_t *, arl,
- arh_t *, arh, ace_t *, dst_ace);
- freeb(mp);
- freemsg(mp1);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "reflection");
- return;
- }
-
- /*
- * Conflicts seen via the wrong interface may be bogus.
- * Multiple interfaces on the same segment imply any conflict
- * will also be seen via the correct interface, so we can ignore
- * anything not matching the arl from the ace.
- */
- if (arl != dst_ace->ace_arl) {
- DTRACE_PROBE3(rput_probe_misdirect, arl_t *, arl,
- arh_t *, arh, ace_t *, dst_ace);
- freeb(mp);
- freemsg(mp1);
- return;
- }
- /*
- * Responses targeting our HW address that are not responses to
- * our DAD probe must be ignored as they are related to requests
- * sent before DAD was restarted. Note: response to our DAD
- * probe will have been handled by ar_ce_resolve_all() above.
- */
- if (op == ARP_RESPONSE &&
- (bcmp(dst_haddr, dst_ace->ace_hw_addr, hlen) == 0)) {
- DTRACE_PROBE3(rput_probe_stale, arl_t *, arl,
- arh_t *, arh, ace_t *, dst_ace);
- freeb(mp);
- freemsg(mp1);
- return;
- }
- /*
- * Responses targeted to HW addresses which are not ours but
- * sent to our unverified proto address are also conflicts.
- * These may be reported by a proxy rather than the interface
- * with the conflicting address, dst_paddr is in conflict
- * rather than src_paddr. To ensure IP can locate the correct
- * ipif to take down, it is necessary to copy dst_paddr to
- * the src_paddr field before sending it to IP. The same is
- * required for probes, where src_paddr will be INADDR_ANY.
- */
- if (is_probe) {
- /*
- * In this case, client_arl will be invalid (e.g.,
- * since probes don't have a valid sender address).
- * But dst_ace has the appropriate arl.
- */
- bcopy(dst_paddr, src_paddr, plen);
- ar_client_notify(dst_ace->ace_arl, mp1, AR_CN_FAILED);
- ar_ce_delete(dst_ace);
- } else if (op == ARP_RESPONSE) {
- bcopy(dst_paddr, src_paddr, plen);
- ar_client_notify(client_arl, mp1, AR_CN_FAILED);
- ar_ce_delete(dst_ace);
- } else if (err == AR_CHANGED) {
- ar_client_notify(client_arl, mp1, AR_CN_ANNOUNCE);
- } else {
- DTRACE_PROBE3(rput_request_unverified, arl_t *, arl,
- arh_t *, arh, ace_t *, dst_ace);
- freemsg(mp1);
- }
- freeb(mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "unverified");
- return;
- }
-
- /*
- * If it's a request, then we reply to this, and if we think the
- * sender's unknown, then we create an entry to avoid unnecessary ARPs.
- * The design assumption is that someone ARPing us is likely to send us
- * a packet soon, and that we'll want to reply to it.
- */
- if (op == ARP_REQUEST) {
- const uchar_t *dstaddr = src_haddr;
- clock_t now;
-
- /*
- * This implements periodic address defense based on a modified
- * version of the RFC 3927 requirements. Instead of sending a
- * broadcasted reply every time, as demanded by the RFC, we
- * send at most one broadcast reply per arp_broadcast_interval.
- */
- now = ddi_get_lbolt();
- if ((now - dst_ace->ace_last_bcast) >
- MSEC_TO_TICK(as->as_broadcast_interval)) {
- DTRACE_PROBE3(rput_bcast_reply, arl_t *, arl,
- arh_t *, arh, ace_t *, dst_ace);
- dst_ace->ace_last_bcast = now;
- dstaddr = arl->arl_phy->ap_arp_addr;
- /*
- * If this is one of the long-suffering entries, then
- * pull it out now. It no longer needs separate
- * defense, because we're doing now that with this
- * broadcasted reply.
- */
- dst_ace->ace_flags &= ~ACE_F_DELAYED;
- }
-
- ar_xmit(arl, ARP_RESPONSE, dst_ace->ace_proto, plen,
- dst_ace->ace_hw_addr, dst_ace->ace_proto_addr,
- src_haddr, src_paddr, dstaddr, as);
- if (!is_probe && err == AR_NOTFOUND &&
- ar_ce_create(OWNING_ARL(arl), proto, src_haddr, hlen,
- src_paddr, plen, NULL, NULL, 0, NULL, 0) == 0) {
- ace_t *ace;
-
- ace = ar_ce_lookup(arl, proto, src_paddr, plen);
- ASSERT(ace != NULL);
- mi_timer(ace->ace_arl->arl_wq, ace->ace_mp,
- as->as_cleanup_interval);
- }
- }
- if (err == AR_CHANGED) {
- freeb(mp);
- ar_client_notify(client_arl, mp1, AR_CN_ANNOUNCE);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "reqchange");
- } else {
- freemsg(mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "end");
- }
-}
-
-static void
-ar_ce_restart_dad(ace_t *ace, void *arl_arg)
-{
- arl_t *arl = arl_arg;
- arp_stack_t *as = ARL_TO_ARPSTACK(arl);
-
- if ((ace->ace_xmit_arl == arl) &&
- (ace->ace_flags & (ACE_F_UNVERIFIED|ACE_F_DAD_ABORTED)) ==
- (ACE_F_UNVERIFIED|ACE_F_DAD_ABORTED)) {
- /*
- * Slight cheat here: we don't use the initial probe delay
- * in this obscure case.
- */
- if (ace->ace_flags & ACE_F_FAST) {
- ace->ace_xmit_count = as->as_fastprobe_count;
- ace->ace_xmit_interval = as->as_fastprobe_interval;
- } else {
- ace->ace_xmit_count = as->as_probe_count;
- ace->ace_xmit_interval = as->as_probe_interval;
- }
- ace->ace_flags &= ~ACE_F_DAD_ABORTED;
- ace_set_timer(ace, B_FALSE);
- }
-}
-
-/* DLPI messages, other than DL_UNITDATA_IND are handled here. */
-static void
-ar_rput_dlpi(queue_t *q, mblk_t *mp)
-{
- ar_t *ar = q->q_ptr;
- arl_t *arl = ar->ar_arl;
- arlphy_t *ap = NULL;
- union DL_primitives *dlp;
- const char *err_str;
- arp_stack_t *as = ar->ar_as;
-
- if (arl != NULL)
- ap = arl->arl_phy;
-
- if (MBLKL(mp) < sizeof (dlp->dl_primitive)) {
- putnext(q, mp);
- return;
- }
- dlp = (union DL_primitives *)mp->b_rptr;
- switch (dlp->dl_primitive) {
- case DL_ERROR_ACK:
- /*
- * ce is confused about how DLPI works, so we have to interpret
- * an "error" on DL_NOTIFY_ACK (which we never could have sent)
- * as really meaning an error on DL_NOTIFY_REQ.
- *
- * Note that supporting DL_NOTIFY_REQ is optional, so printing
- * out an error message on the console isn't warranted except
- * for debug.
- */
- if (dlp->error_ack.dl_error_primitive == DL_NOTIFY_ACK ||
- dlp->error_ack.dl_error_primitive == DL_NOTIFY_REQ) {
- ar_dlpi_done(arl, DL_NOTIFY_REQ);
- freemsg(mp);
- return;
- }
- err_str = dl_primstr(dlp->error_ack.dl_error_primitive);
- DTRACE_PROBE2(rput_dl_error, arl_t *, arl,
- dl_error_ack_t *, &dlp->error_ack);
- switch (dlp->error_ack.dl_error_primitive) {
- case DL_UNBIND_REQ:
- if (arl->arl_provider_style == DL_STYLE1)
- arl->arl_state = ARL_S_DOWN;
- break;
- case DL_DETACH_REQ:
- case DL_BIND_REQ:
- arl->arl_state = ARL_S_DOWN;
- break;
- case DL_ATTACH_REQ:
- break;
- default:
- /* If it's anything else, we didn't send it. */
- putnext(q, mp);
- return;
- }
- ar_dlpi_done(arl, dlp->error_ack.dl_error_primitive);
- (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
- "ar_rput_dlpi: %s failed, dl_errno %d, dl_unix_errno %d",
- err_str, dlp->error_ack.dl_errno,
- dlp->error_ack.dl_unix_errno);
- break;
- case DL_INFO_ACK:
- DTRACE_PROBE2(rput_dl_info, arl_t *, arl,
- dl_info_ack_t *, &dlp->info_ack);
- if (arl != NULL && arl->arl_dlpi_pending == DL_INFO_REQ) {
- /*
- * We have a response back from the driver. Go set up
- * transmit defaults.
- */
- ar_ll_set_defaults(arl, mp);
- ar_dlpi_done(arl, DL_INFO_REQ);
- } else if (arl == NULL) {
- ar_ll_init(as, ar, mp);
- }
- /* Kick off any awaiting messages */
- qenable(WR(q));
- break;
- case DL_OK_ACK:
- DTRACE_PROBE2(rput_dl_ok, arl_t *, arl,
- dl_ok_ack_t *, &dlp->ok_ack);
- switch (dlp->ok_ack.dl_correct_primitive) {
- case DL_UNBIND_REQ:
- if (arl->arl_provider_style == DL_STYLE1)
- arl->arl_state = ARL_S_DOWN;
- break;
- case DL_DETACH_REQ:
- arl->arl_state = ARL_S_DOWN;
- break;
- case DL_ATTACH_REQ:
- break;
- default:
- putnext(q, mp);
- return;
- }
- ar_dlpi_done(arl, dlp->ok_ack.dl_correct_primitive);
- break;
- case DL_NOTIFY_ACK:
- DTRACE_PROBE2(rput_dl_notify, arl_t *, arl,
- dl_notify_ack_t *, &dlp->notify_ack);
- /*
- * We mostly care about interface-up transitions, as this is
- * when we need to redo duplicate address detection.
- */
- if (ap != NULL) {
- ap->ap_notifies = (dlp->notify_ack.dl_notifications &
- DL_NOTE_LINK_UP) != 0;
- }
- ar_dlpi_done(arl, DL_NOTIFY_REQ);
- break;
- case DL_BIND_ACK:
- DTRACE_PROBE2(rput_dl_bind, arl_t *, arl,
- dl_bind_ack_t *, &dlp->bind_ack);
- if (ap != NULL) {
- caddr_t hw_addr;
-
- hw_addr = (caddr_t)dlp + dlp->bind_ack.dl_addr_offset;
- if (ap->ap_saplen > 0)
- hw_addr += ap->ap_saplen;
- bcopy(hw_addr, ap->ap_hw_addr, ap->ap_hw_addrlen);
- }
- arl->arl_state = ARL_S_UP;
- ar_dlpi_done(arl, DL_BIND_REQ);
- break;
- case DL_NOTIFY_IND:
- DTRACE_PROBE2(rput_dl_notify_ind, arl_t *, arl,
- dl_notify_ind_t *, &dlp->notify_ind);
-
- if (dlp->notify_ind.dl_notification == DL_NOTE_REPLUMB) {
- arl->arl_replumbing = B_TRUE;
- if (arl->arl_state == ARL_S_DOWN) {
- arp_replumb_done(arl, mp);
- return;
- }
- break;
- }
-
- if (ap != NULL) {
- switch (dlp->notify_ind.dl_notification) {
- case DL_NOTE_LINK_UP:
- ap->ap_link_down = B_FALSE;
- ar_ce_walk(as, ar_ce_restart_dad, arl);
- break;
- case DL_NOTE_LINK_DOWN:
- ap->ap_link_down = B_TRUE;
- break;
- }
- }
- break;
- case DL_UDERROR_IND:
- DTRACE_PROBE2(rput_dl_uderror, arl_t *, arl,
- dl_uderror_ind_t *, &dlp->uderror_ind);
- (void) mi_strlog(q, 1, SL_ERROR | SL_TRACE,
- "ar_rput_dlpi: "
- "DL_UDERROR_IND, dl_dest_addr_length %d dl_errno %d",
- dlp->uderror_ind.dl_dest_addr_length,
- dlp->uderror_ind.dl_errno);
- putnext(q, mp);
- return;
- default:
- DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl,
- union DL_primitives *, dlp);
- putnext(q, mp);
- return;
- }
- freemsg(mp);
-}
-
-static void
-ar_set_address(ace_t *ace, uchar_t *addrpos, uchar_t *proto_addr,
- uint32_t proto_addr_len)
-{
- uchar_t *mask, *to;
- int len;
-
- ASSERT(ace->ace_hw_addr != NULL);
-
- bcopy(ace->ace_hw_addr, addrpos, ace->ace_hw_addr_length);
- if (ace->ace_flags & ACE_F_MAPPING &&
- proto_addr != NULL &&
- ace->ace_proto_extract_mask) { /* careful */
- len = MIN((int)ace->ace_hw_addr_length
- - ace->ace_hw_extract_start,
- proto_addr_len);
- mask = ace->ace_proto_extract_mask;
- to = addrpos + ace->ace_hw_extract_start;
- while (len-- > 0)
- *to++ |= *mask++ & *proto_addr++;
- }
-}
-
-static int
-ar_slifname(queue_t *q, mblk_t *mp_orig)
-{
- ar_t *ar = q->q_ptr;
- arl_t *arl = ar->ar_arl;
- struct lifreq *lifr;
- mblk_t *mp = mp_orig;
- arl_t *old_arl;
- mblk_t *ioccpy;
- struct iocblk *iocp;
- hook_nic_event_t info;
- arp_stack_t *as = ar->ar_as;
-
- if (ar->ar_on_ill_stream) {
- /*
- * This command is for IP, since it is coming down
- * the <arp-IP-driver> stream. Return ENOENT so that
- * it will be sent downstream by the caller
- */
- return (ENOENT);
- }
- /* We handle both M_IOCTL and M_PROTO messages */
- if (DB_TYPE(mp) == M_IOCTL)
- mp = mp->b_cont;
- if (q->q_next == NULL || arl == NULL) {
- /*
- * If the interface was just opened and
- * the info ack has not yet come back from the driver
- */
- DTRACE_PROBE2(slifname_no_arl, queue_t *, q,
- mblk_t *, mp_orig);
- (void) putq(q, mp_orig);
- return (EINPROGRESS);
- }
-
- if (MBLKL(mp) < sizeof (struct lifreq)) {
- DTRACE_PROBE2(slifname_malformed, queue_t *, q,
- mblk_t *, mp);
- }
-
- if (arl->arl_name[0] != '\0') {
- DTRACE_PROBE1(slifname_already, arl_t *, arl);
- return (EALREADY);
- }
-
- lifr = (struct lifreq *)mp->b_rptr;
-
- if (strlen(lifr->lifr_name) >= LIFNAMSIZ) {
- DTRACE_PROBE2(slifname_bad_name, arl_t *, arl,
- struct lifreq *, lifr);
- return (ENXIO);
- }
-
- /* Check whether the name is already in use. */
-
- old_arl = ar_ll_lookup_by_name(as, lifr->lifr_name);
- if (old_arl != NULL) {
- DTRACE_PROBE2(slifname_exists, arl_t *, arl, arl_t *, old_arl);
- return (EEXIST);
- }
-
- /* Make a copy of the message so we can send it downstream. */
- if ((ioccpy = allocb(sizeof (struct iocblk), BPRI_MED)) == NULL ||
- (ioccpy->b_cont = copymsg(mp)) == NULL) {
- if (ioccpy != NULL)
- freeb(ioccpy);
- return (ENOMEM);
- }
-
- (void) strlcpy(arl->arl_name, lifr->lifr_name, sizeof (arl->arl_name));
-
- /* The ppa is sent down by ifconfig */
- arl->arl_ppa = lifr->lifr_ppa;
-
- /*
- * A network device is not considered to be fully plumb'd until
- * its name has been set using SIOCSLIFNAME. Once it has
- * been set, it cannot be set again (see code above), so there
- * is currently no danger in this function causing two NE_PLUMB
- * events without an intervening NE_UNPLUMB.
- */
- info.hne_nic = arl->arl_index;
- info.hne_lif = 0;
- info.hne_event = NE_PLUMB;
- info.hne_data = arl->arl_name;
- info.hne_datalen = strlen(arl->arl_name);
- (void) hook_run(as->as_net_data->netd_hooks, as->as_arpnicevents,
- (hook_data_t)&info);
-
- /* Chain in the new arl. */
- rw_enter(&as->as_arl_lock, RW_WRITER);
- arl->arl_next = as->as_arl_head;
- as->as_arl_head = arl;
- rw_exit(&as->as_arl_lock);
- DTRACE_PROBE1(slifname_set, arl_t *, arl);
-
- /*
- * Send along a copy of the ioctl; this is just for hitbox. Use
- * M_CTL to avoid confusing anyone else who might be listening.
- */
- DB_TYPE(ioccpy) = M_CTL;
- iocp = (struct iocblk *)ioccpy->b_rptr;
- bzero(iocp, sizeof (*iocp));
- iocp->ioc_cmd = SIOCSLIFNAME;
- iocp->ioc_count = msgsize(ioccpy->b_cont);
- ioccpy->b_wptr = (uchar_t *)(iocp + 1);
- putnext(arl->arl_wq, ioccpy);
-
- return (0);
-}
-
-static int
-ar_set_ppa(queue_t *q, mblk_t *mp_orig)
-{
- ar_t *ar = (ar_t *)q->q_ptr;
- arl_t *arl = ar->ar_arl;
- int ppa;
- char *cp;
- mblk_t *mp = mp_orig;
- arl_t *old_arl;
- arp_stack_t *as = ar->ar_as;
-
- if (ar->ar_on_ill_stream) {
- /*
- * This command is for IP, since it is coming down
- * the <arp-IP-driver> stream. Return ENOENT so that
- * it will be sent downstream by the caller
- */
- return (ENOENT);
- }
-
- /* We handle both M_IOCTL and M_PROTO messages. */
- if (DB_TYPE(mp) == M_IOCTL)
- mp = mp->b_cont;
- if (q->q_next == NULL || arl == NULL) {
- /*
- * If the interface was just opened and
- * the info ack has not yet come back from the driver.
- */
- DTRACE_PROBE2(setppa_no_arl, queue_t *, q,
- mblk_t *, mp_orig);
- (void) putq(q, mp_orig);
- return (EINPROGRESS);
- }
-
- if (arl->arl_name[0] != '\0') {
- DTRACE_PROBE1(setppa_already, arl_t *, arl);
- return (EALREADY);
- }
-
- do {
- q = q->q_next;
- } while (q->q_next != NULL);
- cp = q->q_qinfo->qi_minfo->mi_idname;
-
- ppa = *(int *)(mp->b_rptr);
- (void) snprintf(arl->arl_name, sizeof (arl->arl_name), "%s%d", cp, ppa);
-
- old_arl = ar_ll_lookup_by_name(as, arl->arl_name);
- if (old_arl != NULL) {
- DTRACE_PROBE2(setppa_exists, arl_t *, arl, arl_t *, old_arl);
- /* Make it a null string again */
- arl->arl_name[0] = '\0';
- return (EBUSY);
- }
-
- arl->arl_ppa = ppa;
- DTRACE_PROBE1(setppa_done, arl_t *, arl);
- /* Chain in the new arl. */
- rw_enter(&as->as_arl_lock, RW_WRITER);
- arl->arl_next = as->as_arl_head;
- as->as_arl_head = arl;
- rw_exit(&as->as_arl_lock);
-
- return (0);
-}
-
-static int
-ar_snmp_msg(queue_t *q, mblk_t *mp_orig)
-{
- mblk_t *mpdata, *mp = mp_orig;
- struct opthdr *optp;
- msg2_args_t args;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- if (mp == NULL)
- return (0);
- /*
- * ar_cmd_dispatch() already checked for us that "mp->b_cont" is valid
- * in case of an M_IOCTL message.
- */
- if (DB_TYPE(mp) == M_IOCTL)
- mp = mp->b_cont;
-
- optp = (struct opthdr *)(&mp->b_rptr[sizeof (struct T_optmgmt_ack)]);
- if (optp->level == MIB2_IP && optp->name == MIB2_IP_MEDIA) {
- /*
- * Put our ARP cache entries in the ipNetToMediaTable mp from
- * IP. Due to a historical side effect of IP's MIB code, it
- * always passes us a b_cont, but the b_cont should be empty.
- */
- if ((mpdata = mp->b_cont) == NULL || MBLKL(mpdata) != 0)
- return (EINVAL);
-
- args.m2a_mpdata = mpdata;
- args.m2a_mptail = NULL;
- ar_ce_walk(as, ar_snmp_msg2, &args);
- optp->len = msgdsize(mpdata);
- }
- putnext(q, mp_orig);
- return (EINPROGRESS); /* so that rput() exits doing nothing... */
-}
-
-static void
-ar_snmp_msg2(ace_t *ace, void *arg)
-{
- const char *name = "unknown";
- mib2_ipNetToMediaEntry_t ntme;
- msg2_args_t *m2ap = arg;
-
- ASSERT(ace != NULL && ace->ace_arl != NULL);
- if (ace->ace_arl != NULL)
- name = ace->ace_arl->arl_name;
-
- /*
- * Fill in ntme using the information in the ACE.
- */
- ntme.ipNetToMediaType = (ace->ace_flags & ACE_F_PERMANENT) ? 4 : 3;
- ntme.ipNetToMediaIfIndex.o_length = MIN(OCTET_LENGTH, strlen(name));
- bcopy(name, ntme.ipNetToMediaIfIndex.o_bytes,
- ntme.ipNetToMediaIfIndex.o_length);
-
- bcopy(ace->ace_proto_addr, &ntme.ipNetToMediaNetAddress,
- MIN(sizeof (uint32_t), ace->ace_proto_addr_length));
-
- ntme.ipNetToMediaInfo.ntm_mask.o_length =
- MIN(OCTET_LENGTH, ace->ace_proto_addr_length);
- bcopy(ace->ace_proto_mask, ntme.ipNetToMediaInfo.ntm_mask.o_bytes,
- ntme.ipNetToMediaInfo.ntm_mask.o_length);
- ntme.ipNetToMediaInfo.ntm_flags = ace->ace_flags;
-
- ntme.ipNetToMediaPhysAddress.o_length =
- MIN(OCTET_LENGTH, ace->ace_hw_addr_length);
- if ((ace->ace_flags & ACE_F_RESOLVED) == 0)
- ntme.ipNetToMediaPhysAddress.o_length = 0;
- bcopy(ace->ace_hw_addr, ntme.ipNetToMediaPhysAddress.o_bytes,
- ntme.ipNetToMediaPhysAddress.o_length);
-
- /*
- * All entries within the ARP cache are unique, and there are no
- * preexisting entries in the ipNetToMediaTable mp, so just add 'em.
- */
- (void) snmp_append_data2(m2ap->m2a_mpdata, &m2ap->m2a_mptail,
- (char *)&ntme, sizeof (ntme));
-}
-
-/* Write side put procedure. */
-static void
-ar_wput(queue_t *q, mblk_t *mp)
-{
- int err;
- struct iocblk *ioc;
- mblk_t *mp1;
-
- TRACE_1(TR_FAC_ARP, TR_ARP_WPUT_START,
- "arp_wput_start: q %p", q);
-
- /*
- * Here we handle ARP commands coming from controlling processes
- * either in the form of M_IOCTL messages, or M_PROTO messages.
- */
- switch (DB_TYPE(mp)) {
- case M_IOCTL:
- switch (err = ar_cmd_dispatch(q, mp, B_TRUE)) {
- case ENOENT:
- /*
- * If it is an I_PLINK, process it. Otherwise
- * we don't recognize it, so pass it down.
- * Since ARP is a module there is always someone
- * below.
- */
- ASSERT(q->q_next != NULL);
- ioc = (struct iocblk *)mp->b_rptr;
- if ((ioc->ioc_cmd != I_PLINK) &&
- (ioc->ioc_cmd != I_PUNLINK)) {
- putnext(q, mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_WPUT_END,
- "arp_wput_end: q %p (%S)",
- q, "ioctl/enoent");
- return;
- }
- err = ar_plink_send(q, mp);
- if (err == 0) {
- return;
- }
- if ((mp1 = mp->b_cont) != 0)
- mp1->b_wptr = mp1->b_rptr;
- break;
- case EINPROGRESS:
- /*
- * If the request resulted in an attempt to resolve
- * an address, we return out here. The IOCTL will
- * be completed in ar_rput if something comes back,
- * or as a result of the timer expiring.
- */
- TRACE_2(TR_FAC_ARP, TR_ARP_WPUT_END,
- "arp_wput_end: q %p (%S)", q, "inprog");
- return;
- default:
- DB_TYPE(mp) = M_IOCACK;
- break;
- }
- ioc = (struct iocblk *)mp->b_rptr;
- if (err != 0)
- ioc->ioc_error = err;
- if (ioc->ioc_error != 0) {
- /*
- * Don't free b_cont as IP/IB needs
- * it to identify the request.
- */
- DB_TYPE(mp) = M_IOCNAK;
- }
- ioc->ioc_count = msgdsize(mp->b_cont);
- qreply(q, mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_WPUT_END,
- "arp_wput_end: q %p (%S)", q, "ioctl");
- return;
- case M_FLUSH:
- if (*mp->b_rptr & FLUSHW)
- flushq(q, FLUSHDATA);
- if (*mp->b_rptr & FLUSHR) {
- flushq(RD(q), FLUSHDATA);
- *mp->b_rptr &= ~FLUSHW;
- qreply(q, mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_WPUT_END,
- "arp_wput_end: q %p (%S)", q, "flush");
- return;
- }
- /*
- * The normal behavior of a STREAMS module should be
- * to pass down M_FLUSH messages. However there is a
- * complex sequence of events during plumb/unplumb that
- * can cause DLPI messages in the driver's queue to be
- * flushed. So we don't send down M_FLUSH. This has been
- * reported for some drivers (Eg. le) that send up an M_FLUSH
- * in response to unbind request which will eventually be
- * looped back at the mux head and sent down. Since IP
- * does not queue messages in a module instance queue
- * of IP, nothing is lost by not sending down the flush.
- */
- freemsg(mp);
- return;
- case M_PROTO:
- case M_PCPROTO:
- /*
- * Commands in the form of PROTO messages are handled very
- * much the same as IOCTLs, but no response is returned.
- */
- switch (err = ar_cmd_dispatch(q, mp, B_TRUE)) {
- case ENOENT:
- if (q->q_next) {
- putnext(q, mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_WPUT_END,
- "arp_wput_end: q %p (%S)", q,
- "proto/enoent");
- return;
- }
- break;
- case EINPROGRESS:
- TRACE_2(TR_FAC_ARP, TR_ARP_WPUT_END,
- "arp_wput_end: q %p (%S)", q, "proto/einprog");
- return;
- default:
- break;
- }
- break;
- case M_IOCDATA:
- /*
- * We pass M_IOCDATA downstream because it could be as a
- * result of a previous M_COPYIN/M_COPYOUT message sent
- * upstream.
- */
- /* FALLTHRU */
- case M_CTL:
- /*
- * We also send any M_CTL downstream as it could
- * contain control information for a module downstream.
- */
- putnext(q, mp);
- return;
- default:
- break;
- }
- /* Free any message we don't understand */
- freemsg(mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_WPUT_END,
- "arp_wput_end: q %p (%S)", q, "end");
-}
-
-static boolean_t
-arp_say_ready(ace_t *ace)
-{
- mblk_t *mp;
- arl_t *arl = ace->ace_arl;
- arlphy_t *ap = ace->ace_xmit_arl->arl_phy;
- arh_t *arh;
- uchar_t *cp;
-
- mp = allocb(sizeof (*arh) + 2 * (ace->ace_hw_addr_length +
- ace->ace_proto_addr_length), BPRI_MED);
- if (mp == NULL) {
- /* skip a beat on allocation trouble */
- ace->ace_xmit_count = 1;
- ace_set_timer(ace, B_FALSE);
- return (B_FALSE);
- }
- /* Tell IP address is now usable */
- arh = (arh_t *)mp->b_rptr;
- U16_TO_BE16(ap->ap_arp_hw_type, arh->arh_hardware);
- U16_TO_BE16(ace->ace_proto, arh->arh_proto);
- arh->arh_hlen = ace->ace_hw_addr_length;
- arh->arh_plen = ace->ace_proto_addr_length;
- U16_TO_BE16(ARP_REQUEST, arh->arh_operation);
- cp = (uchar_t *)(arh + 1);
- bcopy(ace->ace_hw_addr, cp, ace->ace_hw_addr_length);
- cp += ace->ace_hw_addr_length;
- bcopy(ace->ace_proto_addr, cp, ace->ace_proto_addr_length);
- cp += ace->ace_proto_addr_length;
- bcopy(ace->ace_hw_addr, cp, ace->ace_hw_addr_length);
- cp += ace->ace_hw_addr_length;
- bcopy(ace->ace_proto_addr, cp, ace->ace_proto_addr_length);
- cp += ace->ace_proto_addr_length;
- mp->b_wptr = cp;
- ar_client_notify(arl, mp, AR_CN_READY);
- DTRACE_PROBE1(ready, ace_t *, ace);
- return (B_TRUE);
-}
-
-/*
- * Pick the longest-waiting aces for defense.
- */
-static void
-ace_reschedule(ace_t *ace, void *arg)
-{
- ace_resched_t *art = arg;
- ace_t **aces;
- ace_t **acemax;
- ace_t *atemp;
-
- if (ace->ace_xmit_arl != art->art_arl)
- return;
- /*
- * Only published entries that are ready for announcement are eligible.
- */
- if ((ace->ace_flags & (ACE_F_PUBLISH | ACE_F_UNVERIFIED | ACE_F_DYING |
- ACE_F_DELAYED)) != ACE_F_PUBLISH) {
- return;
- }
- if (art->art_naces < ACE_RESCHED_LIST_LEN) {
- art->art_aces[art->art_naces++] = ace;
- } else {
- aces = art->art_aces;
- acemax = aces + ACE_RESCHED_LIST_LEN;
- for (; aces < acemax; aces++) {
- if ((*aces)->ace_last_bcast > ace->ace_last_bcast) {
- atemp = *aces;
- *aces = ace;
- ace = atemp;
- }
- }
- }
-}
-
-/*
- * Reschedule the ARP defense of any long-waiting ACEs. It's assumed that this
- * doesn't happen very often (if at all), and thus it needn't be highly
- * optimized. (Note, though, that it's actually O(N) complexity, because the
- * outer loop is bounded by a constant rather than by the length of the list.)
- */
-static void
-arl_reschedule(arl_t *arl)
-{
- arlphy_t *ap = arl->arl_phy;
- ace_resched_t art;
- int i;
- ace_t *ace;
- arp_stack_t *as = ARL_TO_ARPSTACK(arl);
-
- i = ap->ap_defend_count;
- ap->ap_defend_count = 0;
- /* If none could be sitting around, then don't reschedule */
- if (i < as->as_defend_rate) {
- DTRACE_PROBE1(reschedule_none, arl_t *, arl);
- return;
- }
- art.art_arl = arl;
- while (ap->ap_defend_count < as->as_defend_rate) {
- art.art_naces = 0;
- ar_ce_walk(as, ace_reschedule, &art);
- for (i = 0; i < art.art_naces; i++) {
- ace = art.art_aces[i];
- ace->ace_flags |= ACE_F_DELAYED;
- ace_set_timer(ace, B_FALSE);
- if (++ap->ap_defend_count >= as->as_defend_rate)
- break;
- }
- if (art.art_naces < ACE_RESCHED_LIST_LEN)
- break;
- }
- DTRACE_PROBE1(reschedule, arl_t *, arl);
-}
-
-/*
- * Write side service routine. The only action here is delivery of transmit
- * timer events and delayed messages while waiting for the info_ack (ar_arl
- * not yet set).
- */
-static void
-ar_wsrv(queue_t *q)
-{
- ace_t *ace;
- arlphy_t *ap;
- mblk_t *mp;
- clock_t ms;
- arp_stack_t *as = ((ar_t *)q->q_ptr)->ar_as;
-
- TRACE_1(TR_FAC_ARP, TR_ARP_WSRV_START,
- "arp_wsrv_start: q %p", q);
-
- while ((mp = getq(q)) != NULL) {
- switch (DB_TYPE(mp)) {
- case M_PCSIG:
- if (!mi_timer_valid(mp))
- continue;
- ace = (ace_t *)mp->b_rptr;
- if (ace->ace_flags & ACE_F_DYING)
- continue;
- ap = ace->ace_xmit_arl->arl_phy;
- if (ace->ace_flags & ACE_F_UNVERIFIED) {
- ASSERT(ace->ace_flags & ACE_F_PUBLISH);
- ASSERT(ace->ace_query_mp == NULL);
- /*
- * If the link is down, give up for now. IP
- * will give us the go-ahead to try again when
- * the link restarts.
- */
- if (ap->ap_link_down) {
- DTRACE_PROBE1(timer_link_down,
- ace_t *, ace);
- ace->ace_flags |= ACE_F_DAD_ABORTED;
- continue;
- }
- if (ace->ace_xmit_count > 0) {
- DTRACE_PROBE1(timer_probe,
- ace_t *, ace);
- ace->ace_xmit_count--;
- ar_xmit(ace->ace_xmit_arl, ARP_REQUEST,
- ace->ace_proto,
- ace->ace_proto_addr_length,
- ace->ace_hw_addr, NULL, NULL,
- ace->ace_proto_addr, NULL, as);
- ace_set_timer(ace, B_FALSE);
- continue;
- }
- if (!arp_say_ready(ace))
- continue;
- DTRACE_PROBE1(timer_ready, ace_t *, ace);
- ace->ace_xmit_interval =
- as->as_publish_interval;
- ace->ace_xmit_count = as->as_publish_count;
- if (ace->ace_xmit_count == 0)
- ace->ace_xmit_count++;
- ace->ace_flags &= ~ACE_F_UNVERIFIED;
- }
- if (ace->ace_flags & ACE_F_PUBLISH) {
- clock_t now;
-
- /*
- * If an hour has passed, then free up the
- * entries that need defense by rescheduling
- * them.
- */
- now = ddi_get_lbolt();
- if (as->as_defend_rate > 0 &&
- now - ap->ap_defend_start >
- SEC_TO_TICK(as->as_defend_period)) {
- ap->ap_defend_start = now;
- arl_reschedule(ace->ace_xmit_arl);
- }
- /*
- * Finish the job that we started in
- * ar_entry_add. When we get to zero
- * announcement retransmits left, switch to
- * address defense.
- */
- ASSERT(ace->ace_query_mp == NULL);
- if (ace->ace_xmit_count > 0) {
- ace->ace_xmit_count--;
- DTRACE_PROBE1(timer_announce,
- ace_t *, ace);
- } else if (ace->ace_flags & ACE_F_DELAYED) {
- /*
- * This guy was rescheduled as one of
- * the really old entries needing
- * on-going defense. Let him through
- * now.
- */
- DTRACE_PROBE1(timer_send_delayed,
- ace_t *, ace);
- ace->ace_flags &= ~ACE_F_DELAYED;
- } else if (as->as_defend_rate > 0 &&
- (ap->ap_defend_count >=
- as->as_defend_rate ||
- ++ap->ap_defend_count >=
- as->as_defend_rate)) {
- /*
- * If we're no longer allowed to send
- * unbidden defense messages, then just
- * wait for rescheduling.
- */
- DTRACE_PROBE1(timer_excess_defense,
- ace_t *, ace);
- ace_set_timer(ace, B_FALSE);
- continue;
- } else {
- DTRACE_PROBE1(timer_defend,
- ace_t *, ace);
- }
- ar_xmit(ace->ace_xmit_arl, ARP_REQUEST,
- ace->ace_proto,
- ace->ace_proto_addr_length,
- ace->ace_hw_addr,
- ace->ace_proto_addr,
- ace->ace_xmit_arl->arl_phy->ap_arp_addr,
- ace->ace_proto_addr, NULL, as);
- ace->ace_last_bcast = now;
- if (ace->ace_xmit_count == 0)
- ace->ace_xmit_interval =
- as->as_defend_interval;
- if (ace->ace_xmit_interval != 0)
- ace_set_timer(ace, B_FALSE);
- continue;
- }
-
- /*
- * If this is a non-permanent (regular) resolved ARP
- * entry, then it's now time to check if it can be
- * retired. As an optimization, we check with IP
- * first, and just restart the timer if the address is
- * still in use.
- */
- if (ACE_NONPERM(ace)) {
- if (ace->ace_proto == IP_ARP_PROTO_TYPE &&
- ndp_lookup_ipaddr(*(ipaddr_t *)
- ace->ace_proto_addr, as->as_netstack)) {
- ace->ace_flags |= ACE_F_OLD;
- mi_timer(ace->ace_arl->arl_wq,
- ace->ace_mp,
- as->as_cleanup_interval);
- } else {
- ar_delete_notify(ace);
- ar_ce_delete(ace);
- }
- continue;
- }
-
- /*
- * ar_query_xmit returns the number of milliseconds to
- * wait following this transmit. If the number of
- * allowed transmissions has been exhausted, it will
- * return zero without transmitting. If that happens
- * we complete the operation with a failure indication.
- * Otherwise, we restart the timer.
- */
- ms = ar_query_xmit(as, ace);
- if (ms == 0)
- ar_query_reply(ace, ENXIO, NULL, (uint32_t)0);
- else
- mi_timer(q, mp, ms);
- continue;
- default:
- put(q, mp);
- continue;
- }
- }
- TRACE_1(TR_FAC_ARP, TR_ARP_WSRV_END,
- "arp_wsrv_end: q %p", q);
-}
-
-/* ar_xmit is called to transmit an ARP Request or Response. */
-static void
-ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, uint32_t plen,
- const uchar_t *haddr1, const uchar_t *paddr1, const uchar_t *haddr2,
- const uchar_t *paddr2, const uchar_t *dstaddr, arp_stack_t *as)
-{
- arh_t *arh;
- uint8_t *cp;
- uint_t hlen;
- mblk_t *mp;
- arlphy_t *ap = arl->arl_phy;
-
- ASSERT(!(arl->arl_flags & ARL_F_IPMP));
-
- if (ap == NULL) {
- DTRACE_PROBE1(xmit_no_arl_phy, arl_t *, arl);
- return;
- }
-
- /* IFF_NOARP flag is set or link down: do not send arp messages */
- if ((arl->arl_flags & ARL_F_NOARP) || ap->ap_link_down)
- return;
-
- hlen = ap->ap_hw_addrlen;
- if ((mp = copyb(ap->ap_xmit_mp)) == NULL)
- return;
-
- mp->b_cont = allocb(AR_LL_HDR_SLACK + ARH_FIXED_LEN + (hlen * 4) +
- plen + plen, BPRI_MED);
- if (mp->b_cont == NULL) {
- freeb(mp);
- return;
- }
-
- /* Get the L2 destination address for the message */
- if (haddr2 == NULL)
- dstaddr = ap->ap_arp_addr;
- else if (dstaddr == NULL)
- dstaddr = haddr2;
-
- /*
- * Figure out where the target hardware address goes in the
- * DL_UNITDATA_REQ header, and copy it in.
- */
- cp = mi_offset_param(mp, ap->ap_xmit_addroff, hlen);
- ASSERT(cp != NULL);
- if (cp == NULL) {
- freemsg(mp);
- return;
- }
- bcopy(dstaddr, cp, hlen);
-
- /* Fill in the ARP header. */
- cp = mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen);
- mp->b_cont->b_rptr = cp;
- arh = (arh_t *)cp;
- U16_TO_BE16(ap->ap_arp_hw_type, arh->arh_hardware);
- U16_TO_BE16(proto, arh->arh_proto);
- arh->arh_hlen = (uint8_t)hlen;
- arh->arh_plen = (uint8_t)plen;
- U16_TO_BE16(operation, arh->arh_operation);
- cp += ARH_FIXED_LEN;
- bcopy(haddr1, cp, hlen);
- cp += hlen;
- if (paddr1 == NULL)
- bzero(cp, plen);
- else
- bcopy(paddr1, cp, plen);
- cp += plen;
- if (haddr2 == NULL)
- bzero(cp, hlen);
- else
- bcopy(haddr2, cp, hlen);
- cp += hlen;
- bcopy(paddr2, cp, plen);
- cp += plen;
- mp->b_cont->b_wptr = cp;
-
- DTRACE_PROBE3(arp__physical__out__start,
- arl_t *, arl, arh_t *, arh, mblk_t *, mp);
-
- ARP_HOOK_OUT(as->as_arp_physical_out_event, as->as_arp_physical_out,
- arl->arl_index, arh, mp, mp->b_cont, as);
-
- DTRACE_PROBE1(arp__physical__out__end, mblk_t *, mp);
-
- if (mp == NULL)
- return;
-
- /* Ship it out. */
- if (canputnext(arl->arl_wq))
- putnext(arl->arl_wq, mp);
- else
- freemsg(mp);
-}
-
-static mblk_t *
-ar_alloc(uint32_t cmd, int err)
-{
- uint32_t len;
- mblk_t *mp;
- mblk_t *mp1;
- char *cp;
- arc_t *arc;
-
- /* For now only one type of command is accepted */
- if (cmd != AR_DLPIOP_DONE)
- return (NULL);
- len = sizeof (arc_t);
- mp = allocb(len, BPRI_HI);
- if (!mp)
- return (NULL);
-
- DB_TYPE(mp) = M_CTL;
- cp = (char *)mp->b_rptr;
- arc = (arc_t *)(mp->b_rptr);
- arc->arc_cmd = cmd;
- mp->b_wptr = (uchar_t *)&cp[len];
- len = sizeof (int);
- mp1 = allocb(len, BPRI_HI);
- if (!mp1) {
- freeb(mp);
- return (NULL);
- }
- cp = (char *)mp->b_rptr;
- /* Initialize the error code */
- *((int *)mp1->b_rptr) = err;
- mp1->b_wptr = (uchar_t *)&cp[len];
- linkb(mp, mp1);
- return (mp);
-}
-
-void
-arp_ddi_init(void)
-{
- /*
- * We want to be informed each time a stack is created or
- * destroyed in the kernel, so we can maintain the
- * set of arp_stack_t's.
- */
- netstack_register(NS_ARP, arp_stack_init, arp_stack_shutdown,
- arp_stack_fini);
-}
-
-void
-arp_ddi_destroy(void)
-{
- netstack_unregister(NS_ARP);
-}
-
-/*
- * Initialize the ARP stack instance.
- */
-/* ARGSUSED */
-static void *
-arp_stack_init(netstackid_t stackid, netstack_t *ns)
-{
- arp_stack_t *as;
- arpparam_t *pa;
-
- as = (arp_stack_t *)kmem_zalloc(sizeof (*as), KM_SLEEP);
- as->as_netstack = ns;
-
- pa = (arpparam_t *)kmem_alloc(sizeof (arp_param_arr), KM_SLEEP);
- as->as_param_arr = pa;
- bcopy(arp_param_arr, as->as_param_arr, sizeof (arp_param_arr));
-
- (void) ar_param_register(&as->as_nd,
- as->as_param_arr, A_CNT(arp_param_arr));
-
- as->as_arp_index_counter = 1;
- as->as_arp_counter_wrapped = 0;
-
- rw_init(&as->as_arl_lock, NULL, RW_DRIVER, NULL);
- arp_net_init(as, stackid);
- arp_hook_init(as);
-
- return (as);
-}
-
-/* ARGSUSED */
-static void
-arp_stack_shutdown(netstackid_t stackid, void *arg)
-{
- arp_stack_t *as = (arp_stack_t *)arg;
-
- arp_net_shutdown(as);
-}
-
-/*
- * Free the ARP stack instance.
- */
-/* ARGSUSED */
-static void
-arp_stack_fini(netstackid_t stackid, void *arg)
-{
- arp_stack_t *as = (arp_stack_t *)arg;
-
- arp_hook_destroy(as);
- arp_net_destroy(as);
- rw_destroy(&as->as_arl_lock);
- nd_free(&as->as_nd);
- kmem_free(as->as_param_arr, sizeof (arp_param_arr));
- as->as_param_arr = NULL;
- kmem_free(as, sizeof (*as));
-}