summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/ip/ip_ndp.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/inet/ip/ip_ndp.c')
-rw-r--r--usr/src/uts/common/inet/ip/ip_ndp.c5399
1 files changed, 3278 insertions, 2121 deletions
diff --git a/usr/src/uts/common/inet/ip/ip_ndp.c b/usr/src/uts/common/inet/ip/ip_ndp.c
index 35f9d541e8..97096bea99 100644
--- a/usr/src/uts/common/inet/ip/ip_ndp.c
+++ b/usr/src/uts/common/inet/ip/ip_ndp.c
@@ -40,6 +40,7 @@
#include <sys/zone.h>
#include <sys/ethernet.h>
#include <sys/sdt.h>
+#include <sys/mac.h>
#include <net/if.h>
#include <net/if_types.h>
@@ -61,53 +62,93 @@
#include <inet/ip_rts.h>
#include <inet/ip6.h>
#include <inet/ip_ndp.h>
-#include <inet/ipsec_impl.h>
-#include <inet/ipsec_info.h>
#include <inet/sctp_ip.h>
+#include <inet/ip_arp.h>
#include <inet/ip2mac_impl.h>
+#define ANNOUNCE_INTERVAL(isv6) \
+ (isv6 ? ipst->ips_ip_ndp_unsolicit_interval : \
+ ipst->ips_ip_arp_publish_interval)
+
+#define DEFENSE_INTERVAL(isv6) \
+ (isv6 ? ipst->ips_ndp_defend_interval : \
+ ipst->ips_arp_defend_interval)
+
+/* Non-tunable probe interval, based on link capabilities */
+#define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500)
+
+/*
+ * The IPv4 Link Local address space is special; we do extra duplicate checking
+ * there, as the entire assignment mechanism rests on random numbers.
+ */
+#define IS_IPV4_LL_SPACE(ptr) (((uchar_t *)ptr)[0] == 169 && \
+ ((uchar_t *)ptr)[1] == 254)
+
+/*
+ * NCE_EXTERNAL_FLAGS_MASK defines the set of ncec_flags that may be passed
+ * in to the ncec*add* functions.
+ *
+ * NCE_F_AUTHORITY means that we ignore any incoming adverts for that
+ * mapping (though DAD is performed for the mapping). NCE_F_PUBLISH means
+ * that we will respond to requests for the protocol address.
+ */
+#define NCE_EXTERNAL_FLAGS_MASK \
+ (NCE_F_MYADDR | NCE_F_ISROUTER | NCE_F_NONUD | \
+ NCE_F_ANYCAST | NCE_F_UNSOL_ADV | NCE_F_BCAST | NCE_F_MCAST | \
+ NCE_F_AUTHORITY | NCE_F_PUBLISH | NCE_F_STATIC)
+
/*
* Function names with nce_ prefix are static while function
* names with ndp_ prefix are used by rest of the IP.
*
* Lock ordering:
*
- * ndp_g_lock -> ill_lock -> nce_lock
+ * ndp_g_lock -> ill_lock -> ncec_lock
*
* The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and
- * nce_next. Nce_lock protects the contents of the NCE (particularly
- * nce_refcnt).
- */
-
-static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr,
- uint32_t ll_addr_len);
-static void nce_ire_delete(nce_t *nce);
-static void nce_ire_delete1(ire_t *ire, char *nce_arg);
-static void nce_set_ll(nce_t *nce, uchar_t *ll_addr);
-static nce_t *nce_lookup_addr(ill_t *, boolean_t, const in6_addr_t *,
- nce_t *);
-static nce_t *nce_lookup_mapping(ill_t *, const in6_addr_t *);
-static void nce_make_mapping(nce_t *nce, uchar_t *addrpos,
- uchar_t *addr);
-static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr);
-static void nce_queue_mp(nce_t *nce, mblk_t *mp);
-static mblk_t *nce_udreq_alloc(ill_t *ill);
-static void nce_update(nce_t *nce, uint16_t new_state,
- uchar_t *new_ll_addr);
-static uint32_t nce_solicit(nce_t *nce, in6_addr_t src);
-static boolean_t nce_xmit(ill_t *ill, uint8_t type,
- boolean_t use_lla_addr, const in6_addr_t *sender,
+ * ncec_next. ncec_lock protects the contents of the NCE (particularly
+ * ncec_refcnt).
+ */
+
+static void nce_cleanup_list(ncec_t *ncec);
+static void nce_set_ll(ncec_t *ncec, uchar_t *ll_addr);
+static ncec_t *ncec_lookup_illgrp(ill_t *, const in6_addr_t *,
+ ncec_t *);
+static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *);
+static int nce_set_multicast_v6(ill_t *ill, const in6_addr_t *addr,
+ uint16_t ncec_flags, nce_t **newnce);
+static int nce_set_multicast_v4(ill_t *ill, const in_addr_t *dst,
+ uint16_t ncec_flags, nce_t **newnce);
+static boolean_t ndp_xmit(ill_t *ill, uint32_t operation,
+ uint8_t *hwaddr, uint_t hwaddr_len, const in6_addr_t *sender,
const in6_addr_t *target, int flag);
-static boolean_t nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla,
- const in6_addr_t *target, uint_t flags);
-static boolean_t nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla,
- const in6_addr_t *src, uint_t flags);
-static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t,
- nce_t **, nce_t *);
-static ipif_t *ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill);
+static void ncec_refhold_locked(ncec_t *);
+static boolean_t ill_defend_rate_limit(ill_t *, ncec_t *);
+static void nce_queue_mp_common(ncec_t *, mblk_t *, boolean_t);
+static int nce_add_common(ill_t *, uchar_t *, uint_t, const in6_addr_t *,
+ uint16_t, uint16_t, nce_t **);
+static nce_t *nce_add_impl(ill_t *, ncec_t *, nce_t *, mblk_t *);
+static nce_t *nce_add(ill_t *, ncec_t *);
+static void nce_inactive(nce_t *);
+extern nce_t *nce_lookup(ill_t *, const in6_addr_t *);
+static nce_t *nce_ill_lookup_then_add(ill_t *, ncec_t *);
+static int nce_add_v6(ill_t *, uchar_t *, uint_t, const in6_addr_t *,
+ uint16_t, uint16_t, nce_t **);
+static int nce_add_v4(ill_t *, uchar_t *, uint_t, const in_addr_t *,
+ uint16_t, uint16_t, nce_t **);
+static int nce_add_v6_postprocess(nce_t *);
+static int nce_add_v4_postprocess(nce_t *);
+static ill_t *nce_resolve_src(ncec_t *, in6_addr_t *);
+static clock_t nce_fuzz_interval(clock_t, boolean_t);
+static void nce_resolv_ipmp_ok(ncec_t *);
+static void nce_walk_common(ill_t *, pfi_t, void *);
+static void nce_start_timer(ncec_t *, uint_t);
+static nce_t *nce_fastpath_create(ill_t *, ncec_t *);
+static void nce_fastpath_trigger(nce_t *);
+static nce_t *nce_fastpath(ncec_t *, boolean_t, nce_t *);
#ifdef DEBUG
-static void nce_trace_cleanup(const nce_t *);
+static void ncec_trace_cleanup(const ncec_t *);
#endif
#define NCE_HASH_PTR_V4(ipst, addr) \
@@ -117,233 +158,245 @@ static void nce_trace_cleanup(const nce_t *);
(&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \
NCE_TABLE_SIZE)]))
-/* Non-tunable probe interval, based on link capabilities */
-#define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500)
+extern kmem_cache_t *ncec_cache;
+extern kmem_cache_t *nce_cache;
+
+/*
+ * Send out a IPv6 (unicast) or IPv4 (broadcast) DAD probe
+ * If src_ill is not null, the ncec_addr is bound to src_ill. The
+ * src_ill is ignored by nce_dad for IPv4 Neighbor Cache entries where
+ * the probe is sent on the ncec_ill (in the non-IPMP case) or the
+ * IPMP cast_ill (in the IPMP case).
+ *
+ * Note that the probe interval is based on ncec->ncec_ill which
+ * may be the ipmp_ill.
+ */
+static void
+nce_dad(ncec_t *ncec, ill_t *src_ill, boolean_t send_probe)
+{
+ boolean_t dropped;
+ uint32_t probe_interval;
+
+ ASSERT(!(ncec->ncec_flags & NCE_F_MCAST));
+ ASSERT(!(ncec->ncec_flags & NCE_F_BCAST));
+ if (ncec->ncec_ipversion == IPV6_VERSION) {
+ dropped = ndp_xmit(src_ill, ND_NEIGHBOR_SOLICIT,
+ ncec->ncec_lladdr, ncec->ncec_lladdr_length,
+ &ipv6_all_zeros, &ncec->ncec_addr, NDP_PROBE);
+ probe_interval = ILL_PROBE_INTERVAL(ncec->ncec_ill);
+ } else {
+ /* IPv4 DAD delay the initial probe. */
+ if (send_probe)
+ dropped = arp_probe(ncec);
+ else
+ dropped = B_TRUE;
+ probe_interval = nce_fuzz_interval(ncec->ncec_xmit_interval,
+ !send_probe);
+ }
+ if (!dropped) {
+ mutex_enter(&ncec->ncec_lock);
+ ncec->ncec_pcnt--;
+ mutex_exit(&ncec->ncec_lock);
+ }
+ nce_restart_timer(ncec, probe_interval);
+}
+
+/*
+ * Compute default flags to use for an advertisement of this ncec's address.
+ */
+static int
+nce_advert_flags(const ncec_t *ncec)
+{
+ int flag = 0;
+
+ if (ncec->ncec_flags & NCE_F_ISROUTER)
+ flag |= NDP_ISROUTER;
+ if (!(ncec->ncec_flags & NCE_F_ANYCAST))
+ flag |= NDP_ORIDE;
+
+ return (flag);
+}
/*
* NDP Cache Entry creation routine.
* Mapped entries will never do NUD .
* This routine must always be called with ndp6->ndp_g_lock held.
- * Prior to return, nce_refcnt is incremented.
*/
int
-ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr,
- const in6_addr_t *mask, const in6_addr_t *extract_mask,
- uint32_t hw_extract_start, uint16_t flags, uint16_t state,
- nce_t **newnce)
+nce_add_v6(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len,
+ const in6_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce)
{
- static nce_t nce_nil;
- nce_t *nce;
- mblk_t *mp;
- mblk_t *template;
- nce_t **ncep;
int err;
- boolean_t dropped = B_FALSE;
- ip_stack_t *ipst = ill->ill_ipst;
+ nce_t *nce;
- ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock));
+ ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp6->ndp_g_lock));
ASSERT(ill != NULL && ill->ill_isv6);
- if (IN6_IS_ADDR_UNSPECIFIED(addr)) {
- ip0dbg(("ndp_add_v6: no addr\n"));
- return (EINVAL);
- }
- if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) {
- ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags));
- return (EINVAL);
- }
- if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) &&
- (flags & NCE_F_MAPPING)) {
- ip0dbg(("ndp_add_v6: extract mask zero for mapping"));
- return (EINVAL);
- }
- /*
- * Allocate the mblk to hold the nce.
- *
- * XXX This can come out of a separate cache - nce_cache.
- * We don't need the mp anymore as there are no more
- * "qwriter"s
- */
- mp = allocb(sizeof (nce_t), BPRI_MED);
- if (mp == NULL)
- return (ENOMEM);
- nce = (nce_t *)mp->b_rptr;
- mp->b_wptr = (uchar_t *)&nce[1];
- *nce = nce_nil;
-
- /*
- * This one holds link layer address
- */
- if (ill->ill_net_type == IRE_IF_RESOLVER) {
- template = nce_udreq_alloc(ill);
- } else {
- if (ill->ill_phys_addr_length == IPV6_ADDR_LEN &&
- ill->ill_mactype != DL_IPV6) {
- /*
- * We create a nce_res_mp with the IP nexthop address
- * as the destination address if the physical length
- * is exactly 16 bytes for point-to-multipoint links
- * that do their own resolution from IP to link-layer
- * address.
- */
- template = ill_dlur_gen((uchar_t *)addr,
- ill->ill_phys_addr_length, ill->ill_sap,
- ill->ill_sap_length);
- } else {
- if (ill->ill_resolver_mp == NULL) {
- freeb(mp);
- return (EINVAL);
- }
- ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER));
- template = copyb(ill->ill_resolver_mp);
- }
- }
- if (template == NULL) {
- freeb(mp);
- return (ENOMEM);
- }
- nce->nce_ill = ill;
- nce->nce_ipversion = IPV6_VERSION;
- nce->nce_flags = flags;
- nce->nce_state = state;
- nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
- nce->nce_rcnt = ill->ill_xmit_count;
- nce->nce_addr = *addr;
- nce->nce_mask = *mask;
- nce->nce_extract_mask = *extract_mask;
- nce->nce_ll_extract_start = hw_extract_start;
- nce->nce_fp_mp = NULL;
- nce->nce_res_mp = template;
- if (state == ND_REACHABLE)
- nce->nce_last = TICK_TO_MSEC(lbolt64);
- else
- nce->nce_last = 0;
- nce->nce_qd_mp = NULL;
- nce->nce_mp = mp;
- if (hw_addr != NULL)
- nce_set_ll(nce, hw_addr);
- /* This one is for nce getting created */
- nce->nce_refcnt = 1;
- mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL);
- if (nce->nce_flags & NCE_F_MAPPING) {
- ASSERT(IN6_IS_ADDR_MULTICAST(addr));
- ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask));
- ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask));
- ncep = &ipst->ips_ndp6->nce_mask_entries;
- } else {
- ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
- }
+ err = nce_add_common(ill, hw_addr, hw_addr_len, addr, flags, state,
+ &nce);
+ if (err != 0)
+ return (err);
+ ASSERT(newnce != NULL);
+ *newnce = nce;
+ return (err);
+}
- nce->nce_trace_disable = B_FALSE;
+/*
+ * Post-processing routine to be executed after nce_add_v6(). This function
+ * triggers fastpath (if appropriate) and DAD on the newly added nce entry
+ * and must be called without any locks held.
+ */
+int
+nce_add_v6_postprocess(nce_t *nce)
+{
+ ncec_t *ncec = nce->nce_common;
+ boolean_t dropped = B_FALSE;
+ uchar_t *hw_addr = ncec->ncec_lladdr;
+ uint_t hw_addr_len = ncec->ncec_lladdr_length;
+ ill_t *ill = ncec->ncec_ill;
+ int err = 0;
+ uint16_t flags = ncec->ncec_flags;
+ ip_stack_t *ipst = ill->ill_ipst;
+ boolean_t trigger_fastpath = B_TRUE;
- list_create(&nce->nce_cb, sizeof (nce_cb_t),
- offsetof(nce_cb_t, nce_cb_node));
/*
- * Atomically ensure that the ill is not CONDEMNED, before
- * adding the NCE.
+ * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
+ * we call nce_fastpath as soon as the ncec is resolved in nce_process.
+ * We call nce_fastpath from nce_update if the link layer address of
+ * the peer changes from nce_update
*/
- mutex_enter(&ill->ill_lock);
- if (ill->ill_state_flags & ILL_CONDEMNED) {
- mutex_exit(&ill->ill_lock);
- freeb(mp);
- freeb(template);
- return (EINVAL);
- }
- if ((nce->nce_next = *ncep) != NULL)
- nce->nce_next->nce_ptpn = &nce->nce_next;
- *ncep = nce;
- nce->nce_ptpn = ncep;
- *newnce = nce;
- /* This one is for nce being used by an active thread */
- NCE_REFHOLD(*newnce);
-
- /* Bump up the number of nce's referencing this ill */
- DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
- (char *), "nce", (void *), nce);
- ill->ill_nce_cnt++;
- mutex_exit(&ill->ill_lock);
-
- err = 0;
- if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) {
- mutex_enter(&nce->nce_lock);
- mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
- nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
- mutex_exit(&nce->nce_lock);
- dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE);
- if (dropped) {
- mutex_enter(&nce->nce_lock);
- nce->nce_pcnt++;
- mutex_exit(&nce->nce_lock);
+ if (NCE_PUBLISH(ncec) || !NCE_ISREACHABLE(ncec) ||
+ (hw_addr == NULL && ill->ill_net_type != IRE_IF_NORESOLVER))
+ trigger_fastpath = B_FALSE;
+
+ if (trigger_fastpath)
+ nce_fastpath_trigger(nce);
+ if (NCE_PUBLISH(ncec) && ncec->ncec_state == ND_PROBE) {
+ ill_t *hwaddr_ill;
+ /*
+ * Unicast entry that needs DAD.
+ */
+ if (IS_IPMP(ill)) {
+ hwaddr_ill = ipmp_illgrp_find_ill(ill->ill_grp,
+ hw_addr, hw_addr_len);
+ } else {
+ hwaddr_ill = ill;
}
- NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill));
- mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
+ nce_dad(ncec, hwaddr_ill, B_TRUE);
err = EINPROGRESS;
} else if (flags & NCE_F_UNSOL_ADV) {
/*
* We account for the transmit below by assigning one
* less than the ndd variable. Subsequent decrements
- * are done in ndp_timer.
+ * are done in nce_timer.
*/
- mutex_enter(&nce->nce_lock);
- mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
- nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1;
- mutex_exit(&nce->nce_lock);
- dropped = nce_xmit_advert(nce, B_TRUE, &ipv6_all_hosts_mcast,
- 0);
- mutex_enter(&nce->nce_lock);
+ mutex_enter(&ncec->ncec_lock);
+ ncec->ncec_unsolicit_count =
+ ipst->ips_ip_ndp_unsolicit_count - 1;
+ mutex_exit(&ncec->ncec_lock);
+ dropped = ndp_xmit(ill,
+ ND_NEIGHBOR_ADVERT,
+ hw_addr,
+ hw_addr_len,
+ &ncec->ncec_addr, /* Source and target of the adv */
+ &ipv6_all_hosts_mcast, /* Destination of the packet */
+ nce_advert_flags(ncec));
+ mutex_enter(&ncec->ncec_lock);
if (dropped)
- nce->nce_unsolicit_count++;
- if (nce->nce_unsolicit_count != 0) {
- ASSERT(nce->nce_timeout_id == 0);
- nce->nce_timeout_id = timeout(ndp_timer, nce,
- MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval));
+ ncec->ncec_unsolicit_count++;
+ else
+ ncec->ncec_last_time_defended = ddi_get_lbolt();
+ if (ncec->ncec_unsolicit_count != 0) {
+ nce_start_timer(ncec,
+ ipst->ips_ip_ndp_unsolicit_interval);
}
- mutex_exit(&nce->nce_lock);
- mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
+ mutex_exit(&ncec->ncec_lock);
}
-
- /*
- * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
- * we call nce_fastpath as soon as the nce is resolved in ndp_process.
- * We call nce_fastpath from nce_update if the link layer address of
- * the peer changes from nce_update
- */
- if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER)
- nce_fastpath(nce);
return (err);
}
+/*
+ * Atomically lookup and add (if needed) Neighbor Cache information for
+ * an address.
+ *
+ * IPMP notes: the ncec for non-local (i.e., !NCE_MYADDR(ncec) addresses
+ * are always added pointing at the ipmp_ill. Thus, when the ill passed
+ * to nce_add_v6 is an under_ill (i.e., IS_UNDER_IPMP(ill)) two nce_t
+ * entries will be created, both pointing at the same ncec_t. The nce_t
+ * entries will have their nce_ill set to the ipmp_ill and the under_ill
+ * respectively, with the ncec_t having its ncec_ill pointing at the ipmp_ill.
+ * Local addresses are always created on the ill passed to nce_add_v6.
+ */
int
-ndp_lookup_then_add_v6(ill_t *ill, boolean_t match_illgrp, uchar_t *hw_addr,
- const in6_addr_t *addr, const in6_addr_t *mask,
- const in6_addr_t *extract_mask, uint32_t hw_extract_start, uint16_t flags,
- uint16_t state, nce_t **newnce)
+nce_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len,
+ const in6_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce)
{
- int err = 0;
- nce_t *nce;
+ int err = 0;
ip_stack_t *ipst = ill->ill_ipst;
+ nce_t *nce, *upper_nce = NULL;
+ ill_t *in_ill = ill;
+ boolean_t need_ill_refrele = B_FALSE;
+ if (flags & NCE_F_MCAST) {
+ /*
+ * hw_addr will be figured out in nce_set_multicast_v6;
+ * caller has to select the cast_ill
+ */
+ ASSERT(hw_addr == NULL);
+ ASSERT(!IS_IPMP(ill));
+ err = nce_set_multicast_v6(ill, addr, flags, newnce);
+ return (err);
+ }
ASSERT(ill->ill_isv6);
- mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
+ if (IS_UNDER_IPMP(ill) && !(flags & NCE_F_MYADDR)) {
+ ill = ipmp_ill_hold_ipmp_ill(ill);
+ if (ill == NULL)
+ return (ENXIO);
+ need_ill_refrele = B_TRUE;
+ }
- /* Get head of v6 hash table */
- nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
- nce = nce_lookup_addr(ill, match_illgrp, addr, nce);
+ mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
+ nce = nce_lookup_addr(ill, addr);
if (nce == NULL) {
- err = ndp_add_v6(ill,
- hw_addr,
- addr,
- mask,
- extract_mask,
- hw_extract_start,
- flags,
- state,
- newnce);
+ err = nce_add_v6(ill, hw_addr, hw_addr_len, addr, flags, state,
+ &nce);
} else {
- *newnce = nce;
err = EEXIST;
}
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
+ if (err == 0)
+ err = nce_add_v6_postprocess(nce);
+ if (in_ill != ill && nce != NULL) {
+ nce_t *under_nce;
+
+ /*
+ * in_ill was the under_ill. Try to create the under_nce.
+ * Hold the ill_g_lock to prevent changes to group membership
+ * until we are done.
+ */
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+ if (IS_IN_SAME_ILLGRP(in_ill, ill)) {
+ under_nce = nce_fastpath_create(in_ill,
+ nce->nce_common);
+ upper_nce = nce;
+ if ((nce = under_nce) == NULL)
+ err = EINVAL;
+ }
+ rw_exit(&ipst->ips_ill_g_lock);
+ if (under_nce != NULL && NCE_ISREACHABLE(nce->nce_common))
+ nce_fastpath_trigger(under_nce);
+ }
+ if (nce != NULL) {
+ if (newnce != NULL)
+ *newnce = nce;
+ else
+ nce_refrele(nce);
+ }
+ /* nce_refrele is deferred until the lock is dropped */
+ if (upper_nce != NULL)
+ nce_refrele(upper_nce);
+ if (need_ill_refrele)
+ ill_refrele(ill);
return (err);
}
@@ -351,53 +404,51 @@ ndp_lookup_then_add_v6(ill_t *ill, boolean_t match_illgrp, uchar_t *hw_addr,
* Remove all the CONDEMNED nces from the appropriate hash table.
* We create a private list of NCEs, these may have ires pointing
* to them, so the list will be passed through to clean up dependent
- * ires and only then we can do NCE_REFRELE which can make NCE inactive.
+ * ires and only then we can do ncec_refrele() which can make NCE inactive.
*/
static void
-nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list)
+nce_remove(ndp_g_t *ndp, ncec_t *ncec, ncec_t **free_nce_list)
{
- nce_t *nce1;
- nce_t **ptpn;
+ ncec_t *ncec1;
+ ncec_t **ptpn;
ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
ASSERT(ndp->ndp_g_walker == 0);
- for (; nce; nce = nce1) {
- nce1 = nce->nce_next;
- mutex_enter(&nce->nce_lock);
- if (nce->nce_flags & NCE_F_CONDEMNED) {
- ptpn = nce->nce_ptpn;
- nce1 = nce->nce_next;
- if (nce1 != NULL)
- nce1->nce_ptpn = ptpn;
- *ptpn = nce1;
- nce->nce_ptpn = NULL;
- nce->nce_next = NULL;
- nce->nce_next = *free_nce_list;
- *free_nce_list = nce;
+ for (; ncec; ncec = ncec1) {
+ ncec1 = ncec->ncec_next;
+ mutex_enter(&ncec->ncec_lock);
+ if (NCE_ISCONDEMNED(ncec)) {
+ ptpn = ncec->ncec_ptpn;
+ ncec1 = ncec->ncec_next;
+ if (ncec1 != NULL)
+ ncec1->ncec_ptpn = ptpn;
+ *ptpn = ncec1;
+ ncec->ncec_ptpn = NULL;
+ ncec->ncec_next = NULL;
+ ncec->ncec_next = *free_nce_list;
+ *free_nce_list = ncec;
}
- mutex_exit(&nce->nce_lock);
+ mutex_exit(&ncec->ncec_lock);
}
}
/*
- * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup()
- * will return this NCE. Also no new IREs will be created that
- * point to this NCE (See ire_add_v6). Also no new timeouts will
- * be started (See NDP_RESTART_TIMER).
+ * 1. Mark the entry CONDEMNED. This ensures that no new nce_lookup()
+ * will return this NCE. Also no new timeouts will
+ * be started (See nce_restart_timer).
* 2. Cancel any currently running timeouts.
* 3. If there is an ndp walker, return. The walker will do the cleanup.
* This ensures that walkers see a consistent list of NCEs while walking.
* 4. Otherwise remove the NCE from the list of NCEs
- * 5. Delete all IREs pointing to this NCE.
*/
void
-ndp_delete(nce_t *nce)
+ncec_delete(ncec_t *ncec)
{
- nce_t **ptpn;
- nce_t *nce1;
- int ipversion = nce->nce_ipversion;
+ ncec_t **ptpn;
+ ncec_t *ncec1;
+ int ipversion = ncec->ncec_ipversion;
ndp_g_t *ndp;
- ip_stack_t *ipst = nce->nce_ill->ill_ipst;
+ ip_stack_t *ipst = ncec->ncec_ipst;
if (ipversion == IPV4_VERSION)
ndp = ipst->ips_ndp4;
@@ -405,40 +456,42 @@ ndp_delete(nce_t *nce)
ndp = ipst->ips_ndp6;
/* Serialize deletes */
- mutex_enter(&nce->nce_lock);
- if (nce->nce_flags & NCE_F_CONDEMNED) {
+ mutex_enter(&ncec->ncec_lock);
+ if (NCE_ISCONDEMNED(ncec)) {
/* Some other thread is doing the delete */
- mutex_exit(&nce->nce_lock);
+ mutex_exit(&ncec->ncec_lock);
return;
}
/*
* Caller has a refhold. Also 1 ref for being in the list. Thus
* refcnt has to be >= 2
*/
- ASSERT(nce->nce_refcnt >= 2);
- nce->nce_flags |= NCE_F_CONDEMNED;
- mutex_exit(&nce->nce_lock);
+ ASSERT(ncec->ncec_refcnt >= 2);
+ ncec->ncec_flags |= NCE_F_CONDEMNED;
+ mutex_exit(&ncec->ncec_lock);
- nce_fastpath_list_delete(nce);
+ /* Count how many condemned ires for kmem_cache callback */
+ atomic_add_32(&ipst->ips_num_nce_condemned, 1);
+ nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL);
/* Complete any waiting callbacks */
- nce_cb_dispatch(nce);
+ ncec_cb_dispatch(ncec);
/*
* Cancel any running timer. Timeout can't be restarted
- * since CONDEMNED is set. Can't hold nce_lock across untimeout.
+ * since CONDEMNED is set. Can't hold ncec_lock across untimeout.
* Passing invalid timeout id is fine.
*/
- if (nce->nce_timeout_id != 0) {
- (void) untimeout(nce->nce_timeout_id);
- nce->nce_timeout_id = 0;
+ if (ncec->ncec_timeout_id != 0) {
+ (void) untimeout(ncec->ncec_timeout_id);
+ ncec->ncec_timeout_id = 0;
}
mutex_enter(&ndp->ndp_g_lock);
- if (nce->nce_ptpn == NULL) {
+ if (ncec->ncec_ptpn == NULL) {
/*
- * The last ndp walker has already removed this nce from
- * the list after we marked the nce CONDEMNED and before
+ * The last ndp walker has already removed this ncec from
+ * the list after we marked the ncec CONDEMNED and before
* we grabbed the global lock.
*/
mutex_exit(&ndp->ndp_g_lock);
@@ -454,62 +507,68 @@ ndp_delete(nce_t *nce)
}
/*
- * Now remove the nce from the list. NDP_RESTART_TIMER won't restart
+ * Now remove the ncec from the list. nce_restart_timer won't restart
* the timer since it is marked CONDEMNED.
*/
- ptpn = nce->nce_ptpn;
- nce1 = nce->nce_next;
- if (nce1 != NULL)
- nce1->nce_ptpn = ptpn;
- *ptpn = nce1;
- nce->nce_ptpn = NULL;
- nce->nce_next = NULL;
+ ptpn = ncec->ncec_ptpn;
+ ncec1 = ncec->ncec_next;
+ if (ncec1 != NULL)
+ ncec1->ncec_ptpn = ptpn;
+ *ptpn = ncec1;
+ ncec->ncec_ptpn = NULL;
+ ncec->ncec_next = NULL;
mutex_exit(&ndp->ndp_g_lock);
- nce_ire_delete(nce);
+ /* Removed from ncec_ptpn/ncec_next list */
+ ncec_refrele_notr(ncec);
}
void
-ndp_inactive(nce_t *nce)
+ncec_inactive(ncec_t *ncec)
{
mblk_t **mpp;
- ill_t *ill;
+ ill_t *ill = ncec->ncec_ill;
+ ip_stack_t *ipst = ncec->ncec_ipst;
- ASSERT(nce->nce_refcnt == 0);
- ASSERT(MUTEX_HELD(&nce->nce_lock));
- ASSERT(nce->nce_fastpath == NULL);
+ ASSERT(ncec->ncec_refcnt == 0);
+ ASSERT(MUTEX_HELD(&ncec->ncec_lock));
- /* Free all nce allocated messages */
- mpp = &nce->nce_first_mp_to_free;
- do {
- while (*mpp != NULL) {
- mblk_t *mp;
+ /* Count how many condemned nces for kmem_cache callback */
+ if (NCE_ISCONDEMNED(ncec))
+ atomic_add_32(&ipst->ips_num_nce_condemned, -1);
- mp = *mpp;
- *mpp = mp->b_next;
+ /* Free all allocated messages */
+ mpp = &ncec->ncec_qd_mp;
+ while (*mpp != NULL) {
+ mblk_t *mp;
- inet_freemsg(mp);
- }
- } while (mpp++ != &nce->nce_last_mp_to_free);
+ mp = *mpp;
+ *mpp = mp->b_next;
- if (nce->nce_ipversion == IPV6_VERSION) {
- /*
- * must have been cleaned up in nce_delete
- */
- ASSERT(list_is_empty(&nce->nce_cb));
- list_destroy(&nce->nce_cb);
+ inet_freemsg(mp);
}
+ /*
+ * must have been cleaned up in ncec_delete
+ */
+ ASSERT(list_is_empty(&ncec->ncec_cb));
+ list_destroy(&ncec->ncec_cb);
+ /*
+ * free the ncec_lladdr if one was allocated in nce_add_common()
+ */
+ if (ncec->ncec_lladdr_length > 0)
+ kmem_free(ncec->ncec_lladdr, ncec->ncec_lladdr_length);
+
#ifdef DEBUG
- nce_trace_cleanup(nce);
+ ncec_trace_cleanup(ncec);
#endif
- ill = nce->nce_ill;
mutex_enter(&ill->ill_lock);
DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
- (char *), "nce", (void *), nce);
- ill->ill_nce_cnt--;
+ (char *), "ncec", (void *), ncec);
+ ill->ill_ncec_cnt--;
+ ncec->ncec_ill = NULL;
/*
- * If the number of nce's associated with this ill have dropped
+ * If the number of ncec's associated with this ill have dropped
* to zero, check whether we need to restart any operation that
* is waiting for this to happen.
*/
@@ -519,104 +578,59 @@ ndp_inactive(nce_t *nce)
} else {
mutex_exit(&ill->ill_lock);
}
- mutex_destroy(&nce->nce_lock);
- if (nce->nce_mp != NULL)
- inet_freemsg(nce->nce_mp);
+
+ mutex_destroy(&ncec->ncec_lock);
+ kmem_cache_free(ncec_cache, ncec);
}
/*
- * ndp_walk routine. Delete the nce if it is associated with the ill
+ * ncec_walk routine. Delete the ncec if it is associated with the ill
* that is going away. Always called as a writer.
*/
void
-ndp_delete_per_ill(nce_t *nce, uchar_t *arg)
+ncec_delete_per_ill(ncec_t *ncec, uchar_t *arg)
{
- if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) {
- ndp_delete(nce);
+ if ((ncec != NULL) && ncec->ncec_ill == (ill_t *)arg) {
+ ncec_delete(ncec);
}
}
/*
- * Walk a list of to be inactive NCEs and blow away all the ires.
+ * Neighbor Cache cleanup logic for a list of ncec_t entries.
*/
static void
-nce_ire_delete_list(nce_t *nce)
+nce_cleanup_list(ncec_t *ncec)
{
- nce_t *nce_next;
+ ncec_t *ncec_next;
- ASSERT(nce != NULL);
- while (nce != NULL) {
- nce_next = nce->nce_next;
- nce->nce_next = NULL;
+ ASSERT(ncec != NULL);
+ while (ncec != NULL) {
+ ncec_next = ncec->ncec_next;
+ ncec->ncec_next = NULL;
/*
* It is possible for the last ndp walker (this thread)
- * to come here after ndp_delete has marked the nce CONDEMNED
- * and before it has removed the nce from the fastpath list
+ * to come here after ncec_delete has marked the ncec CONDEMNED
+ * and before it has removed the ncec from the fastpath list
* or called untimeout. So we need to do it here. It is safe
- * for both ndp_delete and this thread to do it twice or
+ * for both ncec_delete and this thread to do it twice or
* even simultaneously since each of the threads has a
- * reference on the nce.
+ * reference on the ncec.
*/
- nce_fastpath_list_delete(nce);
+ nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL);
/*
* Cancel any running timer. Timeout can't be restarted
- * since CONDEMNED is set. Can't hold nce_lock across untimeout.
- * Passing invalid timeout id is fine.
+ * since CONDEMNED is set. The ncec_lock can't be
+ * held across untimeout though passing invalid timeout
+ * id is fine.
*/
- if (nce->nce_timeout_id != 0) {
- (void) untimeout(nce->nce_timeout_id);
- nce->nce_timeout_id = 0;
+ if (ncec->ncec_timeout_id != 0) {
+ (void) untimeout(ncec->ncec_timeout_id);
+ ncec->ncec_timeout_id = 0;
}
- /*
- * We might hit this func thus in the v4 case:
- * ipif_down->ipif_ndp_down->ndp_walk
- */
-
- if (nce->nce_ipversion == IPV4_VERSION) {
- ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE,
- IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill);
- } else {
- ASSERT(nce->nce_ipversion == IPV6_VERSION);
- ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE,
- IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill);
- }
- NCE_REFRELE_NOTR(nce);
- nce = nce_next;
- }
-}
-
-/*
- * Delete an ire when the nce goes away.
- */
-/* ARGSUSED */
-static void
-nce_ire_delete(nce_t *nce)
-{
- if (nce->nce_ipversion == IPV6_VERSION) {
- ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE,
- nce_ire_delete1, (char *)nce, nce->nce_ill);
- NCE_REFRELE_NOTR(nce);
- } else {
- ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE,
- nce_ire_delete1, (char *)nce, nce->nce_ill);
- NCE_REFRELE_NOTR(nce);
- }
-}
-
-/*
- * ire_walk routine used to delete every IRE that shares this nce
- */
-static void
-nce_ire_delete1(ire_t *ire, char *nce_arg)
-{
- nce_t *nce = (nce_t *)nce_arg;
-
- ASSERT(ire->ire_type == IRE_CACHE);
-
- if (ire->ire_nce == nce) {
- ASSERT(ire->ire_ipversion == nce->nce_ipversion);
- ire_delete(ire);
+ /* Removed from ncec_ptpn/ncec_next list */
+ ncec_refrele_notr(ncec);
+ ncec = ncec_next;
}
}
@@ -624,100 +638,97 @@ nce_ire_delete1(ire_t *ire, char *nce_arg)
* Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted.
*/
boolean_t
-ndp_restart_dad(nce_t *nce)
+nce_restart_dad(ncec_t *ncec)
{
boolean_t started;
- boolean_t dropped;
+ ill_t *ill, *hwaddr_ill;
- if (nce == NULL)
+ if (ncec == NULL)
return (B_FALSE);
- mutex_enter(&nce->nce_lock);
- if (nce->nce_state == ND_PROBE) {
- mutex_exit(&nce->nce_lock);
+ ill = ncec->ncec_ill;
+ mutex_enter(&ncec->ncec_lock);
+ if (ncec->ncec_state == ND_PROBE) {
+ mutex_exit(&ncec->ncec_lock);
started = B_TRUE;
- } else if (nce->nce_state == ND_REACHABLE) {
- nce->nce_state = ND_PROBE;
- nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1;
- mutex_exit(&nce->nce_lock);
- dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE);
- if (dropped) {
- mutex_enter(&nce->nce_lock);
- nce->nce_pcnt++;
- mutex_exit(&nce->nce_lock);
+ } else if (ncec->ncec_state == ND_REACHABLE) {
+ ASSERT(ncec->ncec_lladdr != NULL);
+ ncec->ncec_state = ND_PROBE;
+ ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT;
+ /*
+ * Slight cheat here: we don't use the initial probe delay
+ * for IPv4 in this obscure case.
+ */
+ mutex_exit(&ncec->ncec_lock);
+ if (IS_IPMP(ill)) {
+ hwaddr_ill = ipmp_illgrp_find_ill(ill->ill_grp,
+ ncec->ncec_lladdr, ncec->ncec_lladdr_length);
+ } else {
+ hwaddr_ill = ill;
}
- NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill));
+ nce_dad(ncec, hwaddr_ill, B_TRUE);
started = B_TRUE;
} else {
- mutex_exit(&nce->nce_lock);
+ mutex_exit(&ncec->ncec_lock);
started = B_FALSE;
}
return (started);
}
/*
- * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed.
- * If one is found, the refcnt on the nce will be incremented.
+ * IPv6 Cache entry lookup. Try to find an ncec matching the parameters passed.
+ * If one is found, the refcnt on the ncec will be incremented.
*/
-nce_t *
-ndp_lookup_v6(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr,
- boolean_t caller_holds_lock)
+ncec_t *
+ncec_lookup_illgrp_v6(ill_t *ill, const in6_addr_t *addr)
{
- nce_t *nce;
- ip_stack_t *ipst = ill->ill_ipst;
+ ncec_t *ncec;
+ ip_stack_t *ipst = ill->ill_ipst;
- ASSERT(ill->ill_isv6);
- if (!caller_holds_lock)
- mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+ mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
/* Get head of v6 hash table */
- nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
- nce = nce_lookup_addr(ill, match_illgrp, addr, nce);
- if (nce == NULL)
- nce = nce_lookup_mapping(ill, addr);
- if (!caller_holds_lock)
- mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
- return (nce);
+ ncec = *((ncec_t **)NCE_HASH_PTR_V6(ipst, *addr));
+ ncec = ncec_lookup_illgrp(ill, addr, ncec);
+ mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
+ rw_exit(&ipst->ips_ill_g_lock);
+ return (ncec);
}
/*
- * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed.
- * If one is found, the refcnt on the nce will be incremented.
- * Since multicast mappings are handled in arp, there are no nce_mcast_entries
- * so we skip the nce_lookup_mapping call.
- * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL
+ * IPv4 Cache entry lookup. Try to find an ncec matching the parameters passed.
+ * If one is found, the refcnt on the ncec will be incremented.
*/
-nce_t *
-ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock)
+ncec_t *
+ncec_lookup_illgrp_v4(ill_t *ill, const in_addr_t *addr)
{
- nce_t *nce;
+ ncec_t *ncec = NULL;
in6_addr_t addr6;
ip_stack_t *ipst = ill->ill_ipst;
- if (!caller_holds_lock)
- mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+ mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
/* Get head of v4 hash table */
- nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr));
+ ncec = *((ncec_t **)NCE_HASH_PTR_V4(ipst, *addr));
IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
- /*
- * NOTE: IPv4 never matches across the illgrp since the NCE's we're
- * looking up have fastpath headers that are inherently per-ill.
- */
- nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce);
- if (!caller_holds_lock)
- mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
- return (nce);
+ ncec = ncec_lookup_illgrp(ill, &addr6, ncec);
+ mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
+ rw_exit(&ipst->ips_ill_g_lock);
+ return (ncec);
}
/*
- * Cache entry lookup. Try to find an nce matching the parameters passed.
- * Look only for exact entries (no mappings). If an nce is found, increment
- * the hold count on that nce. The caller passes in the start of the
- * appropriate hash table, and must be holding the appropriate global
- * lock (ndp_g_lock).
+ * Cache entry lookup. Try to find an ncec matching the parameters passed.
+ * If an ncec is found, increment the hold count on that ncec.
+ * The caller passes in the start of the appropriate hash table, and must
+ * be holding the appropriate global lock (ndp_g_lock). In addition, since
+ * this function matches ncec_t entries across the illgrp, the ips_ill_g_lock
+ * must be held as reader.
+ *
+ * This function always matches across the ipmp group.
*/
-static nce_t *
-nce_lookup_addr(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr,
- nce_t *nce)
+ncec_t *
+ncec_lookup_illgrp(ill_t *ill, const in6_addr_t *addr, ncec_t *ncec)
{
ndp_g_t *ndp;
ip_stack_t *ipst = ill->ill_ipst;
@@ -727,348 +738,246 @@ nce_lookup_addr(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr,
else
ndp = ipst->ips_ndp4;
+ ASSERT(ill != NULL);
ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
if (IN6_IS_ADDR_UNSPECIFIED(addr))
return (NULL);
- for (; nce != NULL; nce = nce->nce_next) {
- if (nce->nce_ill == ill ||
- match_illgrp && IS_IN_SAME_ILLGRP(ill, nce->nce_ill)) {
- if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) &&
- IN6_ARE_ADDR_EQUAL(&nce->nce_mask,
- &ipv6_all_ones)) {
- mutex_enter(&nce->nce_lock);
- if (!(nce->nce_flags & NCE_F_CONDEMNED)) {
- NCE_REFHOLD_LOCKED(nce);
- mutex_exit(&nce->nce_lock);
+ for (; ncec != NULL; ncec = ncec->ncec_next) {
+ if (ncec->ncec_ill == ill ||
+ IS_IN_SAME_ILLGRP(ill, ncec->ncec_ill)) {
+ if (IN6_ARE_ADDR_EQUAL(&ncec->ncec_addr, addr)) {
+ mutex_enter(&ncec->ncec_lock);
+ if (!NCE_ISCONDEMNED(ncec)) {
+ ncec_refhold_locked(ncec);
+ mutex_exit(&ncec->ncec_lock);
break;
}
- mutex_exit(&nce->nce_lock);
+ mutex_exit(&ncec->ncec_lock);
}
}
}
+ return (ncec);
+}
+
+/*
+ * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t
+ * entries for ill only, i.e., when ill is part of an ipmp group,
+ * nce_lookup_v4 will never try to match across the group.
+ */
+nce_t *
+nce_lookup_v4(ill_t *ill, const in_addr_t *addr)
+{
+ nce_t *nce;
+ in6_addr_t addr6;
+ ip_stack_t *ipst = ill->ill_ipst;
+
+ mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
+ IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
+ nce = nce_lookup_addr(ill, &addr6);
+ mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
return (nce);
}
/*
- * Cache entry lookup. Try to find an nce matching the parameters passed.
- * Look only for mappings.
+ * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t
+ * entries for ill only, i.e., when ill is part of an ipmp group,
+ * nce_lookup_v6 will never try to match across the group.
*/
+nce_t *
+nce_lookup_v6(ill_t *ill, const in6_addr_t *addr6)
+{
+ nce_t *nce;
+ ip_stack_t *ipst = ill->ill_ipst;
+
+ mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
+ nce = nce_lookup_addr(ill, addr6);
+ mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
+ return (nce);
+}
+
static nce_t *
-nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr)
+nce_lookup_addr(ill_t *ill, const in6_addr_t *addr)
{
- nce_t *nce;
- ip_stack_t *ipst = ill->ill_ipst;
+ nce_t *nce;
- ASSERT(ill != NULL && ill->ill_isv6);
- ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock));
- if (!IN6_IS_ADDR_MULTICAST(addr))
- return (NULL);
- nce = ipst->ips_ndp6->nce_mask_entries;
- for (; nce != NULL; nce = nce->nce_next)
- if (nce->nce_ill == ill &&
- (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) {
- mutex_enter(&nce->nce_lock);
- if (!(nce->nce_flags & NCE_F_CONDEMNED)) {
- NCE_REFHOLD_LOCKED(nce);
- mutex_exit(&nce->nce_lock);
- break;
- }
- mutex_exit(&nce->nce_lock);
- }
+ ASSERT(ill != NULL);
+#ifdef DEBUG
+ if (ill->ill_isv6)
+ ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp6->ndp_g_lock));
+ else
+ ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp4->ndp_g_lock));
+#endif
+ mutex_enter(&ill->ill_lock);
+ nce = nce_lookup(ill, addr);
+ mutex_exit(&ill->ill_lock);
return (nce);
}
+
+/*
+ * Router turned to host. We need to make sure that cached copies of the ncec
+ * are not used for forwarding packets if they were derived from the default
+ * route, and that the default route itself is removed, as required by
+ * section 7.2.5 of RFC 2461.
+ *
+ * Note that the ncec itself probably has valid link-layer information for the
+ * nexthop, so that there is no reason to delete the ncec, as long as the
+ * ISROUTER flag is turned off.
+ */
+static void
+ncec_router_to_host(ncec_t *ncec)
+{
+ ire_t *ire;
+ ip_stack_t *ipst = ncec->ncec_ipst;
+
+ mutex_enter(&ncec->ncec_lock);
+ ncec->ncec_flags &= ~NCE_F_ISROUTER;
+ mutex_exit(&ncec->ncec_lock);
+
+ ire = ire_ftable_lookup_v6(&ipv6_all_zeros, &ipv6_all_zeros,
+ &ncec->ncec_addr, IRE_DEFAULT, ncec->ncec_ill, ALL_ZONES, NULL,
+ MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW, 0, ipst, NULL);
+ if (ire != NULL) {
+ ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst);
+ ire_delete(ire);
+ ire_refrele(ire);
+ }
+}
+
/*
* Process passed in parameters either from an incoming packet or via
* user ioctl.
*/
-static void
-nce_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv)
+void
+nce_process(ncec_t *ncec, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv)
{
- ill_t *ill = nce->nce_ill;
- uint32_t hw_addr_len = ill->ill_nd_lla_len;
- mblk_t *mp;
+ ill_t *ill = ncec->ncec_ill;
+ uint32_t hw_addr_len = ill->ill_phys_addr_length;
boolean_t ll_updated = B_FALSE;
boolean_t ll_changed;
- ip_stack_t *ipst = ill->ill_ipst;
+ nce_t *nce;
- ASSERT(nce->nce_ipversion == IPV6_VERSION);
+ ASSERT(ncec->ncec_ipversion == IPV6_VERSION);
/*
* No updates of link layer address or the neighbor state is
* allowed, when the cache is in NONUD state. This still
* allows for responding to reachability solicitation.
*/
- mutex_enter(&nce->nce_lock);
- if (nce->nce_state == ND_INCOMPLETE) {
+ mutex_enter(&ncec->ncec_lock);
+ if (ncec->ncec_state == ND_INCOMPLETE) {
if (hw_addr == NULL) {
- mutex_exit(&nce->nce_lock);
+ mutex_exit(&ncec->ncec_lock);
return;
}
- nce_set_ll(nce, hw_addr);
+ nce_set_ll(ncec, hw_addr);
/*
- * Update nce state and send the queued packets
+ * Update ncec state and send the queued packets
* back to ip this time ire will be added.
*/
if (flag & ND_NA_FLAG_SOLICITED) {
- nce_update(nce, ND_REACHABLE, NULL);
+ nce_update(ncec, ND_REACHABLE, NULL);
} else {
- nce_update(nce, ND_STALE, NULL);
- }
- mutex_exit(&nce->nce_lock);
- nce_fastpath(nce);
- nce_cb_dispatch(nce); /* complete callbacks */
- mutex_enter(&nce->nce_lock);
- mp = nce->nce_qd_mp;
- nce->nce_qd_mp = NULL;
- mutex_exit(&nce->nce_lock);
- while (mp != NULL) {
- mblk_t *nxt_mp, *data_mp;
-
- nxt_mp = mp->b_next;
- mp->b_next = NULL;
-
- if (mp->b_datap->db_type == M_CTL)
- data_mp = mp->b_cont;
- else
- data_mp = mp;
- if (data_mp->b_prev != NULL) {
- ill_t *inbound_ill;
- queue_t *fwdq = NULL;
- uint_t ifindex;
-
- ifindex = (uint_t)(uintptr_t)data_mp->b_prev;
- inbound_ill = ill_lookup_on_ifindex(ifindex,
- B_TRUE, NULL, NULL, NULL, NULL, ipst);
- if (inbound_ill == NULL) {
- data_mp->b_prev = NULL;
- freemsg(mp);
- return;
- } else {
- fwdq = inbound_ill->ill_rq;
- }
- data_mp->b_prev = NULL;
- /*
- * Send a forwarded packet back into ip_rput_v6
- * just as in ire_send_v6().
- * Extract the queue from b_prev (set in
- * ip_rput_data_v6).
- */
- if (fwdq != NULL) {
- /*
- * Forwarded packets hop count will
- * get decremented in ip_rput_data_v6
- */
- if (data_mp != mp)
- freeb(mp);
- put(fwdq, data_mp);
- } else {
- /*
- * Send locally originated packets back
- * into ip_wput_v6.
- */
- put(ill->ill_wq, mp);
- }
- ill_refrele(inbound_ill);
- } else {
- put(ill->ill_wq, mp);
- }
- mp = nxt_mp;
+ nce_update(ncec, ND_STALE, NULL);
}
+ mutex_exit(&ncec->ncec_lock);
+ nce = nce_fastpath(ncec, B_TRUE, NULL);
+ nce_resolv_ok(ncec);
+ if (nce != NULL)
+ nce_refrele(nce);
return;
}
- ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len);
+ ll_changed = nce_cmp_ll_addr(ncec, hw_addr, hw_addr_len);
if (!is_adv) {
/* If this is a SOLICITATION request only */
if (ll_changed)
- nce_update(nce, ND_STALE, hw_addr);
- mutex_exit(&nce->nce_lock);
- nce_cb_dispatch(nce);
+ nce_update(ncec, ND_STALE, hw_addr);
+ mutex_exit(&ncec->ncec_lock);
+ ncec_cb_dispatch(ncec);
return;
}
if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) {
/* If in any other state than REACHABLE, ignore */
- if (nce->nce_state == ND_REACHABLE) {
- nce_update(nce, ND_STALE, NULL);
+ if (ncec->ncec_state == ND_REACHABLE) {
+ nce_update(ncec, ND_STALE, NULL);
}
- mutex_exit(&nce->nce_lock);
- nce_cb_dispatch(nce);
+ mutex_exit(&ncec->ncec_lock);
+ ncec_cb_dispatch(ncec);
return;
} else {
if (ll_changed) {
- nce_update(nce, ND_UNCHANGED, hw_addr);
+ nce_update(ncec, ND_UNCHANGED, hw_addr);
ll_updated = B_TRUE;
}
if (flag & ND_NA_FLAG_SOLICITED) {
- nce_update(nce, ND_REACHABLE, NULL);
+ nce_update(ncec, ND_REACHABLE, NULL);
} else {
if (ll_updated) {
- nce_update(nce, ND_STALE, NULL);
+ nce_update(ncec, ND_STALE, NULL);
}
}
- mutex_exit(&nce->nce_lock);
- if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags &
+ mutex_exit(&ncec->ncec_lock);
+ if (!(flag & ND_NA_FLAG_ROUTER) && (ncec->ncec_flags &
NCE_F_ISROUTER)) {
- ire_t *ire;
-
- /*
- * Router turned to host. We need to remove the
- * entry as well as any default route that may be
- * using this as a next hop. This is required by
- * section 7.2.5 of RFC 2461.
- */
- ire = ire_ftable_lookup_v6(&ipv6_all_zeros,
- &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT,
- nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL,
- MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW |
- MATCH_IRE_DEFAULT, ipst);
- if (ire != NULL) {
- ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst);
- ire_delete(ire);
- ire_refrele(ire);
- }
- ndp_delete(nce); /* will do nce_cb_dispatch */
+ ncec_router_to_host(ncec);
} else {
- nce_cb_dispatch(nce);
+ ncec_cb_dispatch(ncec);
}
}
}
/*
- * Walker state structure used by ndp_process() / ndp_process_entry().
- */
-typedef struct ndp_process_data {
- ill_t *np_ill; /* ill/illgrp to match against */
- const in6_addr_t *np_addr; /* IPv6 address to match */
- uchar_t *np_hw_addr; /* passed to nce_process() */
- uint32_t np_flag; /* passed to nce_process() */
- boolean_t np_is_adv; /* passed to nce_process() */
-} ndp_process_data_t;
-
-/*
- * Walker callback used by ndp_process() for IPMP groups: calls nce_process()
- * for each NCE with a matching address that's in the same IPMP group.
- */
-static void
-ndp_process_entry(nce_t *nce, void *arg)
-{
- ndp_process_data_t *npp = arg;
-
- if (IS_IN_SAME_ILLGRP(nce->nce_ill, npp->np_ill) &&
- IN6_ARE_ADDR_EQUAL(&nce->nce_addr, npp->np_addr) &&
- IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) {
- nce_process(nce, npp->np_hw_addr, npp->np_flag, npp->np_is_adv);
- }
-}
-
-/*
- * Wrapper around nce_process() that handles IPMP. In particular, for IPMP,
- * NCEs are per-underlying-ill (because of nce_fp_mp) and thus we may have
- * more than one NCE for a given IPv6 address to tend to. In that case, we
- * need to walk all NCEs and callback nce_process() for each one. Since this
- * is expensive, in the non-IPMP case we just directly call nce_process().
- * Ultimately, nce_fp_mp needs to be moved out of the nce_t so that all IP
- * interfaces in an IPMP group share the same NCEs -- at which point this
- * function can be removed entirely.
- */
-void
-ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv)
-{
- ill_t *ill = nce->nce_ill;
- struct ndp_g_s *ndp = ill->ill_ipst->ips_ndp6;
- ndp_process_data_t np;
-
- if (ill->ill_grp == NULL) {
- nce_process(nce, hw_addr, flag, is_adv);
- return;
- }
-
- /* IPMP case: walk all NCEs */
- np.np_ill = ill;
- np.np_addr = &nce->nce_addr;
- np.np_flag = flag;
- np.np_is_adv = is_adv;
- np.np_hw_addr = hw_addr;
-
- ndp_walk_common(ndp, NULL, (pfi_t)ndp_process_entry, &np, ALL_ZONES);
-}
-
-/*
- * Pass arg1 to the pfi supplied, along with each nce in existence.
- * ndp_walk() places a REFHOLD on the nce and drops the lock when
+ * Pass arg1 to the pfi supplied, along with each ncec in existence.
+ * ncec_walk() places a REFHOLD on the ncec and drops the lock when
* walking the hash list.
*/
void
-ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1,
+ncec_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1,
boolean_t trace)
{
- nce_t *nce;
- nce_t *nce1;
- nce_t **ncep;
- nce_t *free_nce_list = NULL;
+ ncec_t *ncec;
+ ncec_t *ncec1;
+ ncec_t **ncep;
+ ncec_t *free_nce_list = NULL;
mutex_enter(&ndp->ndp_g_lock);
- /* Prevent ndp_delete from unlink and free of NCE */
+ /* Prevent ncec_delete from unlink and free of NCE */
ndp->ndp_g_walker++;
mutex_exit(&ndp->ndp_g_lock);
for (ncep = ndp->nce_hash_tbl;
ncep < A_END(ndp->nce_hash_tbl); ncep++) {
- for (nce = *ncep; nce != NULL; nce = nce1) {
- nce1 = nce->nce_next;
- if (ill == NULL || nce->nce_ill == ill) {
+ for (ncec = *ncep; ncec != NULL; ncec = ncec1) {
+ ncec1 = ncec->ncec_next;
+ if (ill == NULL || ncec->ncec_ill == ill) {
if (trace) {
- NCE_REFHOLD(nce);
- (*pfi)(nce, arg1);
- NCE_REFRELE(nce);
+ ncec_refhold(ncec);
+ (*pfi)(ncec, arg1);
+ ncec_refrele(ncec);
} else {
- NCE_REFHOLD_NOTR(nce);
- (*pfi)(nce, arg1);
- NCE_REFRELE_NOTR(nce);
+ ncec_refhold_notr(ncec);
+ (*pfi)(ncec, arg1);
+ ncec_refrele_notr(ncec);
}
}
}
}
- for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) {
- nce1 = nce->nce_next;
- if (ill == NULL || nce->nce_ill == ill) {
- if (trace) {
- NCE_REFHOLD(nce);
- (*pfi)(nce, arg1);
- NCE_REFRELE(nce);
- } else {
- NCE_REFHOLD_NOTR(nce);
- (*pfi)(nce, arg1);
- NCE_REFRELE_NOTR(nce);
- }
- }
- }
mutex_enter(&ndp->ndp_g_lock);
ndp->ndp_g_walker--;
- /*
- * While NCE's are removed from global list they are placed
- * in a private list, to be passed to nce_ire_delete_list().
- * The reason is, there may be ires pointing to this nce
- * which needs to cleaned up.
- */
if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) {
/* Time to delete condemned entries */
for (ncep = ndp->nce_hash_tbl;
ncep < A_END(ndp->nce_hash_tbl); ncep++) {
- nce = *ncep;
- if (nce != NULL) {
- nce_remove(ndp, nce, &free_nce_list);
+ ncec = *ncep;
+ if (ncec != NULL) {
+ nce_remove(ndp, ncec, &free_nce_list);
}
}
- nce = ndp->nce_mask_entries;
- if (nce != NULL) {
- nce_remove(ndp, nce, &free_nce_list);
- }
ndp->ndp_g_walker_cleanup = B_FALSE;
}
mutex_exit(&ndp->ndp_g_lock);
if (free_nce_list != NULL) {
- nce_ire_delete_list(free_nce_list);
+ nce_cleanup_list(free_nce_list);
}
}
@@ -1077,198 +986,10 @@ ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1,
* Note that ill can be NULL hence can't derive the ipst from it.
*/
void
-ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst)
-{
- ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE);
- ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE);
-}
-
-/*
- * Process resolve requests. Handles both mapped entries
- * as well as cases that needs to be send out on the wire.
- * Lookup a NCE for a given IRE. Regardless of whether one exists
- * or one is created, we defer making ire point to nce until the
- * ire is actually added at which point the nce_refcnt on the nce is
- * incremented. This is done primarily to have symmetry between ire_add()
- * and ire_delete() which decrements the nce_refcnt, when an ire is deleted.
- */
-int
-ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid)
-{
- nce_t *nce, *hw_nce = NULL;
- int err;
- ill_t *ipmp_ill;
- uint16_t nce_flags;
- mblk_t *mp_nce = NULL;
- ip_stack_t *ipst = ill->ill_ipst;
- uchar_t *hwaddr = NULL;
-
- ASSERT(ill->ill_isv6);
-
- if (IN6_IS_ADDR_MULTICAST(dst))
- return (nce_set_multicast(ill, dst));
-
- nce_flags = (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0;
-
- /*
- * If `ill' is under IPMP, then first check to see if there's an NCE
- * for `dst' on the IPMP meta-interface (e.g., because an application
- * explicitly did an SIOCLIFSETND to tie a hardware address to `dst').
- * If so, we use that hardware address when creating the NCE below.
- * Note that we don't yet have a mechanism to remove these NCEs if the
- * NCE for `dst' on the IPMP meta-interface is subsequently removed --
- * but rather than build such a beast, we should fix NCEs so that they
- * can be properly shared across an IPMP group.
- */
- if (IS_UNDER_IPMP(ill)) {
- if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) {
- hw_nce = ndp_lookup_v6(ipmp_ill, B_FALSE, dst, B_FALSE);
- if (hw_nce != NULL && hw_nce->nce_res_mp != NULL) {
- hwaddr = hw_nce->nce_res_mp->b_rptr +
- NCE_LL_ADDR_OFFSET(ipmp_ill);
- nce_flags |= hw_nce->nce_flags;
- }
- ill_refrele(ipmp_ill);
- }
- }
-
- err = ndp_lookup_then_add_v6(ill,
- B_FALSE, /* NCE fastpath is per ill; don't match across group */
- hwaddr,
- dst,
- &ipv6_all_ones,
- &ipv6_all_zeros,
- 0,
- nce_flags,
- hwaddr != NULL ? ND_REACHABLE : ND_INCOMPLETE,
- &nce);
-
- if (hw_nce != NULL)
- NCE_REFRELE(hw_nce);
-
- switch (err) {
- case 0:
- /*
- * New cache entry was created. Make sure that the state
- * is not ND_INCOMPLETE. It can be in some other state
- * even before we send out the solicitation as we could
- * get un-solicited advertisements.
- *
- * If this is an XRESOLV interface, simply return 0,
- * since we don't want to solicit just yet.
- */
- if (ill->ill_flags & ILLF_XRESOLV) {
- NCE_REFRELE(nce);
- return (0);
- }
-
- mutex_enter(&nce->nce_lock);
- if (nce->nce_state != ND_INCOMPLETE) {
- mutex_exit(&nce->nce_lock);
- NCE_REFRELE(nce);
- return (0);
- }
- if (nce->nce_rcnt == 0) {
- /* The caller will free mp */
- mutex_exit(&nce->nce_lock);
- ndp_delete(nce);
- NCE_REFRELE(nce);
- return (ESRCH);
- }
- mp_nce = ip_prepend_zoneid(mp, zoneid, ipst);
- if (mp_nce == NULL) {
- /* The caller will free mp */
- mutex_exit(&nce->nce_lock);
- ndp_delete(nce);
- NCE_REFRELE(nce);
- return (ENOMEM);
- }
- nce_queue_mp(nce, mp_nce);
- ip_ndp_resolve(nce);
- mutex_exit(&nce->nce_lock);
- NCE_REFRELE(nce);
- return (EINPROGRESS);
- case EEXIST:
- /* Resolution in progress just queue the packet */
- mutex_enter(&nce->nce_lock);
- if (nce->nce_state == ND_INCOMPLETE) {
- mp_nce = ip_prepend_zoneid(mp, zoneid, ipst);
- if (mp_nce == NULL) {
- err = ENOMEM;
- } else {
- nce_queue_mp(nce, mp_nce);
- err = EINPROGRESS;
- }
- } else {
- /*
- * Any other state implies we have
- * a nce but IRE needs to be added ...
- * ire_add_v6() will take care of the
- * the case when the nce becomes CONDEMNED
- * before the ire is added to the table.
- */
- err = 0;
- }
- mutex_exit(&nce->nce_lock);
- NCE_REFRELE(nce);
- break;
- default:
- ip1dbg(("ndp_resolver: Can't create NCE %d\n", err));
- break;
- }
- return (err);
-}
-
-/*
- * When there is no resolver, the link layer template is passed in
- * the IRE.
- * Lookup a NCE for a given IRE. Regardless of whether one exists
- * or one is created, we defer making ire point to nce until the
- * ire is actually added at which point the nce_refcnt on the nce is
- * incremented. This is done primarily to have symmetry between ire_add()
- * and ire_delete() which decrements the nce_refcnt, when an ire is deleted.
- */
-int
-ndp_noresolver(ill_t *ill, const in6_addr_t *dst)
+ncec_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst)
{
- nce_t *nce;
- int err = 0;
-
- ASSERT(ill != NULL);
- ASSERT(ill->ill_isv6);
- if (IN6_IS_ADDR_MULTICAST(dst)) {
- err = nce_set_multicast(ill, dst);
- return (err);
- }
-
- err = ndp_lookup_then_add_v6(ill,
- B_FALSE, /* NCE fastpath is per ill; don't match across group */
- ill->ill_dest_addr, /* hardware address is NULL in most cases */
- dst,
- &ipv6_all_ones,
- &ipv6_all_zeros,
- 0,
- (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0,
- ND_REACHABLE,
- &nce);
-
- switch (err) {
- case 0:
- /*
- * Cache entry with a proper resolver cookie was
- * created.
- */
- NCE_REFRELE(nce);
- break;
- case EEXIST:
- err = 0;
- NCE_REFRELE(nce);
- break;
- default:
- ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err));
- break;
- }
- return (err);
+ ncec_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE);
+ ncec_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE);
}
/*
@@ -1277,83 +998,73 @@ ndp_noresolver(ill_t *ill, const in6_addr_t *dst)
* multicast destination.
*/
static int
-nce_set_multicast(ill_t *ill, const in6_addr_t *dst)
+nce_set_multicast_v6(ill_t *ill, const in6_addr_t *dst,
+ uint16_t flags, nce_t **newnce)
{
- nce_t *mnce; /* Multicast mapping entry */
- nce_t *nce;
- uchar_t *hw_addr = NULL;
+ uchar_t *hw_addr;
int err = 0;
ip_stack_t *ipst = ill->ill_ipst;
+ nce_t *nce;
ASSERT(ill != NULL);
ASSERT(ill->ill_isv6);
ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst)));
mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
- nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst));
- nce = nce_lookup_addr(ill, B_FALSE, dst, nce);
+ nce = nce_lookup_addr(ill, dst);
if (nce != NULL) {
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
- NCE_REFRELE(nce);
- return (0);
- }
- /* No entry, now lookup for a mapping this should never fail */
- mnce = nce_lookup_mapping(ill, dst);
- if (mnce == NULL) {
- /* Something broken for the interface. */
- mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
- return (ESRCH);
+ goto done;
}
- ASSERT(mnce->nce_flags & NCE_F_MAPPING);
if (ill->ill_net_type == IRE_IF_RESOLVER) {
/*
* For IRE_IF_RESOLVER a hardware mapping can be
- * generated, for IRE_IF_NORESOLVER, resolution cookie
- * in the ill is copied in ndp_add_v6().
+ * generated.
*/
hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP);
if (hw_addr == NULL) {
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
- NCE_REFRELE(mnce);
return (ENOMEM);
}
- nce_make_mapping(mnce, hw_addr, (uchar_t *)dst);
+ ip_mcast_mapping(ill, (uchar_t *)dst, hw_addr);
+ } else {
+ /*
+ * So no hw_addr is needed for IRE_IF_NORESOLVER.
+ */
+ hw_addr = NULL;
}
- NCE_REFRELE(mnce);
- /*
- * IRE_IF_NORESOLVER type simply copies the resolution
- * cookie passed in. So no hw_addr is needed.
- */
- err = ndp_add_v6(ill,
- hw_addr,
- dst,
- &ipv6_all_ones,
- &ipv6_all_zeros,
- 0,
- NCE_F_NONUD,
- ND_REACHABLE,
- &nce);
+ ASSERT((flags & NCE_F_MCAST) != 0);
+ ASSERT((flags & NCE_F_NONUD) != 0);
+ /* nce_state will be computed by nce_add_common() */
+ err = nce_add_v6(ill, hw_addr, ill->ill_phys_addr_length, dst, flags,
+ ND_UNCHANGED, &nce);
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
+ if (err == 0)
+ err = nce_add_v6_postprocess(nce);
if (hw_addr != NULL)
kmem_free(hw_addr, ill->ill_nd_lla_len);
if (err != 0) {
- ip1dbg(("nce_set_multicast: create failed" "%d\n", err));
+ ip1dbg(("nce_set_multicast_v6: create failed" "%d\n", err));
return (err);
}
- NCE_REFRELE(nce);
+done:
+ ASSERT(nce->nce_common->ncec_state == ND_REACHABLE);
+ if (newnce != NULL)
+ *newnce = nce;
+ else
+ nce_refrele(nce);
return (0);
}
/*
- * Return the link layer address, and any flags of a nce.
+ * Return the link layer address, and any flags of a ncec.
*/
int
ndp_query(ill_t *ill, struct lif_nd_req *lnr)
{
- nce_t *nce;
+ ncec_t *ncec;
in6_addr_t *addr;
sin6_t *sin6;
- dl_unitdata_req_t *dl;
ASSERT(ill != NULL && ill->ill_isv6);
sin6 = (sin6_t *)&lnr->lnr_addr;
@@ -1363,158 +1074,135 @@ ndp_query(ill_t *ill, struct lif_nd_req *lnr)
* NOTE: if the ill is an IPMP interface, then match against the whole
* illgrp. This e.g. allows in.ndpd to retrieve the link layer
* addresses for the data addresses on an IPMP interface even though
- * ipif_ndp_up() created them with an nce_ill of ipif_bound_ill.
+ * ipif_ndp_up() created them with an ncec_ill of ipif_bound_ill.
*/
- nce = ndp_lookup_v6(ill, IS_IPMP(ill), addr, B_FALSE);
- if (nce == NULL)
+ ncec = ncec_lookup_illgrp_v6(ill, addr);
+ if (ncec == NULL)
return (ESRCH);
- /* If in INCOMPLETE state, no link layer address is available yet */
- if (!NCE_ISREACHABLE(nce)) {
- NCE_REFRELE(nce);
+ /* If no link layer address is available yet, return ESRCH */
+ if (!NCE_ISREACHABLE(ncec)) {
+ ncec_refrele(ncec);
return (ESRCH);
}
- dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr;
- if (ill->ill_flags & ILLF_XRESOLV)
- lnr->lnr_hdw_len = dl->dl_dest_addr_length;
- else
- lnr->lnr_hdw_len = ill->ill_nd_lla_len;
- ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <=
- sizeof (lnr->lnr_hdw_addr));
- bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill),
- (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len);
- if (nce->nce_flags & NCE_F_ISROUTER)
+ lnr->lnr_hdw_len = ill->ill_phys_addr_length;
+ bcopy(ncec->ncec_lladdr, (uchar_t *)&lnr->lnr_hdw_addr,
+ lnr->lnr_hdw_len);
+ if (ncec->ncec_flags & NCE_F_ISROUTER)
lnr->lnr_flags = NDF_ISROUTER_ON;
- if (nce->nce_flags & NCE_F_ANYCAST)
+ if (ncec->ncec_flags & NCE_F_ANYCAST)
lnr->lnr_flags |= NDF_ANYCAST_ON;
- NCE_REFRELE(nce);
+ ncec_refrele(ncec);
return (0);
}
/*
- * Send Enable/Disable multicast reqs to driver.
+ * Finish setting up the Enable/Disable multicast for the driver.
*/
-int
-ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len,
+mblk_t *
+ndp_mcastreq(ill_t *ill, const in6_addr_t *v6group, uint32_t hw_addr_len,
uint32_t hw_addr_offset, mblk_t *mp)
{
- nce_t *nce;
uchar_t *hw_addr;
- ip_stack_t *ipst = ill->ill_ipst;
+ ipaddr_t v4group;
+ uchar_t *addr;
- ASSERT(ill != NULL && ill->ill_isv6);
ASSERT(ill->ill_net_type == IRE_IF_RESOLVER);
- hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len);
- if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) {
- freemsg(mp);
- return (EINVAL);
+ if (IN6_IS_ADDR_V4MAPPED(v6group)) {
+ IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
+
+ ASSERT(CLASSD(v4group));
+ ASSERT(!(ill->ill_isv6));
+
+ addr = (uchar_t *)&v4group;
+ } else {
+ ASSERT(IN6_IS_ADDR_MULTICAST(v6group));
+ ASSERT(ill->ill_isv6);
+
+ addr = (uchar_t *)v6group;
}
- mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
- nce = nce_lookup_mapping(ill, addr);
- if (nce == NULL) {
- mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
+ hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len);
+ if (hw_addr == NULL) {
+ ip0dbg(("ndp_mcastreq NULL hw_addr\n"));
freemsg(mp);
- return (ESRCH);
- }
- mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
- /*
- * Update dl_addr_length and dl_addr_offset for primitives that
- * have physical addresses as opposed to full saps
- */
- switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
- case DL_ENABMULTI_REQ:
- /* Track the state if this is the first enabmulti */
- if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
- ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
- ip1dbg(("ndp_mcastreq: ENABMULTI\n"));
- break;
- case DL_DISABMULTI_REQ:
- ip1dbg(("ndp_mcastreq: DISABMULTI\n"));
- break;
- default:
- NCE_REFRELE(nce);
- ip1dbg(("ndp_mcastreq: default\n"));
- return (EINVAL);
+ return (NULL);
}
- nce_make_mapping(nce, hw_addr, (uchar_t *)addr);
- NCE_REFRELE(nce);
- ill_dlpi_send(ill, mp);
- return (0);
-}
+ ip_mcast_mapping(ill, addr, hw_addr);
+ return (mp);
+}
-/*
- * Send out a NS for resolving the ip address in nce.
- */
void
-ip_ndp_resolve(nce_t *nce)
+ip_ndp_resolve(ncec_t *ncec)
{
+ in_addr_t sender4 = INADDR_ANY;
in6_addr_t sender6 = ipv6_all_zeros;
+ ill_t *src_ill;
uint32_t ms;
- mblk_t *mp;
- ip6_t *ip6h;
- ASSERT(MUTEX_HELD(&nce->nce_lock));
- /*
- * Pick the src from outgoing packet, if one is available.
- * Otherwise let nce_xmit figure out the src.
- */
- if ((mp = nce->nce_qd_mp) != NULL) {
- /* Handle ip_newroute_v6 giving us IPSEC packets */
- if (mp->b_datap->db_type == M_CTL)
- mp = mp->b_cont;
- ip6h = (ip6_t *)mp->b_rptr;
- if (ip6h->ip6_nxt == IPPROTO_RAW) {
- /*
- * This message should have been pulled up already in
- * ip_wput_v6. We can't do pullups here because
- * the message could be from the nce_qd_mp which could
- * have b_next/b_prev non-NULL.
- */
- ASSERT(MBLKL(mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN);
- ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t));
- }
- sender6 = ip6h->ip6_src;
+ src_ill = nce_resolve_src(ncec, &sender6);
+ if (src_ill == NULL) {
+ /* Make sure we try again later */
+ ms = ncec->ncec_ill->ill_reachable_retrans_time;
+ nce_restart_timer(ncec, (clock_t)ms);
+ return;
}
- ms = nce_solicit(nce, sender6);
- mutex_exit(&nce->nce_lock);
+ if (ncec->ncec_ipversion == IPV4_VERSION)
+ IN6_V4MAPPED_TO_IPADDR(&sender6, sender4);
+ mutex_enter(&ncec->ncec_lock);
+ if (ncec->ncec_ipversion == IPV6_VERSION)
+ ms = ndp_solicit(ncec, sender6, src_ill);
+ else
+ ms = arp_request(ncec, sender4, src_ill);
+ mutex_exit(&ncec->ncec_lock);
if (ms == 0) {
- if (nce->nce_state != ND_REACHABLE) {
- nce_resolv_failed(nce);
- ndp_delete(nce);
+ if (ncec->ncec_state != ND_REACHABLE) {
+ if (ncec->ncec_ipversion == IPV6_VERSION)
+ ndp_resolv_failed(ncec);
+ else
+ arp_resolv_failed(ncec);
+ ASSERT((ncec->ncec_flags & NCE_F_STATIC) == 0);
+ nce_make_unreachable(ncec);
+ ncec_delete(ncec);
}
} else {
- NDP_RESTART_TIMER(nce, (clock_t)ms);
+ nce_restart_timer(ncec, (clock_t)ms);
}
- mutex_enter(&nce->nce_lock);
+done:
+ ill_refrele(src_ill);
}
/*
- * Send a neighbor solicitation.
+ * Send an IPv6 neighbor solicitation.
* Returns number of milliseconds after which we should either rexmit or abort.
* Return of zero means we should abort.
- * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt.
+ * The caller holds the ncec_lock to protect ncec_qd_mp and ncec_rcnt.
+ * The optional source address is used as a hint to ndp_solicit for
+ * which source to use in the packet.
*
- * NOTE: This routine drops nce_lock (and later reacquires it) when sending
+ * NOTE: This routine drops ncec_lock (and later reacquires it) when sending
* the packet.
*/
uint32_t
-nce_solicit(nce_t *nce, in6_addr_t sender)
+ndp_solicit(ncec_t *ncec, in6_addr_t src, ill_t *ill)
{
- boolean_t dropped;
+ in6_addr_t dst;
+ boolean_t dropped = B_FALSE;
- ASSERT(nce->nce_ipversion == IPV6_VERSION);
- ASSERT(MUTEX_HELD(&nce->nce_lock));
+ ASSERT(ncec->ncec_ipversion == IPV6_VERSION);
+ ASSERT(MUTEX_HELD(&ncec->ncec_lock));
- if (nce->nce_rcnt == 0)
+ if (ncec->ncec_rcnt == 0)
return (0);
- nce->nce_rcnt--;
- mutex_exit(&nce->nce_lock);
- dropped = nce_xmit_solicit(nce, B_TRUE, &sender, 0);
- mutex_enter(&nce->nce_lock);
+ dst = ncec->ncec_addr;
+ ncec->ncec_rcnt--;
+ mutex_exit(&ncec->ncec_lock);
+ dropped = ndp_xmit(ill, ND_NEIGHBOR_SOLICIT, ill->ill_phys_addr,
+ ill->ill_phys_addr_length, &src, &dst, 0);
+ mutex_enter(&ncec->ncec_lock);
if (dropped)
- nce->nce_rcnt++;
- return (nce->nce_ill->ill_reachable_retrans_time);
+ ncec->ncec_rcnt++;
+ return (ncec->ncec_ill->ill_reachable_retrans_time);
}
/*
@@ -1528,23 +1216,30 @@ nce_solicit(nce_t *nce, in6_addr_t sender)
* ip_ndp_excl.
*/
/* ARGSUSED */
-static void
-ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
+void
+ip_addr_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
{
ill_t *ill = rq->q_ptr;
ipif_t *ipif;
- in6_addr_t *addr = (in6_addr_t *)mp->b_rptr;
+ in6_addr_t *addr6 = (in6_addr_t *)mp->b_rptr;
+ in_addr_t *addr4 = (in_addr_t *)mp->b_rptr;
+ boolean_t addr_equal;
for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
/*
* We do not support recovery of proxy ARP'd interfaces,
* because the system lacks a complete proxy ARP mechanism.
*/
- if ((ipif->ipif_flags & IPIF_POINTOPOINT) ||
- !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) {
- continue;
+ if (ill->ill_isv6) {
+ addr_equal = IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
+ addr6);
+ } else {
+ addr_equal = (ipif->ipif_lcl_addr == *addr4);
}
+ if ((ipif->ipif_flags & IPIF_POINTOPOINT) || !addr_equal)
+ continue;
+
/*
* If we have already recovered or if the interface is going
* away, then ignore.
@@ -1561,13 +1256,20 @@ ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
mutex_exit(&ill->ill_lock);
ipif->ipif_was_dup = B_TRUE;
- VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS);
- (void) ipif_up_done_v6(ipif);
+ if (ill->ill_isv6) {
+ VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS);
+ (void) ipif_up_done_v6(ipif);
+ } else {
+ VERIFY(ipif_arp_up(ipif, Res_act_initial, B_TRUE) !=
+ EINPROGRESS);
+ (void) ipif_up_done(ipif);
+ }
}
freeb(mp);
}
/*
+ *
* Attempt to recover an IPv6 interface that's been shut down as a duplicate.
* As long as someone else holds the address, the interface will stay down.
* When that conflict goes away, the interface is brought back up. This is
@@ -1579,8 +1281,8 @@ ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
*
* This function is entered on a timer expiry; the ID is in ipif_recovery_id.
*/
-static void
-ipif6_dup_recovery(void *arg)
+void
+ipif_dup_recovery(void *arg)
{
ipif_t *ipif = arg;
@@ -1598,7 +1300,7 @@ ipif6_dup_recovery(void *arg)
if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING))
return;
- ndp_do_recovery(ipif);
+ ipif_do_recovery(ipif);
}
/*
@@ -1608,18 +1310,24 @@ ipif6_dup_recovery(void *arg)
* Called both by recovery timer expiry and link-up notification.
*/
void
-ndp_do_recovery(ipif_t *ipif)
+ipif_do_recovery(ipif_t *ipif)
{
ill_t *ill = ipif->ipif_ill;
mblk_t *mp;
ip_stack_t *ipst = ill->ill_ipst;
+ size_t mp_size;
- mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED);
+ if (ipif->ipif_isv6)
+ mp_size = sizeof (ipif->ipif_v6lcl_addr);
+ else
+ mp_size = sizeof (ipif->ipif_lcl_addr);
+ mp = allocb(mp_size, BPRI_MED);
if (mp == NULL) {
mutex_enter(&ill->ill_lock);
- if (ipif->ipif_recovery_id == 0 &&
+ if (ipst->ips_ip_dup_recovery > 0 &&
+ ipif->ipif_recovery_id == 0 &&
!(ipif->ipif_state_flags & IPIF_CONDEMNED)) {
- ipif->ipif_recovery_id = timeout(ipif6_dup_recovery,
+ ipif->ipif_recovery_id = timeout(ipif_dup_recovery,
ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
}
mutex_exit(&ill->ill_lock);
@@ -1632,10 +1340,15 @@ ndp_do_recovery(ipif_t *ipif)
(void) untimeout(ipif->ipif_recovery_id);
ipif->ipif_recovery_id = 0;
- bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr,
- sizeof (ipif->ipif_v6lcl_addr));
+ if (ipif->ipif_isv6) {
+ bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr,
+ sizeof (ipif->ipif_v6lcl_addr));
+ } else {
+ bcopy(&ipif->ipif_lcl_addr, mp->b_rptr,
+ sizeof (ipif->ipif_lcl_addr));
+ }
ill_refhold(ill);
- qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP,
+ qwriter_ip(ill, ill->ill_rq, mp, ip_addr_recover, NEW_OP,
B_FALSE);
}
}
@@ -1644,80 +1357,19 @@ ndp_do_recovery(ipif_t *ipif)
* Find the MAC and IP addresses in an NA/NS message.
*/
static void
-ip_ndp_find_addresses(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, in6_addr_t *targp,
- uchar_t **haddr, uint_t *haddrlenp)
+ip_ndp_find_addresses(mblk_t *mp, ip_recv_attr_t *ira, ill_t *ill,
+ in6_addr_t *targp, uchar_t **haddr, uint_t *haddrlenp)
{
- ip6_t *ip6h = (ip6_t *)mp->b_rptr;
icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
- nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6;
nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6;
uchar_t *addr;
- int alen = 0;
+ int alen;
- if (dl_mp == NULL) {
- nd_opt_hdr_t *opt = NULL;
- int len;
-
- /*
- * If it's from the fast-path, then it can't be a probe
- * message, and thus must include a linkaddr option.
- * Extract that here.
- */
- switch (icmp6->icmp6_type) {
- case ND_NEIGHBOR_SOLICIT:
- len = mp->b_wptr - (uchar_t *)ns;
- if ((len -= sizeof (*ns)) > 0) {
- opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1),
- len, ND_OPT_SOURCE_LINKADDR);
- }
- break;
- case ND_NEIGHBOR_ADVERT:
- len = mp->b_wptr - (uchar_t *)na;
- if ((len -= sizeof (*na)) > 0) {
- opt = ndp_get_option((nd_opt_hdr_t *)(na + 1),
- len, ND_OPT_TARGET_LINKADDR);
- }
- break;
- }
-
- if (opt != NULL && opt->nd_opt_len * 8 - sizeof (*opt) >=
- ill->ill_nd_lla_len) {
- addr = (uchar_t *)(opt + 1);
- alen = ill->ill_nd_lla_len;
- }
-
- /*
- * We cheat a bit here for the sake of printing usable log
- * messages in the rare case where the reply we got was unicast
- * without a source linkaddr option, and the interface is in
- * fastpath mode. (Sigh.)
- */
- if (alen == 0 && ill->ill_type == IFT_ETHER &&
- MBLKHEAD(mp) >= sizeof (struct ether_header)) {
- struct ether_header *pether;
-
- pether = (struct ether_header *)((char *)ip6h -
- sizeof (*pether));
- addr = pether->ether_shost.ether_addr_octet;
- alen = ETHERADDRL;
- }
- } else {
- dl_unitdata_ind_t *dlu;
-
- dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr;
- alen = dlu->dl_src_addr_length;
- if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) &&
- dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) {
- addr = dl_mp->b_rptr + dlu->dl_src_addr_offset;
- if (ill->ill_sap_length < 0) {
- alen += ill->ill_sap_length;
- } else {
- addr += ill->ill_sap_length;
- alen -= ill->ill_sap_length;
- }
- }
- }
+ /* icmp_inbound_v6 ensures this */
+ ASSERT(ira->ira_flags & IRAF_L2SRC_SET);
+ addr = ira->ira_l2src;
+ alen = ill->ill_phys_addr_length;
if (alen > 0) {
*haddr = addr;
*haddrlenp = alen;
@@ -1740,35 +1392,58 @@ ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
{
ill_t *ill = rq->q_ptr;
ipif_t *ipif;
- mblk_t *dl_mp = NULL;
uchar_t *haddr;
uint_t haddrlen;
ip_stack_t *ipst = ill->ill_ipst;
in6_addr_t targ;
-
- if (DB_TYPE(mp) != M_DATA) {
- dl_mp = mp;
- mp = mp->b_cont;
+ ip_recv_attr_t iras;
+ mblk_t *attrmp;
+
+ attrmp = mp;
+ mp = mp->b_cont;
+ attrmp->b_cont = NULL;
+ if (!ip_recv_attr_from_mblk(attrmp, &iras)) {
+ /* The ill or ip_stack_t disappeared on us */
+ BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
+ ip_drop_input("ip_recv_attr_from_mblk", mp, ill);
+ freemsg(mp);
+ ira_cleanup(&iras, B_TRUE);
+ return;
}
- ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen);
+ ASSERT(ill == iras.ira_rill);
+
+ ip_ndp_find_addresses(mp, &iras, ill, &targ, &haddr, &haddrlen);
if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) {
/*
* Ignore conflicts generated by misbehaving switches that
* just reflect our own messages back to us. For IPMP, we may
* see reflections across any ill in the illgrp.
+ *
+ * RFC2462 and revisions tried to detect both the case
+ * when a statically configured IPv6 address is a duplicate,
+ * and the case when the L2 address itself is a duplicate. The
+ * later is important because, with stateles address autoconf,
+ * if the L2 address is a duplicate, the resulting IPv6
+ * address(es) would also be duplicates. We rely on DAD of the
+ * IPv6 address itself to detect the latter case.
*/
+ /* For an under ill_grp can change under lock */
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 ||
IS_UNDER_IPMP(ill) &&
- ipmp_illgrp_find_ill(ill->ill_grp, haddr, haddrlen) != NULL)
+ ipmp_illgrp_find_ill(ill->ill_grp, haddr,
+ haddrlen) != NULL) {
+ rw_exit(&ipst->ips_ill_g_lock);
goto ignore_conflict;
+ }
+ rw_exit(&ipst->ips_ill_g_lock);
}
/*
* Look up the appropriate ipif.
*/
- ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, NULL, NULL, NULL,
- NULL, ipst);
+ ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, ipst);
if (ipif == NULL)
goto ignore_conflict;
@@ -1802,43 +1477,64 @@ ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
ill->ill_ipif_dup_count++;
mutex_exit(&ill->ill_lock);
(void) ipif_down(ipif, NULL, NULL);
- ipif_down_tail(ipif);
+ (void) ipif_down_tail(ipif);
mutex_enter(&ill->ill_lock);
if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) &&
ill->ill_net_type == IRE_IF_RESOLVER &&
!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
ipst->ips_ip_dup_recovery > 0) {
ASSERT(ipif->ipif_recovery_id == 0);
- ipif->ipif_recovery_id = timeout(ipif6_dup_recovery,
+ ipif->ipif_recovery_id = timeout(ipif_dup_recovery,
ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
}
mutex_exit(&ill->ill_lock);
ipif_refrele(ipif);
+
ignore_conflict:
- if (dl_mp != NULL)
- freeb(dl_mp);
freemsg(mp);
+ ira_cleanup(&iras, B_TRUE);
}
/*
* Handle failure by tearing down the ipifs with the specified address. Note
- * that tearing down the ipif also means deleting the nce through ipif_down, so
- * it's not possible to do recovery by just restarting the nce timer. Instead,
+ * that tearing down the ipif also means deleting the ncec through ipif_down, so
+ * it's not possible to do recovery by just restarting the ncec timer. Instead,
* we start a timer on the ipif.
+ * Caller has to free mp;
*/
static void
-ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
+ndp_failure(mblk_t *mp, ip_recv_attr_t *ira)
{
+ const uchar_t *haddr;
+ ill_t *ill = ira->ira_rill;
+
+ /*
+ * Ignore conflicts generated by misbehaving switches that just
+ * reflect our own messages back to us.
+ */
+
+ /* icmp_inbound_v6 ensures this */
+ ASSERT(ira->ira_flags & IRAF_L2SRC_SET);
+ haddr = ira->ira_l2src;
+ if (haddr != NULL &&
+ bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) {
+ return;
+ }
+
if ((mp = copymsg(mp)) != NULL) {
- if (dl_mp == NULL)
- dl_mp = mp;
- else if ((dl_mp = copyb(dl_mp)) != NULL)
- dl_mp->b_cont = mp;
- if (dl_mp == NULL) {
+ mblk_t *attrmp;
+
+ attrmp = ip_recv_attr_to_mblk(ira);
+ if (attrmp == NULL) {
+ BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
+ ip_drop_input("ipIfStatsInDiscards", mp, ill);
freemsg(mp);
} else {
+ ASSERT(attrmp->b_cont == NULL);
+ attrmp->b_cont = mp;
+ mp = attrmp;
ill_refhold(ill);
- qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP,
+ qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_excl, NEW_OP,
B_FALSE);
}
}
@@ -1848,20 +1544,39 @@ ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
* Handle a discovered conflict: some other system is advertising that it owns
* one of our IP addresses. We need to defend ourselves, or just shut down the
* interface.
+ *
+ * Handles both IPv4 and IPv6
*/
-static void
-ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce)
+boolean_t
+ip_nce_conflict(mblk_t *mp, ip_recv_attr_t *ira, ncec_t *ncec)
{
- ipif_t *ipif;
- uint32_t now;
- uint_t maxdefense;
- uint_t defs;
- ip_stack_t *ipst = ill->ill_ipst;
+ ipif_t *ipif;
+ clock_t now;
+ uint_t maxdefense;
+ uint_t defs;
+ ill_t *ill = ira->ira_ill;
+ ip_stack_t *ipst = ill->ill_ipst;
+ uint32_t elapsed;
+ boolean_t isv6 = ill->ill_isv6;
+ ipaddr_t ncec_addr;
- ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL,
- NULL, NULL, ipst);
+ if (isv6) {
+ ipif = ipif_lookup_addr_v6(&ncec->ncec_addr, ill, ALL_ZONES,
+ ipst);
+ } else {
+ if (arp_no_defense) {
+ /*
+ * Yes, there is a conflict, but no, we do not
+ * defend ourself.
+ */
+ return (B_TRUE);
+ }
+ IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, ncec_addr);
+ ipif = ipif_lookup_addr(ncec_addr, ill, ALL_ZONES,
+ ipst);
+ }
if (ipif == NULL)
- return;
+ return (B_FALSE);
/*
* First, figure out if this address is disposable.
@@ -1875,50 +1590,51 @@ ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce)
* Now figure out how many times we've defended ourselves. Ignore
* defenses that happened long in the past.
*/
- now = gethrestime_sec();
- mutex_enter(&nce->nce_lock);
- if ((defs = nce->nce_defense_count) > 0 &&
- now - nce->nce_defense_time > ipst->ips_ip_defend_interval) {
- nce->nce_defense_count = defs = 0;
+ now = ddi_get_lbolt();
+ elapsed = (drv_hztousec(now - ncec->ncec_last_time_defended))/1000000;
+ mutex_enter(&ncec->ncec_lock);
+ if ((defs = ncec->ncec_defense_count) > 0 &&
+ elapsed > ipst->ips_ip_defend_interval) {
+ /*
+ * ip_defend_interval has elapsed.
+ * reset the defense count.
+ */
+ ncec->ncec_defense_count = defs = 0;
}
- nce->nce_defense_count++;
- nce->nce_defense_time = now;
- mutex_exit(&nce->nce_lock);
+ ncec->ncec_defense_count++;
+ ncec->ncec_last_time_defended = now;
+ mutex_exit(&ncec->ncec_lock);
ipif_refrele(ipif);
/*
* If we've defended ourselves too many times already, then give up and
- * tear down the interface(s) using this address. Otherwise, defend by
- * sending out an unsolicited Neighbor Advertisement.
+ * tear down the interface(s) using this address.
+ * Otherwise, caller has to defend by sending out an announce.
*/
if (defs >= maxdefense) {
- ip_ndp_failure(ill, mp, dl_mp);
+ if (isv6)
+ ndp_failure(mp, ira);
+ else
+ arp_failure(mp, ira);
} else {
- char hbuf[MAC_STR_LEN];
- char sbuf[INET6_ADDRSTRLEN];
- uchar_t *haddr;
- uint_t haddrlen;
- in6_addr_t targ;
-
- ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen);
- cmn_err(CE_WARN, "node %s is using our IP address %s on %s",
- mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)),
- inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)),
- ill->ill_name);
-
- (void) nce_xmit_advert(nce, B_FALSE, &ipv6_all_hosts_mcast, 0);
+ return (B_TRUE); /* caller must defend this address */
}
+ return (B_FALSE);
}
+/*
+ * Handle reception of Neighbor Solicitation messages.
+ */
static void
-ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
+ndp_input_solicit(mblk_t *mp, ip_recv_attr_t *ira)
{
+ ill_t *ill = ira->ira_ill, *under_ill;
nd_neighbor_solicit_t *ns;
- uint32_t hlen = ill->ill_nd_lla_len;
+ uint32_t hlen = ill->ill_phys_addr_length;
uchar_t *haddr = NULL;
icmp6_t *icmp_nd;
ip6_t *ip6h;
- nce_t *our_nce = NULL;
+ ncec_t *our_ncec = NULL;
in6_addr_t target;
in6_addr_t src;
int len;
@@ -1926,6 +1642,7 @@ ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
nd_opt_hdr_t *opt = NULL;
boolean_t bad_solicit = B_FALSE;
mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib;
+ boolean_t need_ill_refrele = B_FALSE;
ip6h = (ip6_t *)mp->b_rptr;
icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
@@ -1951,7 +1668,6 @@ ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
bad_solicit = B_TRUE;
goto done;
}
-
}
if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
/* Check to see if this is a valid DAD solicitation */
@@ -1974,20 +1690,20 @@ ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
* e.g. the IPMP ill's data link-local. So we match across the illgrp
* to ensure we find the associated NCE.
*/
- our_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE);
+ our_ncec = ncec_lookup_illgrp_v6(ill, &target);
/*
- * If this is a valid Solicitation, a permanent
- * entry should exist in the cache
+ * If this is a valid Solicitation for an address we are publishing,
+ * then a PUBLISH entry should exist in the cache
*/
- if (our_nce == NULL ||
- !(our_nce->nce_flags & NCE_F_PERMANENT)) {
+ if (our_ncec == NULL || !NCE_PUBLISH(our_ncec)) {
ip1dbg(("ndp_input_solicit: Wrong target in NS?!"
"ifname=%s ", ill->ill_name));
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg(" dst %s\n", AF_INET6, &target);
}
- bad_solicit = B_TRUE;
+ if (our_ncec == NULL)
+ bad_solicit = B_TRUE;
goto done;
}
@@ -1998,7 +1714,7 @@ ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
haddr = (uchar_t *)&opt[1];
if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) ||
hlen == 0) {
- ip1dbg(("ndp_input_solicit: bad SLLA\n"));
+ ip1dbg(("ndp_input_advert: bad SLLA\n"));
bad_solicit = B_TRUE;
goto done;
}
@@ -2010,7 +1726,7 @@ ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
flag |= NDP_UNICAST;
/*
- * Create/update the entry for the soliciting node.
+ * Create/update the entry for the soliciting node on the ipmp_ill.
* or respond to outstanding queries, don't if
* the source is unspecified address.
*/
@@ -2035,7 +1751,7 @@ ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
* process of verifying the address, then don't respond at all
* and don't keep track of the sender.
*/
- if (our_nce->nce_state == ND_PROBE)
+ if (our_ncec->ncec_state == ND_PROBE)
goto done;
/*
@@ -2048,27 +1764,37 @@ ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
if (haddr == NULL)
goto no_source;
- err = ndp_lookup_then_add_v6(ill,
- B_FALSE,
- haddr,
+ under_ill = ill;
+ if (IS_UNDER_IPMP(under_ill)) {
+ ill = ipmp_ill_hold_ipmp_ill(under_ill);
+ if (ill == NULL)
+ ill = under_ill;
+ else
+ need_ill_refrele = B_TRUE;
+ }
+ err = nce_lookup_then_add_v6(ill,
+ haddr, hlen,
&src, /* Soliciting nodes address */
- &ipv6_all_ones,
- &ipv6_all_zeros,
- 0,
0,
ND_STALE,
&nnce);
+
+ if (need_ill_refrele) {
+ ill_refrele(ill);
+ ill = under_ill;
+ need_ill_refrele = B_FALSE;
+ }
switch (err) {
case 0:
/* done with this entry */
- NCE_REFRELE(nnce);
+ nce_refrele(nnce);
break;
case EEXIST:
/*
* B_FALSE indicates this is not an an advertisement.
*/
- ndp_process(nnce, haddr, 0, B_FALSE);
- NCE_REFRELE(nnce);
+ nce_process(nnce->nce_common, haddr, 0, B_FALSE);
+ nce_refrele(nnce);
break;
default:
ip1dbg(("ndp_input_solicit: Can't create NCE %d\n",
@@ -2088,19 +1814,18 @@ no_source:
bad_solicit = B_TRUE;
goto done;
}
- if (our_nce->nce_state == ND_PROBE) {
+ if (our_ncec->ncec_state == ND_PROBE) {
/*
- * Internally looped-back probes won't have DLPI
- * attached to them. External ones (which are sent by
- * multicast) always will. Just ignore our own
+ * Internally looped-back probes will have
+ * IRAF_L2SRC_LOOPBACK set so we can ignore our own
* transmissions.
*/
- if (dl_mp != NULL) {
+ if (!(ira->ira_flags & IRAF_L2SRC_LOOPBACK)) {
/*
* If someone else is probing our address, then
* we've crossed wires. Declare failure.
*/
- ip_ndp_failure(ill, mp, dl_mp);
+ ndp_failure(mp, ira);
}
goto done;
}
@@ -2110,24 +1835,34 @@ no_source:
*/
src = ipv6_all_hosts_mcast;
}
- /* Response to a solicitation */
- (void) nce_xmit_advert(our_nce, B_TRUE, &src, flag);
+ flag |= nce_advert_flags(our_ncec);
+ (void) ndp_xmit(ill,
+ ND_NEIGHBOR_ADVERT,
+ our_ncec->ncec_lladdr,
+ our_ncec->ncec_lladdr_length,
+ &target, /* Source and target of the advertisement pkt */
+ &src, /* IP Destination (source of original pkt) */
+ flag);
done:
if (bad_solicit)
BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations);
- if (our_nce != NULL)
- NCE_REFRELE(our_nce);
+ if (our_ncec != NULL)
+ ncec_refrele(our_ncec);
}
+/*
+ * Handle reception of Neighbor Solicitation messages
+ */
void
-ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
+ndp_input_advert(mblk_t *mp, ip_recv_attr_t *ira)
{
+ ill_t *ill = ira->ira_ill;
nd_neighbor_advert_t *na;
- uint32_t hlen = ill->ill_nd_lla_len;
+ uint32_t hlen = ill->ill_phys_addr_length;
uchar_t *haddr = NULL;
icmp6_t *icmp_nd;
ip6_t *ip6h;
- nce_t *dst_nce = NULL;
+ ncec_t *dst_ncec = NULL;
in6_addr_t target;
nd_opt_hdr_t *opt = NULL;
int len;
@@ -2138,6 +1873,7 @@ ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
na = (nd_neighbor_advert_t *)icmp_nd;
+
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
(na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) {
ip1dbg(("ndp_input_advert: Target is multicast but the "
@@ -2179,17 +1915,25 @@ ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
* our local addresses, and those are spread across all the active
* ills in the group.
*/
- if ((dst_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE)) == NULL)
+ if ((dst_ncec = ncec_lookup_illgrp_v6(ill, &target)) == NULL)
return;
- if (dst_nce->nce_flags & NCE_F_PERMANENT) {
+ if (NCE_PUBLISH(dst_ncec)) {
/*
- * Someone just advertised one of our local addresses. First,
+ * Someone just advertised an addresses that we publish. First,
* check it it was us -- if so, we can safely ignore it.
+ * We don't get the haddr from the ira_l2src because, in the
+ * case that the packet originated from us, on an IPMP group,
+ * the ira_l2src may would be the link-layer address of the
+ * cast_ill used to send the packet, which may not be the same
+ * as the dst_ncec->ncec_lladdr of the address.
*/
if (haddr != NULL) {
- if (!nce_cmp_ll_addr(dst_nce, haddr, hlen))
- goto out; /* from us -- no conflict */
+ if (ira->ira_flags & IRAF_L2SRC_LOOPBACK)
+ goto out;
+
+ if (!nce_cmp_ll_addr(dst_ncec, haddr, hlen))
+ goto out; /* from us -- no conflict */
/*
* If we're in an IPMP group, check if this is an echo
@@ -2209,59 +1953,96 @@ ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
}
/*
- * Our own (looped-back) unsolicited neighbor advertisements
- * will get here with dl_mp == NULL. (These will usually be
- * filtered by the `haddr' checks above, but point-to-point
- * links have no hardware address and thus make it here.)
- */
- if (dl_mp == NULL && dst_nce->nce_state != ND_PROBE)
- goto out;
-
- /*
* This appears to be a real conflict. If we're trying to
* configure this NCE (ND_PROBE), then shut it down.
* Otherwise, handle the discovered conflict.
- *
- * In the ND_PROBE case, dl_mp might be NULL if we're getting
- * a unicast reply. This isn't typically done (multicast is
- * the norm in response to a probe), but we can handle it.
*/
- if (dst_nce->nce_state == ND_PROBE)
- ip_ndp_failure(ill, mp, dl_mp);
- else
- ip_ndp_conflict(ill, mp, dl_mp, dst_nce);
+ if (dst_ncec->ncec_state == ND_PROBE) {
+ ndp_failure(mp, ira);
+ } else {
+ if (ip_nce_conflict(mp, ira, dst_ncec)) {
+ char hbuf[MAC_STR_LEN];
+ char sbuf[INET6_ADDRSTRLEN];
+
+ cmn_err(CE_WARN,
+ "node '%s' is using %s on %s",
+ inet_ntop(AF_INET6, &target, sbuf,
+ sizeof (sbuf)),
+ haddr == NULL ? "<none>" :
+ mac_colon_addr(haddr, hlen, hbuf,
+ sizeof (hbuf)), ill->ill_name);
+ /*
+ * RFC 4862, Section 5.4.4 does not mandate
+ * any specific behavior when an NA matches
+ * a non-tentative address assigned to the
+ * receiver. We make the choice of defending
+ * our address, based on the assumption that
+ * the sender has not detected the Duplicate.
+ *
+ * ncec_last_time_defended has been adjusted
+ * in ip_nce_conflict()
+ */
+ (void) ndp_announce(dst_ncec);
+ }
+ }
} else {
if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER)
- dst_nce->nce_flags |= NCE_F_ISROUTER;
+ dst_ncec->ncec_flags |= NCE_F_ISROUTER;
/* B_TRUE indicates this an advertisement */
- ndp_process(dst_nce, haddr, na->nd_na_flags_reserved, B_TRUE);
+ nce_process(dst_ncec, haddr, na->nd_na_flags_reserved, B_TRUE);
}
out:
- NCE_REFRELE(dst_nce);
+ ncec_refrele(dst_ncec);
}
/*
* Process NDP neighbor solicitation/advertisement messages.
* The checksum has already checked o.k before reaching here.
+ * Information about the datalink header is contained in ira_l2src, but
+ * that should be ignored for loopback packets.
*/
void
-ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
+ndp_input(mblk_t *mp, ip_recv_attr_t *ira)
{
+ ill_t *ill = ira->ira_rill;
icmp6_t *icmp_nd;
ip6_t *ip6h;
int len;
mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib;
+ ill_t *orig_ill = NULL;
-
+ /*
+ * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
+ * and make it be the IPMP upper so avoid being confused by a packet
+ * addressed to a unicast address on a different ill.
+ */
+ if (IS_UNDER_IPMP(ill)) {
+ orig_ill = ill;
+ ill = ipmp_ill_hold_ipmp_ill(orig_ill);
+ if (ill == NULL) {
+ ill = orig_ill;
+ BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
+ ip_drop_input("ipIfStatsInDiscards - IPMP ill",
+ mp, ill);
+ freemsg(mp);
+ return;
+ }
+ ASSERT(ill != orig_ill);
+ orig_ill = ira->ira_ill;
+ ira->ira_ill = ill;
+ mib = ill->ill_icmp6_mib;
+ }
if (!pullupmsg(mp, -1)) {
ip1dbg(("ndp_input: pullupmsg failed\n"));
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
+ ip_drop_input("ipIfStatsInDiscards - pullupmsg", mp, ill);
goto done;
}
ip6h = (ip6_t *)mp->b_rptr;
if (ip6h->ip6_hops != IPV6_MAX_HOPS) {
ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n"));
+ ip_drop_input("ipv6IfIcmpBadHoplimit", mp, ill);
BUMP_MIB(mib, ipv6IfIcmpBadHoplimit);
goto done;
}
@@ -2275,6 +2056,7 @@ ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
ip1dbg(("ndp_input: Wrong next header 0x%x\n",
ip6h->ip6_nxt));
+ ip_drop_input("Wrong next header", mp, ill);
BUMP_MIB(mib, ipv6IfIcmpInErrors);
goto done;
}
@@ -2283,6 +2065,7 @@ ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT);
if (icmp_nd->icmp6_code != 0) {
ip1dbg(("ndp_input: icmp6 code != 0 \n"));
+ ip_drop_input("code non-zero", mp, ill);
BUMP_MIB(mib, ipv6IfIcmpInErrors);
goto done;
}
@@ -2293,54 +2076,25 @@ ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
*/
if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) {
ip1dbg(("ndp_input: packet too short\n"));
+ ip_drop_input("packet too short", mp, ill);
BUMP_MIB(mib, ipv6IfIcmpInErrors);
goto done;
}
if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) {
- ndp_input_solicit(ill, mp, dl_mp);
+ ndp_input_solicit(mp, ira);
} else {
- ndp_input_advert(ill, mp, dl_mp);
+ ndp_input_advert(mp, ira);
}
done:
freemsg(mp);
+ if (orig_ill != NULL) {
+ ill_refrele(ill);
+ ira->ira_ill = orig_ill;
+ }
}
/*
- * Utility routine to send an advertisement. Assumes that the NCE cannot
- * go away (e.g., because it's refheld).
- */
-static boolean_t
-nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *target,
- uint_t flags)
-{
- ASSERT((flags & NDP_PROBE) == 0);
-
- if (nce->nce_flags & NCE_F_ISROUTER)
- flags |= NDP_ISROUTER;
- if (!(nce->nce_flags & NCE_F_ANYCAST))
- flags |= NDP_ORIDE;
-
- return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_ADVERT, use_nd_lla,
- &nce->nce_addr, target, flags));
-}
-
-/*
- * Utility routine to send a solicitation. Assumes that the NCE cannot
- * go away (e.g., because it's refheld).
- */
-static boolean_t
-nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *sender,
- uint_t flags)
-{
- if (flags & NDP_PROBE)
- sender = &ipv6_all_zeros;
-
- return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, use_nd_lla,
- sender, &nce->nce_addr, flags));
-}
-
-/*
- * nce_xmit is called to form and transmit a ND solicitation or
+ * ndp_xmit is called to form and transmit a ND solicitation or
* advertisement ICMP packet.
*
* If the source address is unspecified and this isn't a probe (used for
@@ -2353,112 +2107,123 @@ nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *sender,
* corresponding ill's ill_wq otherwise returns B_TRUE.
*/
static boolean_t
-nce_xmit(ill_t *ill, uint8_t type, boolean_t use_nd_lla,
+ndp_xmit(ill_t *ill, uint32_t operation, uint8_t *hw_addr, uint_t hw_addr_len,
const in6_addr_t *sender, const in6_addr_t *target, int flag)
{
- ill_t *hwaddr_ill;
uint32_t len;
icmp6_t *icmp6;
mblk_t *mp;
ip6_t *ip6h;
nd_opt_hdr_t *opt;
- uint_t plen, maxplen;
- ip6i_t *ip6i;
- ipif_t *src_ipif = NULL;
- uint8_t *hw_addr;
+ uint_t plen;
zoneid_t zoneid = GLOBAL_ZONEID;
- char buf[INET6_ADDRSTRLEN];
+ ill_t *hwaddr_ill = ill;
+ ip_xmit_attr_t ixas;
+ ip_stack_t *ipst = ill->ill_ipst;
+ boolean_t need_refrele = B_FALSE;
+ boolean_t probe = B_FALSE;
- ASSERT(!IS_IPMP(ill));
+ if (IS_UNDER_IPMP(ill)) {
+ probe = ipif_lookup_testaddr_v6(ill, sender, NULL);
+ /*
+ * We send non-probe packets on the upper IPMP interface.
+ * ip_output_simple() will use cast_ill for sending any
+ * multicast packets. Note that we can't follow the same
+ * logic for probe packets because all interfaces in the ipmp
+ * group may have failed, so that we really want to only try
+ * to send the ND packet on the ill corresponding to the src
+ * address.
+ */
+ if (!probe) {
+ ill = ipmp_ill_hold_ipmp_ill(ill);
+ if (ill != NULL)
+ need_refrele = B_TRUE;
+ else
+ ill = hwaddr_ill;
+ }
+ }
/*
- * Check that the sender is actually a usable address on `ill', and if
- * so, track that as the src_ipif. If not, for solicitations, set the
- * sender to :: so that a new one will be picked below; for adverts,
- * drop the packet since we expect nce_xmit_advert() to always provide
- * a valid sender.
+ * If we have a unspecified source(sender) address, select a
+ * proper source address for the solicitation here itself so
+ * that we can initialize the h/w address correctly.
+ *
+ * If the sender is specified then we use this address in order
+ * to lookup the zoneid before calling ip_output_v6(). This is to
+ * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly
+ * by IP (we cannot guarantee that the global zone has an interface
+ * route to the destination).
+ *
+ * Note that the NA never comes here with the unspecified source
+ * address.
*/
- if (!IN6_IS_ADDR_UNSPECIFIED(sender)) {
- if ((src_ipif = ip_ndp_lookup_addr_v6(sender, ill)) == NULL ||
- !src_ipif->ipif_addr_ready) {
- if (src_ipif != NULL) {
- ipif_refrele(src_ipif);
- src_ipif = NULL;
- }
- if (type == ND_NEIGHBOR_ADVERT) {
- ip1dbg(("nce_xmit: No source ipif for src %s\n",
- inet_ntop(AF_INET6, sender, buf,
- sizeof (buf))));
- return (B_TRUE);
- }
- sender = &ipv6_all_zeros;
- }
- }
/*
- * If we still have an unspecified source (sender) address and this
- * isn't a probe, select a source address from `ill'.
+ * Probes will have unspec src at this point.
*/
- if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) {
- ASSERT(type != ND_NEIGHBOR_ADVERT);
+ if (!(IN6_IS_ADDR_UNSPECIFIED(sender))) {
+ zoneid = ipif_lookup_addr_zoneid_v6(sender, ill, ipst);
/*
- * Pick a source address for this solicitation, but restrict
- * the selection to addresses assigned to the output
- * interface. We do this because the destination will create
- * a neighbor cache entry for the source address of this
- * packet, so the source address needs to be a valid neighbor.
+ * It's possible for ipif_lookup_addr_zoneid_v6() to return
+ * ALL_ZONES if it cannot find a matching ipif for the address
+ * we are trying to use. In this case we err on the side of
+ * trying to send the packet by defaulting to the GLOBAL_ZONEID.
*/
- src_ipif = ipif_select_source_v6(ill, target, B_TRUE,
- IPV6_PREFER_SRC_DEFAULT, ALL_ZONES);
- if (src_ipif == NULL) {
- ip1dbg(("nce_xmit: No source ipif for dst %s\n",
- inet_ntop(AF_INET6, target, buf, sizeof (buf))));
- return (B_TRUE);
- }
- sender = &src_ipif->ipif_v6src_addr;
+ if (zoneid == ALL_ZONES)
+ zoneid = GLOBAL_ZONEID;
}
- /*
- * We're either sending a probe or we have a source address.
- */
- ASSERT((flag & NDP_PROBE) || src_ipif != NULL);
-
- maxplen = roundup(sizeof (nd_opt_hdr_t) + ND_MAX_HDW_LEN, 8);
- len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) +
- maxplen;
+ plen = (sizeof (nd_opt_hdr_t) + hw_addr_len + 7) / 8;
+ len = IPV6_HDR_LEN + sizeof (nd_neighbor_advert_t) + plen * 8;
mp = allocb(len, BPRI_LO);
if (mp == NULL) {
- if (src_ipif != NULL)
- ipif_refrele(src_ipif);
+ if (need_refrele)
+ ill_refrele(ill);
return (B_TRUE);
}
+
bzero((char *)mp->b_rptr, len);
mp->b_wptr = mp->b_rptr + len;
- ip6i = (ip6i_t *)mp->b_rptr;
- ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
- ip6i->ip6i_nxt = IPPROTO_RAW;
- ip6i->ip6i_flags = IP6I_HOPLIMIT;
- if (flag & NDP_PROBE)
- ip6i->ip6i_flags |= IP6I_UNSPEC_SRC;
+ bzero(&ixas, sizeof (ixas));
+ ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6 | IXAF_NO_HW_CKSUM;
- ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t));
+ ixas.ixa_ifindex = ill->ill_phyint->phyint_ifindex;
+ ixas.ixa_ipst = ipst;
+ ixas.ixa_cred = kcred;
+ ixas.ixa_cpid = NOPID;
+ ixas.ixa_tsl = NULL;
+ ixas.ixa_zoneid = zoneid;
+
+ ip6h = (ip6_t *)mp->b_rptr;
ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
- ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t));
+ ip6h->ip6_plen = htons(len - IPV6_HDR_LEN);
ip6h->ip6_nxt = IPPROTO_ICMPV6;
ip6h->ip6_hops = IPV6_MAX_HOPS;
- ip6h->ip6_src = *sender;
+ ixas.ixa_multicast_ttl = ip6h->ip6_hops;
ip6h->ip6_dst = *target;
icmp6 = (icmp6_t *)&ip6h[1];
- opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN +
- sizeof (nd_neighbor_advert_t));
-
- if (type == ND_NEIGHBOR_SOLICIT) {
+ if (hw_addr_len != 0) {
+ opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN +
+ sizeof (nd_neighbor_advert_t));
+ } else {
+ opt = NULL;
+ }
+ if (operation == ND_NEIGHBOR_SOLICIT) {
nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6;
- if (!(flag & NDP_PROBE))
+ if (opt != NULL && !(flag & NDP_PROBE)) {
+ /*
+ * Note that we don't send out SLLA for ND probes
+ * per RFC 4862, even though we do send out the src
+ * haddr for IPv4 DAD probes, even though both IPv4
+ * and IPv6 go out with the unspecified/INADDR_ANY
+ * src IP addr.
+ */
opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
+ }
+ ip6h->ip6_src = *sender;
ns->nd_ns_target = *target;
if (!(flag & NDP_UNICAST)) {
/* Form multicast address of the target */
@@ -2470,7 +2235,9 @@ nce_xmit(ill_t *ill, uint8_t type, boolean_t use_nd_lla,
nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6;
ASSERT(!(flag & NDP_PROBE));
- opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
+ if (opt != NULL)
+ opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
+ ip6h->ip6_src = *sender;
na->nd_na_target = *sender;
if (flag & NDP_ISROUTER)
na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER;
@@ -2480,231 +2247,223 @@ nce_xmit(ill_t *ill, uint8_t type, boolean_t use_nd_lla,
na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE;
}
- hw_addr = NULL;
if (!(flag & NDP_PROBE)) {
- /*
- * Use our source address to find the hardware address to put
- * in the packet, so that the hardware address and IP address
- * will match up -- even if that hardware address doesn't
- * match the ill we actually transmit the packet through.
- */
- if (IS_IPMP(src_ipif->ipif_ill)) {
- hwaddr_ill = ipmp_ipif_hold_bound_ill(src_ipif);
- if (hwaddr_ill == NULL) {
- ip1dbg(("nce_xmit: no bound ill!\n"));
- ipif_refrele(src_ipif);
- freemsg(mp);
- return (B_TRUE);
- }
- } else {
- hwaddr_ill = src_ipif->ipif_ill;
- ill_refhold(hwaddr_ill); /* for symmetry */
- }
-
- plen = roundup(sizeof (nd_opt_hdr_t) +
- hwaddr_ill->ill_nd_lla_len, 8);
-
- hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla :
- hwaddr_ill->ill_phys_addr;
- if (hw_addr != NULL) {
+ if (hw_addr != NULL && opt != NULL) {
/* Fill in link layer address and option len */
- opt->nd_opt_len = (uint8_t)(plen / 8);
- bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len);
+ opt->nd_opt_len = (uint8_t)plen;
+ bcopy(hw_addr, &opt[1], hw_addr_len);
}
-
- ill_refrele(hwaddr_ill);
+ }
+ if (opt != NULL && opt->nd_opt_type == 0) {
+ /* If there's no link layer address option, then strip it. */
+ len -= plen * 8;
+ mp->b_wptr = mp->b_rptr + len;
+ ip6h->ip6_plen = htons(len - IPV6_HDR_LEN);
}
- if (hw_addr == NULL)
- plen = 0;
-
- /* Fix up the length of the packet now that plen is known */
- len -= (maxplen - plen);
- mp->b_wptr = mp->b_rptr + len;
- ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t));
-
- icmp6->icmp6_type = type;
+ icmp6->icmp6_type = (uint8_t)operation;
icmp6->icmp6_code = 0;
/*
* Prepare for checksum by putting icmp length in the icmp
- * checksum field. The checksum is calculated in ip_wput_v6.
+ * checksum field. The checksum is calculated in ip_output.c.
*/
icmp6->icmp6_cksum = ip6h->ip6_plen;
- /*
- * Before we toss the src_ipif, look up the zoneid to pass to
- * ip_output_v6(). This is to ensure unicast ND_NEIGHBOR_ADVERT
- * packets to be routed correctly by IP (we cannot guarantee that the
- * global zone has an interface route to the destination).
- */
- if (src_ipif != NULL) {
- if ((zoneid = src_ipif->ipif_zoneid) == ALL_ZONES)
- zoneid = GLOBAL_ZONEID;
- ipif_refrele(src_ipif);
- }
-
- ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT);
+ (void) ip_output_simple(mp, &ixas);
+ ixa_cleanup(&ixas);
+ if (need_refrele)
+ ill_refrele(ill);
return (B_FALSE);
}
/*
- * Make a link layer address (does not include the SAP) from an nce.
- * To form the link layer address, use the last four bytes of ipv6
- * address passed in and the fixed offset stored in nce.
+ * Used to set ND_UNREACHBLE before ncec_delete sets it NCE_F_CONDEMNED.
+ * The datapath uses this as an indication that there
+ * is a problem (as opposed to a NCE that was just
+ * reclaimed due to lack of memory.
+ * Note that static ARP entries never become unreachable.
*/
-static void
-nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr)
-{
- uchar_t *mask, *to;
- ill_t *ill = nce->nce_ill;
- int len;
-
- if (ill->ill_net_type == IRE_IF_NORESOLVER)
- return;
- ASSERT(nce->nce_res_mp != NULL);
- ASSERT(ill->ill_net_type == IRE_IF_RESOLVER);
- ASSERT(nce->nce_flags & NCE_F_MAPPING);
- ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask));
- ASSERT(addr != NULL);
- bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill),
- addrpos, ill->ill_nd_lla_len);
- len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start,
- IPV6_ADDR_LEN);
- mask = (uchar_t *)&nce->nce_extract_mask;
- mask += (IPV6_ADDR_LEN - len);
- addr += (IPV6_ADDR_LEN - len);
- to = addrpos + nce->nce_ll_extract_start;
- while (len-- > 0)
- *to++ |= *mask++ & *addr++;
-}
-
-mblk_t *
-nce_udreq_alloc(ill_t *ill)
+void
+nce_make_unreachable(ncec_t *ncec)
{
- mblk_t *template_mp = NULL;
- dl_unitdata_req_t *dlur;
- int sap_length;
-
- ASSERT(ill->ill_isv6);
-
- sap_length = ill->ill_sap_length;
- template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) +
- ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ);
- if (template_mp == NULL)
- return (NULL);
-
- dlur = (dl_unitdata_req_t *)template_mp->b_rptr;
- dlur->dl_priority.dl_min = 0;
- dlur->dl_priority.dl_max = 0;
- dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len;
- dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t);
-
- /* Copy in the SAP value. */
- NCE_LL_SAP_COPY(ill, template_mp);
-
- return (template_mp);
+ mutex_enter(&ncec->ncec_lock);
+ ncec->ncec_state = ND_UNREACHABLE;
+ mutex_exit(&ncec->ncec_lock);
}
/*
- * NDP retransmit timer.
+ * NCE retransmit timer. Common to IPv4 and IPv6.
* This timer goes off when:
- * a. It is time to retransmit NS for resolver.
+ * a. It is time to retransmit a resolution for resolver.
* b. It is time to send reachability probes.
*/
void
-ndp_timer(void *arg)
+nce_timer(void *arg)
{
- nce_t *nce = arg;
- ill_t *ill = nce->nce_ill;
+ ncec_t *ncec = arg;
+ ill_t *ill = ncec->ncec_ill, *src_ill;
char addrbuf[INET6_ADDRSTRLEN];
boolean_t dropped = B_FALSE;
- ip_stack_t *ipst = ill->ill_ipst;
+ ip_stack_t *ipst = ncec->ncec_ipst;
+ boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION);
+ in_addr_t sender4 = INADDR_ANY;
+ in6_addr_t sender6 = ipv6_all_zeros;
/*
- * The timer has to be cancelled by ndp_delete before doing the final
+ * The timer has to be cancelled by ncec_delete before doing the final
* refrele. So the NCE is guaranteed to exist when the timer runs
* until it clears the timeout_id. Before clearing the timeout_id
- * bump up the refcnt so that we can continue to use the nce
+ * bump up the refcnt so that we can continue to use the ncec
*/
- ASSERT(nce != NULL);
-
- mutex_enter(&nce->nce_lock);
- NCE_REFHOLD_LOCKED(nce);
- nce->nce_timeout_id = 0;
+ ASSERT(ncec != NULL);
+ mutex_enter(&ncec->ncec_lock);
+ ncec_refhold_locked(ncec);
+ ncec->ncec_timeout_id = 0;
+ mutex_exit(&ncec->ncec_lock);
+
+ src_ill = nce_resolve_src(ncec, &sender6);
+ /* if we could not find a sender address, return */
+ if (src_ill == NULL) {
+ if (!isv6) {
+ IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, sender4);
+ ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET,
+ &sender4, addrbuf, sizeof (addrbuf))));
+ } else {
+ ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET6,
+ &ncec->ncec_addr, addrbuf, sizeof (addrbuf))));
+ }
+ nce_restart_timer(ncec, ill->ill_reachable_retrans_time);
+ ncec_refrele(ncec);
+ return;
+ }
+ if (!isv6)
+ IN6_V4MAPPED_TO_IPADDR(&sender6, sender4);
+ mutex_enter(&ncec->ncec_lock);
/*
- * Check the reachability state first.
+ * Check the reachability state.
*/
- switch (nce->nce_state) {
+ switch (ncec->ncec_state) {
case ND_DELAY:
- nce->nce_state = ND_PROBE;
- mutex_exit(&nce->nce_lock);
- (void) nce_xmit_solicit(nce, B_FALSE, &ipv6_all_zeros,
- NDP_UNICAST);
+ ASSERT(ncec->ncec_lladdr != NULL);
+ ncec->ncec_state = ND_PROBE;
+ ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT;
+ if (isv6) {
+ mutex_exit(&ncec->ncec_lock);
+ (void) ndp_xmit(src_ill, ND_NEIGHBOR_SOLICIT,
+ src_ill->ill_phys_addr,
+ src_ill->ill_phys_addr_length,
+ &sender6, &ncec->ncec_addr,
+ NDP_UNICAST);
+ } else {
+ (void) arp_request(ncec, sender4, src_ill);
+ mutex_exit(&ncec->ncec_lock);
+ }
if (ip_debug > 3) {
/* ip2dbg */
- pr_addr_dbg("ndp_timer: state for %s changed "
- "to PROBE\n", AF_INET6, &nce->nce_addr);
+ pr_addr_dbg("nce_timer: state for %s changed "
+ "to PROBE\n", AF_INET6, &ncec->ncec_addr);
}
- NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time);
- NCE_REFRELE(nce);
- return;
+ nce_restart_timer(ncec, ill->ill_reachable_retrans_time);
+ break;
case ND_PROBE:
/* must be retransmit timer */
- nce->nce_pcnt--;
- ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT &&
- nce->nce_pcnt >= -1);
- if (nce->nce_pcnt > 0) {
+ ASSERT(ncec->ncec_pcnt >= -1);
+ if (ncec->ncec_pcnt > 0) {
/*
- * As per RFC2461, the nce gets deleted after
+ * As per RFC2461, the ncec gets deleted after
* MAX_UNICAST_SOLICIT unsuccessful re-transmissions.
* Note that the first unicast solicitation is sent
* during the DELAY state.
*/
- ip2dbg(("ndp_timer: pcount=%x dst %s\n",
- nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr,
- addrbuf, sizeof (addrbuf))));
- mutex_exit(&nce->nce_lock);
- dropped = nce_xmit_solicit(nce, B_FALSE,
- &ipv6_all_zeros,
- (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE :
- NDP_UNICAST);
- if (dropped) {
- mutex_enter(&nce->nce_lock);
- nce->nce_pcnt++;
- mutex_exit(&nce->nce_lock);
+ ip2dbg(("nce_timer: pcount=%x dst %s\n",
+ ncec->ncec_pcnt,
+ inet_ntop((isv6? AF_INET6 : AF_INET),
+ &ncec->ncec_addr, addrbuf, sizeof (addrbuf))));
+ if (NCE_PUBLISH(ncec)) {
+ mutex_exit(&ncec->ncec_lock);
+ /*
+ * send out a probe; note that src_ill
+ * is ignored by nce_dad() for all
+ * DAD message types other than IPv6
+ * unicast probes
+ */
+ nce_dad(ncec, src_ill, B_TRUE);
+ } else {
+ ASSERT(src_ill != NULL);
+ ncec->ncec_pcnt--;
+ if (isv6) {
+ mutex_exit(&ncec->ncec_lock);
+ (void) ndp_xmit(src_ill,
+ ND_NEIGHBOR_SOLICIT,
+ src_ill->ill_phys_addr,
+ src_ill->ill_phys_addr_length,
+ &sender6, &ncec->ncec_addr,
+ NDP_UNICAST);
+ } else {
+ /*
+ * since the nce is REACHABLE,
+ * the ARP request will be sent out
+ * as a link-layer unicast.
+ */
+ (void) arp_request(ncec, sender4,
+ src_ill);
+ mutex_exit(&ncec->ncec_lock);
+ }
+ nce_restart_timer(ncec,
+ ill->ill_reachable_retrans_time);
}
- NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill));
- } else if (nce->nce_pcnt < 0) {
- /* No hope, delete the nce */
- nce->nce_state = ND_UNREACHABLE;
- mutex_exit(&nce->nce_lock);
+ } else if (ncec->ncec_pcnt < 0) {
+ /* No hope, delete the ncec */
+ /* Tell datapath it went bad */
+ ncec->ncec_state = ND_UNREACHABLE;
+ mutex_exit(&ncec->ncec_lock);
if (ip_debug > 2) {
/* ip1dbg */
- pr_addr_dbg("ndp_timer: Delete IRE for"
- " dst %s\n", AF_INET6, &nce->nce_addr);
+ pr_addr_dbg("nce_timer: Delete NCE for"
+ " dst %s\n", (isv6? AF_INET6: AF_INET),
+ &ncec->ncec_addr);
}
- ndp_delete(nce);
- } else if (!(nce->nce_flags & NCE_F_PERMANENT)) {
- /* Wait RetransTimer, before deleting the entry */
- ip2dbg(("ndp_timer: pcount=%x dst %s\n",
- nce->nce_pcnt, inet_ntop(AF_INET6,
- &nce->nce_addr, addrbuf, sizeof (addrbuf))));
- mutex_exit(&nce->nce_lock);
+ /* if static ARP can't delete. */
+ if ((ncec->ncec_flags & NCE_F_STATIC) == 0)
+ ncec_delete(ncec);
+
+ } else if (!NCE_PUBLISH(ncec)) {
+ /*
+ * Probe count is 0 for a dynamic entry (one that we
+ * ourselves are not publishing). We should never get
+ * here if NONUD was requested, hence the ASSERT below.
+ */
+ ASSERT((ncec->ncec_flags & NCE_F_NONUD) == 0);
+ ip2dbg(("nce_timer: pcount=%x dst %s\n",
+ ncec->ncec_pcnt, inet_ntop(AF_INET6,
+ &ncec->ncec_addr, addrbuf, sizeof (addrbuf))));
+ ncec->ncec_pcnt--;
+ mutex_exit(&ncec->ncec_lock);
/* Wait one interval before killing */
- NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time);
+ nce_restart_timer(ncec,
+ ill->ill_reachable_retrans_time);
} else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) {
ipif_t *ipif;
+ ipaddr_t ncec_addr;
/*
* We're done probing, and we can now declare this
* address to be usable. Let IP know that it's ok to
* use.
*/
- nce->nce_state = ND_REACHABLE;
- mutex_exit(&nce->nce_lock);
- ipif = ip_ndp_lookup_addr_v6(&nce->nce_addr,
- nce->nce_ill);
+ ncec->ncec_state = ND_REACHABLE;
+ ncec->ncec_flags &= ~NCE_F_UNVERIFIED;
+ mutex_exit(&ncec->ncec_lock);
+ if (isv6) {
+ ipif = ipif_lookup_addr_exact_v6(
+ &ncec->ncec_addr, ill, ipst);
+ } else {
+ IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr,
+ ncec_addr);
+ ipif = ipif_lookup_addr_exact(ncec_addr, ill,
+ ipst);
+ }
if (ipif != NULL) {
if (ipif->ipif_was_dup) {
char ibuf[LIFNAMSIZ + 10];
@@ -2725,17 +2484,28 @@ ndp_timer(void *arg)
ipif->ipif_addr_ready = 1;
ipif_refrele(ipif);
}
+ if (!isv6 && arp_no_defense)
+ break;
/* Begin defending our new address */
- nce->nce_unsolicit_count = 0;
- dropped = nce_xmit_advert(nce, B_FALSE,
- &ipv6_all_hosts_mcast, 0);
- if (dropped) {
- nce->nce_unsolicit_count = 1;
- NDP_RESTART_TIMER(nce,
- ipst->ips_ip_ndp_unsolicit_interval);
- } else if (ipst->ips_ip_ndp_defense_interval != 0) {
- NDP_RESTART_TIMER(nce,
- ipst->ips_ip_ndp_defense_interval);
+ if (ncec->ncec_unsolicit_count > 0) {
+ ncec->ncec_unsolicit_count--;
+ if (isv6) {
+ dropped = ndp_announce(ncec);
+ } else {
+ dropped = arp_announce(ncec);
+ }
+
+ if (dropped)
+ ncec->ncec_unsolicit_count++;
+ else
+ ncec->ncec_last_time_defended =
+ ddi_get_lbolt();
+ }
+ if (ncec->ncec_unsolicit_count > 0) {
+ nce_restart_timer(ncec,
+ ANNOUNCE_INTERVAL(isv6));
+ } else if (DEFENSE_INTERVAL(isv6) != 0) {
+ nce_restart_timer(ncec, DEFENSE_INTERVAL(isv6));
}
} else {
/*
@@ -2744,76 +2514,93 @@ ndp_timer(void *arg)
* doing anything, but switch to reachable state so
* that the restart will work.
*/
- nce->nce_state = ND_REACHABLE;
- mutex_exit(&nce->nce_lock);
+ ncec->ncec_state = ND_REACHABLE;
+ mutex_exit(&ncec->ncec_lock);
}
- NCE_REFRELE(nce);
- return;
+ break;
case ND_INCOMPLETE: {
- ip6_t *ip6h;
- ip6i_t *ip6i;
- mblk_t *mp, *datamp, *nextmp, **prevmpp;
+ mblk_t *mp, *nextmp;
+ mblk_t **prevmpp;
/*
- * Per case (2) in the nce_queue_mp() comments, scan nce_qd_mp
- * for any IPMP probe packets, and toss 'em. IPMP probe
- * packets will always be at the head of nce_qd_mp and always
- * have an ip6i_t header, so we can stop at the first queued
- * ND packet without an ip6i_t.
+ * Per case (2) in the nce_queue_mp() comments, scan ncec_qd_mp
+ * for any IPMP probe packets, and toss them. IPMP probe
+ * packets will always be at the head of ncec_qd_mp, so that
+ * we can stop at the first queued ND packet that is
+ * not a probe packet.
*/
- prevmpp = &nce->nce_qd_mp;
- for (mp = nce->nce_qd_mp; mp != NULL; mp = nextmp) {
+ prevmpp = &ncec->ncec_qd_mp;
+ for (mp = ncec->ncec_qd_mp; mp != NULL; mp = nextmp) {
nextmp = mp->b_next;
- datamp = (DB_TYPE(mp) == M_CTL) ? mp->b_cont : mp;
- ip6h = (ip6_t *)datamp->b_rptr;
- if (ip6h->ip6_nxt != IPPROTO_RAW)
- break;
- ip6i = (ip6i_t *)ip6h;
- if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) {
+ if (IS_UNDER_IPMP(ill) && ncec->ncec_nprobes > 0) {
inet_freemsg(mp);
+ ncec->ncec_nprobes--;
*prevmpp = nextmp;
} else {
prevmpp = &mp->b_next;
}
}
- ip_ndp_resolve(nce);
- mutex_exit(&nce->nce_lock);
- NCE_REFRELE(nce);
+
+ /*
+ * Must be resolver's retransmit timer.
+ */
+ mutex_exit(&ncec->ncec_lock);
+ ip_ndp_resolve(ncec);
break;
}
case ND_REACHABLE:
- if (((nce->nce_flags & NCE_F_UNSOL_ADV) &&
- nce->nce_unsolicit_count != 0) ||
- ((nce->nce_flags & NCE_F_PERMANENT) &&
- ipst->ips_ip_ndp_defense_interval != 0)) {
- if (nce->nce_unsolicit_count > 0)
- nce->nce_unsolicit_count--;
- mutex_exit(&nce->nce_lock);
- dropped = nce_xmit_advert(nce, B_FALSE,
- &ipv6_all_hosts_mcast, 0);
+ if (((ncec->ncec_flags & NCE_F_UNSOL_ADV) &&
+ ncec->ncec_unsolicit_count != 0) ||
+ (NCE_PUBLISH(ncec) && DEFENSE_INTERVAL(isv6) != 0)) {
+ if (ncec->ncec_unsolicit_count > 0) {
+ ncec->ncec_unsolicit_count--;
+ mutex_exit(&ncec->ncec_lock);
+ /*
+ * When we get to zero announcements left,
+ * switch to address defense
+ */
+ } else {
+ boolean_t rate_limit;
+
+ mutex_exit(&ncec->ncec_lock);
+ rate_limit = ill_defend_rate_limit(ill, ncec);
+ if (rate_limit) {
+ nce_restart_timer(ncec,
+ DEFENSE_INTERVAL(isv6));
+ break;
+ }
+ }
+ if (isv6) {
+ dropped = ndp_announce(ncec);
+ } else {
+ dropped = arp_announce(ncec);
+ }
+ mutex_enter(&ncec->ncec_lock);
if (dropped) {
- mutex_enter(&nce->nce_lock);
- nce->nce_unsolicit_count++;
- mutex_exit(&nce->nce_lock);
+ ncec->ncec_unsolicit_count++;
+ } else {
+ ncec->ncec_last_time_defended =
+ ddi_get_lbolt();
}
- if (nce->nce_unsolicit_count != 0) {
- NDP_RESTART_TIMER(nce,
- ipst->ips_ip_ndp_unsolicit_interval);
+ mutex_exit(&ncec->ncec_lock);
+ if (ncec->ncec_unsolicit_count != 0) {
+ nce_restart_timer(ncec,
+ ANNOUNCE_INTERVAL(isv6));
} else {
- NDP_RESTART_TIMER(nce,
- ipst->ips_ip_ndp_defense_interval);
+ nce_restart_timer(ncec, DEFENSE_INTERVAL(isv6));
}
} else {
- mutex_exit(&nce->nce_lock);
+ mutex_exit(&ncec->ncec_lock);
}
- NCE_REFRELE(nce);
break;
default:
- mutex_exit(&nce->nce_lock);
- NCE_REFRELE(nce);
+ mutex_exit(&ncec->ncec_lock);
break;
}
+done:
+ ncec_refrele(ncec);
+ ill_refrele(src_ill);
}
/*
@@ -2821,31 +2608,21 @@ ndp_timer(void *arg)
* Copy SAP from ill.
*/
static void
-nce_set_ll(nce_t *nce, uchar_t *ll_addr)
+nce_set_ll(ncec_t *ncec, uchar_t *ll_addr)
{
- ill_t *ill = nce->nce_ill;
- uchar_t *woffset;
+ ill_t *ill = ncec->ncec_ill;
ASSERT(ll_addr != NULL);
- /* Always called before fast_path_probe */
- ASSERT(nce->nce_fp_mp == NULL);
- if (ill->ill_sap_length != 0) {
- /*
- * Copy the SAP type specified in the
- * request into the xmit template.
- */
- NCE_LL_SAP_COPY(ill, nce->nce_res_mp);
- }
if (ill->ill_phys_addr_length > 0) {
/*
* The bcopy() below used to be called for the physical address
* length rather than the link layer address length. For
* ethernet and many other media, the phys_addr and lla are
* identical.
- * However, with xresolv interfaces being introduced, the
- * phys_addr and lla are no longer the same, and the physical
- * address may not have any useful meaning, so we use the lla
- * for IPv6 address resolution and destination addressing.
+ *
+ * The phys_addr and lla may not be the same for devices that
+ * support DL_IPV6_LINK_LAYER_ADDR, though there are currently
+ * no known instances of these.
*
* For PPP or other interfaces with a zero length
* physical address, don't do anything here.
@@ -2854,22 +2631,18 @@ nce_set_ll(nce_t *nce, uchar_t *ll_addr)
* Using the lla for them would change the way they operate.
* Doing nothing in such cases preserves expected behavior.
*/
- woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
- bcopy(ll_addr, woffset, ill->ill_nd_lla_len);
+ bcopy(ll_addr, ncec->ncec_lladdr, ill->ill_nd_lla_len);
}
}
-static boolean_t
-nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len)
+boolean_t
+nce_cmp_ll_addr(const ncec_t *ncec, const uchar_t *ll_addr,
+ uint32_t ll_addr_len)
{
- ill_t *ill = nce->nce_ill;
- uchar_t *ll_offset;
-
- ASSERT(nce->nce_res_mp != NULL);
+ ASSERT(ncec->ncec_lladdr != NULL);
if (ll_addr == NULL)
return (B_FALSE);
- ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
- if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0)
+ if (bcmp(ll_addr, ncec->ncec_lladdr, ll_addr_len) != 0)
return (B_TRUE);
return (B_FALSE);
}
@@ -2878,15 +2651,16 @@ nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len)
* Updates the link layer address or the reachability state of
* a cache entry. Reset probe counter if needed.
*/
-static void
-nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr)
+void
+nce_update(ncec_t *ncec, uint16_t new_state, uchar_t *new_ll_addr)
{
- ill_t *ill = nce->nce_ill;
+ ill_t *ill = ncec->ncec_ill;
boolean_t need_stop_timer = B_FALSE;
boolean_t need_fastpath_update = B_FALSE;
+ nce_t *nce = NULL;
+ timeout_id_t tid;
- ASSERT(MUTEX_HELD(&nce->nce_lock));
- ASSERT(nce->nce_ipversion == IPV6_VERSION);
+ ASSERT(MUTEX_HELD(&ncec->ncec_lock));
/*
* If this interface does not do NUD, there is no point
* in allowing an update to the cache entry. Although
@@ -2896,184 +2670,251 @@ nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr)
* Non-Resolvers will always be created as REACHABLE.
*/
if (new_state != ND_UNCHANGED) {
- if ((nce->nce_flags & NCE_F_NONUD) &&
- (nce->nce_state != ND_INCOMPLETE))
+ if ((ncec->ncec_flags & NCE_F_NONUD) &&
+ (ncec->ncec_state != ND_INCOMPLETE))
return;
ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN);
ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX);
need_stop_timer = B_TRUE;
if (new_state == ND_REACHABLE)
- nce->nce_last = TICK_TO_MSEC(lbolt64);
+ ncec->ncec_last = TICK_TO_MSEC(lbolt64);
else {
/* We force NUD in this case */
- nce->nce_last = 0;
+ ncec->ncec_last = 0;
}
- nce->nce_state = new_state;
- nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
+ ncec->ncec_state = new_state;
+ ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT;
+ ASSERT(ncec->ncec_lladdr != NULL || new_state == ND_INITIAL ||
+ new_state == ND_INCOMPLETE);
+ }
+ if (need_stop_timer || (ncec->ncec_flags & NCE_F_STATIC)) {
+ tid = ncec->ncec_timeout_id;
+ ncec->ncec_timeout_id = 0;
}
/*
- * In case of fast path we need to free the the fastpath
- * M_DATA and do another probe. Otherwise we can just
+ * Re-trigger fastpath probe and
* overwrite the DL_UNITDATA_REQ data, noting we'll lose
* whatever packets that happens to be transmitting at the time.
*/
if (new_ll_addr != NULL) {
- ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) +
- ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr);
- bcopy(new_ll_addr, nce->nce_res_mp->b_rptr +
- NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len);
- if (nce->nce_fp_mp != NULL) {
- freemsg(nce->nce_fp_mp);
- nce->nce_fp_mp = NULL;
- }
+ bcopy(new_ll_addr, ncec->ncec_lladdr,
+ ill->ill_phys_addr_length);
need_fastpath_update = B_TRUE;
}
- mutex_exit(&nce->nce_lock);
- if (need_stop_timer) {
- (void) untimeout(nce->nce_timeout_id);
- nce->nce_timeout_id = 0;
+ mutex_exit(&ncec->ncec_lock);
+ if (need_stop_timer || (ncec->ncec_flags & NCE_F_STATIC)) {
+ if (tid != 0)
+ (void) untimeout(tid);
}
- if (need_fastpath_update)
- nce_fastpath(nce);
- mutex_enter(&nce->nce_lock);
+ if (need_fastpath_update) {
+ /*
+ * Delete any existing existing dlur_mp and fp_mp information.
+ * For IPMP interfaces, all underlying ill's must be checked
+ * and purged.
+ */
+ nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL);
+ /*
+ * add the new dlur_mp and fp_mp
+ */
+ nce = nce_fastpath(ncec, B_TRUE, NULL);
+ if (nce != NULL)
+ nce_refrele(nce);
+ }
+ mutex_enter(&ncec->ncec_lock);
}
-void
-nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert)
+static void
+nce_queue_mp_common(ncec_t *ncec, mblk_t *mp, boolean_t head_insert)
{
uint_t count = 0;
mblk_t **mpp, *tmp;
- ASSERT(MUTEX_HELD(&nce->nce_lock));
+ ASSERT(MUTEX_HELD(&ncec->ncec_lock));
- for (mpp = &nce->nce_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) {
- if (++count > nce->nce_ill->ill_max_buf) {
- tmp = nce->nce_qd_mp->b_next;
- nce->nce_qd_mp->b_next = NULL;
- nce->nce_qd_mp->b_prev = NULL;
- freemsg(nce->nce_qd_mp);
- nce->nce_qd_mp = tmp;
+ for (mpp = &ncec->ncec_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) {
+ if (++count > ncec->ncec_ill->ill_max_buf) {
+ tmp = ncec->ncec_qd_mp->b_next;
+ ncec->ncec_qd_mp->b_next = NULL;
+ /*
+ * if we never create data addrs on the under_ill
+ * does this matter?
+ */
+ BUMP_MIB(ncec->ncec_ill->ill_ip_mib,
+ ipIfStatsOutDiscards);
+ ip_drop_output("ipIfStatsOutDiscards", ncec->ncec_qd_mp,
+ ncec->ncec_ill);
+ freemsg(ncec->ncec_qd_mp);
+ ncec->ncec_qd_mp = tmp;
}
}
if (head_insert) {
- mp->b_next = nce->nce_qd_mp;
- nce->nce_qd_mp = mp;
+ ncec->ncec_nprobes++;
+ mp->b_next = ncec->ncec_qd_mp;
+ ncec->ncec_qd_mp = mp;
} else {
*mpp = mp;
}
}
-static void
-nce_queue_mp(nce_t *nce, mblk_t *mp)
+/*
+ * nce_queue_mp will queue the packet into the ncec_qd_mp. The packet will be
+ * queued at the head or tail of the queue based on the input argument
+ * 'head_insert'. The caller should specify this argument as B_TRUE if this
+ * packet is an IPMP probe packet, in which case the following happens:
+ *
+ * 1. Insert it at the head of the ncec_qd_mp list. Consider the normal
+ * (non-ipmp_probe) load-speading case where the source address of the ND
+ * packet is not tied to ncec_ill. If the ill bound to the source address
+ * cannot receive, the response to the ND packet will not be received.
+ * However, if ND packets for ncec_ill's probes are queued behind that ND
+ * packet, those probes will also fail to be sent, and thus in.mpathd will
+ * erroneously conclude that ncec_ill has also failed.
+ *
+ * 2. Drop the ipmp_probe packet in ndp_timer() if the ND did not succeed on
+ * the first attempt. This ensures that ND problems do not manifest as
+ * probe RTT spikes.
+ *
+ * We achieve this by inserting ipmp_probe() packets at the head of the
+ * nce_queue.
+ *
+ * The ncec for the probe target is created with ncec_ill set to the ipmp_ill,
+ * but the caller needs to set head_insert to B_TRUE if this is a probe packet.
+ */
+void
+nce_queue_mp(ncec_t *ncec, mblk_t *mp, boolean_t head_insert)
{
- boolean_t head_insert = B_FALSE;
- ip6_t *ip6h;
- ip6i_t *ip6i;
- mblk_t *data_mp;
+ ASSERT(MUTEX_HELD(&ncec->ncec_lock));
+ nce_queue_mp_common(ncec, mp, head_insert);
+}
- ASSERT(MUTEX_HELD(&nce->nce_lock));
+/*
+ * Called when address resolution failed due to a timeout.
+ * Send an ICMP unreachable in response to all queued packets.
+ */
+void
+ndp_resolv_failed(ncec_t *ncec)
+{
+ mblk_t *mp, *nxt_mp;
+ char buf[INET6_ADDRSTRLEN];
+ ill_t *ill = ncec->ncec_ill;
+ ip_recv_attr_t iras;
- if (mp->b_datap->db_type == M_CTL)
- data_mp = mp->b_cont;
- else
- data_mp = mp;
- ip6h = (ip6_t *)data_mp->b_rptr;
- if (ip6h->ip6_nxt == IPPROTO_RAW) {
- /*
- * This message should have been pulled up already in
- * ip_wput_v6. We can't do pullups here because the message
- * could be from the nce_qd_mp which could have b_next/b_prev
- * non-NULL.
- */
- ip6i = (ip6i_t *)ip6h;
- ASSERT(MBLKL(data_mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN);
+ bzero(&iras, sizeof (iras));
+ iras.ira_flags = 0;
+ /*
+ * we are setting the ira_rill to the ipmp_ill (instead of
+ * the actual ill on which the packet was received), but this
+ * is ok because we don't actually need the real ira_rill.
+ * to send the icmp unreachable to the sender.
+ */
+ iras.ira_ill = iras.ira_rill = ill;
+ iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
+ iras.ira_rifindex = iras.ira_ruifindex;
+
+ ip1dbg(("ndp_resolv_failed: dst %s\n",
+ inet_ntop(AF_INET6, (char *)&ncec->ncec_addr, buf, sizeof (buf))));
+ mutex_enter(&ncec->ncec_lock);
+ mp = ncec->ncec_qd_mp;
+ ncec->ncec_qd_mp = NULL;
+ ncec->ncec_nprobes = 0;
+ mutex_exit(&ncec->ncec_lock);
+ while (mp != NULL) {
+ nxt_mp = mp->b_next;
+ mp->b_next = NULL;
- /*
- * If this packet is marked IP6I_IPMP_PROBE, then we need to:
- *
- * 1. Insert it at the head of the nce_qd_mp list. Consider
- * the normal (non-probe) load-speading case where the
- * source address of the ND packet is not tied to nce_ill.
- * If the ill bound to the source address cannot receive,
- * the response to the ND packet will not be received.
- * However, if ND packets for nce_ill's probes are queued
- * behind that ND packet, those probes will also fail to
- * be sent, and thus in.mpathd will erroneously conclude
- * that nce_ill has also failed.
- *
- * 2. Drop the probe packet in ndp_timer() if the ND did
- * not succeed on the first attempt. This ensures that
- * ND problems do not manifest as probe RTT spikes.
- */
- if (ip6i->ip6i_flags & IP6I_IPMP_PROBE)
- head_insert = B_TRUE;
+ BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
+ ip_drop_output("ipIfStatsOutDiscards - address unreachable",
+ mp, ill);
+ icmp_unreachable_v6(mp,
+ ICMP6_DST_UNREACH_ADDR, B_FALSE, &iras);
+ ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
+ mp = nxt_mp;
}
- nce_queue_mp_common(nce, mp, head_insert);
+ ncec_cb_dispatch(ncec); /* finish off waiting callbacks */
}
/*
- * Called when address resolution failed due to a timeout.
- * Send an ICMP unreachable in response to all queued packets.
+ * Handle the completion of NDP and ARP resolution.
*/
void
-nce_resolv_failed(nce_t *nce)
+nce_resolv_ok(ncec_t *ncec)
{
- mblk_t *mp, *nxt_mp, *first_mp;
- char buf[INET6_ADDRSTRLEN];
- ip6_t *ip6h;
- zoneid_t zoneid = GLOBAL_ZONEID;
- ip_stack_t *ipst = nce->nce_ill->ill_ipst;
+ mblk_t *mp;
+ uint_t pkt_len;
+ iaflags_t ixaflags = IXAF_NO_TRACE;
+ nce_t *nce;
+ ill_t *ill = ncec->ncec_ill;
+ boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION);
+ ip_stack_t *ipst = ill->ill_ipst;
+
+ if (IS_IPMP(ncec->ncec_ill)) {
+ nce_resolv_ipmp_ok(ncec);
+ return;
+ }
+ /* non IPMP case */
+
+ mutex_enter(&ncec->ncec_lock);
+ ASSERT(ncec->ncec_nprobes == 0);
+ mp = ncec->ncec_qd_mp;
+ ncec->ncec_qd_mp = NULL;
+ mutex_exit(&ncec->ncec_lock);
- ip1dbg(("nce_resolv_failed: dst %s\n",
- inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf))));
- mutex_enter(&nce->nce_lock);
- mp = nce->nce_qd_mp;
- nce->nce_qd_mp = NULL;
- mutex_exit(&nce->nce_lock);
while (mp != NULL) {
+ mblk_t *nxt_mp;
+
+ if (ill->ill_isv6) {
+ ip6_t *ip6h = (ip6_t *)mp->b_rptr;
+
+ pkt_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
+ } else {
+ ipha_t *ipha = (ipha_t *)mp->b_rptr;
+
+ ixaflags |= IXAF_IS_IPV4;
+ pkt_len = ntohs(ipha->ipha_length);
+ }
nxt_mp = mp->b_next;
mp->b_next = NULL;
- mp->b_prev = NULL;
-
- first_mp = mp;
- if (mp->b_datap->db_type == M_CTL) {
- ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr;
- ASSERT(io->ipsec_out_type == IPSEC_OUT);
- zoneid = io->ipsec_out_zoneid;
- ASSERT(zoneid != ALL_ZONES);
- mp = mp->b_cont;
- mp->b_next = NULL;
- mp->b_prev = NULL;
- }
-
- ip6h = (ip6_t *)mp->b_rptr;
- if (ip6h->ip6_nxt == IPPROTO_RAW) {
- ip6i_t *ip6i;
+ /*
+ * IXAF_NO_DEV_FLOW_CTL information for TCP packets is no
+ * longer available, but it's ok to drop this flag because TCP
+ * has its own flow-control in effect, so TCP packets
+ * are not likely to get here when flow-control is in effect.
+ */
+ mutex_enter(&ill->ill_lock);
+ nce = nce_lookup(ill, &ncec->ncec_addr);
+ mutex_exit(&ill->ill_lock);
+
+ if (nce == NULL) {
+ if (isv6) {
+ BUMP_MIB(&ipst->ips_ip6_mib,
+ ipIfStatsOutDiscards);
+ } else {
+ BUMP_MIB(&ipst->ips_ip_mib,
+ ipIfStatsOutDiscards);
+ }
+ ip_drop_output("ipIfStatsOutDiscards - no nce",
+ mp, NULL);
+ freemsg(mp);
+ } else {
/*
- * This message should have been pulled up already
- * in ip_wput_v6. ip_hdr_complete_v6 assumes that
- * the header is pulled up.
+ * We don't know the zoneid, but
+ * ip_xmit does not care since IXAF_NO_TRACE
+ * is set. (We traced the packet the first
+ * time through ip_xmit.)
*/
- ip6i = (ip6i_t *)ip6h;
- ASSERT((mp->b_wptr - (uchar_t *)ip6i) >=
- sizeof (ip6i_t) + IPV6_HDR_LEN);
- mp->b_rptr += sizeof (ip6i_t);
+ (void) ip_xmit(mp, nce, ixaflags, pkt_len, 0,
+ ALL_ZONES, 0, NULL);
+ nce_refrele(nce);
}
- /*
- * Ignore failure since icmp_unreachable_v6 will silently
- * drop packets with an unspecified source address.
- */
- (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst);
- icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp,
- ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst);
mp = nxt_mp;
}
- nce_cb_dispatch(nce);
+
+ ncec_cb_dispatch(ncec); /* complete callbacks */
}
/*
- * Called by SIOCSNDP* ioctl to add/change an nce entry
+ * Called by SIOCSNDP* ioctl to add/change an ncec entry
* and the corresponding attributes.
* Disallow states other than ND_REACHABLE or ND_STALE.
*/
@@ -3082,31 +2923,28 @@ ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr)
{
sin6_t *sin6;
in6_addr_t *addr;
+ ncec_t *ncec;
nce_t *nce;
- int err;
+ int err = 0;
uint16_t new_flags = 0;
uint16_t old_flags = 0;
int inflags = lnr->lnr_flags;
ip_stack_t *ipst = ill->ill_ipst;
+ boolean_t do_postprocess = B_FALSE;
ASSERT(ill->ill_isv6);
if ((lnr->lnr_state_create != ND_REACHABLE) &&
(lnr->lnr_state_create != ND_STALE))
return (EINVAL);
- if (lnr->lnr_hdw_len > ND_MAX_HDW_LEN)
- return (EINVAL);
-
sin6 = (sin6_t *)&lnr->lnr_addr;
addr = &sin6->sin6_addr;
mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
- /* We know it can not be mapping so just look in the hash table */
- nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
- /* See comment in ndp_query() regarding IS_IPMP(ill) usage */
- nce = nce_lookup_addr(ill, IS_IPMP(ill), addr, nce);
+ ASSERT(!IS_UNDER_IPMP(ill));
+ nce = nce_lookup_addr(ill, addr);
if (nce != NULL)
- new_flags = nce->nce_flags;
+ new_flags = nce->nce_common->ncec_flags;
switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) {
case NDF_ISROUTER_ON:
@@ -3118,7 +2956,7 @@ ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr)
case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON):
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
if (nce != NULL)
- NCE_REFRELE(nce);
+ nce_refrele(nce);
return (EINVAL);
}
@@ -3132,17 +2970,15 @@ ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr)
case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON):
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
if (nce != NULL)
- NCE_REFRELE(nce);
+ nce_refrele(nce);
return (EINVAL);
}
if (nce == NULL) {
- err = ndp_add_v6(ill,
+ err = nce_add_v6(ill,
(uchar_t *)lnr->lnr_hdw_addr,
+ ill->ill_phys_addr_length,
addr,
- &ipv6_all_ones,
- &ipv6_all_zeros,
- 0,
new_flags,
lnr->lnr_state_create,
&nce);
@@ -3150,269 +2986,354 @@ ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr)
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err));
return (err);
+ } else {
+ do_postprocess = B_TRUE;
}
}
- old_flags = nce->nce_flags;
+ ncec = nce->nce_common;
+ old_flags = ncec->ncec_flags;
if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) {
- /*
- * Router turned to host, delete all ires.
- * XXX Just delete the entry, but we need to add too.
- */
- nce->nce_flags &= ~NCE_F_ISROUTER;
+ ncec_router_to_host(ncec);
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
- ndp_delete(nce);
- NCE_REFRELE(nce);
+ if (do_postprocess)
+ err = nce_add_v6_postprocess(nce);
+ nce_refrele(nce);
return (0);
}
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
- mutex_enter(&nce->nce_lock);
- nce->nce_flags = new_flags;
- mutex_exit(&nce->nce_lock);
+ if (do_postprocess)
+ err = nce_add_v6_postprocess(nce);
+ /*
+ * err cannot be anything other than 0 because we don't support
+ * proxy arp of static addresses.
+ */
+ ASSERT(err == 0);
+
+ mutex_enter(&ncec->ncec_lock);
+ ncec->ncec_flags = new_flags;
+ mutex_exit(&ncec->ncec_lock);
/*
* Note that we ignore the state at this point, which
* should be either STALE or REACHABLE. Instead we let
* the link layer address passed in to determine the state
* much like incoming packets.
*/
- nce_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE);
- NCE_REFRELE(nce);
+ nce_process(ncec, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE);
+ nce_refrele(nce);
return (0);
}
/*
- * If the device driver supports it, we make nce_fp_mp to have
- * an M_DATA prepend. Otherwise nce_fp_mp will be null.
- * The caller ensures there is hold on nce for this function.
- * Note that since ill_fastpath_probe() copies the mblk there is
- * no need for the hold beyond this function.
+ * Create an nce_t structure for ill using the ncec->ncec_lladdr to set up
+ * the nce_dlur_mp. If ill != ncec->ncec_ill, then the ips_ill_g_lock must
+ * be held to ensure that they are in the same group.
*/
-void
-nce_fastpath(nce_t *nce)
+static nce_t *
+nce_fastpath_create(ill_t *ill, ncec_t *ncec)
{
- ill_t *ill = nce->nce_ill;
- int res;
- ASSERT(ill != NULL);
- ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE);
+ nce_t *nce;
- if (nce->nce_fp_mp != NULL) {
- /* Already contains fastpath info */
- return;
- }
- if (nce->nce_res_mp != NULL) {
- nce_fastpath_list_add(nce);
- res = ill_fastpath_probe(ill, nce->nce_res_mp);
- /*
- * EAGAIN is an indication of a transient error
- * i.e. allocation failure etc. leave the nce in the list it
- * will be updated when another probe happens for another ire
- * if not it will be taken out of the list when the ire is
- * deleted.
- */
+ nce = nce_ill_lookup_then_add(ill, ncec);
+
+ if (nce == NULL || IS_LOOPBACK(nce->nce_ill) || IS_VNI(nce->nce_ill))
+ return (nce);
- if (res != 0 && res != EAGAIN)
- nce_fastpath_list_delete(nce);
+ /*
+ * hold the ncec_lock to synchronize with nce_update() so that,
+ * at the end of this function, the contents of nce_dlur_mp are
+ * consistent with ncec->ncec_lladdr, even though some intermediate
+ * packet may have been sent out with a mangled address, which would
+ * only be a transient condition.
+ */
+ mutex_enter(&ncec->ncec_lock);
+ if (ncec->ncec_lladdr != NULL) {
+ bcopy(ncec->ncec_lladdr, nce->nce_dlur_mp->b_rptr +
+ NCE_LL_ADDR_OFFSET(ill), ill->ill_phys_addr_length);
+ } else {
+ nce->nce_dlur_mp = ill_dlur_gen(NULL, 0, ill->ill_sap,
+ ill->ill_sap_length);
}
+ mutex_exit(&ncec->ncec_lock);
+ return (nce);
}
/*
- * Drain the list of nce's waiting for fastpath response.
+ * we make nce_fp_mp to have an M_DATA prepend.
+ * The caller ensures there is hold on ncec for this function.
+ * Note that since ill_fastpath_probe() copies the mblk there is
+ * no need to hold the nce or ncec beyond this function.
+ *
+ * If the caller has passed in a non-null ncec_nce to nce_faspath() that
+ * ncec_nce must correspond to the nce for ncec with nce_ill == ncec->ncec_ill
+ * and will be returned back by this function, so that no extra nce_refrele
+ * is required for the caller. The calls from nce_add_common() use this
+ * method. All other callers (that pass in NULL ncec_nce) will have to do a
+ * nce_refrele of the returned nce (when it is non-null).
*/
-void
-nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *),
- void *arg)
+nce_t *
+nce_fastpath(ncec_t *ncec, boolean_t trigger_fp_req, nce_t *ncec_nce)
{
+ nce_t *nce;
+ ill_t *ill = ncec->ncec_ill;
- nce_t *next_nce;
- nce_t *current_nce;
- nce_t *first_nce;
- nce_t *prev_nce = NULL;
+ ASSERT(ill != NULL);
+
+ if (IS_IPMP(ill) && trigger_fp_req) {
+ trigger_fp_req = B_FALSE;
+ ipmp_ncec_fastpath(ncec, ill);
- mutex_enter(&ill->ill_lock);
- first_nce = current_nce = (nce_t *)ill->ill_fastpath_list;
- while (current_nce != (nce_t *)&ill->ill_fastpath_list) {
- next_nce = current_nce->nce_fastpath;
- /*
- * Take it off the list if we're flushing, or if the callback
- * routine tells us to do so. Otherwise, leave the nce in the
- * fastpath list to handle any pending response from the lower
- * layer. We can't drain the list when the callback routine
- * comparison failed, because the response is asynchronous in
- * nature, and may not arrive in the same order as the list
- * insertion.
- */
- if (func == NULL || func(current_nce, arg)) {
- current_nce->nce_fastpath = NULL;
- if (current_nce == first_nce)
- ill->ill_fastpath_list = first_nce = next_nce;
- else
- prev_nce->nce_fastpath = next_nce;
- } else {
- /* previous element that is still in the list */
- prev_nce = current_nce;
- }
- current_nce = next_nce;
}
- mutex_exit(&ill->ill_lock);
+ /*
+ * If the caller already has the nce corresponding to the ill, use
+ * that one. Otherwise we have to lookup/add the nce. Calls from
+ * nce_add_common() fall in the former category, and have just done
+ * the nce lookup/add that can be reused.
+ */
+ if (ncec_nce == NULL)
+ nce = nce_fastpath_create(ill, ncec);
+ else
+ nce = ncec_nce;
+
+ if (nce == NULL || IS_LOOPBACK(nce->nce_ill) || IS_VNI(nce->nce_ill))
+ return (nce);
+
+ if (trigger_fp_req)
+ nce_fastpath_trigger(nce);
+ return (nce);
}
/*
- * Add nce to the nce fastpath list.
+ * Trigger fastpath on nce. No locks may be held.
*/
-void
-nce_fastpath_list_add(nce_t *nce)
+static void
+nce_fastpath_trigger(nce_t *nce)
{
- ill_t *ill;
+ int res;
+ ill_t *ill = nce->nce_ill;
+ ncec_t *ncec = nce->nce_common;
- ill = nce->nce_ill;
+ res = ill_fastpath_probe(ill, nce->nce_dlur_mp);
+ /*
+ * EAGAIN is an indication of a transient error
+ * i.e. allocation failure etc. leave the ncec in the list it
+ * will be updated when another probe happens for another ire
+ * if not it will be taken out of the list when the ire is
+ * deleted.
+ */
+ if (res != 0 && res != EAGAIN && res != ENOTSUP)
+ nce_fastpath_list_delete(ill, ncec, NULL);
+}
- mutex_enter(&ill->ill_lock);
- mutex_enter(&nce->nce_lock);
+/*
+ * Add ncec to the nce fastpath list on ill.
+ */
+static nce_t *
+nce_ill_lookup_then_add_locked(ill_t *ill, ncec_t *ncec)
+{
+ nce_t *nce = NULL;
+ ASSERT(MUTEX_HELD(&ill->ill_lock));
/*
- * if nce has not been deleted and
+ * Atomically ensure that the ill is not CONDEMNED and is not going
+ * down, before adding the NCE.
+ */
+ if (ill->ill_state_flags & ILL_CONDEMNED)
+ return (NULL);
+ mutex_enter(&ncec->ncec_lock);
+ /*
+ * if ncec has not been deleted and
* is not already in the list add it.
*/
- if (!(nce->nce_flags & NCE_F_CONDEMNED) &&
- (nce->nce_fastpath == NULL)) {
- nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list;
- ill->ill_fastpath_list = nce;
+ if (!NCE_ISCONDEMNED(ncec)) {
+ nce = nce_lookup(ill, &ncec->ncec_addr);
+ if (nce != NULL)
+ goto done;
+ nce = nce_add(ill, ncec);
}
+done:
+ mutex_exit(&ncec->ncec_lock);
+ return (nce);
+}
- mutex_exit(&nce->nce_lock);
+nce_t *
+nce_ill_lookup_then_add(ill_t *ill, ncec_t *ncec)
+{
+ nce_t *nce;
+
+ mutex_enter(&ill->ill_lock);
+ nce = nce_ill_lookup_then_add_locked(ill, ncec);
mutex_exit(&ill->ill_lock);
+ return (nce);
}
+
/*
- * remove nce from the nce fastpath list.
+ * remove ncec from the ill_nce list. If 'dead' is non-null, the deleted
+ * nce is added to the 'dead' list, and the caller must nce_refrele() the
+ * entry after all locks have been dropped.
*/
void
-nce_fastpath_list_delete(nce_t *nce)
+nce_fastpath_list_delete(ill_t *ill, ncec_t *ncec, list_t *dead)
{
- nce_t *nce_ptr;
-
- ill_t *ill;
+ nce_t *nce;
- ill = nce->nce_ill;
ASSERT(ill != NULL);
- mutex_enter(&ill->ill_lock);
- if (nce->nce_fastpath == NULL)
- goto done;
-
- ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list);
+ /* first clean out any nce pointers in the under_ills */
+ if (IS_IPMP(ill))
+ ipmp_ncec_flush_nce(ncec);
- if (ill->ill_fastpath_list == nce) {
- ill->ill_fastpath_list = nce->nce_fastpath;
- } else {
- nce_ptr = ill->ill_fastpath_list;
- while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) {
- if (nce_ptr->nce_fastpath == nce) {
- nce_ptr->nce_fastpath = nce->nce_fastpath;
- break;
- }
- nce_ptr = nce_ptr->nce_fastpath;
+ /* now the ill itself */
+ mutex_enter(&ill->ill_lock);
+ for (nce = list_head(&ill->ill_nce); nce != NULL;
+ nce = list_next(&ill->ill_nce, nce)) {
+ if (nce->nce_common == ncec) {
+ nce_refhold(nce);
+ nce_delete(nce);
+ break;
}
}
-
- nce->nce_fastpath = NULL;
-done:
mutex_exit(&ill->ill_lock);
+ if (nce != NULL) {
+ if (dead == NULL)
+ nce_refrele(nce);
+ else
+ list_insert_tail(dead, nce);
+ }
}
/*
- * Update all NCE's that are not in fastpath mode and
- * have an nce_fp_mp that matches mp. mp->b_cont contains
- * the fastpath header.
- *
- * Returns TRUE if entry should be dequeued, or FALSE otherwise.
+ * when the fastpath response does not fit in the datab
+ * associated with the existing nce_fp_mp, we delete and
+ * add the nce to retrigger fastpath based on the information
+ * in the ncec_t.
*/
-boolean_t
-ndp_fastpath_update(nce_t *nce, void *arg)
+static nce_t *
+nce_delete_then_add(nce_t *nce)
+{
+ ill_t *ill = nce->nce_ill;
+ nce_t *newnce = NULL;
+
+ ip0dbg(("nce_delete_then_add nce %p ill %s\n",
+ (void *)nce, ill->ill_name));
+ mutex_enter(&ill->ill_lock);
+ mutex_enter(&nce->nce_common->ncec_lock);
+ nce_delete(nce);
+ /*
+ * Make sure that ncec is not condemned before adding. We hold the
+ * ill_lock and ncec_lock to synchronize with ncec_delete() and
+ * ipmp_ncec_flush_nce()
+ */
+ if (!NCE_ISCONDEMNED(nce->nce_common))
+ newnce = nce_add(ill, nce->nce_common);
+ mutex_exit(&nce->nce_common->ncec_lock);
+ mutex_exit(&ill->ill_lock);
+ nce_refrele(nce);
+ return (newnce); /* could be null if nomem */
+}
+
+typedef struct nce_fp_match_s {
+ nce_t *nce_fp_match_res;
+ mblk_t *nce_fp_match_ack_mp;
+} nce_fp_match_t;
+
+/* ARGSUSED */
+static int
+nce_fastpath_match_dlur(ill_t *ill, nce_t *nce, void *arg)
{
- mblk_t *mp, *fp_mp;
+ nce_fp_match_t *nce_fp_marg = arg;
+ ncec_t *ncec = nce->nce_common;
+ mblk_t *mp = nce_fp_marg->nce_fp_match_ack_mp;
uchar_t *mp_rptr, *ud_mp_rptr;
- mblk_t *ud_mp = nce->nce_res_mp;
+ mblk_t *ud_mp = nce->nce_dlur_mp;
ptrdiff_t cmplen;
- if (nce->nce_flags & NCE_F_MAPPING)
- return (B_TRUE);
- if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL))
- return (B_TRUE);
-
- ip2dbg(("ndp_fastpath_update: trying\n"));
- mp = (mblk_t *)arg;
+ /*
+ * mp is the mp associated with the fastpath ack.
+ * ud_mp is the outstanding DL_UNITDATA_REQ on the nce_t
+ * under consideration. If the contents match, then the
+ * fastpath ack is used to update the nce.
+ */
+ if (ud_mp == NULL)
+ return (0); /* MH_WALK_CONTINUE */
mp_rptr = mp->b_rptr;
cmplen = mp->b_wptr - mp_rptr;
ASSERT(cmplen >= 0);
+
ud_mp_rptr = ud_mp->b_rptr;
/*
- * The nce is locked here to prevent any other threads
- * from accessing and changing nce_res_mp when the IPv6 address
- * becomes resolved to an lla while we're in the middle
- * of looking at and comparing the hardware address (lla).
- * It is also locked to prevent multiple threads in nce_fastpath_update
- * from examining nce_res_mp atthe same time.
+ * The ncec is locked here to prevent any other threads from accessing
+ * and changing nce_dlur_mp when the address becomes resolved to an
+ * lla while we're in the middle of looking at and comparing the
+ * hardware address (lla). It is also locked to prevent multiple
+ * threads in nce_fastpath() from examining nce_dlur_mp at the same
+ * time.
*/
- mutex_enter(&nce->nce_lock);
+ mutex_enter(&ncec->ncec_lock);
if (ud_mp->b_wptr - ud_mp_rptr != cmplen ||
- bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) {
- mutex_exit(&nce->nce_lock);
- /*
- * Don't take the ire off the fastpath list yet,
- * since the response may come later.
- */
- return (B_FALSE);
- }
- /* Matched - install mp as the fastpath mp */
- ip1dbg(("ndp_fastpath_update: match\n"));
- fp_mp = dupb(mp->b_cont);
- if (fp_mp != NULL) {
- nce->nce_fp_mp = fp_mp;
+ bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) == 0) {
+ nce_fp_marg->nce_fp_match_res = nce;
+ mutex_exit(&ncec->ncec_lock);
+ nce_refhold(nce);
+ return (1); /* MH_WALK_TERMINATE */
}
- mutex_exit(&nce->nce_lock);
- return (B_TRUE);
+ mutex_exit(&ncec->ncec_lock);
+ return (0); /* MH_WALK_CONTINUE */
}
/*
- * This function handles the DL_NOTE_FASTPATH_FLUSH notification from
- * driver. Note that it assumes IP is exclusive...
+ * Update all NCE's that are not in fastpath mode and
+ * have an nce_fp_mp that matches mp. mp->b_cont contains
+ * the fastpath header.
+ *
+ * Returns TRUE if entry should be dequeued, or FALSE otherwise.
*/
-/* ARGSUSED */
void
-ndp_fastpath_flush(nce_t *nce, char *arg)
+nce_fastpath_update(ill_t *ill, mblk_t *mp)
{
- if (nce->nce_flags & NCE_F_MAPPING)
- return;
- /* No fastpath info? */
- if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL)
+ nce_fp_match_t nce_fp_marg;
+ nce_t *nce;
+ mblk_t *nce_fp_mp, *fp_mp;
+
+ nce_fp_marg.nce_fp_match_res = NULL;
+ nce_fp_marg.nce_fp_match_ack_mp = mp;
+
+ nce_walk(ill, nce_fastpath_match_dlur, &nce_fp_marg);
+
+ if ((nce = nce_fp_marg.nce_fp_match_res) == NULL)
return;
- if (nce->nce_ipversion == IPV4_VERSION &&
- nce->nce_flags & NCE_F_BCAST) {
- /*
- * IPv4 BROADCAST entries:
- * We can't delete the nce since it is difficult to
- * recreate these without going through the
- * ipif down/up dance.
- *
- * All access to nce->nce_fp_mp in the case of these
- * is protected by nce_lock.
- */
- mutex_enter(&nce->nce_lock);
- if (nce->nce_fp_mp != NULL) {
- freeb(nce->nce_fp_mp);
- nce->nce_fp_mp = NULL;
- mutex_exit(&nce->nce_lock);
- nce_fastpath(nce);
- } else {
+ mutex_enter(&nce->nce_lock);
+ nce_fp_mp = nce->nce_fp_mp;
+
+ if (nce_fp_mp != NULL) {
+ fp_mp = mp->b_cont;
+ if (nce_fp_mp->b_rptr + MBLKL(fp_mp) >
+ nce_fp_mp->b_datap->db_lim) {
mutex_exit(&nce->nce_lock);
+ nce = nce_delete_then_add(nce);
+ if (nce == NULL) {
+ return;
+ }
+ mutex_enter(&nce->nce_lock);
+ nce_fp_mp = nce->nce_fp_mp;
}
+ }
+
+ /* Matched - install mp as the fastpath mp */
+ if (nce_fp_mp == NULL) {
+ fp_mp = dupb(mp->b_cont);
+ nce->nce_fp_mp = fp_mp;
} else {
- /* Just delete the NCE... */
- ndp_delete(nce);
+ fp_mp = mp->b_cont;
+ bcopy(fp_mp->b_rptr, nce_fp_mp->b_rptr, MBLKL(fp_mp));
+ nce->nce_fp_mp->b_wptr = nce->nce_fp_mp->b_rptr
+ + MBLKL(fp_mp);
}
+ mutex_exit(&nce->nce_lock);
+ nce_refrele(nce);
}
/*
@@ -3451,74 +3372,103 @@ ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen)
}
/*
- * ndp_walk function.
+ * ncec_walk function.
* Free a fraction of the NCE cache entries.
- * A fraction of zero means to not free any in that category.
+ *
+ * A possible optimization here would be to use ncec_last where possible, and
+ * delete the least-frequently used entry, which would require more complex
+ * computation as we walk through the ncec's (e.g., track ncec entries by
+ * order of ncec_last and/or maintain state)
*/
-void
-ndp_cache_reclaim(nce_t *nce, char *arg)
+static void
+ncec_cache_reclaim(ncec_t *ncec, char *arg)
{
- nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg;
- uint_t rand;
+ ip_stack_t *ipst = ncec->ncec_ipst;
+ uint_t fraction = *(uint_t *)arg;
+ uint_t rand;
- if (nce->nce_flags & NCE_F_PERMANENT)
+ if ((ncec->ncec_flags &
+ (NCE_F_MYADDR | NCE_F_STATIC | NCE_F_BCAST)) != 0) {
return;
+ }
rand = (uint_t)lbolt +
- NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE);
- if (ncr->ncr_host != 0 &&
- (rand/ncr->ncr_host)*ncr->ncr_host == rand) {
- ndp_delete(nce);
- return;
+ NCE_ADDR_HASH_V6(ncec->ncec_addr, NCE_TABLE_SIZE);
+ if ((rand/fraction)*fraction == rand) {
+ IP_STAT(ipst, ip_nce_reclaim_deleted);
+ ncec_delete(ncec);
}
}
/*
- * ndp_walk function.
- * Count the number of NCEs that can be deleted.
- * These would be hosts but not routers.
+ * kmem_cache callback to free up memory.
+ *
+ * For now we just delete a fixed fraction.
*/
-void
-ndp_cache_count(nce_t *nce, char *arg)
+static void
+ip_nce_reclaim_stack(ip_stack_t *ipst)
{
- ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg;
+ uint_t fraction = ipst->ips_ip_nce_reclaim_fraction;
- if (nce->nce_flags & NCE_F_PERMANENT)
- return;
+ IP_STAT(ipst, ip_nce_reclaim_calls);
- ncc->ncc_total++;
- if (!(nce->nce_flags & NCE_F_ISROUTER))
- ncc->ncc_host++;
+ ncec_walk(NULL, (pfi_t)ncec_cache_reclaim, (uchar_t *)&fraction, ipst);
+
+ /*
+ * Walk all CONNs that can have a reference on an ire, ncec or dce.
+ * Get them to update any stale references to drop any refholds they
+ * have.
+ */
+ ipcl_walk(conn_ixa_cleanup, (void *)B_FALSE, ipst);
+}
+
+/*
+ * Called by the memory allocator subsystem directly, when the system
+ * is running low on memory.
+ */
+/* ARGSUSED */
+void
+ip_nce_reclaim(void *args)
+{
+ netstack_handle_t nh;
+ netstack_t *ns;
+
+ netstack_next_init(&nh);
+ while ((ns = netstack_next(&nh)) != NULL) {
+ ip_nce_reclaim_stack(ns->netstack_ip);
+ netstack_rele(ns);
+ }
+ netstack_next_fini(&nh);
}
#ifdef DEBUG
void
-nce_trace_ref(nce_t *nce)
+ncec_trace_ref(ncec_t *ncec)
{
- ASSERT(MUTEX_HELD(&nce->nce_lock));
+ ASSERT(MUTEX_HELD(&ncec->ncec_lock));
- if (nce->nce_trace_disable)
+ if (ncec->ncec_trace_disable)
return;
- if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) {
- nce->nce_trace_disable = B_TRUE;
- nce_trace_cleanup(nce);
+ if (!th_trace_ref(ncec, ncec->ncec_ipst)) {
+ ncec->ncec_trace_disable = B_TRUE;
+ ncec_trace_cleanup(ncec);
}
}
void
-nce_untrace_ref(nce_t *nce)
+ncec_untrace_ref(ncec_t *ncec)
{
- ASSERT(MUTEX_HELD(&nce->nce_lock));
+ ASSERT(MUTEX_HELD(&ncec->ncec_lock));
- if (!nce->nce_trace_disable)
- th_trace_unref(nce);
+ if (!ncec->ncec_trace_disable)
+ th_trace_unref(ncec);
}
static void
-nce_trace_cleanup(const nce_t *nce)
+ncec_trace_cleanup(const ncec_t *ncec)
{
- th_trace_cleanup(nce, nce->nce_trace_disable);
+ th_trace_cleanup(ncec, ncec->ncec_trace_disable);
}
#endif
@@ -3527,64 +3477,159 @@ nce_trace_cleanup(const nce_t *nce)
* Send an ICMP unreachable in response to all queued packets.
*/
void
-arp_resolv_failed(nce_t *nce)
+arp_resolv_failed(ncec_t *ncec)
{
- mblk_t *mp, *nxt_mp, *first_mp;
+ mblk_t *mp, *nxt_mp;
char buf[INET6_ADDRSTRLEN];
- zoneid_t zoneid = GLOBAL_ZONEID;
struct in_addr ipv4addr;
- ip_stack_t *ipst = nce->nce_ill->ill_ipst;
+ ill_t *ill = ncec->ncec_ill;
+ ip_stack_t *ipst = ncec->ncec_ipst;
+ ip_recv_attr_t iras;
- IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr);
+ bzero(&iras, sizeof (iras));
+ iras.ira_flags = IRAF_IS_IPV4;
+ /*
+ * we are setting the ira_rill to the ipmp_ill (instead of
+ * the actual ill on which the packet was received), but this
+ * is ok because we don't actually need the real ira_rill.
+ * to send the icmp unreachable to the sender.
+ */
+ iras.ira_ill = iras.ira_rill = ill;
+ iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
+ iras.ira_rifindex = iras.ira_ruifindex;
+
+ IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ipv4addr);
ip3dbg(("arp_resolv_failed: dst %s\n",
inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf))));
- mutex_enter(&nce->nce_lock);
- mp = nce->nce_qd_mp;
- nce->nce_qd_mp = NULL;
- mutex_exit(&nce->nce_lock);
-
+ mutex_enter(&ncec->ncec_lock);
+ mp = ncec->ncec_qd_mp;
+ ncec->ncec_qd_mp = NULL;
+ ncec->ncec_nprobes = 0;
+ mutex_exit(&ncec->ncec_lock);
while (mp != NULL) {
nxt_mp = mp->b_next;
mp->b_next = NULL;
- mp->b_prev = NULL;
- first_mp = mp;
- /*
- * Send icmp unreachable messages
- * to the hosts.
- */
- (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst);
- ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n"));
- icmp_unreachable(nce->nce_ill->ill_wq, first_mp,
- ICMP_HOST_UNREACHABLE, zoneid, ipst);
+ BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
+ ip_drop_output("ipIfStatsOutDiscards - address unreachable",
+ mp, ill);
+ if (ipst->ips_ip_arp_icmp_error) {
+ ip3dbg(("arp_resolv_failed: "
+ "Calling icmp_unreachable\n"));
+ icmp_unreachable(mp, ICMP_HOST_UNREACHABLE, &iras);
+ } else {
+ freemsg(mp);
+ }
+ ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
mp = nxt_mp;
}
+ ncec_cb_dispatch(ncec); /* finish off waiting callbacks */
}
+/*
+ * if ill is an under_ill, translate it to the ipmp_ill and add the
+ * nce on the ipmp_ill. Two nce_t entries (one on the ipmp_ill, and
+ * one on the underlying in_ill) will be created for the
+ * ncec_t in this case. The ncec_t itself will be created on the ipmp_ill.
+ */
int
-ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags,
- nce_t **newnce, nce_t *src_nce)
+nce_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len,
+ const in_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce)
{
int err;
- nce_t *nce;
in6_addr_t addr6;
ip_stack_t *ipst = ill->ill_ipst;
+ nce_t *nce, *upper_nce = NULL;
+ ill_t *in_ill = ill, *under = NULL;
+ boolean_t need_ill_refrele = B_FALSE;
+
+ if (flags & NCE_F_MCAST) {
+ /*
+ * hw_addr will be figured out in nce_set_multicast_v4;
+ * caller needs to pass in the cast_ill for ipmp
+ */
+ ASSERT(hw_addr == NULL);
+ ASSERT(!IS_IPMP(ill));
+ err = nce_set_multicast_v4(ill, addr, flags, newnce);
+ return (err);
+ }
+
+ if (IS_UNDER_IPMP(ill) && !(flags & NCE_F_MYADDR)) {
+ ill = ipmp_ill_hold_ipmp_ill(ill);
+ if (ill == NULL)
+ return (ENXIO);
+ need_ill_refrele = B_TRUE;
+ }
+ if ((flags & NCE_F_BCAST) != 0) {
+ /*
+ * IPv4 broadcast ncec: compute the hwaddr.
+ */
+ if (IS_IPMP(ill)) {
+ under = ipmp_ill_get_xmit_ill(ill, B_FALSE);
+ if (under == NULL) {
+ if (need_ill_refrele)
+ ill_refrele(ill);
+ return (ENETDOWN);
+ }
+ hw_addr = under->ill_bcast_mp->b_rptr +
+ NCE_LL_ADDR_OFFSET(under);
+ hw_addr_len = under->ill_phys_addr_length;
+ } else {
+ hw_addr = ill->ill_bcast_mp->b_rptr +
+ NCE_LL_ADDR_OFFSET(ill),
+ hw_addr_len = ill->ill_phys_addr_length;
+ }
+ }
mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
- nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr));
IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
- /*
- * NOTE: IPv4 never matches across the illgrp since the NCE's we're
- * looking up have fastpath headers that are inherently per-ill.
- */
- nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce);
+ nce = nce_lookup_addr(ill, &addr6);
if (nce == NULL) {
- err = ndp_add_v4(ill, addr, flags, newnce, src_nce);
+ err = nce_add_v4(ill, hw_addr, hw_addr_len, addr, flags,
+ state, &nce);
} else {
- *newnce = nce;
err = EEXIST;
}
mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
+ if (err == 0)
+ err = nce_add_v4_postprocess(nce);
+
+ if (in_ill != ill && nce != NULL) {
+ nce_t *under_nce;
+
+ /*
+ * in_ill was the under_ill. Try to create the under_nce.
+ * Hold the ill_g_lock to prevent changes to group membership
+ * until we are done.
+ */
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+ if (IS_IN_SAME_ILLGRP(in_ill, ill)) {
+ under_nce = nce_fastpath_create(in_ill,
+ nce->nce_common);
+ upper_nce = nce;
+ if ((nce = under_nce) == NULL)
+ err = EINVAL;
+ }
+ rw_exit(&ipst->ips_ill_g_lock);
+ if (under_nce != NULL && NCE_ISREACHABLE(nce->nce_common))
+ nce_fastpath_trigger(under_nce);
+ }
+ if (nce != NULL) {
+ if (newnce != NULL)
+ *newnce = nce;
+ else
+ nce_refrele(nce);
+ }
+
+ if (under != NULL)
+ ill_refrele(under);
+
+ if (upper_nce != NULL)
+ nce_refrele(upper_nce);
+
+ if (need_ill_refrele)
+ ill_refrele(ill);
+
return (err);
}
@@ -3592,102 +3637,860 @@ ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags,
* NDP Cache Entry creation routine for IPv4.
* Mapped entries are handled in arp.
* This routine must always be called with ndp4->ndp_g_lock held.
- * Prior to return, nce_refcnt is incremented.
+ * Prior to return, ncec_refcnt is incremented.
+ *
+ * IPMP notes: the ncec for non-local (i.e., !NCE_MYADDR(ncec) addresses
+ * are always added pointing at the ipmp_ill. Thus, when the ill passed
+ * to nce_add_v4 is an under_ill (i.e., IS_UNDER_IPMP(ill)) two nce_t
+ * entries will be created, both pointing at the same ncec_t. The nce_t
+ * entries will have their nce_ill set to the ipmp_ill and the under_ill
+ * respectively, with the ncec_t having its ncec_ill pointing at the ipmp_ill.
+ * Local addresses are always created on the ill passed to nce_add_v4.
*/
-static int
-ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags,
- nce_t **newnce, nce_t *src_nce)
+int
+nce_add_v4(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len,
+ const in_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce)
{
- static nce_t nce_nil;
- nce_t *nce;
- mblk_t *mp;
- mblk_t *template = NULL;
- nce_t **ncep;
- ip_stack_t *ipst = ill->ill_ipst;
- uint16_t state = ND_INITIAL;
int err;
+ boolean_t is_multicast = (flags & NCE_F_MCAST);
+ struct in6_addr addr6;
+ nce_t *nce;
- ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock));
+ ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp4->ndp_g_lock));
ASSERT(!ill->ill_isv6);
- ASSERT((flags & NCE_F_MAPPING) == 0);
+ ASSERT(!IN_MULTICAST(htonl(*addr)) || is_multicast);
+
+ IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
+ err = nce_add_common(ill, hw_addr, hw_addr_len, &addr6, flags, state,
+ &nce);
+ ASSERT(newnce != NULL);
+ *newnce = nce;
+ return (err);
+}
+
+/*
+ * Post-processing routine to be executed after nce_add_v4(). This function
+ * triggers fastpath (if appropriate) and DAD on the newly added nce entry
+ * and must be called without any locks held.
+ *
+ * Always returns 0, but we return an int to keep this symmetric with the
+ * IPv6 counter-part.
+ */
+int
+nce_add_v4_postprocess(nce_t *nce)
+{
+ ncec_t *ncec = nce->nce_common;
+ uint16_t flags = ncec->ncec_flags;
+ boolean_t ndp_need_dad = B_FALSE;
+ boolean_t dropped;
+ clock_t delay;
+ ip_stack_t *ipst = ncec->ncec_ill->ill_ipst;
+ uchar_t *hw_addr = ncec->ncec_lladdr;
+ boolean_t trigger_fastpath = B_TRUE;
- if (ill->ill_resolver_mp == NULL)
- return (EINVAL);
/*
- * Allocate the mblk to hold the nce.
+ * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
+ * we call nce_fastpath as soon as the ncec is resolved in nce_process.
+ * We call nce_fastpath from nce_update if the link layer address of
+ * the peer changes from nce_update
*/
- mp = allocb(sizeof (nce_t), BPRI_MED);
- if (mp == NULL)
- return (ENOMEM);
+ if (NCE_PUBLISH(ncec) || !NCE_ISREACHABLE(ncec) || (hw_addr == NULL &&
+ ncec->ncec_ill->ill_net_type != IRE_IF_NORESOLVER))
+ trigger_fastpath = B_FALSE;
- nce = (nce_t *)mp->b_rptr;
- mp->b_wptr = (uchar_t *)&nce[1];
- *nce = nce_nil;
- nce->nce_ill = ill;
- nce->nce_ipversion = IPV4_VERSION;
- nce->nce_flags = flags;
- nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
- nce->nce_rcnt = ill->ill_xmit_count;
- IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr);
- nce->nce_mask = ipv6_all_ones;
- nce->nce_extract_mask = ipv6_all_zeros;
- nce->nce_ll_extract_start = 0;
- nce->nce_qd_mp = NULL;
- nce->nce_mp = mp;
- /* This one is for nce getting created */
- nce->nce_refcnt = 1;
- mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL);
- ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr));
+ if (trigger_fastpath)
+ nce_fastpath_trigger(nce);
+
+ if (NCE_PUBLISH(ncec) && ncec->ncec_state == ND_PROBE) {
+ /*
+ * Either the caller (by passing in ND_PROBE)
+ * or nce_add_common() (by the internally computed state
+ * based on ncec_addr and ill_net_type) has determined
+ * that this unicast entry needs DAD. Trigger DAD.
+ */
+ ndp_need_dad = B_TRUE;
+ } else if (flags & NCE_F_UNSOL_ADV) {
+ /*
+ * We account for the transmit below by assigning one
+ * less than the ndd variable. Subsequent decrements
+ * are done in nce_timer.
+ */
+ mutex_enter(&ncec->ncec_lock);
+ ncec->ncec_unsolicit_count =
+ ipst->ips_ip_arp_publish_count - 1;
+ mutex_exit(&ncec->ncec_lock);
+ dropped = arp_announce(ncec);
+ mutex_enter(&ncec->ncec_lock);
+ if (dropped)
+ ncec->ncec_unsolicit_count++;
+ else
+ ncec->ncec_last_time_defended = ddi_get_lbolt();
+ if (ncec->ncec_unsolicit_count != 0) {
+ nce_start_timer(ncec,
+ ipst->ips_ip_arp_publish_interval);
+ }
+ mutex_exit(&ncec->ncec_lock);
+ }
- nce->nce_trace_disable = B_FALSE;
+ /*
+ * If ncec_xmit_interval is 0, user has configured us to send the first
+ * probe right away. Do so, and set up for the subsequent probes.
+ */
+ if (ndp_need_dad) {
+ mutex_enter(&ncec->ncec_lock);
+ if (ncec->ncec_pcnt == 0) {
+ /*
+ * DAD probes and announce can be
+ * administratively disabled by setting the
+ * probe_count to zero. Restart the timer in
+ * this case to mark the ipif as ready.
+ */
+ ncec->ncec_unsolicit_count = 0;
+ mutex_exit(&ncec->ncec_lock);
+ nce_restart_timer(ncec, 0);
+ } else {
+ mutex_exit(&ncec->ncec_lock);
+ delay = ((ncec->ncec_flags & NCE_F_FAST) ?
+ ipst->ips_arp_probe_delay :
+ ipst->ips_arp_fastprobe_delay);
+ nce_dad(ncec, NULL, (delay == 0 ? B_TRUE : B_FALSE));
+ }
+ }
+ return (0);
+}
- if (src_nce != NULL) {
+/*
+ * ncec_walk routine to update all entries that have a given destination or
+ * gateway address and cached link layer (MAC) address. This is used when ARP
+ * informs us that a network-to-link-layer mapping may have changed.
+ */
+void
+nce_update_hw_changed(ncec_t *ncec, void *arg)
+{
+ nce_hw_map_t *hwm = arg;
+ ipaddr_t ncec_addr;
+
+ if (ncec->ncec_state != ND_REACHABLE)
+ return;
+
+ IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, ncec_addr);
+ if (ncec_addr != hwm->hwm_addr)
+ return;
+
+ mutex_enter(&ncec->ncec_lock);
+ if (hwm->hwm_flags != 0)
+ ncec->ncec_flags = hwm->hwm_flags;
+ nce_update(ncec, ND_STALE, hwm->hwm_hwaddr);
+ mutex_exit(&ncec->ncec_lock);
+}
+
+void
+ncec_refhold(ncec_t *ncec)
+{
+ mutex_enter(&(ncec)->ncec_lock);
+ (ncec)->ncec_refcnt++;
+ ASSERT((ncec)->ncec_refcnt != 0);
+#ifdef DEBUG
+ ncec_trace_ref(ncec);
+#endif
+ mutex_exit(&(ncec)->ncec_lock);
+}
+
+void
+ncec_refhold_notr(ncec_t *ncec)
+{
+ mutex_enter(&(ncec)->ncec_lock);
+ (ncec)->ncec_refcnt++;
+ ASSERT((ncec)->ncec_refcnt != 0);
+ mutex_exit(&(ncec)->ncec_lock);
+}
+
+static void
+ncec_refhold_locked(ncec_t *ncec)
+{
+ ASSERT(MUTEX_HELD(&(ncec)->ncec_lock));
+ (ncec)->ncec_refcnt++;
+#ifdef DEBUG
+ ncec_trace_ref(ncec);
+#endif
+}
+
+/* ncec_inactive destroys the mutex thus no mutex_exit is needed */
+void
+ncec_refrele(ncec_t *ncec)
+{
+ mutex_enter(&(ncec)->ncec_lock);
+#ifdef DEBUG
+ ncec_untrace_ref(ncec);
+#endif
+ ASSERT((ncec)->ncec_refcnt != 0);
+ if (--(ncec)->ncec_refcnt == 0) {
+ ncec_inactive(ncec);
+ } else {
+ mutex_exit(&(ncec)->ncec_lock);
+ }
+}
+
+void
+ncec_refrele_notr(ncec_t *ncec)
+{
+ mutex_enter(&(ncec)->ncec_lock);
+ ASSERT((ncec)->ncec_refcnt != 0);
+ if (--(ncec)->ncec_refcnt == 0) {
+ ncec_inactive(ncec);
+ } else {
+ mutex_exit(&(ncec)->ncec_lock);
+ }
+}
+
+/*
+ * Common to IPv4 and IPv6.
+ */
+void
+nce_restart_timer(ncec_t *ncec, uint_t ms)
+{
+ timeout_id_t tid;
+
+ ASSERT(!MUTEX_HELD(&(ncec)->ncec_lock));
+
+ /* First cancel any running timer */
+ mutex_enter(&ncec->ncec_lock);
+ tid = ncec->ncec_timeout_id;
+ ncec->ncec_timeout_id = 0;
+ if (tid != 0) {
+ mutex_exit(&ncec->ncec_lock);
+ (void) untimeout(tid);
+ mutex_enter(&ncec->ncec_lock);
+ }
+
+ /* Restart timer */
+ nce_start_timer(ncec, ms);
+ mutex_exit(&ncec->ncec_lock);
+}
+
+static void
+nce_start_timer(ncec_t *ncec, uint_t ms)
+{
+ ASSERT(MUTEX_HELD(&ncec->ncec_lock));
+ /*
+ * Don't start the timer if the ncec has been deleted, or if the timer
+ * is already running
+ */
+ if (!NCE_ISCONDEMNED(ncec) && ncec->ncec_timeout_id == 0) {
+ ncec->ncec_timeout_id = timeout(nce_timer, ncec,
+ MSEC_TO_TICK(ms) == 0 ? 1 : MSEC_TO_TICK(ms));
+ }
+}
+
+int
+nce_set_multicast_v4(ill_t *ill, const in_addr_t *dst,
+ uint16_t flags, nce_t **newnce)
+{
+ uchar_t *hw_addr;
+ int err = 0;
+ ip_stack_t *ipst = ill->ill_ipst;
+ in6_addr_t dst6;
+ nce_t *nce;
+
+ ASSERT(!ill->ill_isv6);
+
+ IN6_IPADDR_TO_V4MAPPED(*dst, &dst6);
+ mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
+ if ((nce = nce_lookup_addr(ill, &dst6)) != NULL) {
+ mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
+ goto done;
+ }
+ if (ill->ill_net_type == IRE_IF_RESOLVER) {
+ /*
+ * For IRE_IF_RESOLVER a hardware mapping can be
+ * generated, for IRE_IF_NORESOLVER, resolution cookie
+ * in the ill is copied in nce_add_v4().
+ */
+ hw_addr = kmem_alloc(ill->ill_phys_addr_length, KM_NOSLEEP);
+ if (hw_addr == NULL) {
+ mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
+ return (ENOMEM);
+ }
+ ip_mcast_mapping(ill, (uchar_t *)dst, hw_addr);
+ } else {
/*
- * src_nce has been provided by the caller. The only
- * caller who provides a non-null, non-broadcast
- * src_nce is from ip_newroute() which must pass in
- * a ND_REACHABLE src_nce (this condition is verified
- * via an ASSERT for the save_ire->ire_nce in ip_newroute())
+ * IRE_IF_NORESOLVER type simply copies the resolution
+ * cookie passed in. So no hw_addr is needed.
*/
- mutex_enter(&src_nce->nce_lock);
- state = src_nce->nce_state;
- if ((src_nce->nce_flags & NCE_F_CONDEMNED) ||
- (ipst->ips_ndp4->ndp_g_hw_change > 0)) {
+ hw_addr = NULL;
+ }
+ ASSERT(flags & NCE_F_MCAST);
+ ASSERT(flags & NCE_F_NONUD);
+ /* nce_state will be computed by nce_add_common() */
+ err = nce_add_v4(ill, hw_addr, ill->ill_phys_addr_length, dst, flags,
+ ND_UNCHANGED, &nce);
+ mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
+ if (err == 0)
+ err = nce_add_v4_postprocess(nce);
+ if (hw_addr != NULL)
+ kmem_free(hw_addr, ill->ill_phys_addr_length);
+ if (err != 0) {
+ ip1dbg(("nce_set_multicast_v4: create failed" "%d\n", err));
+ return (err);
+ }
+done:
+ if (newnce != NULL)
+ *newnce = nce;
+ else
+ nce_refrele(nce);
+ return (0);
+}
+
+/*
+ * This is used when scanning for "old" (least recently broadcast) NCEs. We
+ * don't want to have to walk the list for every single one, so we gather up
+ * batches at a time.
+ */
+#define NCE_RESCHED_LIST_LEN 8
+
+typedef struct {
+ ill_t *ncert_ill;
+ uint_t ncert_num;
+ ncec_t *ncert_nces[NCE_RESCHED_LIST_LEN];
+} nce_resched_t;
+
+/*
+ * Pick the longest waiting NCEs for defense.
+ */
+/* ARGSUSED */
+static int
+ncec_reschedule(ill_t *ill, nce_t *nce, void *arg)
+{
+ nce_resched_t *ncert = arg;
+ ncec_t **ncecs;
+ ncec_t **ncec_max;
+ ncec_t *ncec_temp;
+ ncec_t *ncec = nce->nce_common;
+
+ ASSERT(ncec->ncec_ill == ncert->ncert_ill);
+ /*
+ * Only reachable entries that are ready for announcement are eligible.
+ */
+ if (!NCE_MYADDR(ncec) || ncec->ncec_state != ND_REACHABLE)
+ return (0);
+ if (ncert->ncert_num < NCE_RESCHED_LIST_LEN) {
+ ncec_refhold(ncec);
+ ncert->ncert_nces[ncert->ncert_num++] = ncec;
+ } else {
+ ncecs = ncert->ncert_nces;
+ ncec_max = ncecs + NCE_RESCHED_LIST_LEN;
+ ncec_refhold(ncec);
+ for (; ncecs < ncec_max; ncecs++) {
+ ASSERT(ncec != NULL);
+ if ((*ncecs)->ncec_last_time_defended >
+ ncec->ncec_last_time_defended) {
+ ncec_temp = *ncecs;
+ *ncecs = ncec;
+ ncec = ncec_temp;
+ }
+ }
+ ncec_refrele(ncec);
+ }
+ return (0);
+}
+
+/*
+ * Reschedule the ARP defense of any long-waiting NCEs. It's assumed that this
+ * doesn't happen very often (if at all), and thus it needn't be highly
+ * optimized. (Note, though, that it's actually O(N) complexity, because the
+ * outer loop is bounded by a constant rather than by the length of the list.)
+ */
+static void
+nce_ill_reschedule(ill_t *ill, nce_resched_t *ncert)
+{
+ ncec_t *ncec;
+ ip_stack_t *ipst = ill->ill_ipst;
+ uint_t i, defend_rate;
+
+ i = ill->ill_defend_count;
+ ill->ill_defend_count = 0;
+ if (ill->ill_isv6)
+ defend_rate = ipst->ips_ndp_defend_rate;
+ else
+ defend_rate = ipst->ips_arp_defend_rate;
+ /* If none could be sitting around, then don't reschedule */
+ if (i < defend_rate) {
+ DTRACE_PROBE1(reschedule_none, ill_t *, ill);
+ return;
+ }
+ ncert->ncert_ill = ill;
+ while (ill->ill_defend_count < defend_rate) {
+ nce_walk_common(ill, ncec_reschedule, ncert);
+ for (i = 0; i < ncert->ncert_num; i++) {
+
+ ncec = ncert->ncert_nces[i];
+ mutex_enter(&ncec->ncec_lock);
+ ncec->ncec_flags |= NCE_F_DELAYED;
+ mutex_exit(&ncec->ncec_lock);
/*
- * src_nce has been deleted, or
- * ip_arp_news is in the middle of
- * flushing entries in the the nce.
- * Fail the add, since we don't know
- * if it is safe to copy the contents of
- * src_nce
+ * we plan to schedule this ncec, so incr the
+ * defend_count in anticipation.
*/
- DTRACE_PROBE2(nce__bad__src__nce,
- nce_t *, src_nce, ill_t *, ill);
- mutex_exit(&src_nce->nce_lock);
- err = EINVAL;
- goto err_ret;
+ if (++ill->ill_defend_count >= defend_rate)
+ break;
}
- template = copyb(src_nce->nce_res_mp);
- mutex_exit(&src_nce->nce_lock);
- if (template == NULL) {
- err = ENOMEM;
- goto err_ret;
+ if (ncert->ncert_num < NCE_RESCHED_LIST_LEN)
+ break;
+ }
+}
+
+/*
+ * Check if the current rate-limiting parameters permit the sending
+ * of another address defense announcement for both IPv4 and IPv6.
+ * Returns B_TRUE if rate-limiting is in effect (i.e., send is not
+ * permitted), and B_FALSE otherwise. The `defend_rate' parameter
+ * determines how many address defense announcements are permitted
+ * in any `defense_perio' interval.
+ */
+static boolean_t
+ill_defend_rate_limit(ill_t *ill, ncec_t *ncec)
+{
+ clock_t now = ddi_get_lbolt();
+ ip_stack_t *ipst = ill->ill_ipst;
+ clock_t start = ill->ill_defend_start;
+ uint32_t elapsed, defend_period, defend_rate;
+ nce_resched_t ncert;
+ boolean_t ret;
+ int i;
+
+ if (ill->ill_isv6) {
+ defend_period = ipst->ips_ndp_defend_period;
+ defend_rate = ipst->ips_ndp_defend_rate;
+ } else {
+ defend_period = ipst->ips_arp_defend_period;
+ defend_rate = ipst->ips_arp_defend_rate;
+ }
+ if (defend_rate == 0)
+ return (B_TRUE);
+ bzero(&ncert, sizeof (ncert));
+ mutex_enter(&ill->ill_lock);
+ if (start > 0) {
+ elapsed = now - start;
+ if (elapsed > SEC_TO_TICK(defend_period)) {
+ ill->ill_defend_start = now;
+ /*
+ * nce_ill_reschedule will attempt to
+ * prevent starvation by reschduling the
+ * oldest entries, which are marked with
+ * the NCE_F_DELAYED flag.
+ */
+ nce_ill_reschedule(ill, &ncert);
+ }
+ } else {
+ ill->ill_defend_start = now;
+ }
+ ASSERT(ill->ill_defend_count <= defend_rate);
+ mutex_enter(&ncec->ncec_lock);
+ if (ncec->ncec_flags & NCE_F_DELAYED) {
+ /*
+ * This ncec was rescheduled as one of the really old
+ * entries needing on-going defense. The
+ * ill_defend_count was already incremented in
+ * nce_ill_reschedule. Go ahead and send the announce.
+ */
+ ncec->ncec_flags &= ~NCE_F_DELAYED;
+ mutex_exit(&ncec->ncec_lock);
+ ret = B_FALSE;
+ goto done;
+ }
+ mutex_exit(&ncec->ncec_lock);
+ if (ill->ill_defend_count < defend_rate)
+ ill->ill_defend_count++;
+ if (ill->ill_defend_count == defend_rate) {
+ /*
+ * we are no longer allowed to send unbidden defense
+ * messages. Wait for rescheduling.
+ */
+ ret = B_TRUE;
+ } else {
+ ret = B_FALSE;
+ }
+done:
+ mutex_exit(&ill->ill_lock);
+ /*
+ * After all the locks have been dropped we can restart nce timer,
+ * and refrele the delayed ncecs
+ */
+ for (i = 0; i < ncert.ncert_num; i++) {
+ clock_t xmit_interval;
+ ncec_t *tmp;
+
+ tmp = ncert.ncert_nces[i];
+ xmit_interval = nce_fuzz_interval(tmp->ncec_xmit_interval,
+ B_FALSE);
+ nce_restart_timer(tmp, xmit_interval);
+ ncec_refrele(tmp);
+ }
+ return (ret);
+}
+
+boolean_t
+ndp_announce(ncec_t *ncec)
+{
+ return (ndp_xmit(ncec->ncec_ill, ND_NEIGHBOR_ADVERT, ncec->ncec_lladdr,
+ ncec->ncec_lladdr_length, &ncec->ncec_addr, &ipv6_all_hosts_mcast,
+ nce_advert_flags(ncec)));
+}
+
+ill_t *
+nce_resolve_src(ncec_t *ncec, in6_addr_t *src)
+{
+ mblk_t *mp;
+ in6_addr_t src6;
+ ipaddr_t src4;
+ ill_t *ill = ncec->ncec_ill;
+ ill_t *src_ill = NULL;
+ ipif_t *ipif = NULL;
+ boolean_t is_myaddr = NCE_MYADDR(ncec);
+ boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION);
+
+ ASSERT(src != NULL);
+ ASSERT(IN6_IS_ADDR_UNSPECIFIED(src));
+ src6 = *src;
+ if (is_myaddr) {
+ src6 = ncec->ncec_addr;
+ if (!isv6)
+ IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, src4);
+ } else {
+ /*
+ * try to find one from the outgoing packet.
+ */
+ mutex_enter(&ncec->ncec_lock);
+ mp = ncec->ncec_qd_mp;
+ if (mp != NULL) {
+ if (isv6) {
+ ip6_t *ip6h = (ip6_t *)mp->b_rptr;
+
+ src6 = ip6h->ip6_src;
+ } else {
+ ipha_t *ipha = (ipha_t *)mp->b_rptr;
+
+ src4 = ipha->ipha_src;
+ IN6_IPADDR_TO_V4MAPPED(src4, &src6);
+ }
+ }
+ mutex_exit(&ncec->ncec_lock);
+ }
+
+ /*
+ * For outgoing packets, if the src of outgoing packet is one
+ * of the assigned interface addresses use it, otherwise we
+ * will pick the source address below.
+ * For local addresses (is_myaddr) doing DAD, NDP announce
+ * messages are mcast. So we use the (IPMP) cast_ill or the
+ * (non-IPMP) ncec_ill for these message types. The only case
+ * of unicast DAD messages are for IPv6 ND probes, for which
+ * we find the ipif_bound_ill corresponding to the ncec_addr.
+ */
+ if (!IN6_IS_ADDR_UNSPECIFIED(&src6) || is_myaddr) {
+ if (isv6) {
+ ipif = ipif_lookup_addr_nondup_v6(&src6, ill, ALL_ZONES,
+ ill->ill_ipst);
+ } else {
+ ipif = ipif_lookup_addr_nondup(src4, ill, ALL_ZONES,
+ ill->ill_ipst);
+ }
+
+ /*
+ * If no relevant ipif can be found, then it's not one of our
+ * addresses. Reset to :: and try to find a src for the NS or
+ * ARP request using ipif_select_source_v[4,6] below.
+ * If an ipif can be found, but it's not yet done with
+ * DAD verification, and we are not being invoked for
+ * DAD (i.e., !is_myaddr), then just postpone this
+ * transmission until later.
+ */
+ if (ipif == NULL) {
+ src6 = ipv6_all_zeros;
+ src4 = INADDR_ANY;
+ } else if (!ipif->ipif_addr_ready && !is_myaddr) {
+ DTRACE_PROBE2(nce__resolve__ipif__not__ready,
+ ncec_t *, ncec, ipif_t *, ipif);
+ ipif_refrele(ipif);
+ return (NULL);
}
- } else if (flags & NCE_F_BCAST) {
+ }
+
+ if (IN6_IS_ADDR_UNSPECIFIED(&src6) && !is_myaddr) {
/*
- * broadcast nce.
+ * Pick a source address for this solicitation, but
+ * restrict the selection to addresses assigned to the
+ * output interface. We do this because the destination will
+ * create a neighbor cache entry for the source address of
+ * this packet, so the source address had better be a valid
+ * neighbor.
*/
- template = copyb(ill->ill_bcast_mp);
+ if (isv6) {
+ ipif = ipif_select_source_v6(ill, &ncec->ncec_addr,
+ B_TRUE, IPV6_PREFER_SRC_DEFAULT, ALL_ZONES,
+ B_FALSE, NULL);
+ } else {
+ ipaddr_t nce_addr;
+
+ IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, nce_addr);
+ ipif = ipif_select_source_v4(ill, nce_addr, ALL_ZONES,
+ B_FALSE, NULL);
+ }
+ if (ipif == NULL && IS_IPMP(ill)) {
+ ill_t *send_ill = ipmp_ill_get_xmit_ill(ill, B_TRUE);
+
+ if (send_ill != NULL) {
+ if (isv6) {
+ ipif = ipif_select_source_v6(send_ill,
+ &ncec->ncec_addr, B_TRUE,
+ IPV6_PREFER_SRC_DEFAULT, ALL_ZONES,
+ B_FALSE, NULL);
+ } else {
+ IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr,
+ src4);
+ ipif = ipif_select_source_v4(send_ill,
+ src4, ALL_ZONES, B_TRUE, NULL);
+ }
+ ill_refrele(send_ill);
+ }
+ }
+
+ if (ipif == NULL) {
+ char buf[INET6_ADDRSTRLEN];
+
+ ip1dbg(("nce_resolve_src: No source ipif for dst %s\n",
+ inet_ntop((isv6 ? AF_INET6 : AF_INET),
+ (char *)&ncec->ncec_addr, buf, sizeof (buf))));
+ DTRACE_PROBE1(nce__resolve__no__ipif, ncec_t *, ncec);
+ return (NULL);
+ }
+ src6 = ipif->ipif_v6lcl_addr;
+ }
+ *src = src6;
+ if (ipif != NULL) {
+ src_ill = ipif->ipif_ill;
+ if (IS_IPMP(src_ill))
+ src_ill = ipmp_ipif_hold_bound_ill(ipif);
+ else
+ ill_refhold(src_ill);
+ ipif_refrele(ipif);
+ DTRACE_PROBE2(nce__resolve__src__ill, ncec_t *, ncec,
+ ill_t *, src_ill);
+ }
+ return (src_ill);
+}
+
+void
+ip_nce_lookup_and_update(ipaddr_t *addr, ipif_t *ipif, ip_stack_t *ipst,
+ uchar_t *hwaddr, int hwaddr_len, int flags)
+{
+ ill_t *ill;
+ ncec_t *ncec;
+ nce_t *nce;
+ uint16_t new_state;
+
+ ill = (ipif ? ipif->ipif_ill : NULL);
+ if (ill != NULL) {
+ /*
+ * only one ncec is possible
+ */
+ nce = nce_lookup_v4(ill, addr);
+ if (nce != NULL) {
+ ncec = nce->nce_common;
+ mutex_enter(&ncec->ncec_lock);
+ if (NCE_ISREACHABLE(ncec))
+ new_state = ND_UNCHANGED;
+ else
+ new_state = ND_STALE;
+ ncec->ncec_flags = flags;
+ nce_update(ncec, new_state, hwaddr);
+ mutex_exit(&ncec->ncec_lock);
+ nce_refrele(nce);
+ return;
+ }
+ } else {
+ /*
+ * ill is wildcard; clean up all ncec's and ire's
+ * that match on addr.
+ */
+ nce_hw_map_t hwm;
+
+ hwm.hwm_addr = *addr;
+ hwm.hwm_hwlen = hwaddr_len;
+ hwm.hwm_hwaddr = hwaddr;
+ hwm.hwm_flags = flags;
+
+ ncec_walk_common(ipst->ips_ndp4, NULL,
+ (pfi_t)nce_update_hw_changed, (uchar_t *)&hwm, B_TRUE);
+ }
+}
+
+/*
+ * Common function to add ncec entries.
+ * we always add the ncec with ncec_ill == ill, and always create
+ * nce_t on ncec_ill. A dlpi fastpath message may be triggered if the
+ * ncec is !reachable.
+ *
+ * When the caller passes in an nce_state of ND_UNCHANGED,
+ * nce_add_common() will determine the state of the created nce based
+ * on the ill_net_type and nce_flags used. Otherwise, the nce will
+ * be created with state set to the passed in nce_state.
+ */
+static int
+nce_add_common(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len,
+ const in6_addr_t *addr, uint16_t flags, uint16_t nce_state, nce_t **retnce)
+{
+ static ncec_t nce_nil;
+ uchar_t *template = NULL;
+ int err;
+ ncec_t *ncec;
+ ncec_t **ncep;
+ ip_stack_t *ipst = ill->ill_ipst;
+ uint16_t state;
+ boolean_t fastprobe = B_FALSE;
+ struct ndp_g_s *ndp;
+ nce_t *nce = NULL;
+ mblk_t *dlur_mp = NULL;
+
+ if (ill->ill_isv6)
+ ndp = ill->ill_ipst->ips_ndp6;
+ else
+ ndp = ill->ill_ipst->ips_ndp4;
+
+ *retnce = NULL;
+
+ ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
+
+ if (IN6_IS_ADDR_UNSPECIFIED(addr)) {
+ ip0dbg(("nce_add_common: no addr\n"));
+ return (EINVAL);
+ }
+ if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) {
+ ip0dbg(("nce_add_common: flags = %x\n", (int)flags));
+ return (EINVAL);
+ }
+
+ if (ill->ill_isv6) {
+ ncep = ((ncec_t **)NCE_HASH_PTR_V6(ipst, *addr));
+ } else {
+ ipaddr_t v4addr;
+
+ IN6_V4MAPPED_TO_IPADDR(addr, v4addr);
+ ncep = ((ncec_t **)NCE_HASH_PTR_V4(ipst, v4addr));
+ }
+
+ /*
+ * The caller has ensured that there is no nce on ill, but there could
+ * still be an nce_common_t for the address, so that we find exisiting
+ * ncec_t strucutures first, and atomically add a new nce_t if
+ * one is found. The ndp_g_lock ensures that we don't cross threads
+ * with an ncec_delete(). Unlike ncec_lookup_illgrp() we do not
+ * compare for matches across the illgrp because this function is
+ * called via nce_lookup_then_add_v* -> nce_add_v* -> nce_add_common,
+ * with the nce_lookup_then_add_v* passing in the ipmp_ill where
+ * appropriate.
+ */
+ ncec = *ncep;
+ for (; ncec != NULL; ncec = ncec->ncec_next) {
+ if (ncec->ncec_ill == ill) {
+ if (IN6_ARE_ADDR_EQUAL(&ncec->ncec_addr, addr)) {
+ *retnce = nce_ill_lookup_then_add(ill, ncec);
+ if (*retnce != NULL)
+ break;
+ }
+ }
+ }
+ if (*retnce != NULL) {
+ /*
+ * We should never find *retnce to be MYADDR, since the caller
+ * may then incorrectly restart a DAD timer that's already
+ * running.
+ */
+ ASSERT(!NCE_MYADDR(ncec));
+ /* caller must trigger fastpath on nce */
+ return (0);
+ }
+ ncec = kmem_cache_alloc(ncec_cache, KM_NOSLEEP);
+ if (ncec == NULL)
+ return (ENOMEM);
+ *ncec = nce_nil;
+ ncec->ncec_ill = ill;
+ ncec->ncec_ipversion = (ill->ill_isv6 ? IPV6_VERSION : IPV4_VERSION);
+ ncec->ncec_flags = flags;
+ ncec->ncec_ipst = ipst; /* No netstack_hold */
+
+ if (!ill->ill_isv6) {
+ ipaddr_t addr4;
+
+ /*
+ * DAD probe interval and probe count are set based on
+ * fast/slow probe settings. If the underlying link doesn't
+ * have reliably up/down notifications or if we're working
+ * with IPv4 169.254.0.0/16 Link Local Address space, then
+ * don't use the fast timers. Otherwise, use them.
+ */
+ ASSERT(IN6_IS_ADDR_V4MAPPED(addr));
+ IN6_V4MAPPED_TO_IPADDR(addr, addr4);
+ if (ill->ill_note_link && !IS_IPV4_LL_SPACE(&addr4))
+ fastprobe = B_TRUE;
+ if (fastprobe) {
+ ncec->ncec_xmit_interval =
+ ipst->ips_arp_fastprobe_interval;
+ ncec->ncec_pcnt =
+ ipst->ips_arp_fastprobe_count;
+ ncec->ncec_flags |= NCE_F_FAST;
+ } else {
+ ncec->ncec_xmit_interval =
+ ipst->ips_arp_probe_interval;
+ ncec->ncec_pcnt =
+ ipst->ips_arp_probe_count;
+ }
+ if (NCE_PUBLISH(ncec)) {
+ ncec->ncec_unsolicit_count =
+ ipst->ips_ip_arp_publish_count;
+ }
+ } else {
+ /*
+ * probe interval is constant: ILL_PROBE_INTERVAL
+ * probe count is constant: ND_MAX_UNICAST_SOLICIT
+ */
+ ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT;
+ if (NCE_PUBLISH(ncec)) {
+ ncec->ncec_unsolicit_count =
+ ipst->ips_ip_ndp_unsolicit_count;
+ }
+ }
+ ncec->ncec_rcnt = ill->ill_xmit_count;
+ ncec->ncec_addr = *addr;
+ ncec->ncec_qd_mp = NULL;
+ ncec->ncec_refcnt = 1; /* for ncec getting created */
+ mutex_init(&ncec->ncec_lock, NULL, MUTEX_DEFAULT, NULL);
+ ncec->ncec_trace_disable = B_FALSE;
+
+ /*
+ * ncec_lladdr holds link layer address
+ */
+ if (hw_addr_len > 0) {
+ template = kmem_alloc(hw_addr_len, KM_NOSLEEP);
if (template == NULL) {
err = ENOMEM;
goto err_ret;
}
+ ncec->ncec_lladdr = template;
+ ncec->ncec_lladdr_length = hw_addr_len;
+ bzero(ncec->ncec_lladdr, hw_addr_len);
+ }
+ if ((flags & NCE_F_BCAST) != 0) {
state = ND_REACHABLE;
+ ASSERT(hw_addr_len > 0);
+ } else if (ill->ill_net_type == IRE_IF_RESOLVER) {
+ state = ND_INITIAL;
} else if (ill->ill_net_type == IRE_IF_NORESOLVER) {
/*
* NORESOLVER entries are always created in the REACHABLE
* state.
*/
+ state = ND_REACHABLE;
if (ill->ill_phys_addr_length == IP_ADDR_LEN &&
ill->ill_mactype != DL_IPV4 &&
ill->ill_mactype != DL_6TO4) {
@@ -3698,32 +4501,91 @@ ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags,
* that do their own resolution from IP to link-layer
* address (e.g. IP over X.25).
*/
- template = ill_dlur_gen((uchar_t *)addr,
- ill->ill_phys_addr_length,
- ill->ill_sap, ill->ill_sap_length);
- } else {
- template = copyb(ill->ill_resolver_mp);
+ bcopy((uchar_t *)addr,
+ ncec->ncec_lladdr, ill->ill_phys_addr_length);
}
- if (template == NULL) {
- err = ENOMEM;
- goto err_ret;
+ if (ill->ill_phys_addr_length == IPV6_ADDR_LEN &&
+ ill->ill_mactype != DL_IPV6) {
+ /*
+ * We create a nce_res_mp with the IP nexthop address
+ * as the destination address if the physical legnth
+ * is exactly 16 bytes for point-to-multipoint links
+ * that do their own resolution from IP to link-layer
+ * address.
+ */
+ bcopy((uchar_t *)addr,
+ ncec->ncec_lladdr, ill->ill_phys_addr_length);
}
+ /*
+ * Since NUD is not part of the base IPv4 protocol definition,
+ * IPv4 neighbor entries on NORESOLVER interfaces will never
+ * age, and are marked NCE_F_NONUD.
+ */
+ if (!ill->ill_isv6)
+ ncec->ncec_flags |= NCE_F_NONUD;
+ } else if (ill->ill_net_type == IRE_LOOPBACK) {
state = ND_REACHABLE;
}
- nce->nce_fp_mp = NULL;
- nce->nce_res_mp = template;
- nce->nce_state = state;
+
+ if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) {
+ /*
+ * We are adding an ncec with a deterministic hw_addr,
+ * so the state can only be one of {REACHABLE, STALE, PROBE}.
+ *
+ * if we are adding a unicast ncec for the local address
+ * it would be REACHABLE; we would be adding a ND_STALE entry
+ * for the requestor of an ARP_REQUEST/ND_SOLICIT. Our own
+ * addresses are added in PROBE to trigger DAD.
+ */
+ if ((flags & (NCE_F_MCAST|NCE_F_BCAST)) ||
+ ill->ill_net_type == IRE_IF_NORESOLVER)
+ state = ND_REACHABLE;
+ else if (!NCE_PUBLISH(ncec))
+ state = ND_STALE;
+ else
+ state = ND_PROBE;
+ if (hw_addr != NULL)
+ nce_set_ll(ncec, hw_addr);
+ }
+ /* caller overrides internally computed state */
+ if (nce_state != ND_UNCHANGED)
+ state = nce_state;
+
+ if (state == ND_PROBE)
+ ncec->ncec_flags |= NCE_F_UNVERIFIED;
+
+ ncec->ncec_state = state;
+
if (state == ND_REACHABLE) {
- nce->nce_last = TICK_TO_MSEC(lbolt64);
- nce->nce_init_time = TICK_TO_MSEC(lbolt64);
+ ncec->ncec_last = TICK_TO_MSEC(lbolt64);
+ ncec->ncec_init_time = TICK_TO_MSEC(lbolt64);
} else {
- nce->nce_last = 0;
+ ncec->ncec_last = 0;
if (state == ND_INITIAL)
- nce->nce_init_time = TICK_TO_MSEC(lbolt64);
+ ncec->ncec_init_time = TICK_TO_MSEC(lbolt64);
+ }
+ list_create(&ncec->ncec_cb, sizeof (ncec_cb_t),
+ offsetof(ncec_cb_t, ncec_cb_node));
+ /*
+ * have all the memory allocations out of the way before taking locks
+ * and adding the nce.
+ */
+ nce = kmem_cache_alloc(nce_cache, KM_NOSLEEP);
+ if (nce == NULL) {
+ err = ENOMEM;
+ goto err_ret;
+ }
+ if (ncec->ncec_lladdr != NULL ||
+ ill->ill_net_type == IRE_IF_NORESOLVER) {
+ dlur_mp = ill_dlur_gen(ncec->ncec_lladdr,
+ ill->ill_phys_addr_length, ill->ill_sap,
+ ill->ill_sap_length);
+ if (dlur_mp == NULL) {
+ err = ENOMEM;
+ goto err_ret;
+ }
}
- ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) ||
- (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE));
/*
* Atomically ensure that the ill is not CONDEMNED, before
* adding the NCE.
@@ -3734,128 +4596,423 @@ ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags,
err = EINVAL;
goto err_ret;
}
- if ((nce->nce_next = *ncep) != NULL)
- nce->nce_next->nce_ptpn = &nce->nce_next;
- *ncep = nce;
- nce->nce_ptpn = ncep;
- *newnce = nce;
- /* This one is for nce being used by an active thread */
- NCE_REFHOLD(*newnce);
+ if (!NCE_MYADDR(ncec) &&
+ (ill->ill_state_flags & ILL_DOWN_IN_PROGRESS)) {
+ mutex_exit(&ill->ill_lock);
+ DTRACE_PROBE1(nce__add__on__down__ill, ncec_t *, ncec);
+ err = EINVAL;
+ goto err_ret;
+ }
+ /*
+ * Acquire the ncec_lock even before adding the ncec to the list
+ * so that it cannot get deleted after the ncec is added, but
+ * before we add the nce.
+ */
+ mutex_enter(&ncec->ncec_lock);
+ if ((ncec->ncec_next = *ncep) != NULL)
+ ncec->ncec_next->ncec_ptpn = &ncec->ncec_next;
+ *ncep = ncec;
+ ncec->ncec_ptpn = ncep;
- /* Bump up the number of nce's referencing this ill */
+ /* Bump up the number of ncec's referencing this ill */
DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
- (char *), "nce", (void *), nce);
- ill->ill_nce_cnt++;
+ (char *), "ncec", (void *), ncec);
+ ill->ill_ncec_cnt++;
+ /*
+ * Since we hold the ncec_lock at this time, the ncec cannot be
+ * condemned, and we can safely add the nce.
+ */
+ *retnce = nce_add_impl(ill, ncec, nce, dlur_mp);
+ mutex_exit(&ncec->ncec_lock);
mutex_exit(&ill->ill_lock);
- DTRACE_PROBE1(ndp__add__v4, nce_t *, nce);
+
+ /* caller must trigger fastpath on *retnce */
return (0);
+
err_ret:
- freeb(mp);
- freemsg(template);
+ if (ncec != NULL)
+ kmem_cache_free(ncec_cache, ncec);
+ if (nce != NULL)
+ kmem_cache_free(nce_cache, nce);
+ freemsg(dlur_mp);
+ if (template != NULL)
+ kmem_free(template, ill->ill_phys_addr_length);
return (err);
}
/*
- * ndp_walk routine to delete all entries that have a given destination or
- * gateway address and cached link layer (MAC) address. This is used when ARP
- * informs us that a network-to-link-layer mapping may have changed.
+ * take a ref on the nce
*/
void
-nce_delete_hw_changed(nce_t *nce, void *arg)
+nce_refhold(nce_t *nce)
{
- nce_hw_map_t *hwm = arg;
- mblk_t *mp;
- dl_unitdata_req_t *dlu;
- uchar_t *macaddr;
- ill_t *ill;
- int saplen;
- ipaddr_t nce_addr;
+ mutex_enter(&nce->nce_lock);
+ nce->nce_refcnt++;
+ ASSERT((nce)->nce_refcnt != 0);
+ mutex_exit(&nce->nce_lock);
+}
- if (nce->nce_state != ND_REACHABLE)
- return;
+/*
+ * release a ref on the nce; In general, this
+ * cannot be called with locks held because nce_inactive
+ * may result in nce_inactive which will take the ill_lock,
+ * do ipif_ill_refrele_tail etc. Thus the one exception
+ * where this can be called with locks held is when the caller
+ * is certain that the nce_refcnt is sufficient to prevent
+ * the invocation of nce_inactive.
+ */
+void
+nce_refrele(nce_t *nce)
+{
+ ASSERT((nce)->nce_refcnt != 0);
+ mutex_enter(&nce->nce_lock);
+ if (--nce->nce_refcnt == 0)
+ nce_inactive(nce); /* destroys the mutex */
+ else
+ mutex_exit(&nce->nce_lock);
+}
- IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr);
- if (nce_addr != hwm->hwm_addr)
- return;
+/*
+ * free the nce after all refs have gone away.
+ */
+static void
+nce_inactive(nce_t *nce)
+{
+ ill_t *ill = nce->nce_ill;
+
+ ASSERT(nce->nce_refcnt == 0);
+
+ ncec_refrele_notr(nce->nce_common);
+ nce->nce_common = NULL;
+ freemsg(nce->nce_fp_mp);
+ freemsg(nce->nce_dlur_mp);
+
+ mutex_enter(&ill->ill_lock);
+ DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
+ (char *), "nce", (void *), nce);
+ ill->ill_nce_cnt--;
+ nce->nce_ill = NULL;
+ /*
+ * If the number of ncec's associated with this ill have dropped
+ * to zero, check whether we need to restart any operation that
+ * is waiting for this to happen.
+ */
+ if (ILL_DOWN_OK(ill)) {
+ /* ipif_ill_refrele_tail drops the ill_lock */
+ ipif_ill_refrele_tail(ill);
+ } else {
+ mutex_exit(&ill->ill_lock);
+ }
+
+ mutex_destroy(&nce->nce_lock);
+ kmem_cache_free(nce_cache, nce);
+}
+
+/*
+ * Add an nce to the ill_nce list.
+ */
+static nce_t *
+nce_add_impl(ill_t *ill, ncec_t *ncec, nce_t *nce, mblk_t *dlur_mp)
+{
+ bzero(nce, sizeof (*nce));
+ mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL);
+ nce->nce_common = ncec;
+ nce->nce_addr = ncec->ncec_addr;
+ nce->nce_ill = ill;
+ DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
+ (char *), "nce", (void *), nce);
+ ill->ill_nce_cnt++;
+
+ nce->nce_refcnt = 1; /* for the thread */
+ ncec->ncec_refcnt++; /* want ncec_refhold_locked_notr(ncec) */
+ nce->nce_dlur_mp = dlur_mp;
+
+ /* add nce to the ill's fastpath list. */
+ nce->nce_refcnt++; /* for the list */
+ list_insert_head(&ill->ill_nce, nce);
+ return (nce);
+}
+
+static nce_t *
+nce_add(ill_t *ill, ncec_t *ncec)
+{
+ nce_t *nce;
+ mblk_t *dlur_mp = NULL;
+
+ ASSERT(MUTEX_HELD(&ill->ill_lock));
+ ASSERT(MUTEX_HELD(&ncec->ncec_lock));
+
+ nce = kmem_cache_alloc(nce_cache, KM_NOSLEEP);
+ if (nce == NULL)
+ return (NULL);
+ if (ncec->ncec_lladdr != NULL ||
+ ill->ill_net_type == IRE_IF_NORESOLVER) {
+ dlur_mp = ill_dlur_gen(ncec->ncec_lladdr,
+ ill->ill_phys_addr_length, ill->ill_sap,
+ ill->ill_sap_length);
+ if (dlur_mp == NULL) {
+ kmem_cache_free(nce_cache, nce);
+ return (NULL);
+ }
+ }
+ return (nce_add_impl(ill, ncec, nce, dlur_mp));
+}
+
+/*
+ * remove the nce from the ill_faspath list
+ */
+void
+nce_delete(nce_t *nce)
+{
+ ill_t *ill = nce->nce_ill;
+
+ ASSERT(MUTEX_HELD(&ill->ill_lock));
mutex_enter(&nce->nce_lock);
- if ((mp = nce->nce_res_mp) == NULL) {
+ if (nce->nce_is_condemned) {
+ /*
+ * some other thread has removed this nce from the ill_nce list
+ */
mutex_exit(&nce->nce_lock);
return;
}
- dlu = (dl_unitdata_req_t *)mp->b_rptr;
- macaddr = (uchar_t *)(dlu + 1);
- ill = nce->nce_ill;
- if ((saplen = ill->ill_sap_length) > 0)
- macaddr += saplen;
- else
- saplen = -saplen;
+ nce->nce_is_condemned = B_TRUE;
+ mutex_exit(&nce->nce_lock);
+ list_remove(&ill->ill_nce, nce);
/*
- * If the hardware address is unchanged, then leave this one alone.
- * Note that saplen == abs(saplen) now.
+ * even though we are holding the ill_lock, it is ok to
+ * call nce_refrele here because we know that we should have
+ * at least 2 refs on the nce: one for the thread, and one
+ * for the list. The refrele below will release the one for
+ * the list.
*/
- if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen &&
- bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) {
- mutex_exit(&nce->nce_lock);
- return;
+ nce_refrele(nce);
+}
+
+nce_t *
+nce_lookup(ill_t *ill, const in6_addr_t *addr)
+{
+ nce_t *nce = NULL;
+
+ ASSERT(ill != NULL);
+ ASSERT(MUTEX_HELD(&ill->ill_lock));
+
+ for (nce = list_head(&ill->ill_nce); nce != NULL;
+ nce = list_next(&ill->ill_nce, nce)) {
+ if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr))
+ break;
}
- mutex_exit(&nce->nce_lock);
- DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce);
- ndp_delete(nce);
+ /*
+ * if we found the nce on the ill_nce list while holding
+ * the ill_lock, then it cannot be condemned yet.
+ */
+ if (nce != NULL) {
+ ASSERT(!nce->nce_is_condemned);
+ nce_refhold(nce);
+ }
+ return (nce);
}
/*
- * This function verifies whether a given IPv4 address is potentially known to
- * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t,
- * so that it can continue to look for hardware changes on that address.
+ * Walk the ill_nce list on ill. The callback function func() cannot perform
+ * any destructive actions.
*/
-boolean_t
-ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns)
+static void
+nce_walk_common(ill_t *ill, pfi_t func, void *arg)
{
- nce_t *nce;
- struct in_addr nceaddr;
- ip_stack_t *ipst = ns->netstack_ip;
+ nce_t *nce = NULL, *nce_next;
- if (addr == INADDR_ANY)
- return (B_FALSE);
+ ASSERT(MUTEX_HELD(&ill->ill_lock));
+ for (nce = list_head(&ill->ill_nce); nce != NULL; ) {
+ nce_next = list_next(&ill->ill_nce, nce);
+ if (func(ill, nce, arg) != 0)
+ break;
+ nce = nce_next;
+ }
+}
- mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
- nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr);
- for (; nce != NULL; nce = nce->nce_next) {
- /* Note that only v4 mapped entries are in the table. */
- IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr);
- if (addr == nceaddr.s_addr &&
- IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) {
- /* Single flag check; no lock needed */
- if (!(nce->nce_flags & NCE_F_CONDEMNED))
- break;
+void
+nce_walk(ill_t *ill, pfi_t func, void *arg)
+{
+ mutex_enter(&ill->ill_lock);
+ nce_walk_common(ill, func, arg);
+ mutex_exit(&ill->ill_lock);
+}
+
+void
+nce_flush(ill_t *ill, boolean_t flushall)
+{
+ nce_t *nce, *nce_next;
+ list_t dead;
+
+ list_create(&dead, sizeof (nce_t), offsetof(nce_t, nce_node));
+ mutex_enter(&ill->ill_lock);
+ for (nce = list_head(&ill->ill_nce); nce != NULL; ) {
+ nce_next = list_next(&ill->ill_nce, nce);
+ if (!flushall && NCE_PUBLISH(nce->nce_common)) {
+ nce = nce_next;
+ continue;
}
+ /*
+ * nce_delete requires that the caller should either not
+ * be holding locks, or should hold a ref to ensure that
+ * we wont hit ncec_inactive. So take a ref and clean up
+ * after the list is flushed.
+ */
+ nce_refhold(nce);
+ nce_delete(nce);
+ list_insert_tail(&dead, nce);
+ nce = nce_next;
}
- mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
- return (nce != NULL);
+ mutex_exit(&ill->ill_lock);
+ while ((nce = list_head(&dead)) != NULL) {
+ list_remove(&dead, nce);
+ nce_refrele(nce);
+ }
+ ASSERT(list_is_empty(&dead));
+ list_destroy(&dead);
}
-/*
- * Wrapper around ipif_lookup_addr_exact_v6() that allows ND to work properly
- * with IPMP. Specifically, since neighbor discovery is always done on
- * underlying interfaces (even for addresses owned by an IPMP interface), we
- * need to check for `v6addrp' on both `ill' and on the IPMP meta-interface
- * associated with `ill' (if it exists).
- */
-static ipif_t *
-ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill)
+/* Return an interval that is anywhere in the [1 .. intv] range */
+static clock_t
+nce_fuzz_interval(clock_t intv, boolean_t initial_time)
+{
+ clock_t rnd, frac;
+
+ (void) random_get_pseudo_bytes((uint8_t *)&rnd, sizeof (rnd));
+ /* Note that clock_t is signed; must chop off bits */
+ rnd &= (1ul << (NBBY * sizeof (rnd) - 1)) - 1;
+ if (initial_time) {
+ if (intv <= 0)
+ intv = 1;
+ else
+ intv = (rnd % intv) + 1;
+ } else {
+ /* Compute 'frac' as 20% of the configured interval */
+ if ((frac = intv / 5) <= 1)
+ frac = 2;
+ /* Set intv randomly in the range [intv-frac .. intv+frac] */
+ if ((intv = intv - frac + rnd % (2 * frac + 1)) <= 0)
+ intv = 1;
+ }
+ return (intv);
+}
+
+void
+nce_resolv_ipmp_ok(ncec_t *ncec)
{
- ipif_t *ipif;
+ mblk_t *mp;
+ uint_t pkt_len;
+ iaflags_t ixaflags = IXAF_NO_TRACE;
+ nce_t *under_nce;
+ ill_t *ill = ncec->ncec_ill;
+ boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION);
+ ipif_t *src_ipif = NULL;
ip_stack_t *ipst = ill->ill_ipst;
+ ill_t *send_ill;
+ uint_t nprobes;
- ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst);
- if (ipif == NULL && IS_UNDER_IPMP(ill)) {
- if ((ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) {
- ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst);
- ill_refrele(ill);
+ ASSERT(IS_IPMP(ill));
+
+ mutex_enter(&ncec->ncec_lock);
+ nprobes = ncec->ncec_nprobes;
+ mp = ncec->ncec_qd_mp;
+ ncec->ncec_qd_mp = NULL;
+ ncec->ncec_nprobes = 0;
+ mutex_exit(&ncec->ncec_lock);
+
+ while (mp != NULL) {
+ mblk_t *nxt_mp;
+
+ nxt_mp = mp->b_next;
+ mp->b_next = NULL;
+ if (isv6) {
+ ip6_t *ip6h = (ip6_t *)mp->b_rptr;
+
+ pkt_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
+ src_ipif = ipif_lookup_addr_nondup_v6(&ip6h->ip6_src,
+ ill, ALL_ZONES, ipst);
+ } else {
+ ipha_t *ipha = (ipha_t *)mp->b_rptr;
+
+ ixaflags |= IXAF_IS_IPV4;
+ pkt_len = ntohs(ipha->ipha_length);
+ src_ipif = ipif_lookup_addr_nondup(ipha->ipha_src,
+ ill, ALL_ZONES, ipst);
+ }
+
+ /*
+ * find a new nce based on an under_ill. The first IPMP probe
+ * packet gets queued, so we could still find a src_ipif that
+ * matches an IPMP test address.
+ */
+ if (src_ipif == NULL || IS_IPMP(src_ipif->ipif_ill)) {
+ /*
+ * if src_ipif is null, this could be either a
+ * forwarded packet or a probe whose src got deleted.
+ * We identify the former case by looking for the
+ * ncec_nprobes: the first ncec_nprobes packets are
+ * probes;
+ */
+ if (src_ipif == NULL && nprobes > 0)
+ goto drop_pkt;
+
+ /*
+ * For forwarded packets, we use the ipmp rotor
+ * to find send_ill.
+ */
+ send_ill = ipmp_ill_get_xmit_ill(ncec->ncec_ill,
+ B_TRUE);
+ } else {
+ send_ill = src_ipif->ipif_ill;
+ ill_refhold(send_ill);
+ }
+
+ DTRACE_PROBE4(nce__resolve__ipmp, (mblk_t *), mp,
+ (ncec_t *), ncec, (ipif_t *),
+ src_ipif, (ill_t *), send_ill);
+
+ if (send_ill == NULL) {
+ if (src_ipif != NULL)
+ ipif_refrele(src_ipif);
+ goto drop_pkt;
}
+ /* create an under_nce on send_ill */
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+ if (IS_IN_SAME_ILLGRP(send_ill, ncec->ncec_ill))
+ under_nce = nce_fastpath_create(send_ill, ncec);
+ else
+ under_nce = NULL;
+ rw_exit(&ipst->ips_ill_g_lock);
+ if (under_nce != NULL && NCE_ISREACHABLE(ncec))
+ nce_fastpath_trigger(under_nce);
+
+ ill_refrele(send_ill);
+ if (src_ipif != NULL)
+ ipif_refrele(src_ipif);
+
+ if (under_nce != NULL) {
+ (void) ip_xmit(mp, under_nce, ixaflags, pkt_len, 0,
+ ALL_ZONES, 0, NULL);
+ nce_refrele(under_nce);
+ if (nprobes > 0)
+ nprobes--;
+ mp = nxt_mp;
+ continue;
+ }
+drop_pkt:
+ if (isv6) {
+ BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards);
+ } else {
+ BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
+ }
+ ip_drop_output("ipIfStatsOutDiscards - no under_ill", mp, NULL);
+ freemsg(mp);
+ if (nprobes > 0)
+ nprobes--;
+ mp = nxt_mp;
}
- return (ipif);
+ ncec_cb_dispatch(ncec); /* complete callbacks */
}