diff options
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/inet/ip.h | 8 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ip.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ip6_ire.c | 1 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ip_ire.c | 53 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ip_multi.c | 172 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ip_ndp.c | 88 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ipmp.c | 11 |
7 files changed, 254 insertions, 82 deletions
diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index a8ccd69aea..26a909ba49 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -635,7 +635,9 @@ typedef struct ilg_s { * ill_mcast_lock. Operations that change state on both an ilg and ilm * in addition use ill_mcast_serializer to ensure that we can't have * interleaving between e.g., add and delete operations for the same conn_t, - * group, and ill. + * group, and ill. The ill_mcast_serializer is also used to ensure that + * multicast group joins do not occur on an interface that is in the process + * of joining an IPMP group. * * The comment below (and for other netstack_t references) refers * to the fact that we only do netstack_hold in particular cases, @@ -1680,8 +1682,9 @@ typedef struct ill_s { ill_replumbing : 1, ill_arl_dlpi_pending : 1, + ill_grp_pending : 1, - ill_pad_to_bit_31 : 18; + ill_pad_to_bit_31 : 17; /* Following bit fields protected by ill_lock */ uint_t @@ -1942,6 +1945,7 @@ typedef struct ill_s { * ill_refresh_tid ill_lock ill_lock * ill_grp (for IPMP ill) write once write once * ill_grp (for underlying ill) ipsq + ill_g_lock ipsq OR ill_g_lock + * ill_grp_pending ill_mcast_serializer ill_mcast_serializer * ill_mrouter_cnt atomics atomics * * NOTE: It's OK to make heuristic decisions on an underlying interface diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index 276711d737..8924e40669 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -8606,7 +8606,6 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) freemsg(mp); return; } - ASSERT(arp_no_defense || err != 0); mp1 = ipsq_pending_mp_get(ipsq, &connp); } else { /* The conn has started closing */ diff --git a/usr/src/uts/common/inet/ip/ip6_ire.c b/usr/src/uts/common/inet/ip/ip6_ire.c index b24d859f4c..435f26cfc9 100644 --- a/usr/src/uts/common/inet/ip/ip6_ire.c +++ b/usr/src/uts/common/inet/ip/ip6_ire.c @@ -337,6 +337,7 @@ ire_add_v6(ire_t *ire) } for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { rw_init(&ptr[i].irb_lock, NULL, RW_DEFAULT, NULL); + ptr[i].irb_ipst = ipst; } mutex_enter(&ipst->ips_ire_ft_init_lock); if (ipst->ips_ip_forwarding_table_v6[mask_table_index] == diff --git a/usr/src/uts/common/inet/ip/ip_ire.c b/usr/src/uts/common/inet/ip/ip_ire.c index d195d2543b..a1c338e9be 100644 --- a/usr/src/uts/common/inet/ip/ip_ire.c +++ b/usr/src/uts/common/inet/ip/ip_ire.c @@ -230,6 +230,7 @@ static void ire_walk_ill_ipvers(uint_t match_flags, uint_t ire_type, #ifdef DEBUG static void ire_trace_cleanup(const ire_t *); #endif +static void ire_dep_incr_generation_locked(ire_t *); /* * Following are the functions to increment/decrement the reference @@ -1481,22 +1482,14 @@ ire_delete(ire_t *ire) ire_t *ire1; ire_t **ptpn; irb_t *irb; - nce_t *nce; ip_stack_t *ipst = ire->ire_ipst; - /* We can clear ire_nce_cache under ire_lock even if the IRE is used */ - mutex_enter(&ire->ire_lock); - nce = ire->ire_nce_cache; - ire->ire_nce_cache = NULL; - mutex_exit(&ire->ire_lock); - if (nce != NULL) - nce_refrele(nce); - if ((irb = ire->ire_bucket) == NULL) { /* * It was never inserted in the list. Should call REFRELE * to free this IRE. */ + ire_make_condemned(ire); ire_refrele_notr(ire); return; } @@ -1649,8 +1642,8 @@ ire_inactive(ire_t *ire) ASSERT(ire->ire_next == NULL); /* Count how many condemned ires for kmem_cache callback */ - if (IRE_IS_CONDEMNED(ire)) - atomic_add_32(&ipst->ips_num_ire_condemned, -1); + ASSERT(IRE_IS_CONDEMNED(ire)); + atomic_add_32(&ipst->ips_num_ire_condemned, -1); if (ire->ire_gw_secattr != NULL) { ire_gw_secattr_free(ire->ire_gw_secattr); @@ -1753,17 +1746,31 @@ void irb_increment_generation(irb_t *irb) { ire_t *ire; + ip_stack_t *ipst; if (irb == NULL || irb->irb_ire_cnt == 0) return; - irb_refhold(irb); + ipst = irb->irb_ipst; + /* + * we cannot do an irb_refhold/irb_refrele here as the caller + * already has the global RADIX_NODE_HEAD_WLOCK, and the irb_refrele + * may result in an attempt to free the irb_t, which also needs + * the RADIX_NODE_HEAD lock. However, since we want to traverse the + * irb_ire list without fear of having a condemned ire removed from + * the list, we acquire the irb_lock as WRITER. Moreover, since + * the ire_generation increments are done under the ire_dep_lock, + * acquire the locks in the prescribed lock order first. + */ + rw_enter(&ipst->ips_ire_dep_lock, RW_READER); + rw_enter(&irb->irb_lock, RW_WRITER); for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { if (!IRE_IS_CONDEMNED(ire)) ire_increment_generation(ire); /* Ourselves */ - ire_dep_incr_generation(ire); /* Dependants */ + ire_dep_incr_generation_locked(ire); /* Dependants */ } - irb_refrele(irb); + rw_exit(&irb->irb_lock); + rw_exit(&ipst->ips_ire_dep_lock); } /* @@ -2561,11 +2568,14 @@ ndp_nce_init(ill_t *ill, const in6_addr_t *addr6, int ire_type) /* * The caller should hold irb_lock as a writer if the ire is in a bucket. + * This routine will clear ire_nce_cache, and we make sure that we can never + * set ire_nce_cache after the ire is marked condemned. */ void ire_make_condemned(ire_t *ire) { ip_stack_t *ipst = ire->ire_ipst; + nce_t *nce; mutex_enter(&ire->ire_lock); ASSERT(ire->ire_bucket == NULL || @@ -2574,7 +2584,11 @@ ire_make_condemned(ire_t *ire) ire->ire_generation = IRE_GENERATION_CONDEMNED; /* Count how many condemned ires for kmem_cache callback */ atomic_add_32(&ipst->ips_num_ire_condemned, 1); + nce = ire->ire_nce_cache; + ire->ire_nce_cache = NULL; mutex_exit(&ire->ire_lock); + if (nce != NULL) + nce_refrele(nce); } /* @@ -3232,14 +3246,21 @@ ire_dep_increment_children(ire_t *child) * Walk all the children of this ire recursively and increment their * generation number. */ +static void +ire_dep_incr_generation_locked(ire_t *parent) +{ + ASSERT(RW_READ_HELD(&parent->ire_ipst->ips_ire_dep_lock)); + if (parent->ire_dep_children != NULL) + ire_dep_increment_children(parent->ire_dep_children); +} + void ire_dep_incr_generation(ire_t *parent) { ip_stack_t *ipst = parent->ire_ipst; rw_enter(&ipst->ips_ire_dep_lock, RW_READER); - if (parent->ire_dep_children != NULL) - ire_dep_increment_children(parent->ire_dep_children); + ire_dep_incr_generation_locked(parent); rw_exit(&ipst->ips_ire_dep_lock); } diff --git a/usr/src/uts/common/inet/ip/ip_multi.c b/usr/src/uts/common/inet/ip/ip_multi.c index 180664d4cc..de9e651bec 100644 --- a/usr/src/uts/common/inet/ip/ip_multi.c +++ b/usr/src/uts/common/inet/ip/ip_multi.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -100,7 +100,7 @@ static int ip_msfilter_ill(conn_t *, mblk_t *, const ip_ioctl_cmd_t *, ill_t **); static void ilg_check_detach(conn_t *, ill_t *); -static void ilg_check_reattach(conn_t *); +static void ilg_check_reattach(conn_t *, ill_t *); /* * MT notes: @@ -565,6 +565,12 @@ ip_addmulti(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, ilm = ip_addmulti_serial(v6group, ill, zoneid, ILGSTAT_NONE, MODE_IS_EXCLUDE, NULL, errorp); mutex_exit(&ill->ill_mcast_serializer); + /* + * Now that all locks have been dropped, we can send any + * deferred/queued DLPI or IP packets + */ + ill_mcast_send_queued(ill); + ill_dlpi_send_queued(ill); return (ilm); } @@ -573,7 +579,8 @@ ip_addmulti(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, * then this returns with a refhold on the ilm. * * Internal routine which assumes the caller has already acquired - * ill_multi_serializer. + * ill_mcast_serializer. It is the caller's responsibility to send out + * queued DLPI/multicast packets after all locks are dropped. * * The unspecified address means all multicast addresses for in both the * case of IPv4 and IPv6. @@ -605,6 +612,7 @@ ip_addmulti_serial(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, ipaddr_t v4group; IN6_V4MAPPED_TO_IPADDR(v6group, v4group); + ASSERT(!IS_UNDER_IPMP(ill)); if (!CLASSD(v4group)) { *errorp = EINVAL; return (NULL); @@ -636,9 +644,6 @@ ip_addmulti_serial(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, ilg_flist, errorp); rw_exit(&ill->ill_mcast_lock); - /* Send any deferred/queued DLPI or IP packets */ - ill_mcast_send_queued(ill); - ill_dlpi_send_queued(ill); ill_mcast_timer_start(ill->ill_ipst); return (ilm); } @@ -820,13 +825,21 @@ ip_delmulti(ilm_t *ilm) mutex_enter(&ill->ill_mcast_serializer); error = ip_delmulti_serial(ilm, B_TRUE, B_TRUE); mutex_exit(&ill->ill_mcast_serializer); + /* + * Now that all locks have been dropped, we can send any + * deferred/queued DLPI or IP packets + */ + ill_mcast_send_queued(ill); + ill_dlpi_send_queued(ill); return (error); } /* * Delete the ilm. - * Assumes ill_multi_serializer is held by the caller. + * Assumes ill_mcast_serializer is held by the caller. + * Caller must send out queued dlpi/multicast packets after dropping + * all locks. */ static int ip_delmulti_serial(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving) @@ -840,11 +853,7 @@ ip_delmulti_serial(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving) rw_enter(&ill->ill_mcast_lock, RW_WRITER); ret = ip_delmulti_impl(ilm, no_ilg, leaving); rw_exit(&ill->ill_mcast_lock); - /* Send any deferred/queued DLPI or IP packets */ - ill_mcast_send_queued(ill); - ill_dlpi_send_queued(ill); ill_mcast_timer_start(ill->ill_ipst); - return (ret); } @@ -1903,6 +1912,12 @@ ip_set_srcfilter(conn_t *connp, struct group_filter *gf, if (ilm != NULL) (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); mutex_exit(&ill->ill_mcast_serializer); + /* + * Now that all locks have been dropped, we can send any + * deferred/queued DLPI or IP packets + */ + ill_mcast_send_queued(ill); + ill_dlpi_send_queued(ill); return (0); } else { ilgstat = ILGSTAT_CHANGE; @@ -2009,11 +2024,26 @@ ip_set_srcfilter(conn_t *connp, struct group_filter *gf, } if (ilm != NULL) { + if (ilg->ilg_ill == NULL) { + /* some other thread is re-attaching this. */ + rw_exit(&connp->conn_ilg_lock); + (void) ip_delmulti_serial(ilm, B_FALSE, + (ilgstat == ILGSTAT_NEW)); + err = 0; + goto free_and_exit; + } /* Succeeded. Update the ilg to point at the ilm */ if (ilgstat == ILGSTAT_NEW) { - ASSERT(ilg->ilg_ilm == NULL); - ilg->ilg_ilm = ilm; - ilm->ilm_ifaddr = ifaddr; /* For netstat */ + if (ilg->ilg_ilm == NULL) { + ilg->ilg_ilm = ilm; + ilm->ilm_ifaddr = ifaddr; /* For netstat */ + } else { + /* some other thread is re-attaching this. */ + rw_exit(&connp->conn_ilg_lock); + (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); + err = 0; + goto free_and_exit; + } } else { /* * ip_addmulti didn't get a held ilm for @@ -2057,6 +2087,8 @@ ip_set_srcfilter(conn_t *connp, struct group_filter *gf, free_and_exit: mutex_exit(&ill->ill_mcast_serializer); + ill_mcast_send_queued(ill); + ill_dlpi_send_queued(ill); l_free(orig_filter); l_free(new_filter); @@ -2351,10 +2383,34 @@ ip_opt_add_group(conn_t *connp, boolean_t checkonly, ill_refrele(ill); return (0); } - mutex_enter(&ill->ill_mcast_serializer); + /* + * Multicast groups may not be joined on interfaces that are either + * already underlying interfaces in an IPMP group, or in the process + * of joining the IPMP group. The latter condition is enforced by + * checking the value of ill->ill_grp_pending under the + * ill_mcast_serializer lock. We cannot serialize the + * ill_grp_pending check on the ill_g_lock across ilg_add() because + * ill_mcast_send_queued -> ip_output_simple -> ill_lookup_on_ifindex + * will take the ill_g_lock itself. Instead, we hold the + * ill_mcast_serializer. + */ + if (ill->ill_grp_pending || IS_UNDER_IPMP(ill)) { + DTRACE_PROBE2(group__add__on__under, ill_t *, ill, + in6_addr_t *, v6group); + mutex_exit(&ill->ill_mcast_serializer); + ill_refrele(ill); + return (EADDRNOTAVAIL); + } err = ilg_add(connp, v6group, ifaddr, ifindex, ill, fmode, v6src); mutex_exit(&ill->ill_mcast_serializer); + /* + * We have done an addmulti_impl and/or delmulti_impl. + * All locks have been dropped, we can send any + * deferred/queued DLPI or IP packets + */ + ill_mcast_send_queued(ill); + ill_dlpi_send_queued(ill); ill_refrele(ill); return (err); } @@ -2459,6 +2515,12 @@ retry: done: if (ill != NULL) { mutex_exit(&ill->ill_mcast_serializer); + /* + * Now that all locks have been dropped, we can + * send any deferred/queued DLPI or IP packets + */ + ill_mcast_send_queued(ill); + ill_dlpi_send_queued(ill); ill_refrele(ill); } return (err); @@ -2617,6 +2679,7 @@ ilg_add(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr, ilg->ilg_ill = ill; } else { int index; + if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { rw_exit(&connp->conn_ilg_lock); l_free(new_filter); @@ -2675,13 +2738,27 @@ ilg_add(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr, error = ENXIO; goto free_and_exit; } - if (ilm != NULL) { + if (ilg->ilg_ill == NULL) { + /* some other thread is re-attaching this. */ + rw_exit(&connp->conn_ilg_lock); + (void) ip_delmulti_serial(ilm, B_FALSE, + (ilgstat == ILGSTAT_NEW)); + error = 0; + goto free_and_exit; + } /* Succeeded. Update the ilg to point at the ilm */ if (ilgstat == ILGSTAT_NEW) { - ASSERT(ilg->ilg_ilm == NULL); - ilg->ilg_ilm = ilm; - ilm->ilm_ifaddr = ifaddr; /* For netstat */ + if (ilg->ilg_ilm == NULL) { + ilg->ilg_ilm = ilm; + ilm->ilm_ifaddr = ifaddr; /* For netstat */ + } else { + /* some other thread is re-attaching this. */ + rw_exit(&connp->conn_ilg_lock); + (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); + error = 0; + goto free_and_exit; + } } else { /* * ip_addmulti didn't get a held ilm for @@ -2973,6 +3050,12 @@ ilg_delete_all(conn_t *connp) next: mutex_exit(&ill->ill_mcast_serializer); + /* + * Now that all locks have been dropped, we can send any + * deferred/queued DLPI or IP packets + */ + ill_mcast_send_queued(ill); + ill_dlpi_send_queued(ill); if (need_refrele) { /* Drop ill reference while we hold no locks */ ill_refrele(ill); @@ -2986,8 +3069,7 @@ ilg_delete_all(conn_t *connp) /* * Attach the ilg to an ilm on the ill. If it fails we leave ilg_ill as NULL so - * that a subsequent attempt can attach it. - * Drops and reacquires conn_ilg_lock. + * that a subsequent attempt can attach it. Drops and reacquires conn_ilg_lock. */ static void ilg_attach(conn_t *connp, ilg_t *ilg, ill_t *ill) @@ -3034,10 +3116,11 @@ ilg_attach(conn_t *connp, ilg_t *ilg, ill_t *ill) * Must look up the ilg again since we've not been holding * conn_ilg_lock. The ilg could have disappeared due to an unplumb * having called conn_update_ill, which can run once we dropped the - * conn_ilg_lock above. + * conn_ilg_lock above. Alternatively, the ilg could have been attached + * when the lock was dropped */ ilg = ilg_lookup(connp, &v6group, ifaddr, ifindex); - if (ilg == NULL) { + if (ilg == NULL || ilg->ilg_ilm != NULL) { if (ilm != NULL) { rw_exit(&connp->conn_ilg_lock); (void) ip_delmulti_serial(ilm, B_FALSE, @@ -3050,7 +3133,6 @@ ilg_attach(conn_t *connp, ilg_t *ilg, ill_t *ill) ilg->ilg_ill = NULL; return; } - ASSERT(ilg->ilg_ilm == NULL); ilg->ilg_ilm = ilm; ilm->ilm_ifaddr = ifaddr; /* For netstat */ } @@ -3111,7 +3193,7 @@ conn_update_ill(conn_t *connp, caddr_t arg) if (ill != NULL) ilg_check_detach(connp, ill); - ilg_check_reattach(connp); + ilg_check_reattach(connp, ill); /* Do we need to wake up a thread in ilg_delete_all? */ mutex_enter(&connp->conn_lock); @@ -3164,15 +3246,22 @@ ilg_check_detach(conn_t *connp, ill_t *ill) ilg_refrele(held_ilg); rw_exit(&connp->conn_ilg_lock); mutex_exit(&ill->ill_mcast_serializer); + /* + * Now that all locks have been dropped, we can send any + * deferred/queued DLPI or IP packets + */ + ill_mcast_send_queued(ill); + ill_dlpi_send_queued(ill); } /* * Check if there is a place to attach the conn_ilgs. We do this for both * detached ilgs and attached ones, since for the latter there could be - * a better ill to attach them to. + * a better ill to attach them to. oill is non-null if we just detached from + * that ill. */ static void -ilg_check_reattach(conn_t *connp) +ilg_check_reattach(conn_t *connp, ill_t *oill) { ill_t *ill; char group_buf[INET6_ADDRSTRLEN]; @@ -3209,8 +3298,11 @@ ilg_check_reattach(conn_t *connp) /* Note that ilg could have become condemned */ } - /* Is the ill unchanged, even if both are NULL? */ - if (ill == ilg->ilg_ill) { + /* + * Is the ill unchanged, even if both are NULL? + * Did we just detach from that ill? + */ + if (ill == ilg->ilg_ill || (ill != NULL && ill == oill)) { if (ill != NULL) { /* Drop locks across ill_refrele */ ilg_transfer_hold(held_ilg, ilg); @@ -3259,10 +3351,18 @@ ilg_check_reattach(conn_t *connp) } else { ilm = NULL; } + ilg_transfer_hold(held_ilg, ilg); + held_ilg = ilg; rw_exit(&connp->conn_ilg_lock); if (ilm != NULL) (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); mutex_exit(&ill2->ill_mcast_serializer); + /* + * Now that all locks have been dropped, we can send any + * deferred/queued DLPI or IP packets + */ + ill_mcast_send_queued(ill2); + ill_dlpi_send_queued(ill2); if (need_refrele) { /* Drop ill reference while we hold no locks */ ill_refrele(ill2); @@ -3299,7 +3399,8 @@ ilg_check_reattach(conn_t *connp) rw_enter(&connp->conn_ilg_lock, RW_WRITER); /* Note that ilg could have become condemned */ } - + ilg_transfer_hold(held_ilg, ilg); + held_ilg = ilg; /* * Check that nobody else attached the ilg and that * it wasn't condemned while we dropped the lock. @@ -3317,11 +3418,16 @@ ilg_check_reattach(conn_t *connp) ilg_attach(connp, ilg, ill); ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock)); } - mutex_exit(&ill->ill_mcast_serializer); /* Drop locks across ill_refrele */ - ilg_transfer_hold(held_ilg, ilg); - held_ilg = ilg; rw_exit(&connp->conn_ilg_lock); + mutex_exit(&ill->ill_mcast_serializer); + /* + * Now that all locks have been + * dropped, we can send any + * deferred/queued DLPI or IP packets + */ + ill_mcast_send_queued(ill); + ill_dlpi_send_queued(ill); ill_refrele(ill); rw_enter(&connp->conn_ilg_lock, RW_WRITER); } diff --git a/usr/src/uts/common/inet/ip/ip_ndp.c b/usr/src/uts/common/inet/ip/ip_ndp.c index 793af4512f..fa630fd3be 100644 --- a/usr/src/uts/common/inet/ip/ip_ndp.c +++ b/usr/src/uts/common/inet/ip/ip_ndp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -371,24 +371,37 @@ nce_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len, * until we are done. */ rw_enter(&ipst->ips_ill_g_lock, RW_READER); - if (IS_IN_SAME_ILLGRP(in_ill, ill)) { - under_nce = nce_fastpath_create(in_ill, - nce->nce_common); - upper_nce = nce; - if ((nce = under_nce) == NULL) - err = EINVAL; + if (!IS_IN_SAME_ILLGRP(in_ill, ill)) { + DTRACE_PROBE2(ill__not__in__group, nce_t *, nce, + ill_t *, ill); + rw_exit(&ipst->ips_ill_g_lock); + err = ENXIO; + nce_refrele(nce); + nce = NULL; + goto bail; + } + under_nce = nce_fastpath_create(in_ill, nce->nce_common); + if (under_nce == NULL) { + rw_exit(&ipst->ips_ill_g_lock); + err = EINVAL; + nce_refrele(nce); + nce = NULL; + goto bail; } rw_exit(&ipst->ips_ill_g_lock); - if (under_nce != NULL && NCE_ISREACHABLE(nce->nce_common)) + upper_nce = nce; + nce = under_nce; /* will be returned to caller */ + if (NCE_ISREACHABLE(nce->nce_common)) nce_fastpath_trigger(under_nce); } + /* nce_refrele is deferred until the lock is dropped */ if (nce != NULL) { if (newnce != NULL) *newnce = nce; else nce_refrele(nce); } - /* nce_refrele is deferred until the lock is dropped */ +bail: if (upper_nce != NULL) nce_refrele(upper_nce); if (need_ill_refrele) @@ -3605,15 +3618,27 @@ nce_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len, * until we are done. */ rw_enter(&ipst->ips_ill_g_lock, RW_READER); - if (IS_IN_SAME_ILLGRP(in_ill, ill)) { - under_nce = nce_fastpath_create(in_ill, - nce->nce_common); - upper_nce = nce; - if ((nce = under_nce) == NULL) - err = EINVAL; + if (!IS_IN_SAME_ILLGRP(in_ill, ill)) { + DTRACE_PROBE2(ill__not__in__group, nce_t *, nce, + ill_t *, ill); + rw_exit(&ipst->ips_ill_g_lock); + err = ENXIO; + nce_refrele(nce); + nce = NULL; + goto bail; + } + under_nce = nce_fastpath_create(in_ill, nce->nce_common); + if (under_nce == NULL) { + rw_exit(&ipst->ips_ill_g_lock); + err = EINVAL; + nce_refrele(nce); + nce = NULL; + goto bail; } rw_exit(&ipst->ips_ill_g_lock); - if (under_nce != NULL && NCE_ISREACHABLE(nce->nce_common)) + upper_nce = nce; + nce = under_nce; /* will be returned to caller */ + if (NCE_ISREACHABLE(nce->nce_common)) nce_fastpath_trigger(under_nce); } if (nce != NULL) { @@ -3622,13 +3647,11 @@ nce_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len, else nce_refrele(nce); } - +bail: if (under != NULL) ill_refrele(under); - if (upper_nce != NULL) nce_refrele(upper_nce); - if (need_ill_refrele) ill_refrele(ill); @@ -4395,22 +4418,31 @@ nce_add_common(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len, for (; ncec != NULL; ncec = ncec->ncec_next) { if (ncec->ncec_ill == ill) { if (IN6_ARE_ADDR_EQUAL(&ncec->ncec_addr, addr)) { + /* + * We should never find *retnce to be + * MYADDR, since the caller may then + * incorrectly restart a DAD timer that's + * already running. However, if we are in + * forwarding mode, and the interface is + * moving in/out of groups, the data + * path ire lookup (e.g., ire_revalidate_nce) + * may have determined that some destination + * is offlink while the control path is adding + * that address as a local address. + * Recover from this case by failing the + * lookup + */ + if (NCE_MYADDR(ncec)) + return (ENXIO); *retnce = nce_ill_lookup_then_add(ill, ncec); if (*retnce != NULL) break; } } } - if (*retnce != NULL) { - /* - * We should never find *retnce to be MYADDR, since the caller - * may then incorrectly restart a DAD timer that's already - * running. - */ - ASSERT(!NCE_MYADDR(ncec)); - /* caller must trigger fastpath on nce */ + if (*retnce != NULL) /* caller must trigger fastpath on nce */ return (0); - } + ncec = kmem_cache_alloc(ncec_cache, KM_NOSLEEP); if (ncec == NULL) return (ENOMEM); diff --git a/usr/src/uts/common/inet/ip/ipmp.c b/usr/src/uts/common/inet/ip/ipmp.c index b89171ed2b..f36be5a373 100644 --- a/usr/src/uts/common/inet/ip/ipmp.c +++ b/usr/src/uts/common/inet/ip/ipmp.c @@ -18,7 +18,7 @@ * * CDDL HEADER END * - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1149,7 +1149,12 @@ ipmp_ill_join_illgrp(ill_t *ill, ipmp_illgrp_t *illg) * Blow away all multicast memberships that currently exist on `ill'. * This may seem odd, but it's consistent with the application view * that `ill' no longer exists (e.g., due to ipmp_ill_rtsaddrmsg()). + * The ill_grp_pending bit prevents multicast group joins after + * update_conn_ill() and before ill_grp assignment. */ + mutex_enter(&ill->ill_mcast_serializer); + ill->ill_grp_pending = 1; + mutex_exit(&ill->ill_mcast_serializer); update_conn_ill(ill, ill->ill_ipst); if (ill->ill_isv6) { reset_mrt_ill(ill); @@ -1204,6 +1209,10 @@ ipmp_ill_join_illgrp(ill_t *ill, ipmp_illgrp_t *illg) ill->ill_grp = illg; rw_exit(&ipst->ips_ill_g_lock); + mutex_enter(&ill->ill_mcast_serializer); + ill->ill_grp_pending = 0; + mutex_exit(&ill->ill_mcast_serializer); + /* * Hide the IREs on `ill' so that we don't accidentally find them when * sending data traffic. |