summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Mustacchi <rm@joyent.com>2016-04-15 23:19:12 +0000
committerRobert Mustacchi <rm@joyent.com>2016-04-20 00:31:55 +0000
commit95de36ea31f74a899f58298494b15663ae0ae52d (patch)
tree8e00859f4ba4381c45f4b98053e7bfaf8d1202c4
parent807de683fa8af3b1a537c5de4066fd884e3a7743 (diff)
downloadillumos-joyent-95de36ea31f74a899f58298494b15663ae0ae52d.tar.gz
OS-5279 vnd_s_close()/ds_rx race can induce panic
Reviewed by: Cody Mello <melloc@joyent.com> Reviewed by: Bryan Cantrill <bryan@joyent.com>
-rw-r--r--usr/src/uts/common/io/dld/dld_proto.c4
-rw-r--r--usr/src/uts/common/io/dls/dls.c35
-rw-r--r--usr/src/uts/common/io/vnd/vnd.c223
3 files changed, 238 insertions, 24 deletions
diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c
index a51b958d77..661d8b2f4f 100644
--- a/usr/src/uts/common/io/dld/dld_proto.c
+++ b/usr/src/uts/common/io/dld/dld_proto.c
@@ -725,10 +725,6 @@ proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
goto failed;
}
- /* DLS_PROMISC_RX_ONLY can't be a solo flag */
- if (new_flags == DLS_PROMISC_RX_ONLY)
- new_flags = 0;
-
/*
* Adjust channel promiscuity.
*/
diff --git a/usr/src/uts/common/io/dls/dls.c b/usr/src/uts/common/io/dls/dls.c
index 23529c9c70..0f8dbcb57a 100644
--- a/usr/src/uts/common/io/dls/dls.c
+++ b/usr/src/uts/common/io/dls/dls.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
/*
@@ -252,18 +252,25 @@ dls_promisc(dld_str_t *dsp, uint32_t new_flags)
~(DLS_PROMISC_RX_ONLY | DLS_PROMISC_FIXUPS);
mac_client_promisc_type_t mptype = MAC_CLIENT_PROMISC_ALL;
uint16_t mac_flags = 0;
+ boolean_t doremove = B_FALSE;
ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
ASSERT(!(new_flags & ~(DLS_PROMISC_SAP | DLS_PROMISC_MULTI |
DLS_PROMISC_PHYS | DLS_PROMISC_RX_ONLY | DLS_PROMISC_FIXUPS)));
/*
- * Asking us just to turn on DLS_PROMISC_RX_ONLY and DLS_PROMISC_FIXUPS
- * is not valid.
+ * If we only have the non-data receive flags set or are only changing
+ * them, then there's nothing to do other than update the flags here.
+ * Basically when we only have something in the set of
+ * DLS_PROMISC_RX_ONLY and DLS_PROMISC_FIXUPS around, then there's
+ * nothing else for us to do other than toggle it, as there's no need to
+ * talk to MAC and we don't have to do anything else.
*/
- if ((new_flags & ~(DLS_PROMISC_RX_ONLY | DLS_PROMISC_FIXUPS)) == 0 &&
- new_flags != 0)
- return (EINVAL);
+ if ((old_flags & ~(DLS_PROMISC_RX_ONLY | DLS_PROMISC_FIXUPS)) == 0 &&
+ (new_flags & ~(DLS_PROMISC_RX_ONLY | DLS_PROMISC_FIXUPS)) == 0) {
+ dsp->ds_promisc = new_flags;
+ return (0);
+ }
/*
* If the user has only requested DLS_PROMISC_MULTI then we need to make
@@ -285,6 +292,19 @@ dls_promisc(dld_str_t *dsp, uint32_t new_flags)
mac_flags |= MAC_PROMISC_FLAGS_NO_PHYS;
/*
+ * If we're coming in and we're being asked to transition to a state
+ * where the only DLS flags would be enabled are flags that change what
+ * we do with promiscuous packets (DLS_PROMISC_RX_ONLY and
+ * DLS_PROMISC_FIXUPS) and not which packets we should receive, then we
+ * need to remove the MAC layer promiscuous handler.
+ */
+ if ((new_flags & ~(DLS_PROMISC_RX_ONLY | DLS_PROMISC_FIXUPS)) == 0 &&
+ (old_flags & ~(DLS_PROMISC_RX_ONLY | DLS_PROMISC_FIXUPS)) != 0 &&
+ new_flags != 0) {
+ doremove = B_TRUE;
+ }
+
+ /*
* There are three cases we care about here with respect to MAC. Going
* from nothing to something, something to nothing, something to
* something where we need to change how we're getting stuff from mac.
@@ -309,7 +329,8 @@ dls_promisc(dld_str_t *dsp, uint32_t new_flags)
mac_promisc_remove(dsp->ds_vlan_mph);
dsp->ds_vlan_mph = NULL;
}
- } else if (dsp->ds_promisc != 0 && new_flags == 0) {
+ } else if (dsp->ds_promisc != 0 &&
+ (new_flags == 0 || doremove == B_TRUE)) {
ASSERT(dsp->ds_mph != NULL);
mac_promisc_remove(dsp->ds_mph);
diff --git a/usr/src/uts/common/io/vnd/vnd.c b/usr/src/uts/common/io/vnd/vnd.c
index 58e60bbbc5..2abb6f9464 100644
--- a/usr/src/uts/common/io/vnd/vnd.c
+++ b/usr/src/uts/common/io/vnd/vnd.c
@@ -10,7 +10,7 @@
*/
/*
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
/*
@@ -216,7 +216,7 @@
* | | we become a zombie.
* | v
* | +--------------+
- * +--<-| VNS_S_ONLINE | This is a vnd STREAMS device's
+ * | | VNS_S_ONLINE | This is a vnd STREAMS device's
* | +--------------+ steady state. It will normally
* | | reside in this state while it is in
* | | active use. It will only transition
@@ -226,15 +226,53 @@
* | | flows over the dld fast path.
* | v
* | +---------------------+
- * +--<-| VNS_S_SHUTTING_DOWN | This vnd state takes care of
- * | +---------------------+ disabling capabilities and then
- * | | transitions to zombie state to
- * v | indicate that it is finished.
+ * +--->| VNS_S_SHUTTING_DOWN | This vnd state takes care of
+ * | +---------------------+ disabling capabilities and
+ * | | flushing all data. At this point
+ * | | any additional data that we receive
+ * | | will be dropped. We leave this
+ * v | state by trying to remove multicast
+ * | | promiscuity.
+ * | |
+ * | v
+ * | +---------------------------------+
+ * +-->| VNS_S_MULTICAST_PROMISCOFF_SENT | In this state, we check if we have
+ * | +---------------------------------+ successfully removed multicast
+ * | | promiscuous mode. If we have
+ * | | failed, we still carry on but only
+ * | | warn. We leave this state by trying
+ * | | to disable SAP level promiscuous
+ * | | mode.
+ * | v
+ * | +---------------------------+
+ * +-->| VNS_S_SAP_PROMISCOFF_SENT | In this state, we check if we have
+ * | +---------------------------+ successfully removed SAP level
+ * | | promiscuous mode. If we have
+ * | | failed, we still carry on but only
+ * | | warn. Note that we don't worry
+ * | | about either of
+ * | | DL_PROMISC_FIXUPS or
+ * | | DL_PROMISC_RX_ONLY. If these are
+ * | | the only two entries left, then we
+ * | | should have anything that MAC is
+ * | | doing for us at this point,
+ * | | therefore it's safe for us to
+ * | | proceed to unbind, which is how we
+ * | | leave this state via a
+ * | v DL_UNBIND_REQ.
+ * | +-------------------+
+ * +--->| VNS_S_UNBIND_SENT | Here, we check how the unbind
+ * | +-------------------+ request went. Regardless of its
+ * | | success, we always transition to
+ * | | a zombie state.
* | v
* | +--------------+
* +--->| VNS_S_ZOMBIE | In this state, the vnd STREAMS
- * +--------------+ device is waiting to finished being
- * reaped.
+ * +--------------+ device is waiting to finish being
+ * reaped. Because we have no more
+ * ways to receive data it should be
+ * safe to destroy all remaining data
+ * structures.
*
* If the stream association fails for any reason the state machine reaches
* VNS_S_ZOMBIE. A more detailed vnd_errno_t will propagate back through the
@@ -246,6 +284,19 @@
* finished, which will occur after the original user attach ioctl to the
* character device returns.
*
+ * It's quite important that we end up sending the full series of STREAMS
+ * messages when tearing down. While it's tempting to say that we should just
+ * rely on the STREAMS device being closed to properly ensure that we have no
+ * more additional data, that's not sufficient due to our use of direct
+ * callbacks. DLS does not ensure that by the time we change the direct
+ * callback (vnd_mac_input) that all callers to it will have been quiesced.
+ * However, it does guarantee that if we disable promiscuous mode ourselves and
+ * we turn off the main data path via DL_UNBIND_REQ that it will work.
+ * Therefore, we make sure to do this ourselves rather than letting DLS/DLD do
+ * it as part of tearing down the STREAMS device. This ensures that we'll
+ * quiesce all data before we destroy our data structures and thus we should
+ * eliminate the race in changing the data function.
+ *
* --------------------
* General Architecture
* --------------------
@@ -255,7 +306,7 @@
* consumers see, the internal STREAMS device state, and the data queues
* themselves. The following ASCII art picture describes their relationships and
* some of the major pieces of data that contain them. These are not exhaustive,
- * eg. synchronization primitives are left out.
+ * e.g. synchronization primitives are left out.
*
* +----------------+ +-----------------+
* | global | | global |
@@ -946,6 +997,9 @@ typedef enum vnd_str_state {
VNS_S_CAPAB_E_SENT,
VNS_S_ONLINE,
VNS_S_SHUTTING_DOWN,
+ VNS_S_MULTICAST_PROMISCOFF_SENT,
+ VNS_S_SAP_PROMISCOFF_SENT,
+ VNS_S_UNBIND_SENT,
VNS_S_ZOMBIE
} vnd_str_state_t;
@@ -2094,7 +2148,7 @@ vnd_mac_input(vnd_str_t *vsp, mac_resource_t *unused, mblk_t *mp_chain,
/*
* If we were operating in a traditional dlpi context then we
- * would have enabled DLIOCRAW and rather than the fast path we
+ * would have enabled DLIOCRAW and rather than the fast path, we
* would come through dld_str_rx_raw. That function does two
* things that we have to consider doing ourselves. The first is
* that it adjusts the b_rptr back to account for dld bumping us
@@ -2405,11 +2459,129 @@ vnd_st_shutdown(vnd_str_t *vsp)
NULL, DLD_DISABLE);
vnd_mac_exit(vsp, mph);
}
+}
+
+static boolean_t
+vnd_st_spromiscoff(vnd_str_t *vsp, int type, vnd_str_state_t next)
+{
+ boolean_t ret = B_TRUE;
+ mblk_t *mp;
+ dl_promiscoff_req_t *dprp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_alloc(sizeof (dl_promiscon_req_t), DL_PROMISCOFF_REQ);
+ if (mp == NULL) {
+ cmn_err(CE_NOTE, "!vnd failed to allocate mblk_t for "
+ "promiscoff request");
+ ret = B_FALSE;
+ goto next;
+ }
+
+ dprp = (dl_promiscoff_req_t *)mp->b_rptr;
+ dprp->dl_level = type;
+
+ putnext(vsp->vns_wq, mp);
+next:
+ vsp->vns_state = next;
+ cv_broadcast(&vsp->vns_stcv);
+ return (ret);
+}
+
+static void
+vnd_st_promiscoff(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ t_uscalar_t prim, cprim;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
/*
- * We could send an unbind, but dld also does that for us. As we add
- * more capabilities and the like, we should revisit this.
+ * Unlike other cases where we guard against the incoming packet being
+ * NULL, during tear down we try to keep driving and therefore we may
+ * have gotten here due to an earlier failure, so there's nothing to do.
*/
+ mp = vnd_dlpi_inc_pop(vsp);
+ if (mp == NULL)
+ return;
+
+ prim = ((dl_ok_ack_t *)mp->b_rptr)->dl_primitive;
+ cprim = ((dl_ok_ack_t *)mp->b_rptr)->dl_correct_primitive;
+
+ if (prim != DL_OK_ACK && prim != DL_ERROR_ACK) {
+ vnd_drop_ctl(vsp, mp,
+ "wrong dlpi primitive for vnd_st_promiscoff");
+ return;
+ }
+
+ if (cprim != DL_PROMISCOFF_REQ) {
+ vnd_drop_ctl(vsp, mp,
+ "vnd_st_promiscoff: Got ack/nack for wrong primitive");
+ return;
+ }
+
+ if (prim == DL_ERROR_ACK) {
+ cmn_err(CE_WARN, "!failed to disable promiscuos mode during "
+ "vnd teardown");
+ }
+}
+
+static boolean_t
+vnd_st_sunbind(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ boolean_t ret = B_TRUE;
+
+ mp = vnd_dlpi_alloc(sizeof (dl_unbind_req_t), DL_UNBIND_REQ);
+ if (mp == NULL) {
+ cmn_err(CE_NOTE, "!vnd failed to allocate mblk_t for "
+ "unbind request");
+ ret = B_FALSE;
+ goto next;
+ }
+
+ putnext(vsp->vns_wq, mp);
+next:
+ vsp->vns_state = VNS_S_UNBIND_SENT;
+ cv_broadcast(&vsp->vns_stcv);
+ return (ret);
+}
+
+static void
+vnd_st_unbind(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ t_uscalar_t prim, cprim;
+
+ /*
+ * Unlike other cases where we guard against the incoming packet being
+ * NULL, during tear down we try to keep driving and therefore we may
+ * have gotten here due to an earlier failure, so there's nothing to do.
+ */
+ mp = vnd_dlpi_inc_pop(vsp);
+ if (mp == NULL)
+ goto next;
+
+ prim = ((dl_ok_ack_t *)mp->b_rptr)->dl_primitive;
+ cprim = ((dl_ok_ack_t *)mp->b_rptr)->dl_correct_primitive;
+
+ if (prim != DL_OK_ACK && prim != DL_ERROR_ACK) {
+ vnd_drop_ctl(vsp, mp,
+ "wrong dlpi primitive for vnd_st_unbind");
+ goto next;
+ }
+
+ if (cprim != DL_UNBIND_REQ) {
+ vnd_drop_ctl(vsp, mp,
+ "vnd_st_unbind: Got ack/nack for wrong primitive");
+ goto next;
+ }
+
+ if (prim == DL_ERROR_ACK) {
+ cmn_err(CE_WARN, "!failed to unbind stream during vnd "
+ "teardown");
+ }
+
+next:
vsp->vns_state = VNS_S_ZOMBIE;
cv_broadcast(&vsp->vns_stcv);
}
@@ -2431,6 +2603,14 @@ vnd_str_state_transition(void *arg)
mutex_exit(&vsp->vns_lock);
return;
}
+
+ /*
+ * When trying to shut down, or unwinding from a failed enabling, rather
+ * than immediately entering the ZOMBIE state, we may instead opt to try
+ * and enter the next state in the progression. This is especially
+ * important when trying to tear everything down.
+ */
+loop:
DTRACE_PROBE2(vnd__state__transition, uintptr_t, vsp,
vnd_str_state_t, vsp->vns_state);
switch (vsp->vns_state) {
@@ -2528,10 +2708,27 @@ vnd_str_state_transition(void *arg)
break;
case VNS_S_SHUTTING_DOWN:
vnd_st_shutdown(vsp);
+ if (vnd_st_spromiscoff(vsp, DL_PROMISC_MULTI,
+ VNS_S_MULTICAST_PROMISCOFF_SENT) == B_FALSE)
+ goto loop;
+ break;
+ case VNS_S_MULTICAST_PROMISCOFF_SENT:
+ vnd_st_promiscoff(vsp);
+ if (vnd_st_spromiscoff(vsp, DL_PROMISC_SAP,
+ VNS_S_SAP_PROMISCOFF_SENT) == B_FALSE)
+ goto loop;
+ break;
+ case VNS_S_SAP_PROMISCOFF_SENT:
+ vnd_st_promiscoff(vsp);
+ if (vnd_st_sunbind(vsp) == B_FALSE)
+ goto loop;
+ break;
+ case VNS_S_UNBIND_SENT:
+ vnd_st_unbind(vsp);
break;
case VNS_S_ZOMBIE:
while ((mp = vnd_dlpi_inc_pop(vsp)) != NULL)
- vnd_drop_ctl(vsp, mp, "vsp committed suicide");
+ vnd_drop_ctl(vsp, mp, "vsp received data as a zombie");
break;
default:
panic("vnd_str_t entered an unknown state");