summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorCathy Zhou <Cathy.Zhou@Sun.COM>2009-03-17 20:14:50 -0700
committerCathy Zhou <Cathy.Zhou@Sun.COM>2009-03-17 20:14:50 -0700
commit5d460eafffba936e81c4dd5ebe0f59b238f09121 (patch)
treeec942dd0b37946b807039b9f42e69a8f54c30b7d /usr/src
parentf91909144addd198e09d1842e5354bfa62d96691 (diff)
downloadillumos-joyent-5d460eafffba936e81c4dd5ebe0f59b238f09121.tar.gz
PSARC/2008/242 Data Fast-Path for Softmac
6649224 fast-path needed to improve legacy network interface performance after UV 6649898 the smac_lock and smac_mutex fields in softmac_t should be given a more descriptive name 6799767 DLD capability is not correctly updated if it is renegotiated
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c5
-rw-r--r--usr/src/uts/common/Makefile.files2
-rw-r--r--usr/src/uts/common/inet/arp/arp.c214
-rw-r--r--usr/src/uts/common/inet/arp_impl.h3
-rw-r--r--usr/src/uts/common/inet/ip.h2
-rw-r--r--usr/src/uts/common/inet/ip/ip.c60
-rw-r--r--usr/src/uts/common/inet/ip/ip6_if.c3
-rw-r--r--usr/src/uts/common/inet/ip/ip_if.c194
-rw-r--r--usr/src/uts/common/inet/ip/ip_multi.c221
-rw-r--r--usr/src/uts/common/inet/ip_if.h1
-rw-r--r--usr/src/uts/common/io/dld/dld_drv.c11
-rw-r--r--usr/src/uts/common/io/dld/dld_proto.c49
-rw-r--r--usr/src/uts/common/io/dld/dld_str.c75
-rw-r--r--usr/src/uts/common/io/dls/dls.c51
-rw-r--r--usr/src/uts/common/io/mac/mac.c86
-rw-r--r--usr/src/uts/common/io/mac/mac_client.c121
-rw-r--r--usr/src/uts/common/io/mac/mac_flow.c22
-rw-r--r--usr/src/uts/common/io/mac/mac_provider.c16
-rw-r--r--usr/src/uts/common/io/softmac/softmac_ctl.c96
-rw-r--r--usr/src/uts/common/io/softmac/softmac_dev.c355
-rw-r--r--usr/src/uts/common/io/softmac/softmac_fp.c1252
-rw-r--r--usr/src/uts/common/io/softmac/softmac_main.c345
-rw-r--r--usr/src/uts/common/io/softmac/softmac_pkt.c19
-rw-r--r--usr/src/uts/common/io/sundlpi.c1
-rw-r--r--usr/src/uts/common/sys/dld.h3
-rw-r--r--usr/src/uts/common/sys/dld_impl.h8
-rw-r--r--usr/src/uts/common/sys/dlpi.h19
-rw-r--r--usr/src/uts/common/sys/dls_impl.h2
-rw-r--r--usr/src/uts/common/sys/mac.h18
-rw-r--r--usr/src/uts/common/sys/mac_client_priv.h1
-rw-r--r--usr/src/uts/common/sys/mac_impl.h5
-rw-r--r--usr/src/uts/common/sys/mac_provider.h18
-rw-r--r--usr/src/uts/common/sys/softmac_impl.h244
-rw-r--r--usr/src/uts/intel/ip/ip.global-objs.debug641
-rw-r--r--usr/src/uts/intel/ip/ip.global-objs.obj641
-rw-r--r--usr/src/uts/sparc/ip/ip.global-objs.debug641
-rw-r--r--usr/src/uts/sparc/ip/ip.global-objs.obj641
37 files changed, 2897 insertions, 629 deletions
diff --git a/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c b/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c
index 9adc72162b..fec3ff52e0 100644
--- a/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c
+++ b/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Mdb kernel support module. This module is loaded automatically when the
* kvm target is initialized. Any global functions declared here are exported
@@ -1578,6 +1576,7 @@ mdb_dlpi_prim(int prim)
case DL_NOTIFY_REQ: return ("DL_NOTIFY_REQ");
case DL_NOTIFY_ACK: return ("DL_NOTIFY_ACK");
case DL_NOTIFY_IND: return ("DL_NOTIFY_IND");
+ case DL_NOTIFY_CONF: return ("DL_NOTIFY_CONF");
case DL_CAPABILITY_REQ: return ("DL_CAPABILITY_REQ");
case DL_CAPABILITY_ACK: return ("DL_CAPABILITY_ACK");
case DL_CONTROL_REQ: return ("DL_CONTROL_REQ");
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 318a39a906..f97b615a4d 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -596,7 +596,7 @@ AGGR_OBJS += aggr_dev.o aggr_ctl.o aggr_grp.o aggr_port.o \
aggr_send.o aggr_recv.o aggr_lacp.o
SOFTMAC_OBJS += softmac_main.o softmac_ctl.o softmac_capab.o \
- softmac_dev.o softmac_stat.o softmac_pkt.o
+ softmac_dev.o softmac_stat.o softmac_pkt.o softmac_fp.o
NET80211_OBJS += net80211.o net80211_proto.o net80211_input.o \
net80211_output.o net80211_node.o net80211_crypto.o \
diff --git a/usr/src/uts/common/inet/arp/arp.c b/usr/src/uts/common/inet/arp/arp.c
index e52655dd47..abdbc39a47 100644
--- a/usr/src/uts/common/inet/arp/arp.c
+++ b/usr/src/uts/common/inet/arp/arp.c
@@ -208,6 +208,7 @@ static void ar_ce_walk(arp_stack_t *as, void (*pfi)(ace_t *, void *),
static void ar_client_notify(const arl_t *arl, mblk_t *mp, int code);
static int ar_close(queue_t *q);
static int ar_cmd_dispatch(queue_t *q, mblk_t *mp, boolean_t from_wput);
+static void ar_cmd_drain(arl_t *arl);
static void ar_cmd_done(arl_t *arl);
static mblk_t *ar_dlpi_comm(t_uscalar_t prim, size_t size);
static void ar_dlpi_send(arl_t *, mblk_t *);
@@ -1331,6 +1332,53 @@ ar_dlpi_comm(t_uscalar_t prim, size_t size)
return (mp);
}
+static void
+ar_dlpi_dispatch(arl_t *arl)
+{
+ mblk_t *mp;
+ t_uscalar_t primitive = DL_PRIM_INVAL;
+
+ while (((mp = arl->arl_dlpi_deferred) != NULL) &&
+ (arl->arl_dlpi_pending == DL_PRIM_INVAL)) {
+ union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
+
+ DTRACE_PROBE2(dlpi_dispatch, arl_t *, arl, mblk_t *, mp);
+
+ ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
+ arl->arl_dlpi_deferred = mp->b_next;
+ mp->b_next = NULL;
+
+ /*
+ * If this is a DL_NOTIFY_CONF, no ack is expected.
+ */
+ if ((primitive = dlp->dl_primitive) != DL_NOTIFY_CONF)
+ arl->arl_dlpi_pending = dlp->dl_primitive;
+ putnext(arl->arl_wq, mp);
+ }
+
+ if (arl->arl_dlpi_pending == DL_PRIM_INVAL) {
+ /*
+ * No pending DLPI operation.
+ */
+ ASSERT(mp == NULL);
+ DTRACE_PROBE1(dlpi_idle, arl_t *, arl);
+
+ /*
+ * If the last DLPI message dispatched is DL_NOTIFY_CONF,
+ * it is not assoicated with any pending cmd request, drain
+ * the rest of pending cmd requests, otherwise call
+ * ar_cmd_done() to finish up the current pending cmd
+ * operation.
+ */
+ if (primitive == DL_NOTIFY_CONF)
+ ar_cmd_drain(arl);
+ else
+ ar_cmd_done(arl);
+ } else if (mp != NULL) {
+ DTRACE_PROBE2(dlpi_defer, arl_t *, arl, mblk_t *, mp);
+ }
+}
+
/*
* The following two functions serialize DLPI messages to the driver, much
* along the lines of ill_dlpi_send and ill_dlpi_done in IP. Basically,
@@ -1341,26 +1389,18 @@ ar_dlpi_comm(t_uscalar_t prim, size_t size)
static void
ar_dlpi_send(arl_t *arl, mblk_t *mp)
{
+ mblk_t **mpp;
+
ASSERT(arl != NULL);
ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
- if (arl->arl_dlpi_pending != DL_PRIM_INVAL) {
- mblk_t **mpp;
-
- /* Must queue message. Tail insertion */
- mpp = &arl->arl_dlpi_deferred;
- while (*mpp != NULL)
- mpp = &((*mpp)->b_next);
- *mpp = mp;
+ /* Always queue the message. Tail insertion */
+ mpp = &arl->arl_dlpi_deferred;
+ while (*mpp != NULL)
+ mpp = &((*mpp)->b_next);
+ *mpp = mp;
- DTRACE_PROBE2(dlpi_defer, arl_t *, arl, mblk_t *, mp);
- return;
- }
-
- arl->arl_dlpi_pending =
- ((union DL_primitives *)mp->b_rptr)->dl_primitive;
- DTRACE_PROBE2(dlpi_send, arl_t *, arl, mblk_t *, mp);
- putnext(arl->arl_wq, mp);
+ ar_dlpi_dispatch(arl);
}
/*
@@ -1372,30 +1412,71 @@ ar_dlpi_send(arl_t *arl, mblk_t *mp)
static void
ar_dlpi_done(arl_t *arl, t_uscalar_t prim)
{
- mblk_t *mp;
-
if (arl->arl_dlpi_pending != prim) {
DTRACE_PROBE2(dlpi_done_unexpected, arl_t *, arl,
t_uscalar_t, prim);
return;
}
- if ((mp = arl->arl_dlpi_deferred) == NULL) {
- DTRACE_PROBE2(dlpi_done_idle, arl_t *, arl, t_uscalar_t, prim);
- arl->arl_dlpi_pending = DL_PRIM_INVAL;
- ar_cmd_done(arl);
- return;
- }
+ DTRACE_PROBE2(dlpi_done, arl_t *, arl, t_uscalar_t, prim);
+ arl->arl_dlpi_pending = DL_PRIM_INVAL;
+ ar_dlpi_dispatch(arl);
+}
- arl->arl_dlpi_deferred = mp->b_next;
- mp->b_next = NULL;
+/*
+ * Send a DL_NOTE_REPLUMB_DONE message down to the driver to indicate
+ * the replumb process has already been done. Note that mp is either a
+ * DL_NOTIFY_IND message or an AR_INTERFACE_DOWN message (comes from IP).
+ */
+static void
+arp_replumb_done(arl_t *arl, mblk_t *mp)
+{
+ ASSERT(arl->arl_state == ARL_S_DOWN && arl->arl_replumbing);
- ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
+ mp = mexchange(NULL, mp, sizeof (dl_notify_conf_t), M_PROTO,
+ DL_NOTIFY_CONF);
+ ((dl_notify_conf_t *)(mp->b_rptr))->dl_notification =
+ DL_NOTE_REPLUMB_DONE;
+ arl->arl_replumbing = B_FALSE;
+ ar_dlpi_send(arl, mp);
+}
+
+static void
+ar_cmd_drain(arl_t *arl)
+{
+ mblk_t *mp;
+ queue_t *q;
+
+ /*
+ * Run the commands that have been enqueued while we were waiting
+ * for the last command (AR_INTERFACE_UP or AR_INTERFACE_DOWN)
+ * to complete.
+ */
+ while ((mp = arl->arl_queue) != NULL) {
+ if (((uintptr_t)mp->b_prev & CMD_IN_PROGRESS) != 0) {
+ /*
+ * The current command is an AR_INTERFACE_UP or
+ * AR_INTERFACE_DOWN and is waiting for a DLPI ack
+ * from the driver. Return. We can't make progress now.
+ */
+ break;
+ }
+
+ mp = ar_cmd_dequeue(arl);
+ mp->b_prev = AR_DRAINING;
+ q = mp->b_queue;
+ mp->b_queue = NULL;
- arl->arl_dlpi_pending =
- ((union DL_primitives *)mp->b_rptr)->dl_primitive;
- DTRACE_PROBE2(dlpi_done_next, arl_t *, arl, mblk_t *, mp);
- putnext(arl->arl_wq, mp);
+ /*
+ * Don't call put(q, mp) since it can lead to reorder of
+ * messages by sending the current messages to the end of
+ * arp's syncq
+ */
+ if (q->q_flag & QREADR)
+ ar_rput(q, mp);
+ else
+ ar_wput(q, mp);
+ }
}
static void
@@ -1409,7 +1490,6 @@ ar_cmd_done(arl_t *arl)
queue_t *dlpi_op_done_q;
ar_t *ar_arl;
ar_t *ar_ip;
- queue_t *q;
ASSERT(arl->arl_state == ARL_S_UP || arl->arl_state == ARL_S_DOWN);
@@ -1458,44 +1538,24 @@ ar_cmd_done(arl_t *arl)
ar_arl->ar_arl_ip_assoc = ar_ip;
ar_ip->ar_arl_ip_assoc = ar_arl;
}
- }
- inet_freemsg(mp);
- }
- /*
- * Run the commands that have been enqueued while we were waiting
- * for the last command (AR_INTERFACE_UP or AR_INTERFACE_DOWN)
- * to complete.
- */
- while ((mp = ar_cmd_dequeue(arl)) != NULL) {
- mp->b_prev = AR_DRAINING;
- q = mp->b_queue;
- mp->b_queue = NULL;
-
- /*
- * Don't call put(q, mp) since it can lead to reorder of
- * messages by sending the current messages to the end of
- * arp's syncq
- */
- if (q->q_flag & QREADR)
- ar_rput(q, mp);
- else
- ar_wput(q, mp);
-
- if ((mp = arl->arl_queue) == NULL)
- goto done; /* no work to do */
-
- if ((cmd = (uintptr_t)mp->b_prev) & CMD_IN_PROGRESS) {
+ inet_freemsg(mp);
+ } else if (cmd == AR_INTERFACE_DOWN && arl->arl_replumbing) {
/*
- * The current command is an AR_INTERFACE_UP or
- * AR_INTERFACE_DOWN and is waiting for a DLPI ack
- * from the driver. Return. We can't make progress now.
+ * The arl is successfully brought down and this is
+ * a result of the DL_NOTE_REPLUMB process. Reset
+ * mp->b_prev first (it keeps the 'cmd' information
+ * at this point).
*/
- goto done;
+ mp->b_prev = NULL;
+ arp_replumb_done(arl, mp);
+ } else {
+ inet_freemsg(mp);
}
}
-done:
+ ar_cmd_drain(arl);
+
if (dlpi_op_done_mp != NULL) {
DTRACE_PROBE3(cmd_done_next, arl_t *, arl,
queue_t *, dlpi_op_done_q, mblk_t *, dlpi_op_done_mp);
@@ -2136,8 +2196,18 @@ ar_interface_down(queue_t *q, mblk_t *mp)
* The arl is already down, no work to do.
*/
if (arl->arl_state == ARL_S_DOWN) {
- /* ar_rput frees the mp */
- return (0);
+ if (arl->arl_replumbing) {
+ /*
+ * The arl is already down and this is a result of
+ * the DL_NOTE_REPLUMB process. Return EINPROGRESS
+ * so this mp won't be freed by ar_rput().
+ */
+ arp_replumb_done(arl, mp);
+ return (EINPROGRESS);
+ } else {
+ /* ar_rput frees the mp */
+ return (0);
+ }
}
/*
@@ -2672,7 +2742,7 @@ ar_ll_up(arl_t *arl)
if (notify_mp == NULL)
goto bad;
((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications =
- DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN;
+ DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_REPLUMB;
arl->arl_state = ARL_S_PENDING;
if (arl->arl_provider_style == DL_STYLE2) {
@@ -3852,6 +3922,16 @@ ar_rput_dlpi(queue_t *q, mblk_t *mp)
case DL_NOTIFY_IND:
DTRACE_PROBE2(rput_dl_notify_ind, arl_t *, arl,
dl_notify_ind_t *, &dlp->notify_ind);
+
+ if (dlp->notify_ind.dl_notification == DL_NOTE_REPLUMB) {
+ arl->arl_replumbing = B_TRUE;
+ if (arl->arl_state == ARL_S_DOWN) {
+ arp_replumb_done(arl, mp);
+ return;
+ }
+ break;
+ }
+
if (ap != NULL) {
switch (dlp->notify_ind.dl_notification) {
case DL_NOTE_LINK_UP:
diff --git a/usr/src/uts/common/inet/arp_impl.h b/usr/src/uts/common/inet/arp_impl.h
index f16fdc97a0..38d0d1ab65 100644
--- a/usr/src/uts/common/inet/arp_impl.h
+++ b/usr/src/uts/common/inet/arp_impl.h
@@ -64,7 +64,8 @@ typedef struct arl_s {
t_uscalar_t arl_dlpi_pending; /* pending DLPI request */
mblk_t *arl_dlpi_deferred; /* Deferred DLPI messages */
uint_t arl_state; /* lower interface state */
- uint_t arl_closing : 1; /* stream is closing */
+ uint_t arl_closing : 1, /* stream is closing */
+ arl_replumbing : 1; /* Wait for IP to bring down */
uint32_t arl_index; /* instance number */
struct arlphy_s *arl_phy; /* physical info, if any */
struct arl_s *arl_ipmp_arl; /* pointer to group arl_t */
diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h
index 4be3138778..d6faecb3a5 100644
--- a/usr/src/uts/common/inet/ip.h
+++ b/usr/src/uts/common/inet/ip.h
@@ -1953,6 +1953,7 @@ typedef struct ill_s {
mblk_t *ill_promiscoff_mp; /* for ill_leave_allmulti() */
mblk_t *ill_dlpi_deferred; /* b_next chain of control messages */
mblk_t *ill_ardeact_mp; /* deact mp from ipmp_ill_activate() */
+ mblk_t *ill_replumb_mp; /* replumb mp from ill_replumb() */
mblk_t *ill_phys_addr_mp; /* mblk which holds ill_phys_addr */
#define ill_last_mp_to_free ill_phys_addr_mp
@@ -1977,7 +1978,6 @@ typedef struct ill_s {
ill_dl_up : 1,
ill_up_ipifs : 1,
ill_note_link : 1, /* supports link-up notification */
-
ill_capab_reneg : 1, /* capability renegotiation to be done */
ill_dld_capab_inprog : 1, /* direct dld capab call in prog */
ill_need_recover_multicast : 1,
diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c
index ad161476aa..b26c090aad 100644
--- a/usr/src/uts/common/inet/ip/ip.c
+++ b/usr/src/uts/common/inet/ip/ip.c
@@ -15825,8 +15825,6 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
switch (dlea->dl_error_primitive) {
case DL_DISABMULTI_REQ:
- if (!ill->ill_isv6)
- ipsq_current_finish(ipsq);
ill_dlpi_done(ill, dlea->dl_error_primitive);
break;
case DL_PROMISCON_REQ:
@@ -15902,18 +15900,17 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
mp1 = ipsq_pending_mp_get(ipsq, &connp);
if (mp1 != NULL) {
/*
- * This operation (SIOCSLIFFLAGS) must have
- * happened from a conn.
+ * This might be a result of a DL_NOTE_REPLUMB
+ * notification. In that case, connp is NULL.
*/
- ASSERT(connp != NULL);
- q = CONNP_TO_WQ(connp);
+ if (connp != NULL)
+ q = CONNP_TO_WQ(connp);
+
(void) ipif_down(ipif, NULL, NULL);
/* error is set below the switch */
}
break;
case DL_ENABMULTI_REQ:
- if (!ill->ill_isv6)
- ipsq_current_finish(ipsq);
ill_dlpi_done(ill, DL_ENABMULTI_REQ);
if (ill->ill_dlpi_multicast_state == IDS_INPROGRESS)
@@ -16030,11 +16027,11 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
if (mp1 == NULL)
break;
/*
- * Because mp1 was added by ill_dl_up(), and it always
- * passes a valid connp, connp must be valid here.
+ * mp1 was added by ill_dl_up(). if that is a result of
+ * a DL_NOTE_REPLUMB notification, connp could be NULL.
*/
- ASSERT(connp != NULL);
- q = CONNP_TO_WQ(connp);
+ if (connp != NULL)
+ q = CONNP_TO_WQ(connp);
/*
* We are exclusive. So nothing can change even after
@@ -16056,12 +16053,14 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
*/
if (ill->ill_isv6) {
if (ill->ill_flags & ILLF_XRESOLV) {
- mutex_enter(&connp->conn_lock);
+ if (connp != NULL)
+ mutex_enter(&connp->conn_lock);
mutex_enter(&ill->ill_lock);
success = ipsq_pending_mp_add(connp, ipif, q,
mp1, 0);
mutex_exit(&ill->ill_lock);
- mutex_exit(&connp->conn_lock);
+ if (connp != NULL)
+ mutex_exit(&connp->conn_lock);
if (success) {
err = ipif_resolver_up(ipif,
Res_act_initial);
@@ -16087,11 +16086,13 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
* Leave the pending mblk intact so that
* the ioctl completes in ip_rput().
*/
- mutex_enter(&connp->conn_lock);
+ if (connp != NULL)
+ mutex_enter(&connp->conn_lock);
mutex_enter(&ill->ill_lock);
success = ipsq_pending_mp_add(connp, ipif, q, mp1, 0);
mutex_exit(&ill->ill_lock);
- mutex_exit(&connp->conn_lock);
+ if (connp != NULL)
+ mutex_exit(&connp->conn_lock);
if (success) {
err = ipif_resolver_up(ipif, Res_act_initial);
if (err == EINPROGRESS) {
@@ -16153,6 +16154,15 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
err = ill_set_phys_addr(ill, mp);
break;
+ case DL_NOTE_REPLUMB:
+ /*
+ * Directly return after calling ill_replumb().
+ * Note that we should not free mp as it is reused
+ * in the ill_replumb() function.
+ */
+ err = ill_replumb(ill, mp);
+ return;
+
case DL_NOTE_FASTPATH_FLUSH:
ill_fastpath_flush(ill);
break;
@@ -16462,8 +16472,6 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
switch (dloa->dl_correct_primitive) {
case DL_ENABMULTI_REQ:
case DL_DISABMULTI_REQ:
- if (!ill->ill_isv6)
- ipsq_current_finish(ipsq);
ill_dlpi_done(ill, dloa->dl_correct_primitive);
break;
case DL_PROMISCON_REQ:
@@ -27048,20 +27056,6 @@ ip_process_ioctl(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg)
ipsq_current_start(ipsq, ci.ci_ipif, ipip->ipi_cmd);
/*
- * For most set ioctls that come here, this serves as a single point
- * where we set the IPIF_CHANGING flag. This ensures that there won't
- * be any new references to the ipif. This helps functions that go
- * through this path and end up trying to wait for the refcnts
- * associated with the ipif to go down to zero. The exception is
- * SIOCSLIFREMOVEIF, which sets IPIF_CONDEMNED internally after
- * identifying the right ipif to operate on.
- */
- mutex_enter(&(ci.ci_ipif)->ipif_ill->ill_lock);
- if (ipip->ipi_cmd != SIOCLIFREMOVEIF)
- (ci.ci_ipif)->ipif_state_flags |= IPIF_CHANGING;
- mutex_exit(&(ci.ci_ipif)->ipif_ill->ill_lock);
-
- /*
* A return value of EINPROGRESS means the ioctl is
* either queued and waiting for some reason or has
* already completed.
@@ -27321,7 +27315,7 @@ nak:
break;
switch (((arc_t *)mp->b_rptr)->arc_cmd) {
case AR_ENTRY_SQUERY:
- ip_wput_ctl(q, mp);
+ putnext(q, mp);
return;
case AR_CLIENT_NOTIFY:
ip_arp_news(q, mp);
diff --git a/usr/src/uts/common/inet/ip/ip6_if.c b/usr/src/uts/common/inet/ip/ip6_if.c
index c729118fec..3dbc4559d8 100644
--- a/usr/src/uts/common/inet/ip/ip6_if.c
+++ b/usr/src/uts/common/inet/ip/ip6_if.c
@@ -2825,7 +2825,8 @@ ill_dl_phys(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q)
goto bad;
((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications =
(DL_NOTE_PHYS_ADDR | DL_NOTE_SDU_SIZE | DL_NOTE_FASTPATH_FLUSH |
- DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_CAPAB_RENEG);
+ DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_CAPAB_RENEG |
+ DL_NOTE_REPLUMB);
phys_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) +
sizeof (t_scalar_t), DL_PHYS_ADDR_REQ);
diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c
index 3628dd4f56..ed7ae7b2b1 100644
--- a/usr/src/uts/common/inet/ip/ip_if.c
+++ b/usr/src/uts/common/inet/ip/ip_if.c
@@ -193,6 +193,8 @@ static void ill_glist_delete(ill_t *);
static void ill_phyint_reinit(ill_t *ill);
static void ill_set_nce_router_flags(ill_t *, boolean_t);
static void ill_set_phys_addr_tail(ipsq_t *, queue_t *, mblk_t *, void *);
+static void ill_replumb_tail(ipsq_t *, queue_t *, mblk_t *, void *);
+
static ip_v6intfid_func_t ip_ether_v6intfid, ip_ib_v6intfid;
static ip_v6intfid_func_t ip_ipmp_v6intfid, ip_nodef_v6intfid;
static ip_v6mapinfo_func_t ip_ether_v6mapinfo, ip_ib_v6mapinfo;
@@ -1587,18 +1589,24 @@ conn_cleanup_ill(conn_t *connp, caddr_t arg)
mutex_exit(&connp->conn_lock);
}
-/* ARGSUSED */
-void
-ipif_all_down_tail(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
+static void
+ill_down_ipifs_tail(ill_t *ill)
{
- ill_t *ill = q->q_ptr;
ipif_t *ipif;
- ASSERT(IAM_WRITER_IPSQ(ipsq));
+ ASSERT(IAM_WRITER_ILL(ill));
for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
ipif_non_duplicate(ipif);
ipif_down_tail(ipif);
}
+}
+
+/* ARGSUSED */
+void
+ipif_all_down_tail(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
+{
+ ASSERT(IAM_WRITER_IPSQ(ipsq));
+ ill_down_ipifs_tail(q->q_ptr);
freemsg(mp);
ipsq_current_finish(ipsq);
}
@@ -3007,10 +3015,10 @@ ill_capability_dld_ack(ill_t *ill, mblk_t *mp, dl_capability_sub_t *isub)
ill->ill_name);
return;
}
- idc->idc_capab_df = (ip_capab_func_t)dld.dld_capab;
- idc->idc_capab_dh = (void *)dld.dld_capab_handle;
ill->ill_dld_capab = idc;
}
+ idc->idc_capab_df = (ip_capab_func_t)dld.dld_capab;
+ idc->idc_capab_dh = (void *)dld.dld_capab_handle;
ip1dbg(("ill_capability_dld_ack: interface %s "
"supports DLD version %d\n", ill->ill_name, DLD_CURRENT_VERSION));
@@ -6317,6 +6325,10 @@ ipif_ill_refrele_tail(ill_t *ill)
qwriter_ip(ill, ill->ill_rq, mp,
ill_set_phys_addr_tail, CUR_OP, B_TRUE);
return;
+ case DL_NOTE_REPLUMB:
+ qwriter_ip(ill, ill->ill_rq, mp,
+ ill_replumb_tail, CUR_OP, B_TRUE);
+ return;
default:
ASSERT(0);
ill_refrele(ill);
@@ -8021,6 +8033,7 @@ ipsq_exit(ipsq_t *ipsq)
void
ipsq_current_start(ipsq_t *ipsq, ipif_t *ipif, int ioccmd)
{
+ ill_t *ill = ipif->ipif_ill;
ipxop_t *ipx = ipsq->ipsq_xop;
ASSERT(IAM_WRITER_IPSQ(ipsq));
@@ -8032,6 +8045,39 @@ ipsq_current_start(ipsq_t *ipsq, ipif_t *ipif, int ioccmd)
mutex_enter(&ipx->ipx_lock);
ipx->ipx_current_ipif = ipif;
mutex_exit(&ipx->ipx_lock);
+
+ /*
+ * Set IPIF_CHANGING on one or more ipifs associated with the
+ * current exclusive operation. IPIF_CHANGING prevents any new
+ * references to the ipif (so that the references will eventually
+ * drop to zero) and also prevents any "get" operations (e.g.,
+ * SIOCGLIFFLAGS) from being able to access the ipif until the
+ * operation has completed and the ipif is again in a stable state.
+ *
+ * For ioctls, IPIF_CHANGING is set on the ipif associated with the
+ * ioctl. For internal operations (where ioccmd is zero), all ipifs
+ * on the ill are marked with IPIF_CHANGING since it's unclear which
+ * ipifs will be affected.
+ *
+ * Note that SIOCLIFREMOVEIF is a special case as it sets
+ * IPIF_CONDEMNED internally after identifying the right ipif to
+ * operate on.
+ */
+ switch (ioccmd) {
+ case SIOCLIFREMOVEIF:
+ break;
+ case 0:
+ mutex_enter(&ill->ill_lock);
+ ipif = ipif->ipif_ill->ill_ipif;
+ for (; ipif != NULL; ipif = ipif->ipif_next)
+ ipif->ipif_state_flags |= IPIF_CHANGING;
+ mutex_exit(&ill->ill_lock);
+ break;
+ default:
+ mutex_enter(&ill->ill_lock);
+ ipif->ipif_state_flags |= IPIF_CHANGING;
+ mutex_exit(&ill->ill_lock);
+ }
}
/*
@@ -8061,7 +8107,13 @@ ipsq_current_finish(ipsq_t *ipsq)
mutex_enter(&ill->ill_lock);
dlpi_pending = ill->ill_dlpi_pending;
- ipif->ipif_state_flags &= ~IPIF_CHANGING;
+ if (ipx->ipx_current_ioctl == 0) {
+ ipif = ill->ill_ipif;
+ for (; ipif != NULL; ipif = ipif->ipif_next)
+ ipif->ipif_state_flags &= ~IPIF_CHANGING;
+ } else {
+ ipif->ipif_state_flags &= ~IPIF_CHANGING;
+ }
mutex_exit(&ill->ill_lock);
}
@@ -14010,20 +14062,9 @@ ill_up_ipifs_on_ill(ill_t *ill, queue_t *q, mblk_t *mp)
if (ill == NULL)
return (0);
- /*
- * Except for ipif_state_flags and ill_state_flags the other
- * fields of the ipif/ill that are modified below are protected
- * implicitly since we are a writer. We would have tried to down
- * even an ipif that was already down, in ill_down_ipifs. So we
- * just blindly clear the IPIF_CHANGING flag here on all ipifs.
- */
ASSERT(IAM_WRITER_ILL(ill));
-
ill->ill_up_ipifs = B_TRUE;
for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
- mutex_enter(&ill->ill_lock);
- ipif->ipif_state_flags &= ~IPIF_CHANGING;
- mutex_exit(&ill->ill_lock);
if (ipif->ipif_was_up) {
if (!(ipif->ipif_flags & IPIF_UP))
err = ipif_up(ipif, q, mp);
@@ -14060,19 +14101,16 @@ ill_up_ipifs(ill_t *ill, queue_t *q, mblk_t *mp)
}
/*
- * Bring down any IPIF_UP ipifs on ill.
+ * Bring down any IPIF_UP ipifs on ill. If "logical" is B_TRUE, we bring
+ * down the ipifs without sending DL_UNBIND_REQ to the driver.
*/
static void
-ill_down_ipifs(ill_t *ill)
+ill_down_ipifs(ill_t *ill, boolean_t logical)
{
ipif_t *ipif;
ASSERT(IAM_WRITER_ILL(ill));
- /*
- * Except for ipif_state_flags the other fields of the ipif/ill that
- * are modified below are protected implicitly since we are a writer
- */
for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
/*
* We go through the ipif_down logic even if the ipif
@@ -14083,19 +14121,19 @@ ill_down_ipifs(ill_t *ill)
if (ipif->ipif_flags & IPIF_UP)
ipif->ipif_was_up = B_TRUE;
- mutex_enter(&ill->ill_lock);
- ipif->ipif_state_flags |= IPIF_CHANGING;
- mutex_exit(&ill->ill_lock);
-
/*
* Need to re-create net/subnet bcast ires if
* they are dependent on ipif.
*/
if (!ipif->ipif_isv6)
ipif_check_bcast_ires(ipif);
- (void) ipif_logical_down(ipif, NULL, NULL);
- ipif_non_duplicate(ipif);
- ipif_down_tail(ipif);
+ if (logical) {
+ (void) ipif_logical_down(ipif, NULL, NULL);
+ ipif_non_duplicate(ipif);
+ ipif_down_tail(ipif);
+ } else {
+ (void) ipif_down(ipif, NULL, NULL);
+ }
}
}
@@ -14408,6 +14446,7 @@ ill_dlpi_dispatch(ill_t *ill, mblk_t *mp)
{
union DL_primitives *dlp;
t_uscalar_t prim;
+ boolean_t waitack = B_FALSE;
ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
@@ -14437,11 +14476,20 @@ ill_dlpi_dispatch(ill_t *ill, mblk_t *mp)
* we only wait for the ACK of the DL_UNBIND_REQ.
*/
mutex_enter(&ill->ill_lock);
- if (!(ill->ill_state_flags & ILL_CONDEMNED) || (prim == DL_UNBIND_REQ))
+ if (!(ill->ill_state_flags & ILL_CONDEMNED) ||
+ (prim == DL_UNBIND_REQ)) {
ill->ill_dlpi_pending = prim;
+ waitack = B_TRUE;
+ }
mutex_exit(&ill->ill_lock);
putnext(ill->ill_wq, mp);
+
+ /*
+ * There is no ack for DL_NOTIFY_CONF messages
+ */
+ if (waitack && prim == DL_NOTIFY_CONF)
+ ill_dlpi_done(ill, prim);
}
/*
@@ -16165,14 +16213,13 @@ ill_dl_up(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q)
* Record state needed to complete this operation when the
* DL_BIND_ACK shows up. Also remember the pre-allocated mblks.
*/
- ASSERT(WR(q)->q_next == NULL);
- connp = Q_TO_CONN(q);
-
- mutex_enter(&connp->conn_lock);
+ connp = CONN_Q(q) ? Q_TO_CONN(q) : NULL;
+ ASSERT(connp != NULL || !CONN_Q(q));
+ GRAB_CONN_LOCK(q);
mutex_enter(&ipif->ipif_ill->ill_lock);
success = ipsq_pending_mp_add(connp, ipif, q, mp, 0);
mutex_exit(&ipif->ipif_ill->ill_lock);
- mutex_exit(&connp->conn_lock);
+ RELEASE_CONN_LOCK(q);
if (!success)
goto bad;
@@ -19981,7 +20028,7 @@ ill_set_phys_addr(ill_t *ill, mblk_t *mp)
* If we can quiesce the ill, then set the address. If not, then
* ill_set_phys_addr_tail() will be called from ipif_ill_refrele_tail().
*/
- ill_down_ipifs(ill);
+ ill_down_ipifs(ill, B_TRUE);
mutex_enter(&ill->ill_lock);
if (!ill_is_quiescent(ill)) {
/* call cannot fail since `conn_t *' argument is NULL */
@@ -20062,6 +20109,75 @@ ill_set_ndmp(ill_t *ill, mblk_t *ndmp, uint_t addroff, uint_t addrlen)
ill->ill_nd_lla_len = addrlen;
}
+/*
+ * Replumb the ill.
+ */
+int
+ill_replumb(ill_t *ill, mblk_t *mp)
+{
+ ipsq_t *ipsq = ill->ill_phyint->phyint_ipsq;
+
+ ASSERT(IAM_WRITER_IPSQ(ipsq));
+
+ ipsq_current_start(ipsq, ill->ill_ipif, 0);
+
+ /*
+ * If we can quiesce the ill, then continue. If not, then
+ * ill_replumb_tail() will be called from ipif_ill_refrele_tail().
+ */
+ ill_down_ipifs(ill, B_FALSE);
+
+ mutex_enter(&ill->ill_lock);
+ if (!ill_is_quiescent(ill)) {
+ /* call cannot fail since `conn_t *' argument is NULL */
+ (void) ipsq_pending_mp_add(NULL, ill->ill_ipif, ill->ill_rq,
+ mp, ILL_DOWN);
+ mutex_exit(&ill->ill_lock);
+ return (EINPROGRESS);
+ }
+ mutex_exit(&ill->ill_lock);
+
+ ill_replumb_tail(ipsq, ill->ill_rq, mp, NULL);
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+ill_replumb_tail(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy)
+{
+ ill_t *ill = q->q_ptr;
+
+ ASSERT(IAM_WRITER_IPSQ(ipsq));
+
+ ill_down_ipifs_tail(ill);
+
+ freemsg(ill->ill_replumb_mp);
+ ill->ill_replumb_mp = copyb(mp);
+
+ /*
+ * Successfully quiesced and brought down the interface, now we send
+ * the DL_NOTE_REPLUMB_DONE message down to the driver. Reuse the
+ * DL_NOTE_REPLUMB message.
+ */
+ mp = mexchange(NULL, mp, sizeof (dl_notify_conf_t), M_PROTO,
+ DL_NOTIFY_CONF);
+ ASSERT(mp != NULL);
+ ((dl_notify_conf_t *)mp->b_rptr)->dl_notification =
+ DL_NOTE_REPLUMB_DONE;
+ ill_dlpi_send(ill, mp);
+
+ /*
+ * If there are ipifs to bring up, ill_up_ipifs() will return
+ * EINPROGRESS, and ipsq_current_finish() will be called by
+ * ip_rput_dlpi_writer() or ip_arp_done() when the last ipif is
+ * brought up.
+ */
+ if (ill->ill_replumb_mp == NULL ||
+ ill_up_ipifs(ill, q, ill->ill_replumb_mp) != EINPROGRESS) {
+ ipsq_current_finish(ipsq);
+ }
+}
+
major_t IP_MAJ;
#define IP "ip"
diff --git a/usr/src/uts/common/inet/ip/ip_multi.c b/usr/src/uts/common/inet/ip/ip_multi.c
index 656080b769..d7be67cd26 100644
--- a/usr/src/uts/common/inet/ip/ip_multi.c
+++ b/usr/src/uts/common/inet/ip/ip_multi.c
@@ -81,8 +81,6 @@ static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive,
uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
-static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr,
- uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail);
static void conn_ilg_reap(conn_t *connp);
static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
@@ -676,6 +674,42 @@ ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
}
/*
+ * Mapping the given IP multicast address to the L2 multicast mac address.
+ */
+static void
+ill_multicast_mapping(ill_t *ill, ipaddr_t ip_addr, uint8_t *hw_addr,
+ uint32_t hw_addrlen)
+{
+ dl_unitdata_req_t *dlur;
+ ipaddr_t proto_extract_mask;
+ uint8_t *from, *bcast_addr;
+ uint32_t hw_extract_start;
+ int len;
+
+ ASSERT(IN_CLASSD(ntohl(ip_addr)));
+ ASSERT(hw_addrlen == ill->ill_phys_addr_length);
+ ASSERT((ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) == 0);
+ ASSERT((ill->ill_flags & ILLF_MULTICAST) != 0);
+
+ /*
+ * Find the physical broadcast address.
+ */
+ dlur = (dl_unitdata_req_t *)ill->ill_bcast_mp->b_rptr;
+ bcast_addr = (uint8_t *)dlur + dlur->dl_dest_addr_offset;
+ if (ill->ill_sap_length > 0)
+ bcast_addr += ill->ill_sap_length;
+
+ VERIFY(MEDIA_V4MINFO(ill->ill_media, hw_addrlen, bcast_addr,
+ hw_addr, &hw_extract_start, &proto_extract_mask));
+
+ len = MIN((int)hw_addrlen - hw_extract_start, IP_ADDR_LEN);
+ ip_addr &= proto_extract_mask;
+ from = (uint8_t *)&ip_addr;
+ while (len-- > 0)
+ hw_addr[hw_extract_start + len] |= from[len];
+}
+
+/*
* Send a multicast request to the driver for enabling multicast reception
* for v6groupp address. The caller has already checked whether it is
* appropriate to send one or not.
@@ -698,48 +732,30 @@ ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
return (0);
/*
- * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked
- * on.
+ * Create a DL_ENABMULTI_REQ.
*/
mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
&addrlen, &addroff);
if (!mp)
return (ENOMEM);
+
if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
ipaddr_t v4group;
IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
- /*
- * NOTE!!!
- * The "addroff" passed in here was calculated by
- * ill_create_dl(), and will be used by ill_create_squery()
- * to perform some twisted coding magic. It is the offset
- * into the dl_xxx_req of the hw addr. Here, it will be
- * added to b_wptr - b_rptr to create a magic number that
- * is not an offset into this squery mblk.
- * The actual hardware address will be accessed only in the
- * dl_xxx_req, not in the squery. More importantly,
- * that hardware address can *only* be accessed in this
- * mblk chain by calling mi_offset_param_c(), which uses
- * the magic number in the squery hw offset field to go
- * to the *next* mblk (the dl_xxx_req), subtract the
- * (b_wptr - b_rptr), and find the actual offset into
- * the dl_xxx_req.
- * Any method that depends on using the
- * offset field in the dl_disabmulti_req or squery
- * to find either hardware address will similarly fail.
- *
- * Look in ar_entry_squery() in arp.c to see how this offset
- * is used.
- */
- mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
- if (!mp)
- return (ENOMEM);
- ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n",
+
+ ill_multicast_mapping(ill, v4group,
+ mp->b_rptr + addroff, addrlen);
+
+ ip1dbg(("ip_ll_send_enabmulti_req: IPv4 %s on %s\n",
inet_ntop(AF_INET6, v6groupp, group_buf,
sizeof (group_buf)),
ill->ill_name));
- putnext(ill->ill_rq, mp);
+
+ /* Track the state if this is the first enabmulti */
+ if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
+ ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
+ ill_dlpi_send(ill, mp);
} else {
ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on"
" %s\n",
@@ -934,7 +950,7 @@ ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
{
mblk_t *mp;
char group_buf[INET6_ADDRSTRLEN];
- uint32_t addrlen, addroff;
+ uint32_t addrlen, addroff;
ASSERT(IAM_WRITER_ILL(ill));
@@ -945,12 +961,10 @@ ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
return (0);
/*
- * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked
- * on.
+ * Create a DL_DISABMULTI_REQ.
*/
mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
-
if (!mp)
return (ENOMEM);
@@ -958,29 +972,15 @@ ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
ipaddr_t v4group;
IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
- /*
- * NOTE!!!
- * The "addroff" passed in here was calculated by
- * ill_create_dl(), and will be used by ill_create_squery()
- * to perform some twisted coding magic. It is the offset
- * into the dl_xxx_req of the hw addr. Here, it will be
- * added to b_wptr - b_rptr to create a magic number that
- * is not an offset into this mblk.
- *
- * Please see the comment in ip_ll_send)enabmulti_req()
- * for a complete explanation.
- *
- * Look in ar_entry_squery() in arp.c to see how this offset
- * is used.
- */
- mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
- if (!mp)
- return (ENOMEM);
- ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n",
+
+ ill_multicast_mapping(ill, v4group,
+ mp->b_rptr + addroff, addrlen);
+
+ ip1dbg(("ip_ll_send_disabmulti_req: IPv4 %s on %s\n",
inet_ntop(AF_INET6, v6groupp, group_buf,
sizeof (group_buf)),
ill->ill_name));
- putnext(ill->ill_rq, mp);
+ ill_dlpi_send(ill, mp);
} else {
ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on"
" %s\n",
@@ -1296,58 +1296,6 @@ ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags,
fanout_flags, zoneid);
}
-static area_t ip_aresq_template = {
- AR_ENTRY_SQUERY, /* cmd */
- sizeof (area_t)+IP_ADDR_LEN, /* name offset */
- sizeof (area_t), /* name len (filled by ill_arp_alloc) */
- IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */
- sizeof (area_t), /* proto addr offset */
- IP_ADDR_LEN, /* proto addr_length */
- 0, /* proto mask offset */
- /* Rest is initialized when used */
- 0, /* flags */
- 0, /* hw addr offset */
- 0, /* hw addr length */
-};
-
-static mblk_t *
-ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen,
- uint32_t addroff, mblk_t *mp_tail)
-{
- mblk_t *mp;
- area_t *area;
-
- mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template,
- (caddr_t)&ipaddr);
- if (!mp) {
- freemsg(mp_tail);
- return (NULL);
- }
- area = (area_t *)mp->b_rptr;
- area->area_hw_addr_length = addrlen;
- area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff;
- /*
- * NOTE!
- *
- * The area_hw_addr_offset, as can be seen, does not hold the
- * actual hardware address offset. Rather, it holds the offset
- * to the hw addr in the dl_xxx_req in mp_tail, modified by
- * adding (mp->b_wptr - mp->b_rptr). This allows the function
- * mi_offset_paramc() to find the hardware address in the
- * *second* mblk (dl_xxx_req), not this mblk.
- *
- * Using mi_offset_paramc() is thus the *only* way to access
- * the dl_xxx_hw address.
- *
- * The squery hw address should *not* be accessed.
- *
- * See ar_entry_squery() in arp.c for an example of how all this works.
- */
-
- mp->b_cont = mp_tail;
- return (mp);
-}
-
/*
* Create a DLPI message; for DL_{ENAB,DISAB}MULTI_REQ, room is left for
* the hardware address.
@@ -1425,63 +1373,6 @@ ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length,
}
/*
- * Writer processing for ip_wput_ctl(): send the DL_{ENAB,DISAB}MULTI_REQ
- * messages that had been delayed until we'd heard back from ARP. One catch:
- * we need to ensure that no one else becomes writer on the IPSQ before we've
- * received the replies, or they'll incorrectly process our replies as part of
- * their unrelated IPSQ operation. To do this, we start a new IPSQ operation,
- * which will complete when we process the reply in ip_rput_dlpi_writer().
- */
-/* ARGSUSED */
-static void
-ip_wput_ctl_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg)
-{
- ill_t *ill = q->q_ptr;
- t_uscalar_t prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
-
- ASSERT(IAM_WRITER_ILL(ill));
- ASSERT(prim == DL_ENABMULTI_REQ || prim == DL_DISABMULTI_REQ);
- ip1dbg(("ip_wput_ctl_writer: %s\n", dl_primstr(prim)));
-
- if (prim == DL_ENABMULTI_REQ) {
- /* Track the state if this is the first enabmulti */
- if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
- ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
- }
-
- ipsq_current_start(ipsq, ill->ill_ipif, 0);
- ill_dlpi_send(ill, mp);
-}
-
-void
-ip_wput_ctl(queue_t *q, mblk_t *mp)
-{
- ill_t *ill = q->q_ptr;
- mblk_t *dlmp = mp->b_cont;
- area_t *area = (area_t *)mp->b_rptr;
- t_uscalar_t prim;
-
- /* Check that we have an AR_ENTRY_SQUERY with a tacked on mblk */
- if (MBLKL(mp) < sizeof (area_t) || area->area_cmd != AR_ENTRY_SQUERY ||
- dlmp == NULL) {
- putnext(q, mp);
- return;
- }
-
- /* Check that the tacked on mblk is a DL_{DISAB,ENAB}MULTI_REQ */
- prim = ((union DL_primitives *)dlmp->b_rptr)->dl_primitive;
- if (prim != DL_DISABMULTI_REQ && prim != DL_ENABMULTI_REQ) {
- putnext(q, mp);
- return;
- }
- freeb(mp);
-
- /* See comments above ip_wput_ctl_writer() for details */
- ill_refhold(ill);
- qwriter_ip(ill, ill->ill_wq, dlmp, ip_wput_ctl_writer, NEW_OP, B_FALSE);
-}
-
-/*
* Rejoin any groups which have been explicitly joined by the application (we
* left all explicitly joined groups as part of ill_leave_multicast() prior to
* bringing the interface down). Note that because groups can be joined and
diff --git a/usr/src/uts/common/inet/ip_if.h b/usr/src/uts/common/inet/ip_if.h
index 80dc0a691b..b604c13252 100644
--- a/usr/src/uts/common/inet/ip_if.h
+++ b/usr/src/uts/common/inet/ip_if.h
@@ -188,6 +188,7 @@ extern void ill_refresh_bcast(ill_t *);
extern void ill_restart_dad(ill_t *, boolean_t);
extern boolean_t ill_setdefaulttoken(ill_t *);
extern int ill_set_phys_addr(ill_t *, mblk_t *);
+extern int ill_replumb(ill_t *, mblk_t *);
extern void ill_set_ndmp(ill_t *, mblk_t *, uint_t, uint_t);
extern mblk_t *ill_pending_mp_get(ill_t *, conn_t **, uint_t);
diff --git a/usr/src/uts/common/io/dld/dld_drv.c b/usr/src/uts/common/io/dld/dld_drv.c
index f833adce01..57721bb2ed 100644
--- a/usr/src/uts/common/io/dld/dld_drv.c
+++ b/usr/src/uts/common/io/dld/dld_drv.c
@@ -824,16 +824,19 @@ drv_ioc_usagelog(void *karg, intptr_t arg, int mode, cred_t *cred,
int *rvalp)
{
dld_ioc_usagelog_t *log_info = (dld_ioc_usagelog_t *)karg;
+ int err = 0;
if (log_info->ul_type < MAC_LOGTYPE_LINK ||
log_info->ul_type > MAC_LOGTYPE_FLOW)
return (EINVAL);
- if (log_info->ul_onoff)
- mac_start_logusage(log_info->ul_type, log_info->ul_interval);
- else
+ if (log_info->ul_onoff) {
+ err = mac_start_logusage(log_info->ul_type,
+ log_info->ul_interval);
+ } else {
mac_stop_logusage(log_info->ul_type);
- return (0);
+ }
+ return (err);
}
/*
diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c
index b6faf7ada3..1f683c8591 100644
--- a/usr/src/uts/common/io/dld/dld_proto.c
+++ b/usr/src/uts/common/io/dld/dld_proto.c
@@ -430,8 +430,7 @@ proto_bind_req(dld_str_t *dsp, mblk_t *mp)
mac_perim_enter_by_mh(dsp->ds_mh, &mph);
- if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
- ((err = dls_active_set(dsp)) != 0)) {
+ if ((err = dls_active_set(dsp)) != 0) {
dl_err = DL_SYSERR;
goto failed2;
}
@@ -460,8 +459,7 @@ proto_bind_req(dld_str_t *dsp, mblk_t *mp)
}
dsp->ds_dlstate = DL_UNBOUND;
- if (dsp->ds_passivestate == DLD_UNINITIALIZED)
- dls_active_clear(dsp);
+ dls_active_clear(dsp, B_FALSE);
goto failed2;
}
@@ -489,9 +487,6 @@ proto_bind_req(dld_str_t *dsp, mblk_t *mp)
dlsap_addr_length += sizeof (uint16_t);
dsp->ds_dlstate = DL_IDLE;
- if (dsp->ds_passivestate == DLD_UNINITIALIZED)
- dsp->ds_passivestate = DLD_ACTIVE;
-
dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
return;
@@ -557,6 +552,7 @@ proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
dsp->ds_mode = DLD_UNITDATA;
dsp->ds_dlstate = DL_UNBOUND;
+ dls_active_clear(dsp, B_FALSE);
mac_perim_exit(mph);
dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
return;
@@ -609,8 +605,7 @@ proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
mac_perim_enter_by_mh(dsp->ds_mh, &mph);
- if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
- ((err = dls_active_set(dsp)) != 0)) {
+ if ((promisc_saved == 0) && (err = dls_active_set(dsp)) != 0) {
dsp->ds_promisc = promisc_saved;
dl_err = DL_SYSERR;
goto failed2;
@@ -624,15 +619,13 @@ proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
if (err != 0) {
dl_err = DL_SYSERR;
dsp->ds_promisc = promisc_saved;
- if (dsp->ds_passivestate == DLD_UNINITIALIZED)
- dls_active_clear(dsp);
+ if (promisc_saved == 0)
+ dls_active_clear(dsp, B_FALSE);
goto failed2;
}
mac_perim_exit(mph);
- if (dsp->ds_passivestate == DLD_UNINITIALIZED)
- dsp->ds_passivestate = DLD_ACTIVE;
dlokack(q, mp, DL_PROMISCON_REQ);
return;
@@ -702,12 +695,18 @@ proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
* Adjust channel promiscuity.
*/
err = dls_promisc(dsp, promisc_saved);
- mac_perim_exit(mph);
if (err != 0) {
+ mac_perim_exit(mph);
dl_err = DL_SYSERR;
goto failed;
}
+
+ if (dsp->ds_promisc == 0)
+ dls_active_clear(dsp, B_FALSE);
+
+ mac_perim_exit(mph);
+
dlokack(q, mp, DL_PROMISCOFF_REQ);
return;
failed:
@@ -741,14 +740,12 @@ proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
mac_perim_enter_by_mh(dsp->ds_mh, &mph);
- if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
- ((err = dls_active_set(dsp)) != 0)) {
+ if ((dsp->ds_dmap == NULL) && (err = dls_active_set(dsp)) != 0) {
dl_err = DL_SYSERR;
goto failed2;
}
err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
-
if (err != 0) {
switch (err) {
case EINVAL:
@@ -763,16 +760,13 @@ proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
dl_err = DL_SYSERR;
break;
}
- if (dsp->ds_passivestate == DLD_UNINITIALIZED)
- dls_active_clear(dsp);
-
+ if (dsp->ds_dmap == NULL)
+ dls_active_clear(dsp, B_FALSE);
goto failed2;
}
mac_perim_exit(mph);
- if (dsp->ds_passivestate == DLD_UNINITIALIZED)
- dsp->ds_passivestate = DLD_ACTIVE;
dlokack(q, mp, DL_ENABMULTI_REQ);
return;
@@ -809,6 +803,8 @@ proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
mac_perim_enter_by_mh(dsp->ds_mh, &mph);
err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
+ if ((err == 0) && (dsp->ds_dmap == NULL))
+ dls_active_clear(dsp, B_FALSE);
mac_perim_exit(mph);
if (err != 0) {
@@ -909,8 +905,7 @@ proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
mac_perim_enter_by_mh(dsp->ds_mh, &mph);
- if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
- ((err = dls_active_set(dsp)) != 0)) {
+ if ((err = dls_active_set(dsp)) != 0) {
dl_err = DL_SYSERR;
goto failed2;
}
@@ -928,17 +923,13 @@ proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
dl_err = DL_SYSERR;
break;
}
- if (dsp->ds_passivestate == DLD_UNINITIALIZED)
- dls_active_clear(dsp);
-
+ dls_active_clear(dsp, B_FALSE);
goto failed2;
}
mac_perim_exit(mph);
- if (dsp->ds_passivestate == DLD_UNINITIALIZED)
- dsp->ds_passivestate = DLD_ACTIVE;
dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
return;
diff --git a/usr/src/uts/common/io/dld/dld_str.c b/usr/src/uts/common/io/dld/dld_str.c
index f7f4266062..170e087a69 100644
--- a/usr/src/uts/common/io/dld/dld_str.c
+++ b/usr/src/uts/common/io/dld/dld_str.c
@@ -213,28 +213,20 @@ dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp)
return (rc);
}
-/*
- * qi_qopen: open(9e)
- */
-/*ARGSUSED*/
+void *
+dld_str_private(queue_t *q)
+{
+ return (((dld_str_t *)(q->q_ptr))->ds_private);
+}
+
int
-dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
+dld_str_open(queue_t *rq, dev_t *devp, void *private)
{
dld_str_t *dsp;
major_t major;
minor_t minor;
int err;
- if (sflag == MODOPEN)
- return (ENOTSUP);
-
- /*
- * This is a cloning driver and therefore each queue should only
- * ever get opened once.
- */
- if (rq->q_ptr != NULL)
- return (EBUSY);
-
major = getmajor(*devp);
minor = getminor(*devp);
@@ -249,12 +241,14 @@ dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
return (ENOSR);
ASSERT(dsp->ds_dlstate == DL_UNATTACHED);
+ dsp->ds_private = private;
if (minor != 0) {
/*
* Style 1 open
*/
if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0)
goto failed;
+
ASSERT(dsp->ds_dlstate == DL_UNBOUND);
} else {
(void) qassociate(rq, -1);
@@ -276,11 +270,8 @@ failed:
return (err);
}
-/*
- * qi_qclose: close(9e)
- */
int
-dld_close(queue_t *rq)
+dld_str_close(queue_t *rq)
{
dld_str_t *dsp = rq->q_ptr;
@@ -298,11 +289,6 @@ dld_close(queue_t *rq)
cv_wait(&dsp->ds_dlpi_pending_cv, &dsp->ds_lock);
mutex_exit(&dsp->ds_lock);
- /*
- * Disable the queue srv(9e) routine.
- */
- qprocsoff(rq);
-
/*
* This stream was open to a provider node. Check to see
@@ -322,6 +308,40 @@ dld_close(queue_t *rq)
}
/*
+ * qi_qopen: open(9e)
+ */
+/*ARGSUSED*/
+int
+dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
+{
+ if (sflag == MODOPEN)
+ return (ENOTSUP);
+
+ /*
+ * This is a cloning driver and therefore each queue should only
+ * ever get opened once.
+ */
+ if (rq->q_ptr != NULL)
+ return (EBUSY);
+
+ return (dld_str_open(rq, devp, NULL));
+}
+
+/*
+ * qi_qclose: close(9e)
+ */
+int
+dld_close(queue_t *rq)
+{
+ /*
+ * Disable the queue srv(9e) routine.
+ */
+ qprocsoff(rq);
+
+ return (dld_str_close(rq));
+}
+
+/*
* qi_qputp: put(9e)
*/
void
@@ -603,6 +623,7 @@ dld_str_destroy(dld_str_t *dsp)
ASSERT(dsp->ds_direct == B_FALSE);
ASSERT(dsp->ds_lso == B_FALSE);
ASSERT(dsp->ds_lso_max == 0);
+ ASSERT(dsp->ds_passivestate != DLD_ACTIVE);
/*
* Reinitialize all the flags.
@@ -930,11 +951,10 @@ dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa)
dev_t dev;
int err;
const char *drvname;
- mac_perim_handle_t mph;
+ mac_perim_handle_t mph = NULL;
boolean_t qassociated = B_FALSE;
dls_link_t *dlp = NULL;
dls_dl_handle_t ddp = NULL;
- boolean_t entered_perim = B_FALSE;
if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL)
return (EINVAL);
@@ -959,7 +979,6 @@ dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa)
if ((err = mac_perim_enter_by_macname(dls_devnet_mac(ddp), &mph)) != 0)
goto failed;
- entered_perim = B_TRUE;
/*
* Open a channel.
@@ -986,7 +1005,7 @@ dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa)
failed:
if (dlp != NULL)
dls_link_rele(dlp);
- if (entered_perim)
+ if (mph != NULL)
mac_perim_exit(mph);
if (ddp != NULL)
dls_devnet_rele(ddp);
diff --git a/usr/src/uts/common/io/dls/dls.c b/usr/src/uts/common/io/dls/dls.c
index 72f1aecd6a..4cf2efcccf 100644
--- a/usr/src/uts/common/io/dls/dls.c
+++ b/usr/src/uts/common/io/dls/dls.c
@@ -46,8 +46,17 @@ dls_open(dls_link_t *dlp, dls_dl_handle_t ddh, dld_str_t *dsp)
if (zid != GLOBAL_ZONEID && dlp->dl_zid != zid)
return (ENOENT);
- if ((err = mac_start(dlp->dl_mh)) != 0)
+ /*
+ * mac_start() is required for non-legacy MACs to show accurate
+ * kstats even before the interface is brought up. For legacy
+ * drivers, this is not needed. Further, calling mac_start() for
+ * legacy drivers would make the shared-lower-stream to stay in
+ * the DL_IDLE state, which in turn causes performance regression.
+ */
+ if (!mac_capab_get(dlp->dl_mh, MAC_CAPAB_LEGACY, NULL) &&
+ ((err = mac_start(dlp->dl_mh)) != 0)) {
return (err);
+ }
local = (zid == dlp->dl_zid);
dlp->dl_zone_ref += (local ? 1 : 0);
@@ -96,7 +105,7 @@ dls_close(dld_str_t *dsp)
}
dsp->ds_dmap = NULL;
- dls_active_clear(dsp);
+ dls_active_clear(dsp, B_TRUE);
/*
* If the dld_str_t is bound then unbind it.
@@ -126,7 +135,8 @@ dls_close(dld_str_t *dsp)
dsp->ds_dlp = NULL;
- mac_stop(dsp->ds_mh);
+ if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_LEGACY, NULL))
+ mac_stop(dsp->ds_mh);
/*
* Release our reference to the dls_link_t allowing that to be
@@ -628,29 +638,50 @@ dls_active_set(dld_str_t *dsp)
ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
- /* If we're already active, then there's nothing more to do. */
- if (dsp->ds_active)
+ if (dsp->ds_passivestate == DLD_PASSIVE)
return (0);
- if ((err = dls_mac_active_set(dsp->ds_dlp)) != 0) {
+ /* If we're already active, then there's nothing more to do. */
+ if ((dsp->ds_nactive == 0) &&
+ ((err = dls_mac_active_set(dsp->ds_dlp)) != 0)) {
/* except for ENXIO all other errors are mapped to EBUSY */
if (err != ENXIO)
return (EBUSY);
return (err);
}
- dsp->ds_active = B_TRUE;
+ dsp->ds_passivestate = DLD_ACTIVE;
+ dsp->ds_nactive++;
return (0);
}
+/*
+ * Note that dls_active_set() is called whenever an active operation
+ * (DL_BIND_REQ, DL_ENABMULTI_REQ ...) is processed and
+ * dls_active_clear(dsp, B_FALSE) is called whenever the active operation
+ * is being undone (DL_UNBIND_REQ, DL_DISABMULTI_REQ ...). In some cases,
+ * a stream is closed without every active operation being undone and we
+ * need to clear all the "active" states by calling
+ * dls_active_clear(dsp, B_TRUE).
+ */
void
-dls_active_clear(dld_str_t *dsp)
+dls_active_clear(dld_str_t *dsp, boolean_t all)
{
ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
- if (!dsp->ds_active)
+ if (dsp->ds_passivestate == DLD_PASSIVE)
+ return;
+
+ if (all && dsp->ds_nactive == 0)
+ return;
+
+ ASSERT(dsp->ds_nactive > 0);
+
+ dsp->ds_nactive -= (all ? dsp->ds_nactive : 1);
+ if (dsp->ds_nactive != 0)
return;
+ ASSERT(dsp->ds_passivestate == DLD_ACTIVE);
dls_mac_active_clear(dsp->ds_dlp);
- dsp->ds_active = B_FALSE;
+ dsp->ds_passivestate = DLD_UNINITIALIZED;
}
diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c
index 6e6f451ca9..c700e500fe 100644
--- a/usr/src/uts/common/io/mac/mac.c
+++ b/usr/src/uts/common/io/mac/mac.c
@@ -2158,7 +2158,9 @@ uint32_t
mac_no_notification(mac_handle_t mh)
{
mac_impl_t *mip = (mac_impl_t *)mh;
- return (mip->mi_unsup_note);
+
+ return (((mip->mi_state_flags & MIS_LEGACY) != 0) ?
+ mip->mi_capab_legacy.ml_unsup_note : 0);
}
/*
@@ -2842,6 +2844,28 @@ mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize,
return (err);
}
+int
+mac_fastpath_disable(mac_handle_t mh)
+{
+ mac_impl_t *mip = (mac_impl_t *)mh;
+
+ if ((mip->mi_state_flags & MIS_LEGACY) == 0)
+ return (0);
+
+ return (mip->mi_capab_legacy.ml_fastpath_disable(mip->mi_driver));
+}
+
+void
+mac_fastpath_enable(mac_handle_t mh)
+{
+ mac_impl_t *mip = (mac_impl_t *)mh;
+
+ if ((mip->mi_state_flags & MIS_LEGACY) == 0)
+ return;
+
+ mip->mi_capab_legacy.ml_fastpath_enable(mip->mi_driver);
+}
+
void
mac_register_priv_prop(mac_impl_t *mip, mac_priv_prop_t *mpp, uint_t nprop)
{
@@ -4391,34 +4415,79 @@ mac_log_linkinfo(void *arg)
rw_exit(&i_mac_impl_lock);
}
+typedef struct i_mac_fastpath_state_s {
+ boolean_t mf_disable;
+ int mf_err;
+} i_mac_fastpath_state_t;
+
+/*ARGSUSED*/
+static uint_t
+i_mac_fastpath_disable_walker(mod_hash_key_t key, mod_hash_val_t *val,
+ void *arg)
+{
+ i_mac_fastpath_state_t *state = arg;
+ mac_handle_t mh = (mac_handle_t)val;
+
+ if (state->mf_disable)
+ state->mf_err = mac_fastpath_disable(mh);
+ else
+ mac_fastpath_enable(mh);
+
+ return (state->mf_err == 0 ? MH_WALK_CONTINUE : MH_WALK_TERMINATE);
+}
+
/*
* Start the logging timer.
*/
-void
+int
mac_start_logusage(mac_logtype_t type, uint_t interval)
{
+ i_mac_fastpath_state_t state = {B_TRUE, 0};
+ int err;
+
rw_enter(&i_mac_impl_lock, RW_WRITER);
switch (type) {
case MAC_LOGTYPE_FLOW:
if (mac_flow_log_enable) {
rw_exit(&i_mac_impl_lock);
- return;
+ return (0);
}
- mac_flow_log_enable = B_TRUE;
/* FALLTHRU */
case MAC_LOGTYPE_LINK:
if (mac_link_log_enable) {
rw_exit(&i_mac_impl_lock);
- return;
+ return (0);
}
- mac_link_log_enable = B_TRUE;
break;
default:
ASSERT(0);
}
+
+ /* Disable fastpath */
+ mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_disable_walker, &state);
+ if ((err = state.mf_err) != 0) {
+ /* Reenable fastpath */
+ state.mf_disable = B_FALSE;
+ state.mf_err = 0;
+ mod_hash_walk(i_mac_impl_hash,
+ i_mac_fastpath_disable_walker, &state);
+ rw_exit(&i_mac_impl_lock);
+ return (err);
+ }
+
+ switch (type) {
+ case MAC_LOGTYPE_FLOW:
+ mac_flow_log_enable = B_TRUE;
+ /* FALLTHRU */
+ case MAC_LOGTYPE_LINK:
+ mac_link_log_enable = B_TRUE;
+ break;
+ }
+
mac_logging_interval = interval;
rw_exit(&i_mac_impl_lock);
mac_log_linkinfo(NULL);
+ return (0);
}
/*
@@ -4428,6 +4497,7 @@ void
mac_stop_logusage(mac_logtype_t type)
{
i_mac_log_state_t lstate;
+ i_mac_fastpath_state_t state = {B_FALSE, 0};
rw_enter(&i_mac_impl_lock, RW_WRITER);
lstate.mi_fenable = mac_flow_log_enable;
@@ -4455,6 +4525,10 @@ mac_stop_logusage(mac_logtype_t type)
default:
ASSERT(0);
}
+
+ /* Reenable fastpath */
+ mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_disable_walker, &state);
+
rw_exit(&i_mac_impl_lock);
(void) untimeout(mac_logging_timer);
mac_logging_timer = 0;
diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c
index 85614bdd9e..8b4006a805 100644
--- a/usr/src/uts/common/io/mac/mac_client.c
+++ b/usr/src/uts/common/io/mac/mac_client.c
@@ -410,6 +410,12 @@ mac_devinfo_get(mac_handle_t mh)
return (((mac_impl_t *)mh)->mi_dip);
}
+void *
+mac_driver(mac_handle_t mh)
+{
+ return (((mac_impl_t *)mh)->mi_driver);
+}
+
const char *
mac_name(mac_handle_t mh)
{
@@ -1637,10 +1643,9 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
boolean_t bcast_added = B_FALSE;
boolean_t nactiveclients_added = B_FALSE;
boolean_t mac_started = B_FALSE;
+ boolean_t fastpath_disabled = B_FALSE;
mac_resource_props_t mrp;
- ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
-
/* when VID is non-zero, the underlying MAC can not be VNIC */
ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != 0)));
@@ -1708,19 +1713,39 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
}
/*
- * Return EBUSY if:
- * - this is an exclusive active mac client and there already exist
- * active mac clients, or
- * - there already exist an exclusively active mac client.
+ * If this is a VNIC/VLAN, disable softmac fast-path.
*/
- if ((mcip->mci_state_flags & MCIS_EXCLUSIVE) &&
- (mip->mi_nactiveclients != 0) || (mip->mi_state_flags &
- MIS_EXCLUSIVE)) {
+ if (mcip->mci_state_flags & MCIS_IS_VNIC) {
+ err = mac_fastpath_disable((mac_handle_t)mip);
+ if (err != 0)
+ return (err);
+ fastpath_disabled = B_TRUE;
+ }
+
+ /*
+ * Return EBUSY if:
+ * - there is an exclusively active mac client exists.
+ * - this is an exclusive active mac client but
+ * a. there is already active mac clients exist, or
+ * b. fastpath streams are already plumbed on this legacy device
+ */
+ if (mip->mi_state_flags & MIS_EXCLUSIVE) {
+ if (fastpath_disabled)
+ mac_fastpath_enable((mac_handle_t)mip);
return (EBUSY);
}
- if (mcip->mci_state_flags & MCIS_EXCLUSIVE)
+ if (mcip->mci_state_flags & MCIS_EXCLUSIVE) {
+ ASSERT(!fastpath_disabled);
+ if (mip->mi_nactiveclients != 0)
+ return (EBUSY);
+
+ if ((mip->mi_state_flags & MIS_LEGACY) &&
+ !(mip->mi_capab_legacy.ml_active_set(mip->mi_driver))) {
+ return (EBUSY);
+ }
mip->mi_state_flags |= MIS_EXCLUSIVE;
+ }
bzero(&mrp, sizeof (mac_resource_props_t));
if (is_primary && !(mcip->mci_state_flags & (MCIS_IS_VNIC |
@@ -1970,8 +1995,15 @@ bail:
if (nactiveclients_added)
mip->mi_nactiveclients--;
- if (mcip->mci_state_flags & MCIS_EXCLUSIVE)
+ if (mcip->mci_state_flags & MCIS_EXCLUSIVE) {
mip->mi_state_flags &= ~MIS_EXCLUSIVE;
+ if (mip->mi_state_flags & MIS_LEGACY)
+ mip->mi_capab_legacy.ml_active_clear(mip->mi_driver);
+ }
+
+ if (fastpath_disabled)
+ mac_fastpath_enable((mac_handle_t)mip);
+
kmem_free(muip, sizeof (mac_unicast_impl_t));
return (err);
}
@@ -2087,10 +2119,10 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah)
mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr,
muip->mui_vid);
}
- mac_stop((mac_handle_t)mip);
+
FLOW_FINAL_REFRELE(flent);
- i_mac_perim_exit(mip);
- return (0);
+ ASSERT(!(mcip->mci_state_flags & MCIS_EXCLUSIVE));
+ goto done;
}
/*
@@ -2170,8 +2202,14 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah)
mac_capab_update((mac_handle_t)mip);
mac_virtual_link_update(mip);
}
- if (mcip->mci_state_flags & MCIS_EXCLUSIVE)
+
+ if (mcip->mci_state_flags & MCIS_EXCLUSIVE) {
mip->mi_state_flags &= ~MIS_EXCLUSIVE;
+
+ if (mip->mi_state_flags & MIS_LEGACY)
+ mip->mi_capab_legacy.ml_active_clear(mip->mi_driver);
+ }
+
mcip->mci_state_flags &= ~MCIS_UNICAST_HW;
if (mcip->mci_state_flags & MCIS_TAG_DISABLE)
@@ -2183,10 +2221,16 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah)
if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK)
mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK;
- mac_stop((mac_handle_t)mip);
+ kmem_free(muip, sizeof (mac_unicast_impl_t));
+done:
+ /*
+ * Disable fastpath if this is a VNIC or a VLAN.
+ */
+ if (mcip->mci_state_flags & MCIS_IS_VNIC)
+ mac_fastpath_enable((mac_handle_t)mip);
+ mac_stop((mac_handle_t)mip);
i_mac_perim_exit(mip);
- kmem_free(muip, sizeof (mac_unicast_impl_t));
return (0);
}
@@ -3149,16 +3193,17 @@ mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
mac_impl_t *mip = (mac_impl_t *)mh;
/*
- * if mi_nactiveclients > 1, only MAC_CAPAB_HCKSUM,
- * MAC_CAPAB_NO_NATIVEVLAN, MAC_CAPAB_NO_ZCOPY can be advertised.
+ * if mi_nactiveclients > 1, only MAC_CAPAB_LEGACY, MAC_CAPAB_HCKSUM,
+ * MAC_CAPAB_NO_NATIVEVLAN and MAC_CAPAB_NO_ZCOPY can be advertised.
*/
if (mip->mi_nactiveclients > 1) {
switch (cap) {
- case MAC_CAPAB_HCKSUM:
- return (i_mac_capab_get(mh, cap, cap_data));
case MAC_CAPAB_NO_NATIVEVLAN:
case MAC_CAPAB_NO_ZCOPY:
return (B_TRUE);
+ case MAC_CAPAB_LEGACY:
+ case MAC_CAPAB_HCKSUM:
+ break;
default:
return (B_FALSE);
}
@@ -3303,7 +3348,8 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp)
mac_impl_t *mip = (mac_impl_t *)mh;
mac_client_impl_t *mcip;
int err = 0;
- mac_resource_props_t tmrp;
+ uint32_t resmask, newresmask;
+ mac_resource_props_t tmrp, umrp;
ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
@@ -3311,6 +3357,20 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp)
if (err != 0)
return (err);
+ bcopy(&mip->mi_resource_props, &umrp, sizeof (mac_resource_props_t));
+ resmask = umrp.mrp_mask;
+ mac_update_resources(mrp, &umrp, B_FALSE);
+ newresmask = umrp.mrp_mask;
+
+ if (resmask == 0 && newresmask != 0) {
+ /*
+ * Bandwidth, priority or cpu link properties configured,
+ * must disable fastpath.
+ */
+ if ((err = mac_fastpath_disable((mac_handle_t)mip)) != 0)
+ return (err);
+ }
+
/*
* Since bind_cpu may be modified by mac_client_set_resources()
* we use a copy of bind_cpu and finally cache bind_cpu in mip.
@@ -3322,9 +3382,20 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp)
err =
mac_client_set_resources((mac_client_handle_t)mcip, &tmrp);
}
- /* if mac_client_set_resources failed, do not update the values */
- if (err == 0)
- mac_update_resources(mrp, &mip->mi_resource_props, B_FALSE);
+
+ /* Only update the values if mac_client_set_resources succeeded */
+ if (err == 0) {
+ bcopy(&umrp, &mip->mi_resource_props,
+ sizeof (mac_resource_props_t));
+ /*
+ * If bankwidth, priority or cpu link properties cleared,
+ * renable fastpath.
+ */
+ if (resmask != 0 && newresmask == 0)
+ mac_fastpath_enable((mac_handle_t)mip);
+ } else if (resmask == 0 && newresmask != 0) {
+ mac_fastpath_enable((mac_handle_t)mip);
+ }
return (err);
}
diff --git a/usr/src/uts/common/io/mac/mac_flow.c b/usr/src/uts/common/io/mac/mac_flow.c
index cb6560b1f7..fd4d13cf1b 100644
--- a/usr/src/uts/common/io/mac/mac_flow.c
+++ b/usr/src/uts/common/io/mac/mac_flow.c
@@ -335,8 +335,9 @@ mac_flow_rem_subflow(flow_entry_t *flent)
{
flow_tab_t *ft = flent->fe_flow_tab;
mac_client_impl_t *mcip = ft->ft_mcip;
+ mac_handle_t mh = (mac_handle_t)ft->ft_mip;
- ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
+ ASSERT(MAC_PERIM_HELD(mh));
mac_flow_remove(ft, flent, B_FALSE);
if (flent->fe_mcip == NULL) {
@@ -348,10 +349,11 @@ mac_flow_rem_subflow(flow_entry_t *flent)
mac_flow_tab_destroy(ft);
mcip->mci_subflow_tab = NULL;
}
- return;
+ } else {
+ mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
+ mac_link_flow_clean((mac_client_handle_t)mcip, flent);
}
- mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
- mac_link_flow_clean((mac_client_handle_t)mcip, flent);
+ mac_fastpath_enable(mh);
}
/*
@@ -363,13 +365,17 @@ mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent,
boolean_t instantiate_flow)
{
mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
+ mac_handle_t mh = (mac_handle_t)mcip->mci_mip;
flow_tab_info_t *ftinfo;
flow_mask_t mask;
flow_tab_t *ft;
int err;
boolean_t ft_created = B_FALSE;
- ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
+ ASSERT(MAC_PERIM_HELD(mh));
+
+ if ((err = mac_fastpath_disable(mh)) != 0)
+ return (err);
/*
* If the subflow table exists already just add the new subflow
@@ -382,8 +388,10 @@ mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent,
* Try to create a new table and then add the subflow to the
* newly created subflow table
*/
- if ((ftinfo = mac_flow_tab_info_get(mask)) == NULL)
+ if ((ftinfo = mac_flow_tab_info_get(mask)) == NULL) {
+ mac_fastpath_enable(mh);
return (EOPNOTSUPP);
+ }
mac_flow_tab_create(ftinfo->fti_ops, mask, ftinfo->fti_size,
mcip->mci_mip, &ft);
@@ -394,6 +402,7 @@ mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent,
if (err != 0) {
if (ft_created)
mac_flow_tab_destroy(ft);
+ mac_fastpath_enable(mh);
return (err);
}
@@ -405,6 +414,7 @@ mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent,
mac_flow_remove(ft, flent, B_FALSE);
if (ft_created)
mac_flow_tab_destroy(ft);
+ mac_fastpath_enable(mh);
return (err);
}
} else {
diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c
index 4d9d590457..0c9d6fddf2 100644
--- a/usr/src/uts/common/io/mac/mac_provider.c
+++ b/usr/src/uts/common/io/mac/mac_provider.c
@@ -131,7 +131,6 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp)
uint_t instance;
boolean_t style1_created = B_FALSE;
boolean_t style2_created = B_FALSE;
- mac_capab_legacy_t legacy;
char *driver;
minor_t minor = 0;
@@ -298,14 +297,11 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp)
}
mip->mi_callbacks = mregp->m_callbacks;
- if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY, &legacy))
+ if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
+ &mip->mi_capab_legacy)) {
mip->mi_state_flags |= MIS_LEGACY;
-
- if (mip->mi_state_flags & MIS_LEGACY) {
- mip->mi_unsup_note = legacy.ml_unsup_note;
- mip->mi_phy_dev = legacy.ml_dev;
+ mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
} else {
- mip->mi_unsup_note = 0;
mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
ddi_get_instance(mip->mi_dip) + 1);
}
@@ -505,6 +501,12 @@ mac_unregister(mac_handle_t mh)
i_mac_perim_enter(mip);
+ /*
+ * There is still resource properties configured over this mac.
+ */
+ if (mip->mi_resource_props.mrp_mask != 0)
+ mac_fastpath_enable((mac_handle_t)mip);
+
if (mip->mi_minor < MAC_MAX_MINOR + 1) {
ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
ddi_remove_minor_node(mip->mi_dip,
diff --git a/usr/src/uts/common/io/softmac/softmac_ctl.c b/usr/src/uts/common/io/softmac/softmac_ctl.c
index 99c665aae6..d4c8afa8ce 100644
--- a/usr/src/uts/common/io/softmac/softmac_ctl.c
+++ b/usr/src/uts/common/io/softmac/softmac_ctl.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -70,6 +70,22 @@ softmac_send_bind_req(softmac_lower_t *slp, uint_t sap)
}
int
+softmac_send_unbind_req(softmac_lower_t *slp)
+{
+ mblk_t *reqmp;
+
+ /*
+ * create unbind req message and send it down
+ */
+ reqmp = mexchange(NULL, NULL, DL_UNBIND_REQ_SIZE, M_PROTO,
+ DL_UNBIND_REQ);
+ if (reqmp == NULL)
+ return (ENOMEM);
+
+ return (softmac_proto_tx(slp, reqmp, NULL));
+}
+
+int
softmac_send_promisc_req(softmac_lower_t *slp, t_uscalar_t level, boolean_t on)
{
mblk_t *reqmp;
@@ -105,6 +121,7 @@ softmac_m_promisc(void *arg, boolean_t on)
softmac_t *softmac = arg;
softmac_lower_t *slp = softmac->smac_lower;
+ ASSERT(MAC_PERIM_HELD(softmac->smac_mh));
ASSERT(slp != NULL);
return (softmac_send_promisc_req(slp, DL_PROMISC_PHYS, on));
}
@@ -120,6 +137,7 @@ softmac_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
t_uscalar_t dl_prim;
uint32_t size, addr_length;
+ ASSERT(MAC_PERIM_HELD(softmac->smac_mh));
/*
* create multicst message and send it down
*/
@@ -162,6 +180,7 @@ softmac_m_unicst(void *arg, const uint8_t *macaddr)
mblk_t *reqmp;
size_t size;
+ ASSERT(MAC_PERIM_HELD(softmac->smac_mh));
/*
* create set_phys_addr message and send it down
*/
@@ -425,16 +444,72 @@ runt:
}
void
-softmac_rput_process_notdata(queue_t *rq, mblk_t *mp)
+softmac_rput_process_notdata(queue_t *rq, softmac_upper_t *sup, mblk_t *mp)
{
- softmac_lower_t *slp = rq->q_ptr;
+ softmac_lower_t *slp = rq->q_ptr;
+ union DL_primitives *dlp;
+ ssize_t len = MBLKL(mp);
switch (DB_TYPE(mp)) {
case M_PROTO:
case M_PCPROTO:
- softmac_rput_process_proto(rq, mp);
- break;
+ /*
+ * If this is a shared-lower-stream, pass it to softmac to
+ * process.
+ */
+ if (sup == NULL) {
+ softmac_rput_process_proto(rq, mp);
+ break;
+ }
+ /*
+ * Dedicated-lower-stream.
+ */
+ dlp = (union DL_primitives *)mp->b_rptr;
+ ASSERT(len >= sizeof (dlp->dl_primitive));
+ switch (dlp->dl_primitive) {
+ case DL_OK_ACK:
+ if (len < DL_OK_ACK_SIZE)
+ goto runt;
+
+ /*
+ * If this is a DL_OK_ACK for a DL_UNBIND_REQ, pass it
+ * to softmac to process, otherwise directly pass it to
+ * the upper stream.
+ */
+ if (dlp->ok_ack.dl_correct_primitive == DL_UNBIND_REQ) {
+ softmac_rput_process_proto(rq, mp);
+ break;
+ }
+
+ putnext(sup->su_rq, mp);
+ break;
+ case DL_ERROR_ACK:
+ if (len < DL_ERROR_ACK_SIZE)
+ goto runt;
+
+ /*
+ * If this is a DL_ERROR_ACK for a DL_UNBIND_REQ, pass
+ * it to softmac to process, otherwise directly pass it
+ * to the upper stream.
+ */
+ if (dlp->error_ack.dl_error_primitive ==
+ DL_UNBIND_REQ) {
+ softmac_rput_process_proto(rq, mp);
+ break;
+ }
+
+ putnext(sup->su_rq, mp);
+ break;
+ case DL_BIND_ACK:
+ case DL_CAPABILITY_ACK:
+ softmac_rput_process_proto(rq, mp);
+ break;
+ default:
+ putnext(sup->su_rq, mp);
+ break;
+ }
+ break;
case M_FLUSH:
if (*mp->b_rptr & FLUSHR)
flushq(rq, FLUSHDATA);
@@ -447,6 +522,11 @@ softmac_rput_process_notdata(queue_t *rq, mblk_t *mp)
case M_IOCNAK:
case M_COPYIN:
case M_COPYOUT:
+ if (sup != NULL) {
+ putnext(sup->su_rq, mp);
+ break;
+ }
+
mutex_enter(&slp->sl_mutex);
if (!slp->sl_pending_ioctl) {
mutex_exit(&slp->sl_mutex);
@@ -460,7 +540,7 @@ softmac_rput_process_notdata(queue_t *rq, mblk_t *mp)
slp->sl_ack_mp = mp;
cv_broadcast(&slp->sl_cv);
mutex_exit(&slp->sl_mutex);
- return;
+ break;
default:
cmn_err(CE_NOTE, "softmac: got unsupported mblk type 0x%x",
@@ -468,4 +548,8 @@ softmac_rput_process_notdata(queue_t *rq, mblk_t *mp)
freemsg(mp);
break;
}
+ return;
+runt:
+ cmn_err(CE_WARN, "softmac: got runt %s", dl_primstr(dlp->dl_primitive));
+ freemsg(mp);
}
diff --git a/usr/src/uts/common/io/softmac/softmac_dev.c b/usr/src/uts/common/io/softmac/softmac_dev.c
index f548df055d..37c5740846 100644
--- a/usr/src/uts/common/io/softmac/softmac_dev.c
+++ b/usr/src/uts/common/io/softmac/softmac_dev.c
@@ -19,28 +19,45 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/types.h>
-#include <sys/dld.h>
#include <inet/common.h>
#include <sys/stropts.h>
#include <sys/modctl.h>
-#include <sys/avl.h>
+#include <sys/dld.h>
#include <sys/softmac_impl.h>
-#include <sys/softmac.h>
dev_info_t *softmac_dip = NULL;
+static kmem_cache_t *softmac_upper_cachep;
+
+/*
+ * This function is a generic open(9E) entry point into the softmac for
+ * both the softmac module and the softmac driver.
+ */
+static int softmac_cmn_open(queue_t *, dev_t *, int, int, cred_t *);
+
+/*
+ * The following softmac_mod_xxx() functions are (9E) entry point functions for
+ * the softmac module.
+ */
+static int softmac_mod_close(queue_t *);
+static void softmac_mod_rput(queue_t *, mblk_t *);
+static void softmac_mod_wput(queue_t *, mblk_t *);
+static void softmac_mod_wsrv(queue_t *);
+
+/*
+ * The following softmac_drv_xxx() functions are (9E) entry point functions for
+ * the softmac driver.
+ */
+static int softmac_drv_open(queue_t *, dev_t *, int, int, cred_t *);
+static int softmac_drv_close(queue_t *);
+static void softmac_drv_wput(queue_t *, mblk_t *);
+static void softmac_drv_wsrv(queue_t *);
-static int softmac_open(queue_t *, dev_t *, int, int, cred_t *);
-static int softmac_close(queue_t *);
-static void softmac_rput(queue_t *, mblk_t *);
-static void softmac_rsrv(queue_t *);
-static void softmac_wput(queue_t *, mblk_t *);
-static void softmac_wsrv(queue_t *);
static int softmac_attach(dev_info_t *, ddi_attach_cmd_t);
static int softmac_detach(dev_info_t *, ddi_detach_cmd_t);
static int softmac_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
@@ -68,21 +85,21 @@ static struct module_info softmac_dld_modinfo = {
};
static struct qinit softmac_urinit = {
- (pfi_t)softmac_rput, /* qi_putp */
- (pfi_t)softmac_rsrv, /* qi_srvp */
- softmac_open, /* qi_qopen */
- softmac_close, /* qi_qclose */
- NULL, /* qi_qadmin */
- &softmac_modinfo /* qi_minfo */
+ (pfi_t)softmac_mod_rput, /* qi_putp */
+ (pfi_t)NULL, /* qi_srvp */
+ softmac_cmn_open, /* qi_qopen */
+ softmac_mod_close, /* qi_qclose */
+ NULL, /* qi_qadmin */
+ &softmac_modinfo /* qi_minfo */
};
static struct qinit softmac_uwinit = {
- (pfi_t)softmac_wput, /* qi_putp */
- (pfi_t)softmac_wsrv, /* qi_srvp */
- NULL, /* qi_qopen */
- NULL, /* qi_qclose */
- NULL, /* qi_qadmin */
- &softmac_modinfo /* qi_minfo */
+ (pfi_t)softmac_mod_wput, /* qi_putp */
+ (pfi_t)softmac_mod_wsrv, /* qi_srvp */
+ NULL, /* qi_qopen */
+ NULL, /* qi_qclose */
+ NULL, /* qi_qadmin */
+ &softmac_modinfo /* qi_minfo */
};
static struct streamtab softmac_tab = {
@@ -95,11 +112,12 @@ DDI_DEFINE_STREAM_OPS(softmac_ops, nulldev, nulldev, softmac_attach,
ddi_quiesce_not_supported);
static struct qinit softmac_dld_r_qinit = {
- NULL, NULL, dld_open, dld_close, NULL, &softmac_dld_modinfo
+ NULL, NULL, softmac_drv_open, softmac_drv_close, NULL,
+ &softmac_dld_modinfo
};
static struct qinit softmac_dld_w_qinit = {
- (pfi_t)dld_wput, (pfi_t)dld_wsrv, NULL, NULL, NULL,
+ (pfi_t)softmac_drv_wput, (pfi_t)softmac_drv_wsrv, NULL, NULL, NULL,
&softmac_dld_modinfo
};
@@ -128,6 +146,49 @@ static struct modlinkage softmac_modlinkage = {
NULL
};
+/*ARGSUSED*/
+static int
+softmac_upper_constructor(void *buf, void *arg, int kmflag)
+{
+ softmac_upper_t *sup = buf;
+
+ bzero(buf, sizeof (softmac_upper_t));
+
+ mutex_init(&sup->su_mutex, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&sup->su_cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&sup->su_disp_mutex, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&sup->su_disp_cv, NULL, CV_DEFAULT, NULL);
+ list_create(&sup->su_req_list, sizeof (softmac_switch_req_t),
+ offsetof(softmac_switch_req_t, ssq_req_list_node));
+ return (0);
+}
+
+/*ARGSUSED*/
+static void
+softmac_upper_destructor(void *buf, void *arg)
+{
+ softmac_upper_t *sup = buf;
+
+ ASSERT(sup->su_slp == NULL);
+ ASSERT(sup->su_pending_head == NULL && sup->su_pending_tail == NULL);
+ ASSERT(!sup->su_dlpi_pending);
+ ASSERT(!sup->su_active);
+ ASSERT(!sup->su_closing);
+ ASSERT(sup->su_tx_flow_mp == NULL);
+ ASSERT(sup->su_tx_inprocess == 0);
+ ASSERT(sup->su_mode == SOFTMAC_UNKNOWN);
+ ASSERT(!sup->su_tx_busy);
+ ASSERT(!sup->su_bound);
+ ASSERT(!sup->su_taskq_scheduled);
+ ASSERT(list_is_empty(&sup->su_req_list));
+
+ list_destroy(&sup->su_req_list);
+ mutex_destroy(&sup->su_mutex);
+ cv_destroy(&sup->su_cv);
+ mutex_destroy(&sup->su_disp_mutex);
+ cv_destroy(&sup->su_disp_cv);
+}
+
int
_init(void)
{
@@ -135,6 +196,11 @@ _init(void)
softmac_init();
+ softmac_upper_cachep = kmem_cache_create("softmac_upper_cache",
+ sizeof (softmac_upper_t), 0, softmac_upper_constructor,
+ softmac_upper_destructor, NULL, NULL, NULL, 0);
+ ASSERT(softmac_upper_cachep != NULL);
+
if ((err = mod_install(&softmac_modlinkage)) != 0) {
softmac_fini();
return (err);
@@ -154,6 +220,7 @@ _fini(void)
if ((err = mod_remove(&softmac_modlinkage)) != 0)
return (err);
+ kmem_cache_destroy(softmac_upper_cachep);
softmac_fini();
return (0);
@@ -166,7 +233,7 @@ _info(struct modinfo *modinfop)
}
static int
-softmac_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
+softmac_cmn_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
{
softmac_lower_t *slp;
/*
@@ -198,16 +265,15 @@ softmac_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
/*
* Regular device open of a softmac DLPI node. We modify
* the queues' q_qinfo pointer such that all future STREAMS
- * operations will go through dld's entry points (including
- * dld_close()).
+ * operations will go through another set of entry points
*/
rq->q_qinfo = &softmac_dld_r_qinit;
WR(rq)->q_qinfo = &softmac_dld_w_qinit;
- return (dld_open(rq, devp, flag, sflag, credp));
+ return (softmac_drv_open(rq, devp, flag, sflag, credp));
}
static int
-softmac_close(queue_t *rq)
+softmac_mod_close(queue_t *rq)
{
softmac_lower_t *slp = rq->q_ptr;
@@ -237,10 +303,11 @@ softmac_close(queue_t *rq)
}
static void
-softmac_rput(queue_t *rq, mblk_t *mp)
+softmac_mod_rput(queue_t *rq, mblk_t *mp)
{
- softmac_lower_t *slp = rq->q_ptr;
- union DL_primitives *dlp;
+ softmac_lower_t *slp = rq->q_ptr;
+ softmac_lower_rxinfo_t *rxinfo;
+ union DL_primitives *dlp;
/*
* This is the softmac module.
@@ -249,11 +316,21 @@ softmac_rput(queue_t *rq, mblk_t *mp)
ASSERT((mp->b_next == NULL) && (mp->b_prev == NULL));
switch (DB_TYPE(mp)) {
- case M_DATA:
+ case M_DATA: {
+
+ /*
+ * If sl_rxinfo is non-NULL. This is dedicated-lower-stream
+ * created for fastpath. Directly call the rx callback.
+ */
+ if ((rxinfo = slp->sl_rxinfo) != NULL) {
+ rxinfo->slr_rx(rxinfo->slr_arg, NULL, mp, NULL);
+ break;
+ }
+
/*
- * Some drivers start to send up packets even if not in the
- * DL_IDLE state, where sl_softmac is not set yet. Drop the
- * packet in this case.
+ * A shared-lower-stream. Some driver starts to send up
+ * packets even it not in the DL_IDLE state, where
+ * sl_softmac is not set yet. Drop the packet in this case.
*/
if (slp->sl_softmac == NULL) {
freemsg(mp);
@@ -275,18 +352,13 @@ softmac_rput(queue_t *rq, mblk_t *mp)
*/
if (DB_REF(mp) == 1) {
ASSERT(slp->sl_softmac != NULL);
- /*
- * We don't need any locks to protect sl_handle
- * because ip_input() can tolerate if sl_handle
- * is reset to NULL when DL_CAPAB_POLL is
- * disabled.
- */
mac_rx(slp->sl_softmac->smac_mh, NULL, mp);
return;
} else {
softmac_rput_process_data(slp, mp);
}
break;
+ }
case M_PROTO:
case M_PCPROTO:
if (MBLKL(mp) < sizeof (dlp->dl_primitive)) {
@@ -295,6 +367,12 @@ softmac_rput(queue_t *rq, mblk_t *mp)
}
dlp = (union DL_primitives *)mp->b_rptr;
if (dlp->dl_primitive == DL_UNITDATA_IND) {
+
+ if ((rxinfo = slp->sl_rxinfo) != NULL) {
+ rxinfo->slr_rx(rxinfo->slr_arg, NULL, mp, NULL);
+ break;
+ }
+
cmn_err(CE_WARN, "got unexpected %s message",
dl_primstr(DL_UNITDATA_IND));
freemsg(mp);
@@ -302,19 +380,13 @@ softmac_rput(queue_t *rq, mblk_t *mp)
}
/*FALLTHROUGH*/
default:
- softmac_rput_process_notdata(rq, mp);
+ softmac_rput_process_notdata(rq, slp->sl_sup, mp);
break;
}
}
-/* ARGSUSED */
-static void
-softmac_rsrv(queue_t *rq)
-{
-}
-
static void
-softmac_wput(queue_t *wq, mblk_t *mp)
+softmac_mod_wput(queue_t *wq, mblk_t *mp)
{
/*
* This is the softmac module
@@ -342,7 +414,6 @@ softmac_wput(queue_t *wq, mblk_t *mp)
*/
arg = (smac_ioc_start_t *)mp->b_cont->b_rptr;
arg->si_slp = slp;
-
miocack(wq, mp, sizeof (*arg), 0);
break;
}
@@ -359,7 +430,7 @@ softmac_wput(queue_t *wq, mblk_t *mp)
}
static void
-softmac_wsrv(queue_t *wq)
+softmac_mod_wsrv(queue_t *wq)
{
softmac_lower_t *slp = wq->q_ptr;
@@ -372,7 +443,9 @@ softmac_wsrv(queue_t *wq)
* Inform that the tx resource is available; mac_tx_update() will
* inform all the upper streams sharing this lower stream.
*/
- if (slp->sl_softmac != NULL)
+ if (slp->sl_sup != NULL)
+ qenable(slp->sl_sup->su_wq);
+ else if (slp->sl_softmac != NULL)
mac_tx_update(slp->sl_softmac->smac_mh);
}
@@ -420,3 +493,179 @@ softmac_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
return (DDI_FAILURE);
}
+
+/*ARGSUSED*/
+static void
+softmac_dedicated_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+ mac_header_info_t *mhip)
+{
+ queue_t *rq = ((softmac_upper_t *)arg)->su_rq;
+
+ if (canputnext(rq))
+ putnext(rq, mp);
+ else
+ freemsg(mp);
+}
+
+/*ARGSUSED*/
+static int
+softmac_drv_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
+{
+ softmac_upper_t *sup = NULL;
+ softmac_t *softmac;
+ int err = 0;
+
+ /*
+ * This is a softmac device created for a legacy device, find the
+ * associated softmac and initialize the softmac_upper_t structure.
+ */
+ if ((err = softmac_hold(*devp, &softmac)) != 0)
+ return (err);
+
+ sup = kmem_cache_alloc(softmac_upper_cachep, KM_NOSLEEP);
+ if (sup == NULL) {
+ err = ENOMEM;
+ goto fail;
+ }
+
+ ASSERT(list_is_empty(&sup->su_req_list));
+
+ if ((sup->su_tx_flow_mp = allocb(1, BPRI_HI)) == NULL) {
+ err = ENOMEM;
+ goto fail;
+ }
+
+ sup->su_rq = rq;
+ sup->su_wq = WR(rq);
+ sup->su_softmac = softmac;
+ sup->su_mode = SOFTMAC_UNKNOWN;
+
+ sup->su_rxinfo.slr_arg = sup;
+ sup->su_rxinfo.slr_rx = softmac_dedicated_rx;
+ sup->su_direct_rxinfo.slr_arg = sup;
+ sup->su_direct_rxinfo.slr_rx = softmac_dedicated_rx;
+
+ if ((err = dld_str_open(rq, devp, sup)) != 0) {
+ freeb(sup->su_tx_flow_mp);
+ sup->su_tx_flow_mp = NULL;
+ goto fail;
+ }
+
+ return (0);
+
+fail:
+ if (sup != NULL)
+ kmem_cache_free(softmac_upper_cachep, sup);
+ softmac_rele(softmac);
+ return (err);
+}
+
+static int
+softmac_drv_close(queue_t *rq)
+{
+ softmac_upper_t *sup = dld_str_private(rq);
+ softmac_t *softmac = sup->su_softmac;
+
+ ASSERT(WR(rq)->q_next == NULL);
+
+ qprocsoff(rq);
+
+ ASSERT(sup->su_tx_inprocess == 0);
+
+ /*
+ * Wait until the pending request are processed by the worker thread.
+ */
+ mutex_enter(&sup->su_disp_mutex);
+ sup->su_closing = B_TRUE;
+ while (sup->su_dlpi_pending)
+ cv_wait(&sup->su_disp_cv, &sup->su_disp_mutex);
+ mutex_exit(&sup->su_disp_mutex);
+
+ softmac_upperstream_close(sup);
+
+ if (sup->su_tx_flow_mp != NULL) {
+ freeb(sup->su_tx_flow_mp);
+ sup->su_tx_flow_mp = NULL;
+ }
+
+ if (sup->su_active) {
+ mutex_enter(&softmac->smac_active_mutex);
+ softmac->smac_nactive--;
+ mutex_exit(&softmac->smac_active_mutex);
+ sup->su_active = B_FALSE;
+ }
+
+ sup->su_bound = B_FALSE;
+ sup->su_softmac = NULL;
+ sup->su_closing = B_FALSE;
+
+ kmem_cache_free(softmac_upper_cachep, sup);
+
+ softmac_rele(softmac);
+ return (dld_str_close(rq));
+}
+
+static void
+softmac_drv_wput(queue_t *wq, mblk_t *mp)
+{
+ softmac_upper_t *sup = dld_str_private(wq);
+ t_uscalar_t prim;
+
+ ASSERT(wq->q_next == NULL);
+
+ switch (DB_TYPE(mp)) {
+ case M_DATA:
+ case M_MULTIDATA:
+ softmac_wput_data(sup, mp);
+ break;
+ case M_PROTO:
+ case M_PCPROTO:
+
+ if (MBLKL(mp) < sizeof (t_uscalar_t)) {
+ freemsg(mp);
+ return;
+ }
+
+ prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
+ if (prim == DL_UNITDATA_REQ) {
+ softmac_wput_data(sup, mp);
+ return;
+ }
+
+ softmac_wput_nondata(sup, mp);
+ break;
+ default:
+ softmac_wput_nondata(sup, mp);
+ break;
+ }
+}
+
+static void
+softmac_drv_wsrv(queue_t *wq)
+{
+ softmac_upper_t *sup = dld_str_private(wq);
+
+ ASSERT(wq->q_next == NULL);
+
+ mutex_enter(&sup->su_mutex);
+ if (sup->su_mode != SOFTMAC_FASTPATH) {
+ /*
+ * Bump su_tx_inprocess so that su_mode won't change.
+ */
+ sup->su_tx_inprocess++;
+ mutex_exit(&sup->su_mutex);
+ dld_wsrv(wq);
+ mutex_enter(&sup->su_mutex);
+ if (--sup->su_tx_inprocess == 0)
+ cv_signal(&sup->su_cv);
+ } else if (sup->su_tx_busy && SOFTMAC_CANPUTNEXT(sup->su_slp->sl_wq)) {
+ /*
+ * The flow-conctol of the dedicated-lower-stream is
+ * relieved, relieve the flow-control of the
+ * upper-stream too.
+ */
+ sup->su_tx_flow_mp = getq(wq);
+ sup->su_tx_busy = B_FALSE;
+ }
+ mutex_exit(&sup->su_mutex);
+}
diff --git a/usr/src/uts/common/io/softmac/softmac_fp.c b/usr/src/uts/common/io/softmac/softmac_fp.c
new file mode 100644
index 0000000000..a012aa32a4
--- /dev/null
+++ b/usr/src/uts/common/io/softmac/softmac_fp.c
@@ -0,0 +1,1252 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Softmac data-path switching:
+ *
+ * - Fast-path model
+ *
+ * When the softmac fast-path is used, a dedicated lower-stream
+ * will be opened over the legacy device for each IP/ARP (upper-)stream
+ * over the softMAC, and all DLPI messages (including control messages
+ * and data messages) will be exchanged between the upper-stream and
+ * the corresponding lower-stream directly. Therefore, the data
+ * demultiplexing, filtering and classification processing will be done
+ * by the lower-stream, and the GLDv3 DLS/MAC layer processing will be
+ * no longer needed.
+ *
+ * - Slow-path model
+ *
+ * Some GLDv3 features requires the GLDv3 DLS/MAC layer processing to
+ * not be bypassed to assure its function correctness. For example,
+ * softmac fast-path must be disabled to support GLDv3 VNIC functionality.
+ * In this case, a shared lower-stream will be opened over the legacy
+ * device, which is responsible for implementing the GLDv3 callbacks
+ * and passing RAW data messages between the legacy devices and the GLDv3
+ * framework.
+ *
+ * By default, the softmac fast-path mode will be used to assure the
+ * performance; MAC clients will be able to request to disable the softmac
+ * fast-path mode to support certain features, and if that succeeds,
+ * the system will fallback to the slow-path softmac data-path model.
+ *
+ *
+ * The details of the softmac data fast-path model is stated as below
+ *
+ * 1. When a stream is opened on a softMAC, the softmac module will takes
+ * over the DLPI processing on this stream;
+ *
+ * 2. For IP/ARP streams over a softMAC, softmac data fast-path will be
+ * used by default, unless fast-path is disabled by any MAC client
+ * explicitly. The softmac module first identifies an IP/ARP stream
+ * by seeing whether there is a SIOCSLIFNAME ioctl sent from upstream,
+ * if there is one, this stream is either an IP or an ARP stream
+ * and will use fast-path potentially;
+ *
+ * 3. When the softmac fast-path is used, an dedicated lower-stream will
+ * be setup for each IP/ARP stream (1-1 mapping). From that point on,
+ * all control and data messages will be exchanged between the IP/ARP
+ * upper-stream and the legacy device through this dedicated
+ * lower-stream. As a result, the DLS/MAC layer processing in GLDv3
+ * will be skipped, and this greatly improves the performance;
+ *
+ * 4. When the softmac data fast-path is disabled by a MAC client (e.g.,
+ * by a VNIC), all the IP/ARP upper streams will try to switch from
+ * the fast-path to the slow-path. The dedicated lower-stream will be
+ * destroyed, and all the control and data-messages will go through the
+ * existing GLDv3 code path and (in the end) the shared lower-stream;
+ *
+ * 5. On the other hand, when the last MAC client cancels its fast-path
+ * disable request, all the IP/ARP streams will try to switch back to
+ * the fast-path mode;
+ *
+ * Step 5 and 6 both rely on the data-path mode switching process
+ * described below:
+ *
+ * 1) To switch the softmac data-path mode (between fast-path and slow-path),
+ * softmac will first send a DL_NOTE_REPLUMB DL_NOTIFY_IND message
+ * upstream over each IP/ARP streams that needs data-path mode switching;
+ *
+ * 2) When IP receives this DL_NOTE_REPLUMB message, it will bring down
+ * all the IP interfaces on the corresponding ill (IP Lower level
+ * structure), and bring up those interfaces over again; this will in
+ * turn cause the ARP to "replumb" the interface.
+ *
+ * During the replumb process, both IP and ARP will send downstream the
+ * necessary DL_DISABMULTI_REQ and DL_UNBIND_REQ messages and cleanup
+ * the old state of the underlying softMAC, following with the necessary
+ * DL_BIND_REQ and DL_ENABMULTI_REQ messages to setup the new state.
+ * Between the cleanup and re-setup process, IP/ARP will also send down
+ * a DL_NOTE_REPLUMB_DONE DL_NOTIFY_CONF messages to the softMAC to
+ * indicate the *switching point*;
+ *
+ * 3) When softmac receives the DL_NOTE_REPLUMB_DONE message, it either
+ * creates or destroys the dedicated lower-stream (depending on which
+ * data-path mode the softMAC switches to), and change the softmac
+ * data-path mode. From then on, softmac will process all the succeeding
+ * control messages (including the DL_BIND_REQ and DL_ENABMULTI_REQ
+ * messages) and data messages based on new data-path mode.
+ */
+
+#include <sys/types.h>
+#include <sys/disp.h>
+#include <sys/callb.h>
+#include <sys/sysmacros.h>
+#include <sys/file.h>
+#include <sys/vlan.h>
+#include <sys/dld.h>
+#include <sys/sockio.h>
+#include <sys/softmac_impl.h>
+
+static kmutex_t softmac_taskq_lock;
+static kcondvar_t softmac_taskq_cv;
+static list_t softmac_taskq_list; /* List of softmac_upper_t */
+boolean_t softmac_taskq_quit;
+boolean_t softmac_taskq_done;
+
+static void softmac_taskq_dispatch();
+static int softmac_fastpath_setup(softmac_upper_t *);
+static mac_tx_cookie_t softmac_fastpath_wput_data(softmac_upper_t *, mblk_t *,
+ uintptr_t, uint16_t);
+static void softmac_datapath_switch_done(softmac_upper_t *);
+
+void
+softmac_fp_init()
+{
+ mutex_init(&softmac_taskq_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&softmac_taskq_cv, NULL, CV_DRIVER, NULL);
+
+ softmac_taskq_quit = B_FALSE;
+ softmac_taskq_done = B_FALSE;
+ list_create(&softmac_taskq_list, sizeof (softmac_upper_t),
+ offsetof(softmac_upper_t, su_taskq_list_node));
+ (void) thread_create(NULL, 0, softmac_taskq_dispatch, NULL, 0,
+ &p0, TS_RUN, minclsyspri);
+}
+
+void
+softmac_fp_fini()
+{
+ /*
+ * Request the softmac_taskq thread to quit and wait for it to be done.
+ */
+ mutex_enter(&softmac_taskq_lock);
+ softmac_taskq_quit = B_TRUE;
+ cv_signal(&softmac_taskq_cv);
+ while (!softmac_taskq_done)
+ cv_wait(&softmac_taskq_cv, &softmac_taskq_lock);
+ mutex_exit(&softmac_taskq_lock);
+ list_destroy(&softmac_taskq_list);
+
+ mutex_destroy(&softmac_taskq_lock);
+ cv_destroy(&softmac_taskq_cv);
+}
+
+static boolean_t
+check_ip_above(queue_t *q)
+{
+ queue_t *next_q;
+ boolean_t ret = B_TRUE;
+
+ claimstr(q);
+ next_q = q->q_next;
+ if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0)
+ ret = B_FALSE;
+ releasestr(q);
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+softmac_capab_perim(softmac_upper_t *sup, void *data, uint_t flags)
+{
+ switch (flags) {
+ case DLD_ENABLE:
+ mutex_enter(&sup->su_mutex);
+ break;
+ case DLD_DISABLE:
+ mutex_exit(&sup->su_mutex);
+ break;
+ case DLD_QUERY:
+ return (MUTEX_HELD(&sup->su_mutex));
+ }
+ return (0);
+}
+
+/* ARGSUSED */
+static mac_tx_notify_handle_t
+softmac_client_tx_notify(void *txcb, mac_tx_notify_t func, void *arg)
+{
+ return (NULL);
+}
+
+static int
+softmac_capab_direct(softmac_upper_t *sup, void *data, uint_t flags)
+{
+ dld_capab_direct_t *direct = data;
+ softmac_lower_t *slp = sup->su_slp;
+
+ ASSERT(MUTEX_HELD(&sup->su_mutex));
+
+ ASSERT(sup->su_mode == SOFTMAC_FASTPATH);
+
+ switch (flags) {
+ case DLD_ENABLE:
+ if (sup->su_direct)
+ return (0);
+
+ sup->su_direct_rxinfo.slr_rx = (softmac_rx_t)direct->di_rx_cf;
+ sup->su_direct_rxinfo.slr_arg = direct->di_rx_ch;
+ slp->sl_rxinfo = &sup->su_direct_rxinfo;
+ direct->di_tx_df = (uintptr_t)softmac_fastpath_wput_data;
+ direct->di_tx_dh = sup;
+
+ /*
+ * We relying on the STREAM flow-control to backenable
+ * the IP stream. Therefore, no notify callback needs to
+ * be registered. But IP requires this to be a valid function
+ * pointer.
+ */
+ direct->di_tx_cb_df = (uintptr_t)softmac_client_tx_notify;
+ direct->di_tx_cb_dh = NULL;
+ sup->su_direct = B_TRUE;
+ return (0);
+
+ case DLD_DISABLE:
+ if (!sup->su_direct)
+ return (0);
+
+ slp->sl_rxinfo = &sup->su_rxinfo;
+ sup->su_direct = B_FALSE;
+ return (0);
+ }
+ return (ENOTSUP);
+}
+
+static int
+softmac_dld_capab(softmac_upper_t *sup, uint_t type, void *data, uint_t flags)
+{
+ int err;
+
+ /*
+ * Don't enable direct callback capabilities unless the caller is
+ * the IP client. When a module is inserted in a stream (_I_INSERT)
+ * the stack initiates capability disable, but due to races, the
+ * module insertion may complete before the capability disable
+ * completes. So we limit the check to DLD_ENABLE case.
+ */
+ if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
+ !check_ip_above(sup->su_rq)) {
+ return (ENOTSUP);
+ }
+
+ switch (type) {
+ case DLD_CAPAB_DIRECT:
+ err = softmac_capab_direct(sup, data, flags);
+ break;
+
+ case DLD_CAPAB_PERIM:
+ err = softmac_capab_perim(sup, data, flags);
+ break;
+
+ default:
+ err = ENOTSUP;
+ break;
+ }
+ return (err);
+}
+
+static void
+softmac_capability_advertise(softmac_upper_t *sup, mblk_t *mp)
+{
+ dl_capability_ack_t *dlap;
+ dl_capability_sub_t *dlsp;
+ t_uscalar_t subsize;
+ uint8_t *ptr;
+ queue_t *q = sup->su_wq;
+ mblk_t *mp1;
+ softmac_t *softmac = sup->su_softmac;
+ boolean_t dld_capable = B_FALSE;
+ boolean_t hcksum_capable = B_FALSE;
+ boolean_t zcopy_capable = B_FALSE;
+ boolean_t mdt_capable = B_FALSE;
+
+ ASSERT(sup->su_mode == SOFTMAC_FASTPATH);
+
+ /*
+ * Initially assume no capabilities.
+ */
+ subsize = 0;
+
+ /*
+ * Direct capability negotiation interface between IP and softmac
+ */
+ if (check_ip_above(sup->su_rq)) {
+ dld_capable = B_TRUE;
+ subsize += sizeof (dl_capability_sub_t) +
+ sizeof (dl_capab_dld_t);
+ }
+
+ /*
+ * Check if checksum offload is supported on this MAC.
+ */
+ if (softmac->smac_capab_flags & MAC_CAPAB_HCKSUM) {
+ hcksum_capable = B_TRUE;
+ subsize += sizeof (dl_capability_sub_t) +
+ sizeof (dl_capab_hcksum_t);
+ }
+
+ /*
+ * Check if zerocopy is supported on this interface.
+ */
+ if (!(softmac->smac_capab_flags & MAC_CAPAB_NO_ZCOPY)) {
+ zcopy_capable = B_TRUE;
+ subsize += sizeof (dl_capability_sub_t) +
+ sizeof (dl_capab_zerocopy_t);
+ }
+
+ if (softmac->smac_mdt) {
+ mdt_capable = B_TRUE;
+ subsize += sizeof (dl_capability_sub_t) +
+ sizeof (dl_capab_mdt_t);
+ }
+
+ /*
+ * If there are no capabilities to advertise or if we
+ * can't allocate a response, send a DL_ERROR_ACK.
+ */
+ if ((subsize == 0) || (mp1 = reallocb(mp,
+ sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
+ dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
+ return;
+ }
+
+ mp = mp1;
+ DB_TYPE(mp) = M_PROTO;
+ mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
+ bzero(mp->b_rptr, MBLKL(mp));
+ dlap = (dl_capability_ack_t *)mp->b_rptr;
+ dlap->dl_primitive = DL_CAPABILITY_ACK;
+ dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
+ dlap->dl_sub_length = subsize;
+ ptr = (uint8_t *)&dlap[1];
+
+ /*
+ * IP polling interface.
+ */
+ if (dld_capable) {
+ dl_capab_dld_t dld;
+
+ dlsp = (dl_capability_sub_t *)ptr;
+ dlsp->dl_cap = DL_CAPAB_DLD;
+ dlsp->dl_length = sizeof (dl_capab_dld_t);
+ ptr += sizeof (dl_capability_sub_t);
+
+ bzero(&dld, sizeof (dl_capab_dld_t));
+ dld.dld_version = DLD_CURRENT_VERSION;
+ dld.dld_capab = (uintptr_t)softmac_dld_capab;
+ dld.dld_capab_handle = (uintptr_t)sup;
+
+ dlcapabsetqid(&(dld.dld_mid), sup->su_rq);
+ bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
+ ptr += sizeof (dl_capab_dld_t);
+ }
+
+ /*
+ * TCP/IP checksum offload.
+ */
+ if (hcksum_capable) {
+ dl_capab_hcksum_t hcksum;
+
+ dlsp = (dl_capability_sub_t *)ptr;
+
+ dlsp->dl_cap = DL_CAPAB_HCKSUM;
+ dlsp->dl_length = sizeof (dl_capab_hcksum_t);
+ ptr += sizeof (dl_capability_sub_t);
+
+ bzero(&hcksum, sizeof (dl_capab_hcksum_t));
+ hcksum.hcksum_version = HCKSUM_VERSION_1;
+ hcksum.hcksum_txflags = softmac->smac_hcksum_txflags;
+ dlcapabsetqid(&(hcksum.hcksum_mid), sup->su_rq);
+ bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
+ ptr += sizeof (dl_capab_hcksum_t);
+ }
+
+ /*
+ * Zero copy
+ */
+ if (zcopy_capable) {
+ dl_capab_zerocopy_t zcopy;
+
+ dlsp = (dl_capability_sub_t *)ptr;
+
+ dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
+ dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
+ ptr += sizeof (dl_capability_sub_t);
+
+ bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
+ zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
+ zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
+ dlcapabsetqid(&(zcopy.zerocopy_mid), sup->su_rq);
+ bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
+ ptr += sizeof (dl_capab_zerocopy_t);
+ }
+
+ /*
+ * MDT
+ */
+ if (mdt_capable) {
+ dl_capab_mdt_t mdt;
+
+ dlsp = (dl_capability_sub_t *)ptr;
+
+ dlsp->dl_cap = DL_CAPAB_MDT;
+ dlsp->dl_length = sizeof (dl_capab_mdt_t);
+ ptr += sizeof (dl_capability_sub_t);
+
+ bzero(&mdt, sizeof (dl_capab_mdt_t));
+ mdt.mdt_version = MDT_VERSION_2;
+ mdt.mdt_flags = DL_CAPAB_MDT_ENABLE;
+ mdt.mdt_hdr_head = softmac->smac_mdt_capab.mdt_hdr_head;
+ mdt.mdt_hdr_tail = softmac->smac_mdt_capab.mdt_hdr_tail;
+ mdt.mdt_max_pld = softmac->smac_mdt_capab.mdt_max_pld;
+ mdt.mdt_span_limit = softmac->smac_mdt_capab.mdt_span_limit;
+ dlcapabsetqid(&(mdt.mdt_mid), sup->su_rq);
+ bcopy(&mdt, ptr, sizeof (dl_capab_mdt_t));
+ ptr += sizeof (dl_capab_mdt_t);
+ }
+
+ ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
+ qreply(q, mp);
+}
+
+static void
+softmac_capability_req(softmac_upper_t *sup, mblk_t *mp)
+{
+ dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
+ dl_capability_sub_t *sp;
+ size_t size, len;
+ offset_t off, end;
+ t_uscalar_t dl_err;
+ queue_t *q = sup->su_wq;
+
+ ASSERT(sup->su_mode == SOFTMAC_FASTPATH);
+ if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
+ dl_err = DL_BADPRIM;
+ goto failed;
+ }
+
+ if (!sup->su_bound) {
+ dl_err = DL_OUTSTATE;
+ goto failed;
+ }
+
+ /*
+ * This request is overloaded. If there are no requested capabilities
+ * then we just want to acknowledge with all the capabilities we
+ * support. Otherwise we enable the set of capabilities requested.
+ */
+ if (dlp->dl_sub_length == 0) {
+ softmac_capability_advertise(sup, mp);
+ return;
+ }
+
+ if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
+ dl_err = DL_BADPRIM;
+ goto failed;
+ }
+
+ dlp->dl_primitive = DL_CAPABILITY_ACK;
+
+ off = dlp->dl_sub_offset;
+ len = dlp->dl_sub_length;
+
+ /*
+ * Walk the list of capabilities to be enabled.
+ */
+ for (end = off + len; off < end; ) {
+ sp = (dl_capability_sub_t *)(mp->b_rptr + off);
+ size = sizeof (dl_capability_sub_t) + sp->dl_length;
+
+ if (off + size > end ||
+ !IS_P2ALIGNED(off, sizeof (uint32_t))) {
+ dl_err = DL_BADPRIM;
+ goto failed;
+ }
+
+ switch (sp->dl_cap) {
+ /*
+ * TCP/IP checksum offload to hardware.
+ */
+ case DL_CAPAB_HCKSUM: {
+ dl_capab_hcksum_t *hcksump;
+ dl_capab_hcksum_t hcksum;
+
+ hcksump = (dl_capab_hcksum_t *)&sp[1];
+ /*
+ * Copy for alignment.
+ */
+ bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
+ dlcapabsetqid(&(hcksum.hcksum_mid), sup->su_rq);
+ bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ off += size;
+ }
+ qreply(q, mp);
+ return;
+failed:
+ dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
+}
+
+static void
+softmac_bind_req(softmac_upper_t *sup, mblk_t *mp)
+{
+ softmac_lower_t *slp = sup->su_slp;
+ softmac_t *softmac = sup->su_softmac;
+ mblk_t *ackmp, *mp1;
+ int err;
+
+ if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
+ freemsg(mp);
+ return;
+ }
+
+ /*
+ * Allocate ackmp incase the underlying driver does not ack timely.
+ */
+ if ((mp1 = allocb(sizeof (dl_error_ack_t), BPRI_HI)) == NULL) {
+ dlerrorack(sup->su_wq, mp, DL_BIND_REQ, DL_SYSERR, ENOMEM);
+ return;
+ }
+
+ err = softmac_output(slp, mp, DL_BIND_REQ, DL_BIND_ACK, &ackmp);
+ if (ackmp != NULL) {
+ freemsg(mp1);
+ } else {
+ /*
+ * The driver does not ack timely.
+ */
+ ASSERT(err == ENOMSG);
+ ackmp = mp1;
+ }
+ if (err != 0)
+ goto failed;
+
+ /*
+ * Enable capabilities the underlying driver claims to support.
+ */
+ if ((err = softmac_capab_enable(slp)) != 0)
+ goto failed;
+
+ /*
+ * Check whether this softmac is already marked as exclusively used,
+ * e.g., an aggregation is created over it. Fail the BIND_REQ if so.
+ */
+ mutex_enter(&softmac->smac_active_mutex);
+ if (softmac->smac_active) {
+ mutex_exit(&softmac->smac_active_mutex);
+ err = EBUSY;
+ goto failed;
+ }
+ softmac->smac_nactive++;
+ sup->su_active = B_TRUE;
+ mutex_exit(&softmac->smac_active_mutex);
+ sup->su_bound = B_TRUE;
+
+ qreply(sup->su_wq, ackmp);
+ return;
+failed:
+ if (err != 0) {
+ dlerrorack(sup->su_wq, ackmp, DL_BIND_REQ, DL_SYSERR, err);
+ return;
+ }
+}
+
+static void
+softmac_unbind_req(softmac_upper_t *sup, mblk_t *mp)
+{
+ softmac_lower_t *slp = sup->su_slp;
+ softmac_t *softmac = sup->su_softmac;
+ mblk_t *ackmp, *mp1;
+ int err;
+
+ if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
+ freemsg(mp);
+ return;
+ }
+
+ if (!sup->su_bound) {
+ dlerrorack(sup->su_wq, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
+ return;
+ }
+
+ /*
+ * Allocate ackmp incase the underlying driver does not ack timely.
+ */
+ if ((mp1 = allocb(sizeof (dl_error_ack_t), BPRI_HI)) == NULL) {
+ dlerrorack(sup->su_wq, mp, DL_UNBIND_REQ, DL_SYSERR, ENOMEM);
+ return;
+ }
+
+ err = softmac_output(slp, mp, DL_UNBIND_REQ, DL_OK_ACK, &ackmp);
+ if (ackmp != NULL) {
+ freemsg(mp1);
+ } else {
+ /*
+ * The driver does not ack timely.
+ */
+ ASSERT(err == ENOMSG);
+ ackmp = mp1;
+ }
+ if (err != 0) {
+ dlerrorack(sup->su_wq, ackmp, DL_UNBIND_REQ, DL_SYSERR, err);
+ return;
+ }
+
+ sup->su_bound = B_FALSE;
+
+ mutex_enter(&softmac->smac_active_mutex);
+ if (sup->su_active) {
+ ASSERT(!softmac->smac_active);
+ softmac->smac_nactive--;
+ sup->su_active = B_FALSE;
+ }
+ mutex_exit(&softmac->smac_active_mutex);
+
+done:
+ qreply(sup->su_wq, ackmp);
+}
+
+/*
+ * Process the non-data mblk.
+ */
+static void
+softmac_wput_single_nondata(softmac_upper_t *sup, mblk_t *mp)
+{
+ softmac_t *softmac = sup->su_softmac;
+ softmac_lower_t *slp = sup->su_slp;
+ unsigned char dbtype;
+ t_uscalar_t prim;
+
+ dbtype = DB_TYPE(mp);
+ switch (dbtype) {
+ case M_IOCTL:
+ case M_CTL: {
+ uint32_t expected_mode;
+
+ if (((struct iocblk *)(mp->b_rptr))->ioc_cmd != SIOCSLIFNAME)
+ break;
+
+ /*
+ * Nak the M_IOCTL based on the STREAMS specification.
+ */
+ if (dbtype == M_IOCTL)
+ miocnak(sup->su_wq, mp, 0, EINVAL);
+
+ /*
+ * This stream is either IP or ARP. See whether
+ * we need to setup a dedicated-lower-stream for it.
+ */
+ mutex_enter(&softmac->smac_fp_mutex);
+
+ expected_mode = DATAPATH_MODE(softmac);
+ if (expected_mode == SOFTMAC_SLOWPATH)
+ sup->su_mode = SOFTMAC_SLOWPATH;
+ list_insert_head(&softmac->smac_sup_list, sup);
+ mutex_exit(&softmac->smac_fp_mutex);
+
+ /*
+ * Setup the fast-path dedicated lower stream if fast-path
+ * is expected. Note that no lock is held here, and if
+ * smac_expected_mode is changed from SOFTMAC_FASTPATH to
+ * SOFTMAC_SLOWPATH, the DL_NOTE_REPLUMB message used for
+ * data-path switching would already be queued and will
+ * be processed by softmac_wput_single_nondata() later.
+ */
+ if (expected_mode == SOFTMAC_FASTPATH)
+ (void) softmac_fastpath_setup(sup);
+ return;
+ }
+ case M_PROTO:
+ case M_PCPROTO:
+ if (MBLKL(mp) < sizeof (t_uscalar_t)) {
+ freemsg(mp);
+ return;
+ }
+ prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
+ switch (prim) {
+ case DL_NOTIFY_IND:
+ if (MBLKL(mp) < sizeof (dl_notify_ind_t) ||
+ ((dl_notify_ind_t *)mp->b_rptr)->dl_notification !=
+ DL_NOTE_REPLUMB) {
+ freemsg(mp);
+ return;
+ }
+ /*
+ * This DL_NOTE_REPLUMB message is initiated
+ * and queued by the softmac itself, when the
+ * sup is trying to switching its datapath mode
+ * between SOFTMAC_SLOWPATH and SOFTMAC_FASTPATH.
+ * Send this message upstream.
+ */
+ qreply(sup->su_wq, mp);
+ return;
+ case DL_NOTIFY_CONF:
+ if (MBLKL(mp) < sizeof (dl_notify_conf_t) ||
+ ((dl_notify_conf_t *)mp->b_rptr)->dl_notification !=
+ DL_NOTE_REPLUMB_DONE) {
+ freemsg(mp);
+ return;
+ }
+ /*
+ * This is an indication from IP/ARP that the
+ * fastpath->slowpath switch is done.
+ */
+ freemsg(mp);
+ softmac_datapath_switch_done(sup);
+ return;
+ }
+ break;
+ }
+
+ /*
+ * No need to hold lock to check su_mode, since su_mode updating only
+ * operation is is serialized by softmac_wput_nondata_task().
+ */
+ if (sup->su_mode != SOFTMAC_FASTPATH) {
+ dld_wput(sup->su_wq, mp);
+ return;
+ }
+
+ /*
+ * Fastpath non-data message processing. Most of non-data messages
+ * can be directly passed down to the dedicated-lower-stream, aside
+ * from the following M_PROTO/M_PCPROTO messages.
+ */
+ switch (dbtype) {
+ case M_PROTO:
+ case M_PCPROTO:
+ switch (prim) {
+ case DL_BIND_REQ:
+ softmac_bind_req(sup, mp);
+ break;
+ case DL_UNBIND_REQ:
+ softmac_unbind_req(sup, mp);
+ break;
+ case DL_CAPABILITY_REQ:
+ softmac_capability_req(sup, mp);
+ break;
+ default:
+ putnext(slp->sl_wq, mp);
+ break;
+ }
+ break;
+ default:
+ putnext(slp->sl_wq, mp);
+ break;
+ }
+}
+
+/*
+ * The worker thread which processes non-data messages. Note we only process
+ * one message at one time in order to be able to "flush" the queued message
+ * and serialize the processing.
+ */
+static void
+softmac_wput_nondata_task(void *arg)
+{
+ softmac_upper_t *sup = arg;
+ mblk_t *mp;
+
+ mutex_enter(&sup->su_disp_mutex);
+
+ while (sup->su_pending_head != NULL) {
+ if (sup->su_closing)
+ break;
+
+ SOFTMAC_DQ_PENDING(sup, &mp);
+ mutex_exit(&sup->su_disp_mutex);
+ softmac_wput_single_nondata(sup, mp);
+ mutex_enter(&sup->su_disp_mutex);
+ }
+
+ /*
+ * If the stream is closing, flush all queued messages and inform
+ * the stream to be closed.
+ */
+ freemsgchain(sup->su_pending_head);
+ sup->su_pending_head = sup->su_pending_tail = NULL;
+ sup->su_dlpi_pending = B_FALSE;
+ cv_signal(&sup->su_disp_cv);
+ mutex_exit(&sup->su_disp_mutex);
+}
+
+/*
+ * Kernel thread to handle taskq dispatch failures in softmac_wput_nondata().
+ * This thread is started when the softmac module is first loaded.
+ */
+static void
+softmac_taskq_dispatch(void)
+{
+ callb_cpr_t cprinfo;
+ softmac_upper_t *sup;
+
+ CALLB_CPR_INIT(&cprinfo, &softmac_taskq_lock, callb_generic_cpr,
+ "softmac_taskq_dispatch");
+ mutex_enter(&softmac_taskq_lock);
+
+ while (!softmac_taskq_quit) {
+ sup = list_head(&softmac_taskq_list);
+ while (sup != NULL) {
+ list_remove(&softmac_taskq_list, sup);
+ sup->su_taskq_scheduled = B_FALSE;
+ mutex_exit(&softmac_taskq_lock);
+ VERIFY(taskq_dispatch(system_taskq,
+ softmac_wput_nondata_task, sup, TQ_SLEEP) != NULL);
+ mutex_enter(&softmac_taskq_lock);
+ sup = list_head(&softmac_taskq_list);
+ }
+
+ CALLB_CPR_SAFE_BEGIN(&cprinfo);
+ cv_wait(&softmac_taskq_cv, &softmac_taskq_lock);
+ CALLB_CPR_SAFE_END(&cprinfo, &softmac_taskq_lock);
+ }
+
+ softmac_taskq_done = B_TRUE;
+ cv_signal(&softmac_taskq_cv);
+ CALLB_CPR_EXIT(&cprinfo);
+ thread_exit();
+}
+
+void
+softmac_wput_nondata(softmac_upper_t *sup, mblk_t *mp)
+{
+ /*
+ * The processing of the message might block. Enqueue the
+ * message for later processing.
+ */
+ mutex_enter(&sup->su_disp_mutex);
+
+ if (sup->su_closing) {
+ mutex_exit(&sup->su_disp_mutex);
+ freemsg(mp);
+ return;
+ }
+
+ SOFTMAC_EQ_PENDING(sup, mp);
+
+ if (sup->su_dlpi_pending) {
+ mutex_exit(&sup->su_disp_mutex);
+ return;
+ }
+ sup->su_dlpi_pending = B_TRUE;
+ mutex_exit(&sup->su_disp_mutex);
+
+ if (taskq_dispatch(system_taskq, softmac_wput_nondata_task,
+ sup, TQ_NOSLEEP) != NULL) {
+ return;
+ }
+
+ mutex_enter(&softmac_taskq_lock);
+ if (!sup->su_taskq_scheduled) {
+ list_insert_tail(&softmac_taskq_list, sup);
+ cv_signal(&softmac_taskq_cv);
+ }
+ sup->su_taskq_scheduled = B_TRUE;
+ mutex_exit(&softmac_taskq_lock);
+}
+
+/*
+ * Setup the dedicated-lower-stream (fast-path) for the IP/ARP upperstream.
+ */
+static int
+softmac_fastpath_setup(softmac_upper_t *sup)
+{
+ softmac_t *softmac = sup->su_softmac;
+ softmac_lower_t *slp;
+ int err;
+
+ err = softmac_lower_setup(softmac, sup, &slp);
+
+ mutex_enter(&sup->su_mutex);
+ /*
+ * Wait for all data messages to be processed so that we can change
+ * the su_mode.
+ */
+ while (sup->su_tx_inprocess != 0)
+ cv_wait(&sup->su_cv, &sup->su_mutex);
+
+ ASSERT(sup->su_mode != SOFTMAC_FASTPATH);
+ ASSERT(sup->su_slp == NULL);
+ if (err != 0) {
+ sup->su_mode = SOFTMAC_SLOWPATH;
+ } else {
+ sup->su_slp = slp;
+ sup->su_mode = SOFTMAC_FASTPATH;
+ }
+ mutex_exit(&sup->su_mutex);
+ return (err);
+}
+
+/*
+ * Tear down the dedicated-lower-stream (fast-path) for the IP/ARP upperstream.
+ */
+static void
+softmac_fastpath_tear(softmac_upper_t *sup)
+{
+ mutex_enter(&sup->su_mutex);
+ /*
+ * Wait for all data messages in the dedicated-lower-stream
+ * to be processed.
+ */
+ while (sup->su_tx_inprocess != 0)
+ cv_wait(&sup->su_cv, &sup->su_mutex);
+
+ if (sup->su_tx_busy) {
+ ASSERT(sup->su_tx_flow_mp == NULL);
+ sup->su_tx_flow_mp = getq(sup->su_wq);
+ sup->su_tx_busy = B_FALSE;
+ }
+
+ sup->su_mode = SOFTMAC_SLOWPATH;
+
+ /*
+ * Destroy the dedicated-lower-stream. Note that slp is destroyed
+ * when lh is closed.
+ */
+ (void) ldi_close(sup->su_slp->sl_lh, FREAD|FWRITE, kcred);
+ sup->su_slp = NULL;
+ mutex_exit(&sup->su_mutex);
+}
+
+void
+softmac_wput_data(softmac_upper_t *sup, mblk_t *mp)
+{
+ /*
+ * No lock is required to access the su_mode field since the data
+ * traffic is quiesce by IP when the data-path mode is in the
+ * process of switching.
+ */
+ if (sup->su_mode != SOFTMAC_FASTPATH)
+ dld_wput(sup->su_wq, mp);
+ else
+ (void) softmac_fastpath_wput_data(sup, mp, NULL, 0);
+}
+
+/*ARGSUSED*/
+static mac_tx_cookie_t
+softmac_fastpath_wput_data(softmac_upper_t *sup, mblk_t *mp, uintptr_t f_hint,
+ uint16_t flag)
+{
+ queue_t *wq = sup->su_slp->sl_wq;
+
+ /*
+ * This function is called from IP, only the MAC_DROP_ON_NO_DESC
+ * flag can be specified.
+ */
+ ASSERT((flag & ~MAC_DROP_ON_NO_DESC) == 0);
+ ASSERT(mp->b_next == NULL);
+
+ /*
+ * Check wether the dedicated-lower-stream is able to handle more
+ * messages, and enable the flow-control if it is not.
+ *
+ * Note that in order not to introduce any packet reordering, we
+ * always send the message down to the dedicated-lower-stream:
+ *
+ * If the flow-control is already enabled, but we still get
+ * the messages from the upper-stream, it means that the upper
+ * stream does not respect STREAMS flow-control (e.g., TCP). Simply
+ * pass the message down to the lower-stream in that case.
+ */
+ if (SOFTMAC_CANPUTNEXT(wq)) {
+ putnext(wq, mp);
+ return (NULL);
+ }
+
+ if ((flag & MAC_DROP_ON_NO_DESC) != 0) {
+ freemsg(mp);
+ return ((mac_tx_cookie_t)wq);
+ }
+
+ if (sup->su_tx_busy) {
+ putnext(wq, mp);
+ return ((mac_tx_cookie_t)wq);
+ }
+
+ mutex_enter(&sup->su_mutex);
+ if (!sup->su_tx_busy) {
+ ASSERT(sup->su_tx_flow_mp != NULL);
+ (void) putq(sup->su_wq, sup->su_tx_flow_mp);
+ sup->su_tx_flow_mp = NULL;
+ sup->su_tx_busy = B_TRUE;
+ qenable(wq);
+ }
+ mutex_exit(&sup->su_mutex);
+ putnext(wq, mp);
+ return ((mac_tx_cookie_t)wq);
+}
+
+boolean_t
+softmac_active_set(void *arg)
+{
+ softmac_t *softmac = arg;
+
+ mutex_enter(&softmac->smac_active_mutex);
+ if (softmac->smac_nactive != 0) {
+ mutex_exit(&softmac->smac_active_mutex);
+ return (B_FALSE);
+ }
+ softmac->smac_active = B_TRUE;
+ mutex_exit(&softmac->smac_active_mutex);
+ return (B_TRUE);
+}
+
+void
+softmac_active_clear(void *arg)
+{
+ softmac_t *softmac = arg;
+
+ mutex_enter(&softmac->smac_active_mutex);
+ ASSERT(softmac->smac_active && (softmac->smac_nactive == 0));
+ softmac->smac_active = B_FALSE;
+ mutex_exit(&softmac->smac_active_mutex);
+}
+
+/*
+ * Disable/reenable fastpath on given softmac. This request could come from a
+ * MAC client or directly from administrators.
+ */
+int
+softmac_datapath_switch(softmac_t *softmac, boolean_t disable, boolean_t admin)
+{
+ softmac_upper_t *sup;
+ mblk_t *head = NULL, *tail = NULL, *mp;
+ list_t reqlist;
+ softmac_switch_req_t *req;
+ uint32_t current_mode, expected_mode;
+ int err = 0;
+
+ mutex_enter(&softmac->smac_fp_mutex);
+
+ current_mode = DATAPATH_MODE(softmac);
+ if (admin) {
+ if (softmac->smac_fastpath_admin_disabled == disable) {
+ mutex_exit(&softmac->smac_fp_mutex);
+ return (0);
+ }
+ softmac->smac_fastpath_admin_disabled = disable;
+ } else if (disable) {
+ softmac->smac_fp_disable_clients++;
+ } else {
+ ASSERT(softmac->smac_fp_disable_clients != 0);
+ softmac->smac_fp_disable_clients--;
+ }
+
+ expected_mode = DATAPATH_MODE(softmac);
+ if (current_mode == expected_mode) {
+ mutex_exit(&softmac->smac_fp_mutex);
+ return (0);
+ }
+
+ /*
+ * The expected mode is different from whatever datapath mode
+ * this softmac is expected from last request, enqueue the data-path
+ * switch request.
+ */
+ list_create(&reqlist, sizeof (softmac_switch_req_t),
+ offsetof(softmac_switch_req_t, ssq_req_list_node));
+
+ /*
+ * Allocate all DL_NOTIFY_IND messages and request structures that
+ * are required to switch each IP/ARP stream to the expected mode.
+ */
+ for (sup = list_head(&softmac->smac_sup_list); sup != NULL;
+ sup = list_next(&softmac->smac_sup_list, sup)) {
+ dl_notify_ind_t *dlip;
+
+ req = kmem_alloc(sizeof (softmac_switch_req_t), KM_NOSLEEP);
+ if (req == NULL)
+ break;
+
+ req->ssq_expected_mode = expected_mode;
+
+ /*
+ * Allocate the DL_NOTE_REPLUMB message.
+ */
+ if ((mp = allocb(sizeof (dl_notify_ind_t), BPRI_LO)) == NULL) {
+ kmem_free(req, sizeof (softmac_switch_req_t));
+ break;
+ }
+
+ list_insert_tail(&reqlist, req);
+
+ mp->b_wptr = mp->b_rptr + sizeof (dl_notify_ind_t);
+ mp->b_datap->db_type = M_PROTO;
+ bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
+ dlip = (dl_notify_ind_t *)mp->b_rptr;
+ dlip->dl_primitive = DL_NOTIFY_IND;
+ dlip->dl_notification = DL_NOTE_REPLUMB;
+ if (head == NULL) {
+ head = tail = mp;
+ } else {
+ tail->b_next = mp;
+ tail = mp;
+ }
+ }
+
+ /*
+ * Note that it is fine if the expected data-path mode is fast-path
+ * and some of streams fails to switch. Only return failure if we
+ * are expected to switch to the slow-path.
+ */
+ if (sup != NULL && expected_mode == SOFTMAC_SLOWPATH) {
+ err = ENOMEM;
+ goto fail;
+ }
+
+ /*
+ * Start switching for each IP/ARP stream. The switching operation
+ * will eventually succeed and there is no need to wait for it
+ * to finish.
+ */
+ for (sup = list_head(&softmac->smac_sup_list); sup != NULL;
+ sup = list_next(&softmac->smac_sup_list, sup)) {
+ mp = head->b_next;
+ head->b_next = NULL;
+
+ /*
+ * Add the swtich request to the requests list of the stream.
+ */
+ req = list_head(&reqlist);
+ ASSERT(req != NULL);
+ list_remove(&reqlist, req);
+ list_insert_tail(&sup->su_req_list, req);
+ softmac_wput_nondata(sup, head);
+ head = mp;
+ }
+
+ mutex_exit(&softmac->smac_fp_mutex);
+ ASSERT(list_is_empty(&reqlist));
+ list_destroy(&reqlist);
+ return (0);
+fail:
+ if (admin) {
+ softmac->smac_fastpath_admin_disabled = !disable;
+ } else if (disable) {
+ softmac->smac_fp_disable_clients--;
+ } else {
+ softmac->smac_fp_disable_clients++;
+ }
+
+ mutex_exit(&softmac->smac_fp_mutex);
+ while ((req = list_head(&reqlist)) != NULL) {
+ list_remove(&reqlist, req);
+ kmem_free(req, sizeof (softmac_switch_req_t));
+ }
+ freemsgchain(head);
+ list_destroy(&reqlist);
+ return (err);
+}
+
+int
+softmac_fastpath_disable(void *arg)
+{
+ return (softmac_datapath_switch((softmac_t *)arg, B_TRUE, B_FALSE));
+}
+
+void
+softmac_fastpath_enable(void *arg)
+{
+ VERIFY(softmac_datapath_switch((softmac_t *)arg, B_FALSE,
+ B_FALSE) == 0);
+}
+
+void
+softmac_upperstream_close(softmac_upper_t *sup)
+{
+ softmac_t *softmac = sup->su_softmac;
+ softmac_switch_req_t *req;
+
+ mutex_enter(&softmac->smac_fp_mutex);
+
+ if (sup->su_mode == SOFTMAC_FASTPATH)
+ softmac_fastpath_tear(sup);
+
+ if (sup->su_mode != SOFTMAC_UNKNOWN) {
+ list_remove(&softmac->smac_sup_list, sup);
+ sup->su_mode = SOFTMAC_UNKNOWN;
+ }
+
+ /*
+ * Cleanup all the switch requests queueed on this stream.
+ */
+ while ((req = list_head(&sup->su_req_list)) != NULL) {
+ list_remove(&sup->su_req_list, req);
+ kmem_free(req, sizeof (softmac_switch_req_t));
+ }
+ mutex_exit(&softmac->smac_fp_mutex);
+}
+
+/*
+ * Handle the DL_NOTE_REPLUMB_DONE indication from IP/ARP. Change the upper
+ * stream from the fastpath mode to the slowpath mode.
+ */
+static void
+softmac_datapath_switch_done(softmac_upper_t *sup)
+{
+ softmac_t *softmac = sup->su_softmac;
+ softmac_switch_req_t *req;
+ uint32_t expected_mode;
+
+ mutex_enter(&softmac->smac_fp_mutex);
+ req = list_head(&sup->su_req_list);
+ list_remove(&sup->su_req_list, req);
+ expected_mode = req->ssq_expected_mode;
+ kmem_free(req, sizeof (softmac_switch_req_t));
+
+ if (expected_mode == sup->su_mode) {
+ mutex_exit(&softmac->smac_fp_mutex);
+ return;
+ }
+
+ ASSERT(!sup->su_bound);
+ mutex_exit(&softmac->smac_fp_mutex);
+
+ /*
+ * It is fine if the expected mode is fast-path and we fail
+ * to enable fastpath on this stream.
+ */
+ if (expected_mode == SOFTMAC_SLOWPATH)
+ softmac_fastpath_tear(sup);
+ else
+ (void) softmac_fastpath_setup(sup);
+}
diff --git a/usr/src/uts/common/io/softmac/softmac_main.c b/usr/src/uts/common/io/softmac/softmac_main.c
index a44856c849..bfdf3ee851 100644
--- a/usr/src/uts/common/io/softmac/softmac_main.c
+++ b/usr/src/uts/common/io/softmac/softmac_main.c
@@ -69,6 +69,8 @@ static mod_hash_t *softmac_hash;
static kmutex_t smac_global_lock;
static kcondvar_t smac_global_cv;
+static kmem_cache_t *softmac_cachep;
+
#define SOFTMAC_HASHSZ 64
static void softmac_create_task(void *);
@@ -79,9 +81,14 @@ static void softmac_m_stop(void *);
static int softmac_m_open(void *);
static void softmac_m_close(void *);
static boolean_t softmac_m_getcapab(void *, mac_capab_t, void *);
+static int softmac_m_setprop(void *, const char *, mac_prop_id_t,
+ uint_t, const void *);
+static int softmac_m_getprop(void *, const char *, mac_prop_id_t,
+ uint_t, uint_t, void *, uint_t *);
+
#define SOFTMAC_M_CALLBACK_FLAGS \
- (MC_IOCTL | MC_GETCAPAB | MC_OPEN | MC_CLOSE)
+ (MC_IOCTL | MC_GETCAPAB | MC_OPEN | MC_CLOSE | MC_SETPROP | MC_GETPROP)
static mac_callbacks_t softmac_m_callbacks = {
SOFTMAC_M_CALLBACK_FLAGS,
@@ -95,9 +102,57 @@ static mac_callbacks_t softmac_m_callbacks = {
softmac_m_ioctl,
softmac_m_getcapab,
softmac_m_open,
- softmac_m_close
+ softmac_m_close,
+ softmac_m_setprop,
+ softmac_m_getprop
};
+/*ARGSUSED*/
+static int
+softmac_constructor(void *buf, void *arg, int kmflag)
+{
+ softmac_t *softmac = buf;
+
+ bzero(buf, sizeof (softmac_t));
+ mutex_init(&softmac->smac_mutex, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&softmac->smac_active_mutex, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&softmac->smac_fp_mutex, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&softmac->smac_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&softmac->smac_fp_cv, NULL, CV_DEFAULT, NULL);
+ list_create(&softmac->smac_sup_list, sizeof (softmac_upper_t),
+ offsetof(softmac_upper_t, su_list_node));
+ return (0);
+}
+
+/*ARGSUSED*/
+static void
+softmac_destructor(void *buf, void *arg)
+{
+ softmac_t *softmac = buf;
+
+ ASSERT(softmac->smac_fp_disable_clients == 0);
+ ASSERT(!softmac->smac_fastpath_admin_disabled);
+
+ ASSERT(!(softmac->smac_flags & SOFTMAC_ATTACH_DONE));
+ ASSERT(softmac->smac_hold_cnt == 0);
+ ASSERT(softmac->smac_attachok_cnt == 0);
+ ASSERT(softmac->smac_mh == NULL);
+ ASSERT(softmac->smac_softmac[0] == NULL &&
+ softmac->smac_softmac[1] == NULL);
+ ASSERT(softmac->smac_state == SOFTMAC_INITIALIZED);
+ ASSERT(softmac->smac_lower == NULL);
+ ASSERT(softmac->smac_active == B_FALSE);
+ ASSERT(softmac->smac_nactive == 0);
+ ASSERT(list_is_empty(&softmac->smac_sup_list));
+
+ list_destroy(&softmac->smac_sup_list);
+ mutex_destroy(&softmac->smac_mutex);
+ mutex_destroy(&softmac->smac_active_mutex);
+ mutex_destroy(&softmac->smac_fp_mutex);
+ cv_destroy(&softmac->smac_cv);
+ cv_destroy(&softmac->smac_fp_cv);
+}
+
void
softmac_init()
{
@@ -108,11 +163,19 @@ softmac_init()
rw_init(&softmac_hash_lock, NULL, RW_DEFAULT, NULL);
mutex_init(&smac_global_lock, NULL, MUTEX_DRIVER, NULL);
cv_init(&smac_global_cv, NULL, CV_DRIVER, NULL);
+
+ softmac_cachep = kmem_cache_create("softmac_cache",
+ sizeof (softmac_t), 0, softmac_constructor,
+ softmac_destructor, NULL, NULL, NULL, 0);
+ ASSERT(softmac_cachep != NULL);
+ softmac_fp_init();
}
void
softmac_fini()
{
+ softmac_fp_fini();
+ kmem_cache_destroy(softmac_cachep);
rw_destroy(&softmac_hash_lock);
mod_hash_destroy_hash(softmac_hash);
mutex_destroy(&smac_global_lock);
@@ -281,16 +344,12 @@ softmac_create(dev_info_t *dip, dev_t dev)
* Check whether the softmac for the specified device already exists
*/
rw_enter(&softmac_hash_lock, RW_WRITER);
- if ((err = mod_hash_find(softmac_hash, (mod_hash_key_t)devname,
+ if ((mod_hash_find(softmac_hash, (mod_hash_key_t)devname,
(mod_hash_val_t *)&softmac)) != 0) {
- softmac = kmem_zalloc(sizeof (softmac_t), KM_SLEEP);
- mutex_init(&softmac->smac_mutex, NULL, MUTEX_DRIVER, NULL);
- cv_init(&softmac->smac_cv, NULL, CV_DRIVER, NULL);
+ softmac = kmem_cache_alloc(softmac_cachep, KM_SLEEP);
(void) strlcpy(softmac->smac_devname, devname, MAXNAMELEN);
- /*
- * Insert the softmac into the hash table.
- */
+
err = mod_hash_insert(softmac_hash,
(mod_hash_key_t)softmac->smac_devname,
(mod_hash_val_t)softmac);
@@ -413,8 +472,18 @@ softmac_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
case MAC_CAPAB_LEGACY: {
mac_capab_legacy_t *legacy = cap_data;
+ /*
+ * The caller is not interested in the details.
+ */
+ if (legacy == NULL)
+ break;
+
legacy->ml_unsup_note = ~softmac->smac_notifications &
(DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_SPEED);
+ legacy->ml_active_set = softmac_active_set;
+ legacy->ml_active_clear = softmac_active_clear;
+ legacy->ml_fastpath_disable = softmac_fastpath_disable;
+ legacy->ml_fastpath_enable = softmac_fastpath_enable;
legacy->ml_dev = makedevice(softmac->smac_umajor,
softmac->smac_uppa + 1);
break;
@@ -816,22 +885,23 @@ softmac_mac_register(softmac_t *softmac)
* Try to create the datalink for this softmac.
*/
if ((err = softmac_create_datalink(softmac)) != 0) {
- if (!(softmac->smac_flags & SOFTMAC_NOSUPP)) {
+ if (!(softmac->smac_flags & SOFTMAC_NOSUPP))
(void) mac_unregister(softmac->smac_mh);
- softmac->smac_mh = NULL;
- }
+ mutex_enter(&softmac->smac_mutex);
+ softmac->smac_mh = NULL;
+ goto done;
}
/*
* If succeed, create the thread which handles the DL_NOTIFY_IND from
* the lower stream.
*/
+ mutex_enter(&softmac->smac_mutex);
if (softmac->smac_mh != NULL) {
softmac->smac_notify_thread = thread_create(NULL, 0,
softmac_notify_thread, softmac, 0, &p0,
TS_RUN, minclsyspri);
}
- mutex_enter(&softmac->smac_mutex);
done:
ASSERT(softmac->smac_state == SOFTMAC_ATTACH_INPROG &&
softmac->smac_attachok_cnt == softmac->smac_cnt);
@@ -967,7 +1037,6 @@ softmac_destroy(dev_info_t *dip, dev_t dev)
rw_exit(&softmac_hash_lock);
return (0);
}
-
err = mod_hash_remove(softmac_hash,
(mod_hash_key_t)devname,
(mod_hash_val_t *)&hashval);
@@ -975,10 +1044,9 @@ softmac_destroy(dev_info_t *dip, dev_t dev)
mutex_exit(&softmac->smac_mutex);
rw_exit(&softmac_hash_lock);
-
- mutex_destroy(&softmac->smac_mutex);
- cv_destroy(&softmac->smac_cv);
- kmem_free(softmac, sizeof (softmac_t));
+ ASSERT(softmac->smac_fp_disable_clients == 0);
+ softmac->smac_fastpath_admin_disabled = B_FALSE;
+ kmem_cache_free(softmac_cachep, softmac);
return (0);
}
mutex_exit(&softmac->smac_mutex);
@@ -1119,27 +1187,84 @@ softmac_recreate()
} while (smw.smw_retry);
}
-/* ARGSUSED */
static int
softmac_m_start(void *arg)
{
- return (0);
+ softmac_t *softmac = arg;
+ softmac_lower_t *slp = softmac->smac_lower;
+ int err;
+
+ ASSERT(MAC_PERIM_HELD(softmac->smac_mh));
+ /*
+ * Bind to SAP 2 on token ring, 0 on other interface types.
+ * (SAP 0 has special significance on token ring).
+ * Note that the receive-side packets could come anytime after bind.
+ */
+ err = softmac_send_bind_req(slp, softmac->smac_media == DL_TPR ? 2 : 0);
+ if (err != 0)
+ return (err);
+
+ /*
+ * Put the lower stream to the DL_PROMISC_SAP mode in order to receive
+ * all packets of interest.
+ *
+ * some driver (e.g. the old legacy eri driver) incorrectly passes up
+ * packets to DL_PROMISC_SAP stream when the lower stream is not bound,
+ * so that we send DL_PROMISON_REQ after DL_BIND_REQ.
+ */
+ err = softmac_send_promisc_req(slp, DL_PROMISC_SAP, B_TRUE);
+ if (err != 0) {
+ (void) softmac_send_unbind_req(slp);
+ return (err);
+ }
+
+ /*
+ * Enable capabilities the underlying driver claims to support.
+ * Some driver requires this being called after the stream is bound.
+ */
+ if ((err = softmac_capab_enable(slp)) != 0) {
+ (void) softmac_send_promisc_req(slp, DL_PROMISC_SAP, B_FALSE);
+ (void) softmac_send_unbind_req(slp);
+ }
+
+ return (err);
}
/* ARGSUSED */
static void
softmac_m_stop(void *arg)
{
+ softmac_t *softmac = arg;
+ softmac_lower_t *slp = softmac->smac_lower;
+
+ ASSERT(MAC_PERIM_HELD(softmac->smac_mh));
+
+ /*
+ * It is not needed to reset zerocopy, MDT or HCKSUM capabilities.
+ */
+ (void) softmac_send_promisc_req(slp, DL_PROMISC_SAP, B_FALSE);
+ (void) softmac_send_unbind_req(slp);
}
/*
- * Set up the lower stream above the legacy device which is shared by
- * GLDv3 MAC clients. Put the lower stream into DLIOCRAW mode to send
- * and receive the raw data. Further, put the lower stream into
+ * Set up the lower stream above the legacy device. There are two different
+ * type of lower streams:
+ *
+ * - Shared lower-stream
+ *
+ * Shared by all GLDv3 MAC clients. Put the lower stream to the DLIOCRAW
+ * mode to send and receive the raw data. Further, put the lower stream into
* DL_PROMISC_SAP mode to receive all packets of interest.
+ *
+ * - Dedicated lower-stream
+ *
+ * The lower-stream which is dedicated to upper IP/ARP stream. This is used
+ * as fast-path for IP. In this case, the second argument is the pointer to
+ * the softmac upper-stream.
*/
-static int
-softmac_lower_setup(softmac_t *softmac, softmac_lower_t **slpp)
+int
+softmac_lower_setup(softmac_t *softmac, softmac_upper_t *sup,
+ softmac_lower_t **slpp)
{
ldi_ident_t li;
dev_t dev;
@@ -1153,7 +1278,13 @@ softmac_lower_setup(softmac_t *softmac, softmac_lower_t **slpp)
if ((err = ldi_ident_from_dip(softmac_dip, &li)) != 0)
return (err);
+ /*
+ * The GLDv3 framework makes sure that mac_unregister(), mac_open(),
+ * and mac_close() cannot be called at the same time. So we don't
+ * need any protection to access softmac here.
+ */
dev = softmac->smac_dev;
+
err = ldi_open_by_dev(&dev, OTYP_CHR, FREAD|FWRITE, kcred, &lh, li);
ldi_ident_release(li);
if (err != 0)
@@ -1172,10 +1303,13 @@ softmac_lower_setup(softmac_t *softmac, softmac_lower_t **slpp)
}
/*
- * Put the lower stream into DLIOCRAW mode to send/receive raw data.
+ * If this is the shared-lower-stream, put the lower stream to
+ * the DLIOCRAW mode to send/receive raw data.
*/
- if ((err = ldi_ioctl(lh, DLIOCRAW, 0, FKIOCTL, kcred, &rval)) != 0)
+ if ((sup == NULL) && (err = ldi_ioctl(lh, DLIOCRAW, 0, FKIOCTL,
+ kcred, &rval)) != 0) {
goto done;
+ }
/*
* Then push the softmac shim layer atop the lower stream.
@@ -1198,50 +1332,25 @@ softmac_lower_setup(softmac_t *softmac, softmac_lower_t **slpp)
goto done;
}
slp = start_arg.si_slp;
+ slp->sl_sup = sup;
slp->sl_lh = lh;
slp->sl_softmac = softmac;
*slpp = slp;
- /*
- * Bind to SAP 2 on token ring, 0 on other interface types.
- * (SAP 0 has special significance on token ring).
- * Note that the receive-side packets could come anytime after bind.
- */
- if (softmac->smac_media == DL_TPR)
- err = softmac_send_bind_req(slp, 2);
- else
- err = softmac_send_bind_req(slp, 0);
- if (err != 0)
- goto done;
-
- /*
- * Put the lower stream into DL_PROMISC_SAP mode to receive all
- * packets of interest.
- *
- * Some drivers (e.g. the old legacy eri driver) incorrectly pass up
- * packets to DL_PROMISC_SAP stream when the lower stream is not bound,
- * so we send DL_PROMISON_REQ after DL_BIND_REQ.
- */
- if ((err = softmac_send_promisc_req(slp, DL_PROMISC_SAP, B_TRUE)) != 0)
- goto done;
-
- /*
- * Enable the capabilities the underlying driver claims to support.
- * Some drivers require this to be called after the stream is bound.
- */
- if ((err = softmac_capab_enable(slp)) != 0)
- goto done;
-
- /*
- * Send the DL_NOTIFY_REQ to enable certain DL_NOTIFY_IND.
- * We don't have to wait for the ack.
- */
- notifications = DL_NOTE_PHYS_ADDR | DL_NOTE_LINK_UP |
- DL_NOTE_LINK_DOWN | DL_NOTE_PROMISC_ON_PHYS |
- DL_NOTE_PROMISC_OFF_PHYS;
+ if (sup != NULL) {
+ slp->sl_rxinfo = &sup->su_rxinfo;
+ } else {
+ /*
+ * Send DL_NOTIFY_REQ to enable certain DL_NOTIFY_IND.
+ * We don't have to wait for the ack.
+ */
+ notifications = DL_NOTE_PHYS_ADDR | DL_NOTE_LINK_UP |
+ DL_NOTE_LINK_DOWN | DL_NOTE_PROMISC_ON_PHYS |
+ DL_NOTE_PROMISC_OFF_PHYS;
- (void) softmac_send_notify_req(slp,
- (notifications & softmac->smac_notifications));
+ (void) softmac_send_notify_req(slp,
+ (notifications & softmac->smac_notifications));
+ }
done:
if (err != 0)
@@ -1257,13 +1366,11 @@ softmac_m_open(void *arg)
int err;
ASSERT(MAC_PERIM_HELD(softmac->smac_mh));
- ASSERT(softmac->smac_lower_state == SOFTMAC_INITIALIZED);
- if ((err = softmac_lower_setup(softmac, &slp)) != 0)
+ if ((err = softmac_lower_setup(softmac, NULL, &slp)) != 0)
return (err);
softmac->smac_lower = slp;
- softmac->smac_lower_state = SOFTMAC_READY;
return (0);
}
@@ -1274,7 +1381,6 @@ softmac_m_close(void *arg)
softmac_lower_t *slp;
ASSERT(MAC_PERIM_HELD(softmac->smac_mh));
- ASSERT(softmac->smac_lower_state == SOFTMAC_READY);
slp = softmac->smac_lower;
ASSERT(slp != NULL);
@@ -1282,10 +1388,74 @@ softmac_m_close(void *arg)
* Note that slp is destroyed when lh is closed.
*/
(void) ldi_close(slp->sl_lh, FREAD|FWRITE, kcred);
- softmac->smac_lower_state = SOFTMAC_INITIALIZED;
softmac->smac_lower = NULL;
}
+/*
+ * Softmac supports two priviate link properteis:
+ *
+ * - "_fastpath"
+ *
+ * This is a read-only link property which points out the current data-path
+ * model of the given legacy link. The possible values are "disabled" and
+ * "enabled".
+ *
+ * - "_disable_fastpath"
+ *
+ * This is a read-write link property which can be used to disable or enable
+ * the fast-path of the given legacy link. The possible values are "true"
+ * and "false". Note that even when "_disable_fastpath" is set to be
+ * "false", the fast-path may still not be enabled since there may be
+ * other mac cleints that request the fast-path to be disabled.
+ */
+/* ARGSUSED */
+static int
+softmac_m_setprop(void *arg, const char *name, mac_prop_id_t id,
+ uint_t valsize, const void *val)
+{
+ softmac_t *softmac = arg;
+
+ if (id != MAC_PROP_PRIVATE || strcmp(name, "_disable_fastpath") != 0)
+ return (ENOTSUP);
+
+ if (strcmp(val, "true") == 0)
+ return (softmac_datapath_switch(softmac, B_TRUE, B_TRUE));
+ else if (strcmp(val, "false") == 0)
+ return (softmac_datapath_switch(softmac, B_FALSE, B_TRUE));
+ else
+ return (EINVAL);
+}
+
+static int
+softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t flags,
+ uint_t valsize, void *val, uint_t *perm)
+{
+ softmac_t *softmac = arg;
+ char *fpstr;
+
+ if (id != MAC_PROP_PRIVATE)
+ return (ENOTSUP);
+
+ if (strcmp(name, "_fastpath") == 0) {
+ if ((flags & MAC_PROP_DEFAULT) != 0)
+ return (ENOTSUP);
+
+ *perm = MAC_PROP_PERM_READ;
+ mutex_enter(&softmac->smac_fp_mutex);
+ fpstr = (DATAPATH_MODE(softmac) == SOFTMAC_SLOWPATH) ?
+ "disabled" : "enabled";
+ mutex_exit(&softmac->smac_fp_mutex);
+ } else if (strcmp(name, "_disable_fastpath") == 0) {
+ *perm = MAC_PROP_PERM_RW;
+ fpstr = ((flags & MAC_PROP_DEFAULT) != 0) ? "false" :
+ (softmac->smac_fastpath_admin_disabled ? "true" : "false");
+ } else {
+ return (ENOTSUP);
+ }
+
+ return (strlcpy(val, fpstr, valsize) >= valsize ? EINVAL : 0);
+}
+
int
softmac_hold_device(dev_t dev, dls_dev_handle_t *ddhp)
{
@@ -1367,12 +1537,39 @@ again:
void
softmac_rele_device(dls_dev_handle_t ddh)
{
+ if (ddh != NULL)
+ softmac_rele((softmac_t *)ddh);
+}
+
+int
+softmac_hold(dev_t dev, softmac_t **softmacp)
+{
softmac_t *softmac;
+ char *drv;
+ mac_handle_t mh;
+ char mac[MAXNAMELEN];
+ int err;
- if (ddh == NULL)
- return;
+ if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
+ return (EINVAL);
- softmac = (softmac_t *)ddh;
+ (void) snprintf(mac, MAXNAMELEN, "%s%d", drv, getminor(dev) - 1);
+ if ((err = mac_open(mac, &mh)) != 0)
+ return (err);
+
+ softmac = (softmac_t *)mac_driver(mh);
+
+ mutex_enter(&softmac->smac_mutex);
+ softmac->smac_hold_cnt++;
+ mutex_exit(&softmac->smac_mutex);
+ mac_close(mh);
+ *softmacp = softmac;
+ return (0);
+}
+
+void
+softmac_rele(softmac_t *softmac)
+{
mutex_enter(&softmac->smac_mutex);
softmac->smac_hold_cnt--;
mutex_exit(&softmac->smac_mutex);
diff --git a/usr/src/uts/common/io/softmac/softmac_pkt.c b/usr/src/uts/common/io/softmac/softmac_pkt.c
index 4a856f4f58..b0d613b9be 100644
--- a/usr/src/uts/common/io/softmac/softmac_pkt.c
+++ b/usr/src/uts/common/io/softmac/softmac_pkt.c
@@ -27,16 +27,6 @@
#include <inet/led.h>
#include <sys/softmac_impl.h>
-/*
- * Macro to check whether the write-queue of the lower stream is full.
- *
- * Because softmac is pushed right above the underlying device and
- * _I_INSERT/_I_REMOVE is not processed in the lower stream, it is
- * safe to directly access the q_next pointer.
- */
-#define CANPUTNEXT(q) \
- (!((q)->q_next->q_nfsrv->q_flag & QFULL) || canput((q)->q_next))
-
mblk_t *
softmac_m_tx(void *arg, mblk_t *mp)
{
@@ -46,7 +36,7 @@ softmac_m_tx(void *arg, mblk_t *mp)
* Optimize for the most common case.
*/
if (mp->b_next == NULL) {
- if (!CANPUTNEXT(wq))
+ if (!SOFTMAC_CANPUTNEXT(wq))
return (mp);
mp->b_flag |= MSGNOLOOP;
@@ -57,7 +47,7 @@ softmac_m_tx(void *arg, mblk_t *mp)
while (mp != NULL) {
mblk_t *next = mp->b_next;
- if (!CANPUTNEXT(wq))
+ if (!SOFTMAC_CANPUTNEXT(wq))
break;
mp->b_next = NULL;
mp->b_flag |= MSGNOLOOP;
@@ -67,7 +57,6 @@ softmac_m_tx(void *arg, mblk_t *mp)
return (mp);
}
-
void
softmac_rput_process_data(softmac_lower_t *slp, mblk_t *mp)
{
@@ -141,7 +130,7 @@ dlpi_get_errno(t_uscalar_t error, t_uscalar_t unix_errno)
return (error == DL_SYSERR ? unix_errno : EINVAL);
}
-static int
+int
softmac_output(softmac_lower_t *slp, mblk_t *mp, t_uscalar_t dl_prim,
t_uscalar_t ack, mblk_t **mpp)
{
@@ -227,7 +216,7 @@ softmac_ioctl_tx(softmac_lower_t *slp, mblk_t *mp, mblk_t **mpp)
softmac_serialize_exit(slp);
}
-static int
+int
softmac_mexchange_error_ack(mblk_t **mpp, t_uscalar_t error_primitive,
t_uscalar_t error, t_uscalar_t unix_errno)
{
diff --git a/usr/src/uts/common/io/sundlpi.c b/usr/src/uts/common/io/sundlpi.c
index d537f8127b..a322634fb6 100644
--- a/usr/src/uts/common/io/sundlpi.c
+++ b/usr/src/uts/common/io/sundlpi.c
@@ -491,6 +491,7 @@ dl_primstr(t_uscalar_t prim)
case DL_NOTIFY_REQ: return ("DL_NOTIFY_REQ");
case DL_NOTIFY_ACK: return ("DL_NOTIFY_ACK");
case DL_NOTIFY_IND: return ("DL_NOTIFY_IND");
+ case DL_NOTIFY_CONF: return ("DL_NOTIFY_CONF");
case DL_CAPABILITY_REQ: return ("DL_CAPABILITY_REQ");
case DL_CAPABILITY_ACK: return ("DL_CAPABILITY_ACK");
case DL_CONTROL_REQ: return ("DL_CONTROL_REQ");
diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h
index 3094fa1a09..5fede27bb2 100644
--- a/usr/src/uts/common/sys/dld.h
+++ b/usr/src/uts/common/sys/dld.h
@@ -411,6 +411,9 @@ int dld_open(queue_t *, dev_t *, int, int, cred_t *);
int dld_close(queue_t *);
void dld_wput(queue_t *, mblk_t *);
void dld_wsrv(queue_t *);
+int dld_str_open(queue_t *, dev_t *, void *);
+int dld_str_close(queue_t *);
+void *dld_str_private(queue_t *);
void dld_init_ops(struct dev_ops *, const char *);
void dld_fini_ops(struct dev_ops *);
int dld_autopush(dev_t *, struct dlautopush *);
diff --git a/usr/src/uts/common/sys/dld_impl.h b/usr/src/uts/common/sys/dld_impl.h
index 79aa82ba75..68caa4f459 100644
--- a/usr/src/uts/common/sys/dld_impl.h
+++ b/usr/src/uts/common/sys/dld_impl.h
@@ -207,13 +207,19 @@ struct dld_str_s { /* Protected by */
dls_multicst_addr_t *ds_dmap; /* ds_rw_lock */
dls_rx_t ds_rx; /* ds_lock */
void *ds_rx_arg; /* ds_lock */
- boolean_t ds_active; /* SL */
+ uint_t ds_nactive; /* SL */
dld_str_t *ds_next; /* SL */
dls_head_t *ds_head;
dls_dl_handle_t ds_ddh;
list_node_t ds_tqlist;
+
+ /*
+ * driver private data set by the driver when calling dld_str_open().
+ */
+ void *ds_private;
};
+
#define DLD_DATATHR_INC(dsp) { \
ASSERT(MUTEX_HELD(&(dsp)->ds_lock)); \
dsp->ds_datathr_cnt++; \
diff --git a/usr/src/uts/common/sys/dlpi.h b/usr/src/uts/common/sys/dlpi.h
index e67f604630..11293ac6d3 100644
--- a/usr/src/uts/common/sys/dlpi.h
+++ b/usr/src/uts/common/sys/dlpi.h
@@ -104,6 +104,7 @@ typedef struct dl_ipnetinfo {
#define DL_CONTROL_ACK 0x113 /* Device specific control ack */
#define DL_PASSIVE_REQ 0x114 /* Allow access to aggregated link */
#define DL_INTR_MODE_REQ 0x115 /* Request Rx processing in INTR mode */
+#define DL_NOTIFY_CONF 0x116 /* Notification from upstream */
/*
* Primitives used for Connectionless Service
@@ -385,7 +386,7 @@ typedef struct dl_ipnetinfo {
#define DL_PROMISC_MULTI 0x03 /* promiscuous mode for multicast */
/*
- * DLPI notification codes for DL_NOTIFY primitives.
+ * DLPI notification codes for DL_NOTIFY_REQ primitives.
* Bit-wise distinct since DL_NOTIFY_REQ and DL_NOTIFY_ACK carry multiple
* notification codes.
*/
@@ -400,6 +401,12 @@ typedef struct dl_ipnetinfo {
#define DL_NOTE_SPEED 0x0100 /* Approximate link speed */
#define DL_NOTE_FASTPATH_FLUSH 0x0200 /* Fast Path info changes */
#define DL_NOTE_CAPAB_RENEG 0x0400 /* Initiate capability renegotiation */
+#define DL_NOTE_REPLUMB 0x0800 /* Inform the link to replumb */
+
+/*
+ * DLPI notification codes for DL_NOTIFY_CONF primitives.
+ */
+#define DL_NOTE_REPLUMB_DONE 0x0001 /* Indicate replumb has done */
/*
* DLPI Quality Of Service definition for use in QOS structure definitions.
@@ -1017,6 +1024,14 @@ typedef struct {
} dl_notify_ind_t;
/*
+ * DL_NOTIFY_CONF, M_PROTO type
+ */
+typedef struct {
+ t_uscalar_t dl_primitive; /* set to DL_NOTIFY_CONF */
+ uint32_t dl_notification; /* Which notification? */
+} dl_notify_conf_t;
+
+/*
* DL_AGGR_REQ, M_PROTO type
*/
typedef struct {
@@ -1507,6 +1522,7 @@ union DL_primitives {
dl_notify_req_t notify_req;
dl_notify_ack_t notify_ack;
dl_notify_ind_t notify_ind;
+ dl_notify_conf_t notify_conf;
dl_aggr_req_t aggr_req;
dl_aggr_ind_t aggr_ind;
dl_unaggr_req_t unaggr_req;
@@ -1574,6 +1590,7 @@ union DL_primitives {
#define DL_NOTIFY_REQ_SIZE sizeof (dl_notify_req_t)
#define DL_NOTIFY_ACK_SIZE sizeof (dl_notify_ack_t)
#define DL_NOTIFY_IND_SIZE sizeof (dl_notify_ind_t)
+#define DL_NOTIFY_CONF_SIZE sizeof (dl_notify_conf_t)
#define DL_AGGR_REQ_SIZE sizeof (dl_aggr_req_t)
#define DL_AGGR_IND_SIZE sizeof (dl_aggr_ind_t)
#define DL_UNAGGR_REQ_SIZE sizeof (dl_unaggr_req_t)
diff --git a/usr/src/uts/common/sys/dls_impl.h b/usr/src/uts/common/sys/dls_impl.h
index 33162a4d5c..dafd451954 100644
--- a/usr/src/uts/common/sys/dls_impl.h
+++ b/usr/src/uts/common/sys/dls_impl.h
@@ -119,7 +119,7 @@ extern void dls_rx_promisc(void *, mac_resource_handle_t, mblk_t *,
extern void dls_rx_vlan_promisc(void *, mac_resource_handle_t,
mblk_t *, boolean_t);
extern int dls_active_set(dld_str_t *);
-extern void dls_active_clear(dld_str_t *);
+extern void dls_active_clear(dld_str_t *, boolean_t);
extern void dls_mgmt_init(void);
extern void dls_mgmt_fini(void);
diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h
index 1756644b6c..2cfe7443e5 100644
--- a/usr/src/uts/common/sys/mac.h
+++ b/usr/src/uts/common/sys/mac.h
@@ -260,20 +260,6 @@ typedef struct mac_info_s {
} mac_info_t;
/*
- * Information for legacy devices.
- */
-typedef struct mac_capab_legacy_s {
- /*
- * Notifications that the legacy device does not support.
- */
- uint32_t ml_unsup_note;
- /*
- * dev_t of the legacy device; can be held to force attach.
- */
- dev_t ml_dev;
-} mac_capab_legacy_t;
-
-/*
* When VNICs are created on top of the NIC, there are two levels
* of MAC layer, a lower MAC, which is the MAC layer at the level of the
* physical NIC, and an upper MAC, which is the MAC layer at the level
@@ -569,13 +555,15 @@ extern void mac_margin_get(mac_handle_t, uint32_t *);
extern int mac_margin_remove(mac_handle_t, uint32_t);
extern int mac_margin_add(mac_handle_t, uint32_t *,
boolean_t);
+extern int mac_fastpath_disable(mac_handle_t);
+extern void mac_fastpath_enable(mac_handle_t);
extern mactype_register_t *mactype_alloc(uint_t);
extern void mactype_free(mactype_register_t *);
extern int mactype_register(mactype_register_t *);
extern int mactype_unregister(const char *);
-extern void mac_start_logusage(mac_logtype_t, uint_t);
+extern int mac_start_logusage(mac_logtype_t, uint_t);
extern void mac_stop_logusage(mac_logtype_t);
extern mac_handle_t mac_get_lower_mac_handle(mac_handle_t);
diff --git a/usr/src/uts/common/sys/mac_client_priv.h b/usr/src/uts/common/sys/mac_client_priv.h
index c1b999bb31..20e3afa82a 100644
--- a/usr/src/uts/common/sys/mac_client_priv.h
+++ b/usr/src/uts/common/sys/mac_client_priv.h
@@ -63,6 +63,7 @@ extern void mac_ioctl(mac_handle_t, queue_t *, mblk_t *);
extern link_state_t mac_link_get(mac_handle_t);
extern void mac_resource_set(mac_client_handle_t, mac_resource_add_t, void *);
extern dev_info_t *mac_devinfo_get(mac_handle_t);
+extern void *mac_driver(mac_handle_t);
extern boolean_t mac_capab_get(mac_handle_t, mac_capab_t, void *);
extern boolean_t mac_sap_verify(mac_handle_t, uint32_t, uint32_t *);
extern mblk_t *mac_header(mac_handle_t, const uint8_t *, uint32_t, mblk_t *,
diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h
index ee5557b113..9a02c07b54 100644
--- a/usr/src/uts/common/sys/mac_impl.h
+++ b/usr/src/uts/common/sys/mac_impl.h
@@ -457,9 +457,10 @@ struct mac_impl_s {
mac_resource_props_t mi_resource_props; /* SL */
minor_t mi_minor; /* WO */
- dev_t mi_phy_dev; /* WO */
uint32_t mi_oref; /* SL */
- uint32_t mi_unsup_note; /* WO */
+ mac_capab_legacy_t mi_capab_legacy; /* WO */
+ dev_t mi_phy_dev; /* WO */
+
/*
* List of margin value requests added by mac clients. This list is
* sorted: the first one has the greatest value.
diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h
index 5522a6c884..6713912b63 100644
--- a/usr/src/uts/common/sys/mac_provider.h
+++ b/usr/src/uts/common/sys/mac_provider.h
@@ -145,6 +145,24 @@ typedef struct mac_capab_multifactaddr_s {
} mac_capab_multifactaddr_t;
/*
+ * Info and callbacks of legacy devices.
+ */
+typedef struct mac_capab_legacy_s {
+ /*
+ * Notifications that the legacy device does not support.
+ */
+ uint32_t ml_unsup_note;
+ /*
+ * dev_t of the legacy device; can be held to force attach.
+ */
+ dev_t ml_dev;
+ boolean_t (*ml_active_set)(void *);
+ void (*ml_active_clear)(void *);
+ int (*ml_fastpath_disable)(void *);
+ void (*ml_fastpath_enable)(void *);
+} mac_capab_legacy_t;
+
+/*
* MAC driver entry point types.
*/
typedef int (*mac_getstat_t)(void *, uint_t, uint64_t *);
diff --git a/usr/src/uts/common/sys/softmac_impl.h b/usr/src/uts/common/sys/softmac_impl.h
index 9cdb49de31..83caa23c82 100644
--- a/usr/src/uts/common/sys/softmac_impl.h
+++ b/usr/src/uts/common/sys/softmac_impl.h
@@ -44,9 +44,20 @@
extern "C" {
#endif
+typedef void (*softmac_rx_t)(void *, mac_resource_handle_t, mblk_t *,
+ mac_header_info_t *);
+
+typedef struct softmac_lower_rxinfo_s {
+ softmac_rx_t slr_rx;
+ void *slr_arg;
+} softmac_lower_rxinfo_t;
+
typedef struct softmac_lower_s {
+ ldi_handle_t sl_lh;
struct softmac *sl_softmac;
queue_t *sl_wq;
+ struct softmac_upper_s *sl_sup;
+ softmac_lower_rxinfo_t *sl_rxinfo;
/*
* sl_ctl_inprogress is used to serialize the control path. It will
@@ -68,8 +79,6 @@ typedef struct softmac_lower_s {
t_uscalar_t sl_pending_prim;
boolean_t sl_pending_ioctl;
mblk_t *sl_ack_mp;
-
- ldi_handle_t sl_lh;
} softmac_lower_t;
typedef enum {
@@ -110,55 +119,53 @@ typedef struct softmac_dev_s {
* node, the other minor node can still be used to register the mac.
* (Specifically, an incorrect xxx_getinfo() implementation will cause style-2
* minor node mac registration to fail.)
+ *
+ * Locking description:
+ * WO: write once, valid the life time.
*/
typedef struct softmac {
- /*
- * The following fields will be set when the softmac is created and
- * will not change. No lock is required.
- */
- char smac_devname[MAXNAMELEN];
- major_t smac_umajor;
- int smac_uppa;
- uint32_t smac_cnt; /* # of minor nodes for this device */
+ char smac_devname[MAXNAMELEN]; /* WO */
+ major_t smac_umajor; /* WO */
+ int smac_uppa; /* WO */
+ uint32_t smac_cnt; /* WO, # of minor nodes */
+ kmutex_t smac_mutex;
+ kcondvar_t smac_cv;
+ softmac_state_t smac_state; /* smac_mutex */
/*
- * The following fields are protected by smac_mutex.
- *
* The smac_hold_cnt field increases when softmac_hold_device() is
* called to force the dls_vlan_t of the device to be created. The
* device pre-detach fails if this counter is not 0.
*/
- softmac_state_t smac_state;
- uint32_t smac_hold_cnt;
- kmutex_t smac_mutex;
- kcondvar_t smac_cv;
- uint32_t smac_flags;
- int smac_attacherr;
+ uint32_t smac_hold_cnt; /* smac_mutex */
+ uint32_t smac_flags; /* smac_mutex */
+ int smac_attacherr; /* smac_mutex */
mac_handle_t smac_mh;
- softmac_dev_t *smac_softmac[2];
+ softmac_dev_t *smac_softmac[2]; /* smac_mutex */
+
/*
* Number of minor nodes whose post-attach routine has succeeded.
* This should be the same as the numbers of softmac_dev_t.
* Note that it does not imply SOFTMAC_ATTACH_DONE as the taskq might
* be still ongoing.
*/
- uint32_t smac_attachok_cnt;
+ uint32_t smac_attachok_cnt; /* smac_mutex */
/*
* Number of softmac_dev_t left when pre-detach fails. This is used
* to indicate whether postattach is called because of a failed
* pre-detach.
*/
- uint32_t smac_attached_left;
+ uint32_t smac_attached_left; /* smac_mutex */
/*
* Thread handles the DL_NOTIFY_IND message from the lower stream.
*/
- kthread_t *smac_notify_thread;
+ kthread_t *smac_notify_thread; /* smac_mutex */
/*
* Head and tail of the DL_NOTIFY_IND messsages.
*/
- mblk_t *smac_notify_head;
- mblk_t *smac_notify_tail;
+ mblk_t *smac_notify_head; /* smac_mutex */
+ mblk_t *smac_notify_tail; /* smac_mutex */
/*
* The remaining fields are used to register the MAC for a legacy
@@ -193,10 +200,34 @@ typedef struct softmac {
dl_capab_mdt_t smac_mdt_capab;
boolean_t smac_mdt;
- /* Following fields protected by the mac perimeter */
- softmac_lower_state_t smac_lower_state;
- /* Lower stream structure */
+ /*
+ * Lower stream structure, accessed by the MAC provider API. The GLDv3
+ * framework assures it's validity.
+ */
softmac_lower_t *smac_lower;
+
+ kmutex_t smac_active_mutex;
+ /*
+ * Set by xxx_active_set() when aggregation is created.
+ */
+ boolean_t smac_active; /* smac_active_mutex */
+ /*
+ * Numbers of the bounded streams in the fast-path mode.
+ */
+ uint32_t smac_nactive; /* smac_active_mutex */
+
+ kmutex_t smac_fp_mutex;
+ kcondvar_t smac_fp_cv;
+ /*
+ * numbers of clients that request to disable fastpath.
+ */
+ uint32_t smac_fp_disable_clients; /* smac_fp_mutex */
+ boolean_t smac_fastpath_admin_disabled; /* smac_fp_mutex */
+
+ /*
+ * stream list over this softmac.
+ */
+ list_t smac_sup_list; /* smac_fp_mutex */
} softmac_t;
typedef struct smac_ioc_start_s {
@@ -206,20 +237,157 @@ typedef struct smac_ioc_start_s {
#define SMAC_IOC ('S' << 24 | 'M' << 16 | 'C' << 8)
#define SMAC_IOC_START (SMAC_IOC | 0x01)
+/*
+ * The su_mode of a non-IP/ARP stream is UNKNOWN, and the su_mode of an IP/ARP
+ * stream is either SLOWPATH or FASTPATH.
+ */
+#define SOFTMAC_UNKNOWN 0x00
+#define SOFTMAC_SLOWPATH 0x01
+#define SOFTMAC_FASTPATH 0x02
+
+typedef struct softmac_switch_req_s {
+ list_node_t ssq_req_list_node;
+ uint32_t ssq_expected_mode;
+} softmac_switch_req_t;
+
+#define DATAPATH_MODE(softmac) \
+ ((((softmac)->smac_fp_disable_clients != 0) || \
+ (softmac)->smac_fastpath_admin_disabled) ? SOFTMAC_SLOWPATH : \
+ SOFTMAC_FASTPATH)
+
+
+/*
+ * Locking description:
+ *
+ * WO: Set once and valid for life;
+ * SL: Serialized by the control path (softmac_wput_nondata_task())
+ */
+typedef struct softmac_upper_s {
+ softmac_t *su_softmac; /* WO */
+ queue_t *su_rq; /* WO */
+ queue_t *su_wq; /* WO */
+
+ /*
+ * List of upper streams that has pending DLPI messages to be processed.
+ */
+ list_node_t su_taskq_list_node; /* softmac_taskq_lock */
+
+ /*
+ * non-NULL for IP/ARP streams in the fast-path mode
+ */
+ softmac_lower_t *su_slp; /* SL & su_mutex */
+
+ /*
+ * List of all IP/ARP upperstreams on the same softmac (including
+ * the ones in both data-path modes).
+ */
+ list_node_t su_list_node; /* smac_fp_mutex */
+
+ /*
+ * List of datapath switch requests.
+ */
+ list_t su_req_list; /* smac_fp_mutex */
+
+ /*
+ * Place holder of RX callbacks used to handles data messages comes
+ * from the dedicated-lower-stream associated with the IP/ARP stream.
+ * Another RX callback is softmac_drop_rxinfo, which is a global
+ * variable.
+ */
+ softmac_lower_rxinfo_t su_rxinfo; /* WO */
+ softmac_lower_rxinfo_t su_direct_rxinfo; /* WO */
+
+ /*
+ * Used to serialize the DLPI operation and fastpath<->slowpath
+ * switching over operation.
+ */
+ kmutex_t su_disp_mutex;
+ kcondvar_t su_disp_cv;
+ mblk_t *su_pending_head; /* su_disp_mutex */
+ mblk_t *su_pending_tail; /* su_disp_mutex */
+ boolean_t su_dlpi_pending; /* su_disp_mutex */
+ boolean_t su_closing; /* su_disp_mutex */
+
+ uint32_t su_bound : 1, /* SL */
+ su_active : 1, /* SL */
+ su_direct : 1; /* SL */
+
+ /*
+ * Used for fastpath data path.
+ */
+ kmutex_t su_mutex;
+ kcondvar_t su_cv;
+ mblk_t *su_tx_flow_mp; /* su_mutex */
+ boolean_t su_tx_busy; /* su_mutex */
+ /*
+ * Number of softmac_srv() operation in fastpath processing.
+ */
+ uint32_t su_tx_inprocess; /* su_mutex */
+ /*
+ * SOFTMAC_SLOWPATH or SOFTMAC_FASTPATH
+ */
+ uint32_t su_mode; /* SL & su_mutex */
+
+ /*
+ * Whether this stream is already scheduled in softmac_taskq_list.
+ */
+ boolean_t su_taskq_scheduled; /* softmac_taskq_lock */
+} softmac_upper_t;
+
+#define SOFTMAC_EQ_PENDING(sup, mp) { \
+ if ((sup)->su_pending_head == NULL) { \
+ (sup)->su_pending_head = (sup)->su_pending_tail = (mp); \
+ } else { \
+ (sup)->su_pending_tail->b_next = (mp); \
+ (sup)->su_pending_tail = (mp); \
+ } \
+}
+
+#define SOFTMAC_DQ_PENDING(sup, mpp) { \
+ if ((sup)->su_pending_head == NULL) { \
+ *(mpp) = NULL; \
+ } else { \
+ *(mpp) = (sup)->su_pending_head; \
+ if (((sup)->su_pending_head = (*(mpp))->b_next) == NULL)\
+ (sup)->su_pending_tail = NULL; \
+ (*(mpp))->b_next = NULL; \
+ } \
+}
+
+/*
+ * A macro to check whether the write-queue of the lower stream is full
+ * and packets need to be enqueued.
+ *
+ * Because softmac is pushed right above the underlying device and
+ * _I_INSERT/_I_REMOVE is not processed in the lower stream, it is
+ * safe to directly access the q_next pointer.
+ */
+#define SOFTMAC_CANPUTNEXT(q) \
+ (!((q)->q_next->q_nfsrv->q_flag & QFULL) || canput((q)->q_next))
+
+
extern dev_info_t *softmac_dip;
#define SOFTMAC_DEV_NAME "softmac"
extern int softmac_send_bind_req(softmac_lower_t *, uint_t);
+extern int softmac_send_unbind_req(softmac_lower_t *);
extern int softmac_send_notify_req(softmac_lower_t *, uint32_t);
extern int softmac_send_promisc_req(softmac_lower_t *, t_uscalar_t,
boolean_t);
-extern void softmac_init(void);
-extern void softmac_fini(void);
-extern boolean_t softmac_busy(void);
+extern void softmac_init();
+extern void softmac_fini();
+extern void softmac_fp_init();
+extern void softmac_fp_fini();
+extern boolean_t softmac_busy();
extern int softmac_fill_capab(ldi_handle_t, softmac_t *);
extern int softmac_capab_enable(softmac_lower_t *);
-extern void softmac_rput_process_notdata(queue_t *, mblk_t *);
+extern void softmac_rput_process_notdata(queue_t *, softmac_upper_t *,
+ mblk_t *);
extern void softmac_rput_process_data(softmac_lower_t *, mblk_t *);
+extern int softmac_output(softmac_lower_t *, mblk_t *, t_uscalar_t,
+ t_uscalar_t, mblk_t **);
+extern int softmac_mexchange_error_ack(mblk_t **, t_uscalar_t,
+ t_uscalar_t, t_uscalar_t);
extern int softmac_m_promisc(void *, boolean_t);
extern int softmac_m_multicst(void *, boolean_t, const uint8_t *);
@@ -231,6 +399,20 @@ extern int softmac_proto_tx(softmac_lower_t *, mblk_t *, mblk_t **);
extern void softmac_ioctl_tx(softmac_lower_t *, mblk_t *, mblk_t **);
extern void softmac_notify_thread(void *);
+extern int softmac_hold(dev_t, softmac_t **);
+extern void softmac_rele(softmac_t *);
+extern int softmac_lower_setup(softmac_t *, softmac_upper_t *,
+ softmac_lower_t **);
+extern boolean_t softmac_active_set(void *);
+extern void softmac_active_clear(void *);
+extern int softmac_fastpath_disable(void *);
+extern void softmac_fastpath_enable(void *);
+extern int softmac_datapath_switch(softmac_t *, boolean_t, boolean_t);
+
+extern void softmac_wput_data(softmac_upper_t *, mblk_t *);
+extern void softmac_wput_nondata(softmac_upper_t *, mblk_t *);
+extern void softmac_upperstream_close(softmac_upper_t *);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/intel/ip/ip.global-objs.debug64 b/usr/src/uts/intel/ip/ip.global-objs.debug64
index d89224677b..13c13b2057 100644
--- a/usr/src/uts/intel/ip/ip.global-objs.debug64
+++ b/usr/src/uts/intel/ip/ip.global-objs.debug64
@@ -95,7 +95,6 @@ ip_ard_template
ip_area_template
ip_ared_template
ip_areq_template
-ip_aresq_template
ip_arma_multi_template
ip_aroff_template
ip_aron_template
diff --git a/usr/src/uts/intel/ip/ip.global-objs.obj64 b/usr/src/uts/intel/ip/ip.global-objs.obj64
index 0e58fdc219..e46a4353e7 100644
--- a/usr/src/uts/intel/ip/ip.global-objs.obj64
+++ b/usr/src/uts/intel/ip/ip.global-objs.obj64
@@ -95,7 +95,6 @@ ip_ard_template
ip_area_template
ip_ared_template
ip_areq_template
-ip_aresq_template
ip_arma_multi_template
ip_aroff_template
ip_aron_template
diff --git a/usr/src/uts/sparc/ip/ip.global-objs.debug64 b/usr/src/uts/sparc/ip/ip.global-objs.debug64
index 6606b472bf..a2269a3a2c 100644
--- a/usr/src/uts/sparc/ip/ip.global-objs.debug64
+++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64
@@ -95,7 +95,6 @@ ip_ard_template
ip_area_template
ip_ared_template
ip_areq_template
-ip_aresq_template
ip_arma_multi_template
ip_aroff_template
ip_aron_template
diff --git a/usr/src/uts/sparc/ip/ip.global-objs.obj64 b/usr/src/uts/sparc/ip/ip.global-objs.obj64
index 89d40afbbb..4be214bbb5 100644
--- a/usr/src/uts/sparc/ip/ip.global-objs.obj64
+++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64
@@ -95,7 +95,6 @@ ip_ard_template
ip_area_template
ip_ared_template
ip_areq_template
-ip_aresq_template
ip_arma_multi_template
ip_aroff_template
ip_aron_template