diff options
| author | Cathy Zhou <Cathy.Zhou@Sun.COM> | 2009-03-17 20:14:50 -0700 |
|---|---|---|
| committer | Cathy Zhou <Cathy.Zhou@Sun.COM> | 2009-03-17 20:14:50 -0700 |
| commit | 5d460eafffba936e81c4dd5ebe0f59b238f09121 (patch) | |
| tree | ec942dd0b37946b807039b9f42e69a8f54c30b7d /usr/src | |
| parent | f91909144addd198e09d1842e5354bfa62d96691 (diff) | |
| download | illumos-joyent-5d460eafffba936e81c4dd5ebe0f59b238f09121.tar.gz | |
PSARC/2008/242 Data Fast-Path for Softmac
6649224 fast-path needed to improve legacy network interface performance after UV
6649898 the smac_lock and smac_mutex fields in softmac_t should be given a more descriptive name
6799767 DLD capability is not correctly updated if it is renegotiated
Diffstat (limited to 'usr/src')
37 files changed, 2897 insertions, 629 deletions
diff --git a/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c b/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c index 9adc72162b..fec3ff52e0 100644 --- a/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c +++ b/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Mdb kernel support module. This module is loaded automatically when the * kvm target is initialized. Any global functions declared here are exported @@ -1578,6 +1576,7 @@ mdb_dlpi_prim(int prim) case DL_NOTIFY_REQ: return ("DL_NOTIFY_REQ"); case DL_NOTIFY_ACK: return ("DL_NOTIFY_ACK"); case DL_NOTIFY_IND: return ("DL_NOTIFY_IND"); + case DL_NOTIFY_CONF: return ("DL_NOTIFY_CONF"); case DL_CAPABILITY_REQ: return ("DL_CAPABILITY_REQ"); case DL_CAPABILITY_ACK: return ("DL_CAPABILITY_ACK"); case DL_CONTROL_REQ: return ("DL_CONTROL_REQ"); diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 318a39a906..f97b615a4d 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -596,7 +596,7 @@ AGGR_OBJS += aggr_dev.o aggr_ctl.o aggr_grp.o aggr_port.o \ aggr_send.o aggr_recv.o aggr_lacp.o SOFTMAC_OBJS += softmac_main.o softmac_ctl.o softmac_capab.o \ - softmac_dev.o softmac_stat.o softmac_pkt.o + softmac_dev.o softmac_stat.o softmac_pkt.o softmac_fp.o NET80211_OBJS += net80211.o net80211_proto.o net80211_input.o \ net80211_output.o net80211_node.o net80211_crypto.o \ diff --git a/usr/src/uts/common/inet/arp/arp.c b/usr/src/uts/common/inet/arp/arp.c index e52655dd47..abdbc39a47 100644 --- a/usr/src/uts/common/inet/arp/arp.c +++ b/usr/src/uts/common/inet/arp/arp.c @@ -208,6 +208,7 @@ static void ar_ce_walk(arp_stack_t *as, void (*pfi)(ace_t *, void *), static void ar_client_notify(const arl_t *arl, mblk_t *mp, int code); static int ar_close(queue_t *q); static int ar_cmd_dispatch(queue_t *q, mblk_t *mp, boolean_t from_wput); +static void ar_cmd_drain(arl_t *arl); static void ar_cmd_done(arl_t *arl); static mblk_t *ar_dlpi_comm(t_uscalar_t prim, size_t size); static void ar_dlpi_send(arl_t *, mblk_t *); @@ -1331,6 +1332,53 @@ ar_dlpi_comm(t_uscalar_t prim, size_t size) return (mp); } +static void +ar_dlpi_dispatch(arl_t *arl) +{ + mblk_t *mp; + t_uscalar_t primitive = DL_PRIM_INVAL; + + while (((mp = arl->arl_dlpi_deferred) != NULL) && + (arl->arl_dlpi_pending == DL_PRIM_INVAL)) { + union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; + + DTRACE_PROBE2(dlpi_dispatch, arl_t *, arl, mblk_t *, mp); + + ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); + arl->arl_dlpi_deferred = mp->b_next; + mp->b_next = NULL; + + /* + * If this is a DL_NOTIFY_CONF, no ack is expected. + */ + if ((primitive = dlp->dl_primitive) != DL_NOTIFY_CONF) + arl->arl_dlpi_pending = dlp->dl_primitive; + putnext(arl->arl_wq, mp); + } + + if (arl->arl_dlpi_pending == DL_PRIM_INVAL) { + /* + * No pending DLPI operation. + */ + ASSERT(mp == NULL); + DTRACE_PROBE1(dlpi_idle, arl_t *, arl); + + /* + * If the last DLPI message dispatched is DL_NOTIFY_CONF, + * it is not assoicated with any pending cmd request, drain + * the rest of pending cmd requests, otherwise call + * ar_cmd_done() to finish up the current pending cmd + * operation. + */ + if (primitive == DL_NOTIFY_CONF) + ar_cmd_drain(arl); + else + ar_cmd_done(arl); + } else if (mp != NULL) { + DTRACE_PROBE2(dlpi_defer, arl_t *, arl, mblk_t *, mp); + } +} + /* * The following two functions serialize DLPI messages to the driver, much * along the lines of ill_dlpi_send and ill_dlpi_done in IP. Basically, @@ -1341,26 +1389,18 @@ ar_dlpi_comm(t_uscalar_t prim, size_t size) static void ar_dlpi_send(arl_t *arl, mblk_t *mp) { + mblk_t **mpp; + ASSERT(arl != NULL); ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); - if (arl->arl_dlpi_pending != DL_PRIM_INVAL) { - mblk_t **mpp; - - /* Must queue message. Tail insertion */ - mpp = &arl->arl_dlpi_deferred; - while (*mpp != NULL) - mpp = &((*mpp)->b_next); - *mpp = mp; + /* Always queue the message. Tail insertion */ + mpp = &arl->arl_dlpi_deferred; + while (*mpp != NULL) + mpp = &((*mpp)->b_next); + *mpp = mp; - DTRACE_PROBE2(dlpi_defer, arl_t *, arl, mblk_t *, mp); - return; - } - - arl->arl_dlpi_pending = - ((union DL_primitives *)mp->b_rptr)->dl_primitive; - DTRACE_PROBE2(dlpi_send, arl_t *, arl, mblk_t *, mp); - putnext(arl->arl_wq, mp); + ar_dlpi_dispatch(arl); } /* @@ -1372,30 +1412,71 @@ ar_dlpi_send(arl_t *arl, mblk_t *mp) static void ar_dlpi_done(arl_t *arl, t_uscalar_t prim) { - mblk_t *mp; - if (arl->arl_dlpi_pending != prim) { DTRACE_PROBE2(dlpi_done_unexpected, arl_t *, arl, t_uscalar_t, prim); return; } - if ((mp = arl->arl_dlpi_deferred) == NULL) { - DTRACE_PROBE2(dlpi_done_idle, arl_t *, arl, t_uscalar_t, prim); - arl->arl_dlpi_pending = DL_PRIM_INVAL; - ar_cmd_done(arl); - return; - } + DTRACE_PROBE2(dlpi_done, arl_t *, arl, t_uscalar_t, prim); + arl->arl_dlpi_pending = DL_PRIM_INVAL; + ar_dlpi_dispatch(arl); +} - arl->arl_dlpi_deferred = mp->b_next; - mp->b_next = NULL; +/* + * Send a DL_NOTE_REPLUMB_DONE message down to the driver to indicate + * the replumb process has already been done. Note that mp is either a + * DL_NOTIFY_IND message or an AR_INTERFACE_DOWN message (comes from IP). + */ +static void +arp_replumb_done(arl_t *arl, mblk_t *mp) +{ + ASSERT(arl->arl_state == ARL_S_DOWN && arl->arl_replumbing); - ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); + mp = mexchange(NULL, mp, sizeof (dl_notify_conf_t), M_PROTO, + DL_NOTIFY_CONF); + ((dl_notify_conf_t *)(mp->b_rptr))->dl_notification = + DL_NOTE_REPLUMB_DONE; + arl->arl_replumbing = B_FALSE; + ar_dlpi_send(arl, mp); +} + +static void +ar_cmd_drain(arl_t *arl) +{ + mblk_t *mp; + queue_t *q; + + /* + * Run the commands that have been enqueued while we were waiting + * for the last command (AR_INTERFACE_UP or AR_INTERFACE_DOWN) + * to complete. + */ + while ((mp = arl->arl_queue) != NULL) { + if (((uintptr_t)mp->b_prev & CMD_IN_PROGRESS) != 0) { + /* + * The current command is an AR_INTERFACE_UP or + * AR_INTERFACE_DOWN and is waiting for a DLPI ack + * from the driver. Return. We can't make progress now. + */ + break; + } + + mp = ar_cmd_dequeue(arl); + mp->b_prev = AR_DRAINING; + q = mp->b_queue; + mp->b_queue = NULL; - arl->arl_dlpi_pending = - ((union DL_primitives *)mp->b_rptr)->dl_primitive; - DTRACE_PROBE2(dlpi_done_next, arl_t *, arl, mblk_t *, mp); - putnext(arl->arl_wq, mp); + /* + * Don't call put(q, mp) since it can lead to reorder of + * messages by sending the current messages to the end of + * arp's syncq + */ + if (q->q_flag & QREADR) + ar_rput(q, mp); + else + ar_wput(q, mp); + } } static void @@ -1409,7 +1490,6 @@ ar_cmd_done(arl_t *arl) queue_t *dlpi_op_done_q; ar_t *ar_arl; ar_t *ar_ip; - queue_t *q; ASSERT(arl->arl_state == ARL_S_UP || arl->arl_state == ARL_S_DOWN); @@ -1458,44 +1538,24 @@ ar_cmd_done(arl_t *arl) ar_arl->ar_arl_ip_assoc = ar_ip; ar_ip->ar_arl_ip_assoc = ar_arl; } - } - inet_freemsg(mp); - } - /* - * Run the commands that have been enqueued while we were waiting - * for the last command (AR_INTERFACE_UP or AR_INTERFACE_DOWN) - * to complete. - */ - while ((mp = ar_cmd_dequeue(arl)) != NULL) { - mp->b_prev = AR_DRAINING; - q = mp->b_queue; - mp->b_queue = NULL; - - /* - * Don't call put(q, mp) since it can lead to reorder of - * messages by sending the current messages to the end of - * arp's syncq - */ - if (q->q_flag & QREADR) - ar_rput(q, mp); - else - ar_wput(q, mp); - - if ((mp = arl->arl_queue) == NULL) - goto done; /* no work to do */ - - if ((cmd = (uintptr_t)mp->b_prev) & CMD_IN_PROGRESS) { + inet_freemsg(mp); + } else if (cmd == AR_INTERFACE_DOWN && arl->arl_replumbing) { /* - * The current command is an AR_INTERFACE_UP or - * AR_INTERFACE_DOWN and is waiting for a DLPI ack - * from the driver. Return. We can't make progress now. + * The arl is successfully brought down and this is + * a result of the DL_NOTE_REPLUMB process. Reset + * mp->b_prev first (it keeps the 'cmd' information + * at this point). */ - goto done; + mp->b_prev = NULL; + arp_replumb_done(arl, mp); + } else { + inet_freemsg(mp); } } -done: + ar_cmd_drain(arl); + if (dlpi_op_done_mp != NULL) { DTRACE_PROBE3(cmd_done_next, arl_t *, arl, queue_t *, dlpi_op_done_q, mblk_t *, dlpi_op_done_mp); @@ -2136,8 +2196,18 @@ ar_interface_down(queue_t *q, mblk_t *mp) * The arl is already down, no work to do. */ if (arl->arl_state == ARL_S_DOWN) { - /* ar_rput frees the mp */ - return (0); + if (arl->arl_replumbing) { + /* + * The arl is already down and this is a result of + * the DL_NOTE_REPLUMB process. Return EINPROGRESS + * so this mp won't be freed by ar_rput(). + */ + arp_replumb_done(arl, mp); + return (EINPROGRESS); + } else { + /* ar_rput frees the mp */ + return (0); + } } /* @@ -2672,7 +2742,7 @@ ar_ll_up(arl_t *arl) if (notify_mp == NULL) goto bad; ((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications = - DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN; + DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_REPLUMB; arl->arl_state = ARL_S_PENDING; if (arl->arl_provider_style == DL_STYLE2) { @@ -3852,6 +3922,16 @@ ar_rput_dlpi(queue_t *q, mblk_t *mp) case DL_NOTIFY_IND: DTRACE_PROBE2(rput_dl_notify_ind, arl_t *, arl, dl_notify_ind_t *, &dlp->notify_ind); + + if (dlp->notify_ind.dl_notification == DL_NOTE_REPLUMB) { + arl->arl_replumbing = B_TRUE; + if (arl->arl_state == ARL_S_DOWN) { + arp_replumb_done(arl, mp); + return; + } + break; + } + if (ap != NULL) { switch (dlp->notify_ind.dl_notification) { case DL_NOTE_LINK_UP: diff --git a/usr/src/uts/common/inet/arp_impl.h b/usr/src/uts/common/inet/arp_impl.h index f16fdc97a0..38d0d1ab65 100644 --- a/usr/src/uts/common/inet/arp_impl.h +++ b/usr/src/uts/common/inet/arp_impl.h @@ -64,7 +64,8 @@ typedef struct arl_s { t_uscalar_t arl_dlpi_pending; /* pending DLPI request */ mblk_t *arl_dlpi_deferred; /* Deferred DLPI messages */ uint_t arl_state; /* lower interface state */ - uint_t arl_closing : 1; /* stream is closing */ + uint_t arl_closing : 1, /* stream is closing */ + arl_replumbing : 1; /* Wait for IP to bring down */ uint32_t arl_index; /* instance number */ struct arlphy_s *arl_phy; /* physical info, if any */ struct arl_s *arl_ipmp_arl; /* pointer to group arl_t */ diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index 4be3138778..d6faecb3a5 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -1953,6 +1953,7 @@ typedef struct ill_s { mblk_t *ill_promiscoff_mp; /* for ill_leave_allmulti() */ mblk_t *ill_dlpi_deferred; /* b_next chain of control messages */ mblk_t *ill_ardeact_mp; /* deact mp from ipmp_ill_activate() */ + mblk_t *ill_replumb_mp; /* replumb mp from ill_replumb() */ mblk_t *ill_phys_addr_mp; /* mblk which holds ill_phys_addr */ #define ill_last_mp_to_free ill_phys_addr_mp @@ -1977,7 +1978,6 @@ typedef struct ill_s { ill_dl_up : 1, ill_up_ipifs : 1, ill_note_link : 1, /* supports link-up notification */ - ill_capab_reneg : 1, /* capability renegotiation to be done */ ill_dld_capab_inprog : 1, /* direct dld capab call in prog */ ill_need_recover_multicast : 1, diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index ad161476aa..b26c090aad 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -15825,8 +15825,6 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) switch (dlea->dl_error_primitive) { case DL_DISABMULTI_REQ: - if (!ill->ill_isv6) - ipsq_current_finish(ipsq); ill_dlpi_done(ill, dlea->dl_error_primitive); break; case DL_PROMISCON_REQ: @@ -15902,18 +15900,17 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) mp1 = ipsq_pending_mp_get(ipsq, &connp); if (mp1 != NULL) { /* - * This operation (SIOCSLIFFLAGS) must have - * happened from a conn. + * This might be a result of a DL_NOTE_REPLUMB + * notification. In that case, connp is NULL. */ - ASSERT(connp != NULL); - q = CONNP_TO_WQ(connp); + if (connp != NULL) + q = CONNP_TO_WQ(connp); + (void) ipif_down(ipif, NULL, NULL); /* error is set below the switch */ } break; case DL_ENABMULTI_REQ: - if (!ill->ill_isv6) - ipsq_current_finish(ipsq); ill_dlpi_done(ill, DL_ENABMULTI_REQ); if (ill->ill_dlpi_multicast_state == IDS_INPROGRESS) @@ -16030,11 +16027,11 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) if (mp1 == NULL) break; /* - * Because mp1 was added by ill_dl_up(), and it always - * passes a valid connp, connp must be valid here. + * mp1 was added by ill_dl_up(). if that is a result of + * a DL_NOTE_REPLUMB notification, connp could be NULL. */ - ASSERT(connp != NULL); - q = CONNP_TO_WQ(connp); + if (connp != NULL) + q = CONNP_TO_WQ(connp); /* * We are exclusive. So nothing can change even after @@ -16056,12 +16053,14 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) */ if (ill->ill_isv6) { if (ill->ill_flags & ILLF_XRESOLV) { - mutex_enter(&connp->conn_lock); + if (connp != NULL) + mutex_enter(&connp->conn_lock); mutex_enter(&ill->ill_lock); success = ipsq_pending_mp_add(connp, ipif, q, mp1, 0); mutex_exit(&ill->ill_lock); - mutex_exit(&connp->conn_lock); + if (connp != NULL) + mutex_exit(&connp->conn_lock); if (success) { err = ipif_resolver_up(ipif, Res_act_initial); @@ -16087,11 +16086,13 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) * Leave the pending mblk intact so that * the ioctl completes in ip_rput(). */ - mutex_enter(&connp->conn_lock); + if (connp != NULL) + mutex_enter(&connp->conn_lock); mutex_enter(&ill->ill_lock); success = ipsq_pending_mp_add(connp, ipif, q, mp1, 0); mutex_exit(&ill->ill_lock); - mutex_exit(&connp->conn_lock); + if (connp != NULL) + mutex_exit(&connp->conn_lock); if (success) { err = ipif_resolver_up(ipif, Res_act_initial); if (err == EINPROGRESS) { @@ -16153,6 +16154,15 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) err = ill_set_phys_addr(ill, mp); break; + case DL_NOTE_REPLUMB: + /* + * Directly return after calling ill_replumb(). + * Note that we should not free mp as it is reused + * in the ill_replumb() function. + */ + err = ill_replumb(ill, mp); + return; + case DL_NOTE_FASTPATH_FLUSH: ill_fastpath_flush(ill); break; @@ -16462,8 +16472,6 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) switch (dloa->dl_correct_primitive) { case DL_ENABMULTI_REQ: case DL_DISABMULTI_REQ: - if (!ill->ill_isv6) - ipsq_current_finish(ipsq); ill_dlpi_done(ill, dloa->dl_correct_primitive); break; case DL_PROMISCON_REQ: @@ -27048,20 +27056,6 @@ ip_process_ioctl(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg) ipsq_current_start(ipsq, ci.ci_ipif, ipip->ipi_cmd); /* - * For most set ioctls that come here, this serves as a single point - * where we set the IPIF_CHANGING flag. This ensures that there won't - * be any new references to the ipif. This helps functions that go - * through this path and end up trying to wait for the refcnts - * associated with the ipif to go down to zero. The exception is - * SIOCSLIFREMOVEIF, which sets IPIF_CONDEMNED internally after - * identifying the right ipif to operate on. - */ - mutex_enter(&(ci.ci_ipif)->ipif_ill->ill_lock); - if (ipip->ipi_cmd != SIOCLIFREMOVEIF) - (ci.ci_ipif)->ipif_state_flags |= IPIF_CHANGING; - mutex_exit(&(ci.ci_ipif)->ipif_ill->ill_lock); - - /* * A return value of EINPROGRESS means the ioctl is * either queued and waiting for some reason or has * already completed. @@ -27321,7 +27315,7 @@ nak: break; switch (((arc_t *)mp->b_rptr)->arc_cmd) { case AR_ENTRY_SQUERY: - ip_wput_ctl(q, mp); + putnext(q, mp); return; case AR_CLIENT_NOTIFY: ip_arp_news(q, mp); diff --git a/usr/src/uts/common/inet/ip/ip6_if.c b/usr/src/uts/common/inet/ip/ip6_if.c index c729118fec..3dbc4559d8 100644 --- a/usr/src/uts/common/inet/ip/ip6_if.c +++ b/usr/src/uts/common/inet/ip/ip6_if.c @@ -2825,7 +2825,8 @@ ill_dl_phys(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q) goto bad; ((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications = (DL_NOTE_PHYS_ADDR | DL_NOTE_SDU_SIZE | DL_NOTE_FASTPATH_FLUSH | - DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_CAPAB_RENEG); + DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_CAPAB_RENEG | + DL_NOTE_REPLUMB); phys_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index 3628dd4f56..ed7ae7b2b1 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -193,6 +193,8 @@ static void ill_glist_delete(ill_t *); static void ill_phyint_reinit(ill_t *ill); static void ill_set_nce_router_flags(ill_t *, boolean_t); static void ill_set_phys_addr_tail(ipsq_t *, queue_t *, mblk_t *, void *); +static void ill_replumb_tail(ipsq_t *, queue_t *, mblk_t *, void *); + static ip_v6intfid_func_t ip_ether_v6intfid, ip_ib_v6intfid; static ip_v6intfid_func_t ip_ipmp_v6intfid, ip_nodef_v6intfid; static ip_v6mapinfo_func_t ip_ether_v6mapinfo, ip_ib_v6mapinfo; @@ -1587,18 +1589,24 @@ conn_cleanup_ill(conn_t *connp, caddr_t arg) mutex_exit(&connp->conn_lock); } -/* ARGSUSED */ -void -ipif_all_down_tail(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) +static void +ill_down_ipifs_tail(ill_t *ill) { - ill_t *ill = q->q_ptr; ipif_t *ipif; - ASSERT(IAM_WRITER_IPSQ(ipsq)); + ASSERT(IAM_WRITER_ILL(ill)); for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { ipif_non_duplicate(ipif); ipif_down_tail(ipif); } +} + +/* ARGSUSED */ +void +ipif_all_down_tail(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) +{ + ASSERT(IAM_WRITER_IPSQ(ipsq)); + ill_down_ipifs_tail(q->q_ptr); freemsg(mp); ipsq_current_finish(ipsq); } @@ -3007,10 +3015,10 @@ ill_capability_dld_ack(ill_t *ill, mblk_t *mp, dl_capability_sub_t *isub) ill->ill_name); return; } - idc->idc_capab_df = (ip_capab_func_t)dld.dld_capab; - idc->idc_capab_dh = (void *)dld.dld_capab_handle; ill->ill_dld_capab = idc; } + idc->idc_capab_df = (ip_capab_func_t)dld.dld_capab; + idc->idc_capab_dh = (void *)dld.dld_capab_handle; ip1dbg(("ill_capability_dld_ack: interface %s " "supports DLD version %d\n", ill->ill_name, DLD_CURRENT_VERSION)); @@ -6317,6 +6325,10 @@ ipif_ill_refrele_tail(ill_t *ill) qwriter_ip(ill, ill->ill_rq, mp, ill_set_phys_addr_tail, CUR_OP, B_TRUE); return; + case DL_NOTE_REPLUMB: + qwriter_ip(ill, ill->ill_rq, mp, + ill_replumb_tail, CUR_OP, B_TRUE); + return; default: ASSERT(0); ill_refrele(ill); @@ -8021,6 +8033,7 @@ ipsq_exit(ipsq_t *ipsq) void ipsq_current_start(ipsq_t *ipsq, ipif_t *ipif, int ioccmd) { + ill_t *ill = ipif->ipif_ill; ipxop_t *ipx = ipsq->ipsq_xop; ASSERT(IAM_WRITER_IPSQ(ipsq)); @@ -8032,6 +8045,39 @@ ipsq_current_start(ipsq_t *ipsq, ipif_t *ipif, int ioccmd) mutex_enter(&ipx->ipx_lock); ipx->ipx_current_ipif = ipif; mutex_exit(&ipx->ipx_lock); + + /* + * Set IPIF_CHANGING on one or more ipifs associated with the + * current exclusive operation. IPIF_CHANGING prevents any new + * references to the ipif (so that the references will eventually + * drop to zero) and also prevents any "get" operations (e.g., + * SIOCGLIFFLAGS) from being able to access the ipif until the + * operation has completed and the ipif is again in a stable state. + * + * For ioctls, IPIF_CHANGING is set on the ipif associated with the + * ioctl. For internal operations (where ioccmd is zero), all ipifs + * on the ill are marked with IPIF_CHANGING since it's unclear which + * ipifs will be affected. + * + * Note that SIOCLIFREMOVEIF is a special case as it sets + * IPIF_CONDEMNED internally after identifying the right ipif to + * operate on. + */ + switch (ioccmd) { + case SIOCLIFREMOVEIF: + break; + case 0: + mutex_enter(&ill->ill_lock); + ipif = ipif->ipif_ill->ill_ipif; + for (; ipif != NULL; ipif = ipif->ipif_next) + ipif->ipif_state_flags |= IPIF_CHANGING; + mutex_exit(&ill->ill_lock); + break; + default: + mutex_enter(&ill->ill_lock); + ipif->ipif_state_flags |= IPIF_CHANGING; + mutex_exit(&ill->ill_lock); + } } /* @@ -8061,7 +8107,13 @@ ipsq_current_finish(ipsq_t *ipsq) mutex_enter(&ill->ill_lock); dlpi_pending = ill->ill_dlpi_pending; - ipif->ipif_state_flags &= ~IPIF_CHANGING; + if (ipx->ipx_current_ioctl == 0) { + ipif = ill->ill_ipif; + for (; ipif != NULL; ipif = ipif->ipif_next) + ipif->ipif_state_flags &= ~IPIF_CHANGING; + } else { + ipif->ipif_state_flags &= ~IPIF_CHANGING; + } mutex_exit(&ill->ill_lock); } @@ -14010,20 +14062,9 @@ ill_up_ipifs_on_ill(ill_t *ill, queue_t *q, mblk_t *mp) if (ill == NULL) return (0); - /* - * Except for ipif_state_flags and ill_state_flags the other - * fields of the ipif/ill that are modified below are protected - * implicitly since we are a writer. We would have tried to down - * even an ipif that was already down, in ill_down_ipifs. So we - * just blindly clear the IPIF_CHANGING flag here on all ipifs. - */ ASSERT(IAM_WRITER_ILL(ill)); - ill->ill_up_ipifs = B_TRUE; for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { - mutex_enter(&ill->ill_lock); - ipif->ipif_state_flags &= ~IPIF_CHANGING; - mutex_exit(&ill->ill_lock); if (ipif->ipif_was_up) { if (!(ipif->ipif_flags & IPIF_UP)) err = ipif_up(ipif, q, mp); @@ -14060,19 +14101,16 @@ ill_up_ipifs(ill_t *ill, queue_t *q, mblk_t *mp) } /* - * Bring down any IPIF_UP ipifs on ill. + * Bring down any IPIF_UP ipifs on ill. If "logical" is B_TRUE, we bring + * down the ipifs without sending DL_UNBIND_REQ to the driver. */ static void -ill_down_ipifs(ill_t *ill) +ill_down_ipifs(ill_t *ill, boolean_t logical) { ipif_t *ipif; ASSERT(IAM_WRITER_ILL(ill)); - /* - * Except for ipif_state_flags the other fields of the ipif/ill that - * are modified below are protected implicitly since we are a writer - */ for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { /* * We go through the ipif_down logic even if the ipif @@ -14083,19 +14121,19 @@ ill_down_ipifs(ill_t *ill) if (ipif->ipif_flags & IPIF_UP) ipif->ipif_was_up = B_TRUE; - mutex_enter(&ill->ill_lock); - ipif->ipif_state_flags |= IPIF_CHANGING; - mutex_exit(&ill->ill_lock); - /* * Need to re-create net/subnet bcast ires if * they are dependent on ipif. */ if (!ipif->ipif_isv6) ipif_check_bcast_ires(ipif); - (void) ipif_logical_down(ipif, NULL, NULL); - ipif_non_duplicate(ipif); - ipif_down_tail(ipif); + if (logical) { + (void) ipif_logical_down(ipif, NULL, NULL); + ipif_non_duplicate(ipif); + ipif_down_tail(ipif); + } else { + (void) ipif_down(ipif, NULL, NULL); + } } } @@ -14408,6 +14446,7 @@ ill_dlpi_dispatch(ill_t *ill, mblk_t *mp) { union DL_primitives *dlp; t_uscalar_t prim; + boolean_t waitack = B_FALSE; ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); @@ -14437,11 +14476,20 @@ ill_dlpi_dispatch(ill_t *ill, mblk_t *mp) * we only wait for the ACK of the DL_UNBIND_REQ. */ mutex_enter(&ill->ill_lock); - if (!(ill->ill_state_flags & ILL_CONDEMNED) || (prim == DL_UNBIND_REQ)) + if (!(ill->ill_state_flags & ILL_CONDEMNED) || + (prim == DL_UNBIND_REQ)) { ill->ill_dlpi_pending = prim; + waitack = B_TRUE; + } mutex_exit(&ill->ill_lock); putnext(ill->ill_wq, mp); + + /* + * There is no ack for DL_NOTIFY_CONF messages + */ + if (waitack && prim == DL_NOTIFY_CONF) + ill_dlpi_done(ill, prim); } /* @@ -16165,14 +16213,13 @@ ill_dl_up(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q) * Record state needed to complete this operation when the * DL_BIND_ACK shows up. Also remember the pre-allocated mblks. */ - ASSERT(WR(q)->q_next == NULL); - connp = Q_TO_CONN(q); - - mutex_enter(&connp->conn_lock); + connp = CONN_Q(q) ? Q_TO_CONN(q) : NULL; + ASSERT(connp != NULL || !CONN_Q(q)); + GRAB_CONN_LOCK(q); mutex_enter(&ipif->ipif_ill->ill_lock); success = ipsq_pending_mp_add(connp, ipif, q, mp, 0); mutex_exit(&ipif->ipif_ill->ill_lock); - mutex_exit(&connp->conn_lock); + RELEASE_CONN_LOCK(q); if (!success) goto bad; @@ -19981,7 +20028,7 @@ ill_set_phys_addr(ill_t *ill, mblk_t *mp) * If we can quiesce the ill, then set the address. If not, then * ill_set_phys_addr_tail() will be called from ipif_ill_refrele_tail(). */ - ill_down_ipifs(ill); + ill_down_ipifs(ill, B_TRUE); mutex_enter(&ill->ill_lock); if (!ill_is_quiescent(ill)) { /* call cannot fail since `conn_t *' argument is NULL */ @@ -20062,6 +20109,75 @@ ill_set_ndmp(ill_t *ill, mblk_t *ndmp, uint_t addroff, uint_t addrlen) ill->ill_nd_lla_len = addrlen; } +/* + * Replumb the ill. + */ +int +ill_replumb(ill_t *ill, mblk_t *mp) +{ + ipsq_t *ipsq = ill->ill_phyint->phyint_ipsq; + + ASSERT(IAM_WRITER_IPSQ(ipsq)); + + ipsq_current_start(ipsq, ill->ill_ipif, 0); + + /* + * If we can quiesce the ill, then continue. If not, then + * ill_replumb_tail() will be called from ipif_ill_refrele_tail(). + */ + ill_down_ipifs(ill, B_FALSE); + + mutex_enter(&ill->ill_lock); + if (!ill_is_quiescent(ill)) { + /* call cannot fail since `conn_t *' argument is NULL */ + (void) ipsq_pending_mp_add(NULL, ill->ill_ipif, ill->ill_rq, + mp, ILL_DOWN); + mutex_exit(&ill->ill_lock); + return (EINPROGRESS); + } + mutex_exit(&ill->ill_lock); + + ill_replumb_tail(ipsq, ill->ill_rq, mp, NULL); + return (0); +} + +/* ARGSUSED */ +static void +ill_replumb_tail(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy) +{ + ill_t *ill = q->q_ptr; + + ASSERT(IAM_WRITER_IPSQ(ipsq)); + + ill_down_ipifs_tail(ill); + + freemsg(ill->ill_replumb_mp); + ill->ill_replumb_mp = copyb(mp); + + /* + * Successfully quiesced and brought down the interface, now we send + * the DL_NOTE_REPLUMB_DONE message down to the driver. Reuse the + * DL_NOTE_REPLUMB message. + */ + mp = mexchange(NULL, mp, sizeof (dl_notify_conf_t), M_PROTO, + DL_NOTIFY_CONF); + ASSERT(mp != NULL); + ((dl_notify_conf_t *)mp->b_rptr)->dl_notification = + DL_NOTE_REPLUMB_DONE; + ill_dlpi_send(ill, mp); + + /* + * If there are ipifs to bring up, ill_up_ipifs() will return + * EINPROGRESS, and ipsq_current_finish() will be called by + * ip_rput_dlpi_writer() or ip_arp_done() when the last ipif is + * brought up. + */ + if (ill->ill_replumb_mp == NULL || + ill_up_ipifs(ill, q, ill->ill_replumb_mp) != EINPROGRESS) { + ipsq_current_finish(ipsq); + } +} + major_t IP_MAJ; #define IP "ip" diff --git a/usr/src/uts/common/inet/ip/ip_multi.c b/usr/src/uts/common/inet/ip/ip_multi.c index 656080b769..d7be67cd26 100644 --- a/usr/src/uts/common/inet/ip/ip_multi.c +++ b/usr/src/uts/common/inet/ip/ip_multi.c @@ -81,8 +81,6 @@ static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); -static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, - uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); static void conn_ilg_reap(conn_t *connp); static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); @@ -676,6 +674,42 @@ ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, } /* + * Mapping the given IP multicast address to the L2 multicast mac address. + */ +static void +ill_multicast_mapping(ill_t *ill, ipaddr_t ip_addr, uint8_t *hw_addr, + uint32_t hw_addrlen) +{ + dl_unitdata_req_t *dlur; + ipaddr_t proto_extract_mask; + uint8_t *from, *bcast_addr; + uint32_t hw_extract_start; + int len; + + ASSERT(IN_CLASSD(ntohl(ip_addr))); + ASSERT(hw_addrlen == ill->ill_phys_addr_length); + ASSERT((ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) == 0); + ASSERT((ill->ill_flags & ILLF_MULTICAST) != 0); + + /* + * Find the physical broadcast address. + */ + dlur = (dl_unitdata_req_t *)ill->ill_bcast_mp->b_rptr; + bcast_addr = (uint8_t *)dlur + dlur->dl_dest_addr_offset; + if (ill->ill_sap_length > 0) + bcast_addr += ill->ill_sap_length; + + VERIFY(MEDIA_V4MINFO(ill->ill_media, hw_addrlen, bcast_addr, + hw_addr, &hw_extract_start, &proto_extract_mask)); + + len = MIN((int)hw_addrlen - hw_extract_start, IP_ADDR_LEN); + ip_addr &= proto_extract_mask; + from = (uint8_t *)&ip_addr; + while (len-- > 0) + hw_addr[hw_extract_start + len] |= from[len]; +} + +/* * Send a multicast request to the driver for enabling multicast reception * for v6groupp address. The caller has already checked whether it is * appropriate to send one or not. @@ -698,48 +732,30 @@ ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) return (0); /* - * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked - * on. + * Create a DL_ENABMULTI_REQ. */ mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), &addrlen, &addroff); if (!mp) return (ENOMEM); + if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { ipaddr_t v4group; IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); - /* - * NOTE!!! - * The "addroff" passed in here was calculated by - * ill_create_dl(), and will be used by ill_create_squery() - * to perform some twisted coding magic. It is the offset - * into the dl_xxx_req of the hw addr. Here, it will be - * added to b_wptr - b_rptr to create a magic number that - * is not an offset into this squery mblk. - * The actual hardware address will be accessed only in the - * dl_xxx_req, not in the squery. More importantly, - * that hardware address can *only* be accessed in this - * mblk chain by calling mi_offset_param_c(), which uses - * the magic number in the squery hw offset field to go - * to the *next* mblk (the dl_xxx_req), subtract the - * (b_wptr - b_rptr), and find the actual offset into - * the dl_xxx_req. - * Any method that depends on using the - * offset field in the dl_disabmulti_req or squery - * to find either hardware address will similarly fail. - * - * Look in ar_entry_squery() in arp.c to see how this offset - * is used. - */ - mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); - if (!mp) - return (ENOMEM); - ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", + + ill_multicast_mapping(ill, v4group, + mp->b_rptr + addroff, addrlen); + + ip1dbg(("ip_ll_send_enabmulti_req: IPv4 %s on %s\n", inet_ntop(AF_INET6, v6groupp, group_buf, sizeof (group_buf)), ill->ill_name)); - putnext(ill->ill_rq, mp); + + /* Track the state if this is the first enabmulti */ + if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) + ill->ill_dlpi_multicast_state = IDS_INPROGRESS; + ill_dlpi_send(ill, mp); } else { ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on" " %s\n", @@ -934,7 +950,7 @@ ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) { mblk_t *mp; char group_buf[INET6_ADDRSTRLEN]; - uint32_t addrlen, addroff; + uint32_t addrlen, addroff; ASSERT(IAM_WRITER_ILL(ill)); @@ -945,12 +961,10 @@ ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) return (0); /* - * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked - * on. + * Create a DL_DISABMULTI_REQ. */ mp = ill_create_dl(ill, DL_DISABMULTI_REQ, sizeof (dl_disabmulti_req_t), &addrlen, &addroff); - if (!mp) return (ENOMEM); @@ -958,29 +972,15 @@ ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) ipaddr_t v4group; IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); - /* - * NOTE!!! - * The "addroff" passed in here was calculated by - * ill_create_dl(), and will be used by ill_create_squery() - * to perform some twisted coding magic. It is the offset - * into the dl_xxx_req of the hw addr. Here, it will be - * added to b_wptr - b_rptr to create a magic number that - * is not an offset into this mblk. - * - * Please see the comment in ip_ll_send)enabmulti_req() - * for a complete explanation. - * - * Look in ar_entry_squery() in arp.c to see how this offset - * is used. - */ - mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); - if (!mp) - return (ENOMEM); - ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", + + ill_multicast_mapping(ill, v4group, + mp->b_rptr + addroff, addrlen); + + ip1dbg(("ip_ll_send_disabmulti_req: IPv4 %s on %s\n", inet_ntop(AF_INET6, v6groupp, group_buf, sizeof (group_buf)), ill->ill_name)); - putnext(ill->ill_rq, mp); + ill_dlpi_send(ill, mp); } else { ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on" " %s\n", @@ -1296,58 +1296,6 @@ ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, fanout_flags, zoneid); } -static area_t ip_aresq_template = { - AR_ENTRY_SQUERY, /* cmd */ - sizeof (area_t)+IP_ADDR_LEN, /* name offset */ - sizeof (area_t), /* name len (filled by ill_arp_alloc) */ - IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ - sizeof (area_t), /* proto addr offset */ - IP_ADDR_LEN, /* proto addr_length */ - 0, /* proto mask offset */ - /* Rest is initialized when used */ - 0, /* flags */ - 0, /* hw addr offset */ - 0, /* hw addr length */ -}; - -static mblk_t * -ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, - uint32_t addroff, mblk_t *mp_tail) -{ - mblk_t *mp; - area_t *area; - - mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, - (caddr_t)&ipaddr); - if (!mp) { - freemsg(mp_tail); - return (NULL); - } - area = (area_t *)mp->b_rptr; - area->area_hw_addr_length = addrlen; - area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; - /* - * NOTE! - * - * The area_hw_addr_offset, as can be seen, does not hold the - * actual hardware address offset. Rather, it holds the offset - * to the hw addr in the dl_xxx_req in mp_tail, modified by - * adding (mp->b_wptr - mp->b_rptr). This allows the function - * mi_offset_paramc() to find the hardware address in the - * *second* mblk (dl_xxx_req), not this mblk. - * - * Using mi_offset_paramc() is thus the *only* way to access - * the dl_xxx_hw address. - * - * The squery hw address should *not* be accessed. - * - * See ar_entry_squery() in arp.c for an example of how all this works. - */ - - mp->b_cont = mp_tail; - return (mp); -} - /* * Create a DLPI message; for DL_{ENAB,DISAB}MULTI_REQ, room is left for * the hardware address. @@ -1425,63 +1373,6 @@ ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, } /* - * Writer processing for ip_wput_ctl(): send the DL_{ENAB,DISAB}MULTI_REQ - * messages that had been delayed until we'd heard back from ARP. One catch: - * we need to ensure that no one else becomes writer on the IPSQ before we've - * received the replies, or they'll incorrectly process our replies as part of - * their unrelated IPSQ operation. To do this, we start a new IPSQ operation, - * which will complete when we process the reply in ip_rput_dlpi_writer(). - */ -/* ARGSUSED */ -static void -ip_wput_ctl_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg) -{ - ill_t *ill = q->q_ptr; - t_uscalar_t prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; - - ASSERT(IAM_WRITER_ILL(ill)); - ASSERT(prim == DL_ENABMULTI_REQ || prim == DL_DISABMULTI_REQ); - ip1dbg(("ip_wput_ctl_writer: %s\n", dl_primstr(prim))); - - if (prim == DL_ENABMULTI_REQ) { - /* Track the state if this is the first enabmulti */ - if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) - ill->ill_dlpi_multicast_state = IDS_INPROGRESS; - } - - ipsq_current_start(ipsq, ill->ill_ipif, 0); - ill_dlpi_send(ill, mp); -} - -void -ip_wput_ctl(queue_t *q, mblk_t *mp) -{ - ill_t *ill = q->q_ptr; - mblk_t *dlmp = mp->b_cont; - area_t *area = (area_t *)mp->b_rptr; - t_uscalar_t prim; - - /* Check that we have an AR_ENTRY_SQUERY with a tacked on mblk */ - if (MBLKL(mp) < sizeof (area_t) || area->area_cmd != AR_ENTRY_SQUERY || - dlmp == NULL) { - putnext(q, mp); - return; - } - - /* Check that the tacked on mblk is a DL_{DISAB,ENAB}MULTI_REQ */ - prim = ((union DL_primitives *)dlmp->b_rptr)->dl_primitive; - if (prim != DL_DISABMULTI_REQ && prim != DL_ENABMULTI_REQ) { - putnext(q, mp); - return; - } - freeb(mp); - - /* See comments above ip_wput_ctl_writer() for details */ - ill_refhold(ill); - qwriter_ip(ill, ill->ill_wq, dlmp, ip_wput_ctl_writer, NEW_OP, B_FALSE); -} - -/* * Rejoin any groups which have been explicitly joined by the application (we * left all explicitly joined groups as part of ill_leave_multicast() prior to * bringing the interface down). Note that because groups can be joined and diff --git a/usr/src/uts/common/inet/ip_if.h b/usr/src/uts/common/inet/ip_if.h index 80dc0a691b..b604c13252 100644 --- a/usr/src/uts/common/inet/ip_if.h +++ b/usr/src/uts/common/inet/ip_if.h @@ -188,6 +188,7 @@ extern void ill_refresh_bcast(ill_t *); extern void ill_restart_dad(ill_t *, boolean_t); extern boolean_t ill_setdefaulttoken(ill_t *); extern int ill_set_phys_addr(ill_t *, mblk_t *); +extern int ill_replumb(ill_t *, mblk_t *); extern void ill_set_ndmp(ill_t *, mblk_t *, uint_t, uint_t); extern mblk_t *ill_pending_mp_get(ill_t *, conn_t **, uint_t); diff --git a/usr/src/uts/common/io/dld/dld_drv.c b/usr/src/uts/common/io/dld/dld_drv.c index f833adce01..57721bb2ed 100644 --- a/usr/src/uts/common/io/dld/dld_drv.c +++ b/usr/src/uts/common/io/dld/dld_drv.c @@ -824,16 +824,19 @@ drv_ioc_usagelog(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) { dld_ioc_usagelog_t *log_info = (dld_ioc_usagelog_t *)karg; + int err = 0; if (log_info->ul_type < MAC_LOGTYPE_LINK || log_info->ul_type > MAC_LOGTYPE_FLOW) return (EINVAL); - if (log_info->ul_onoff) - mac_start_logusage(log_info->ul_type, log_info->ul_interval); - else + if (log_info->ul_onoff) { + err = mac_start_logusage(log_info->ul_type, + log_info->ul_interval); + } else { mac_stop_logusage(log_info->ul_type); - return (0); + } + return (err); } /* diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c index b6faf7ada3..1f683c8591 100644 --- a/usr/src/uts/common/io/dld/dld_proto.c +++ b/usr/src/uts/common/io/dld/dld_proto.c @@ -430,8 +430,7 @@ proto_bind_req(dld_str_t *dsp, mblk_t *mp) mac_perim_enter_by_mh(dsp->ds_mh, &mph); - if (dsp->ds_passivestate == DLD_UNINITIALIZED && - ((err = dls_active_set(dsp)) != 0)) { + if ((err = dls_active_set(dsp)) != 0) { dl_err = DL_SYSERR; goto failed2; } @@ -460,8 +459,7 @@ proto_bind_req(dld_str_t *dsp, mblk_t *mp) } dsp->ds_dlstate = DL_UNBOUND; - if (dsp->ds_passivestate == DLD_UNINITIALIZED) - dls_active_clear(dsp); + dls_active_clear(dsp, B_FALSE); goto failed2; } @@ -489,9 +487,6 @@ proto_bind_req(dld_str_t *dsp, mblk_t *mp) dlsap_addr_length += sizeof (uint16_t); dsp->ds_dlstate = DL_IDLE; - if (dsp->ds_passivestate == DLD_UNINITIALIZED) - dsp->ds_passivestate = DLD_ACTIVE; - dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0); return; @@ -557,6 +552,7 @@ proto_unbind_req(dld_str_t *dsp, mblk_t *mp) dsp->ds_mode = DLD_UNITDATA; dsp->ds_dlstate = DL_UNBOUND; + dls_active_clear(dsp, B_FALSE); mac_perim_exit(mph); dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ); return; @@ -609,8 +605,7 @@ proto_promiscon_req(dld_str_t *dsp, mblk_t *mp) mac_perim_enter_by_mh(dsp->ds_mh, &mph); - if (dsp->ds_passivestate == DLD_UNINITIALIZED && - ((err = dls_active_set(dsp)) != 0)) { + if ((promisc_saved == 0) && (err = dls_active_set(dsp)) != 0) { dsp->ds_promisc = promisc_saved; dl_err = DL_SYSERR; goto failed2; @@ -624,15 +619,13 @@ proto_promiscon_req(dld_str_t *dsp, mblk_t *mp) if (err != 0) { dl_err = DL_SYSERR; dsp->ds_promisc = promisc_saved; - if (dsp->ds_passivestate == DLD_UNINITIALIZED) - dls_active_clear(dsp); + if (promisc_saved == 0) + dls_active_clear(dsp, B_FALSE); goto failed2; } mac_perim_exit(mph); - if (dsp->ds_passivestate == DLD_UNINITIALIZED) - dsp->ds_passivestate = DLD_ACTIVE; dlokack(q, mp, DL_PROMISCON_REQ); return; @@ -702,12 +695,18 @@ proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp) * Adjust channel promiscuity. */ err = dls_promisc(dsp, promisc_saved); - mac_perim_exit(mph); if (err != 0) { + mac_perim_exit(mph); dl_err = DL_SYSERR; goto failed; } + + if (dsp->ds_promisc == 0) + dls_active_clear(dsp, B_FALSE); + + mac_perim_exit(mph); + dlokack(q, mp, DL_PROMISCOFF_REQ); return; failed: @@ -741,14 +740,12 @@ proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp) mac_perim_enter_by_mh(dsp->ds_mh, &mph); - if (dsp->ds_passivestate == DLD_UNINITIALIZED && - ((err = dls_active_set(dsp)) != 0)) { + if ((dsp->ds_dmap == NULL) && (err = dls_active_set(dsp)) != 0) { dl_err = DL_SYSERR; goto failed2; } err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset); - if (err != 0) { switch (err) { case EINVAL: @@ -763,16 +760,13 @@ proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp) dl_err = DL_SYSERR; break; } - if (dsp->ds_passivestate == DLD_UNINITIALIZED) - dls_active_clear(dsp); - + if (dsp->ds_dmap == NULL) + dls_active_clear(dsp, B_FALSE); goto failed2; } mac_perim_exit(mph); - if (dsp->ds_passivestate == DLD_UNINITIALIZED) - dsp->ds_passivestate = DLD_ACTIVE; dlokack(q, mp, DL_ENABMULTI_REQ); return; @@ -809,6 +803,8 @@ proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp) mac_perim_enter_by_mh(dsp->ds_mh, &mph); err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset); + if ((err == 0) && (dsp->ds_dmap == NULL)) + dls_active_clear(dsp, B_FALSE); mac_perim_exit(mph); if (err != 0) { @@ -909,8 +905,7 @@ proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp) mac_perim_enter_by_mh(dsp->ds_mh, &mph); - if (dsp->ds_passivestate == DLD_UNINITIALIZED && - ((err = dls_active_set(dsp)) != 0)) { + if ((err = dls_active_set(dsp)) != 0) { dl_err = DL_SYSERR; goto failed2; } @@ -928,17 +923,13 @@ proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp) dl_err = DL_SYSERR; break; } - if (dsp->ds_passivestate == DLD_UNINITIALIZED) - dls_active_clear(dsp); - + dls_active_clear(dsp, B_FALSE); goto failed2; } mac_perim_exit(mph); - if (dsp->ds_passivestate == DLD_UNINITIALIZED) - dsp->ds_passivestate = DLD_ACTIVE; dlokack(q, mp, DL_SET_PHYS_ADDR_REQ); return; diff --git a/usr/src/uts/common/io/dld/dld_str.c b/usr/src/uts/common/io/dld/dld_str.c index f7f4266062..170e087a69 100644 --- a/usr/src/uts/common/io/dld/dld_str.c +++ b/usr/src/uts/common/io/dld/dld_str.c @@ -213,28 +213,20 @@ dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) return (rc); } -/* - * qi_qopen: open(9e) - */ -/*ARGSUSED*/ +void * +dld_str_private(queue_t *q) +{ + return (((dld_str_t *)(q->q_ptr))->ds_private); +} + int -dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) +dld_str_open(queue_t *rq, dev_t *devp, void *private) { dld_str_t *dsp; major_t major; minor_t minor; int err; - if (sflag == MODOPEN) - return (ENOTSUP); - - /* - * This is a cloning driver and therefore each queue should only - * ever get opened once. - */ - if (rq->q_ptr != NULL) - return (EBUSY); - major = getmajor(*devp); minor = getminor(*devp); @@ -249,12 +241,14 @@ dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) return (ENOSR); ASSERT(dsp->ds_dlstate == DL_UNATTACHED); + dsp->ds_private = private; if (minor != 0) { /* * Style 1 open */ if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) goto failed; + ASSERT(dsp->ds_dlstate == DL_UNBOUND); } else { (void) qassociate(rq, -1); @@ -276,11 +270,8 @@ failed: return (err); } -/* - * qi_qclose: close(9e) - */ int -dld_close(queue_t *rq) +dld_str_close(queue_t *rq) { dld_str_t *dsp = rq->q_ptr; @@ -298,11 +289,6 @@ dld_close(queue_t *rq) cv_wait(&dsp->ds_dlpi_pending_cv, &dsp->ds_lock); mutex_exit(&dsp->ds_lock); - /* - * Disable the queue srv(9e) routine. - */ - qprocsoff(rq); - /* * This stream was open to a provider node. Check to see @@ -322,6 +308,40 @@ dld_close(queue_t *rq) } /* + * qi_qopen: open(9e) + */ +/*ARGSUSED*/ +int +dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) +{ + if (sflag == MODOPEN) + return (ENOTSUP); + + /* + * This is a cloning driver and therefore each queue should only + * ever get opened once. + */ + if (rq->q_ptr != NULL) + return (EBUSY); + + return (dld_str_open(rq, devp, NULL)); +} + +/* + * qi_qclose: close(9e) + */ +int +dld_close(queue_t *rq) +{ + /* + * Disable the queue srv(9e) routine. + */ + qprocsoff(rq); + + return (dld_str_close(rq)); +} + +/* * qi_qputp: put(9e) */ void @@ -603,6 +623,7 @@ dld_str_destroy(dld_str_t *dsp) ASSERT(dsp->ds_direct == B_FALSE); ASSERT(dsp->ds_lso == B_FALSE); ASSERT(dsp->ds_lso_max == 0); + ASSERT(dsp->ds_passivestate != DLD_ACTIVE); /* * Reinitialize all the flags. @@ -930,11 +951,10 @@ dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) dev_t dev; int err; const char *drvname; - mac_perim_handle_t mph; + mac_perim_handle_t mph = NULL; boolean_t qassociated = B_FALSE; dls_link_t *dlp = NULL; dls_dl_handle_t ddp = NULL; - boolean_t entered_perim = B_FALSE; if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) return (EINVAL); @@ -959,7 +979,6 @@ dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) if ((err = mac_perim_enter_by_macname(dls_devnet_mac(ddp), &mph)) != 0) goto failed; - entered_perim = B_TRUE; /* * Open a channel. @@ -986,7 +1005,7 @@ dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) failed: if (dlp != NULL) dls_link_rele(dlp); - if (entered_perim) + if (mph != NULL) mac_perim_exit(mph); if (ddp != NULL) dls_devnet_rele(ddp); diff --git a/usr/src/uts/common/io/dls/dls.c b/usr/src/uts/common/io/dls/dls.c index 72f1aecd6a..4cf2efcccf 100644 --- a/usr/src/uts/common/io/dls/dls.c +++ b/usr/src/uts/common/io/dls/dls.c @@ -46,8 +46,17 @@ dls_open(dls_link_t *dlp, dls_dl_handle_t ddh, dld_str_t *dsp) if (zid != GLOBAL_ZONEID && dlp->dl_zid != zid) return (ENOENT); - if ((err = mac_start(dlp->dl_mh)) != 0) + /* + * mac_start() is required for non-legacy MACs to show accurate + * kstats even before the interface is brought up. For legacy + * drivers, this is not needed. Further, calling mac_start() for + * legacy drivers would make the shared-lower-stream to stay in + * the DL_IDLE state, which in turn causes performance regression. + */ + if (!mac_capab_get(dlp->dl_mh, MAC_CAPAB_LEGACY, NULL) && + ((err = mac_start(dlp->dl_mh)) != 0)) { return (err); + } local = (zid == dlp->dl_zid); dlp->dl_zone_ref += (local ? 1 : 0); @@ -96,7 +105,7 @@ dls_close(dld_str_t *dsp) } dsp->ds_dmap = NULL; - dls_active_clear(dsp); + dls_active_clear(dsp, B_TRUE); /* * If the dld_str_t is bound then unbind it. @@ -126,7 +135,8 @@ dls_close(dld_str_t *dsp) dsp->ds_dlp = NULL; - mac_stop(dsp->ds_mh); + if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_LEGACY, NULL)) + mac_stop(dsp->ds_mh); /* * Release our reference to the dls_link_t allowing that to be @@ -628,29 +638,50 @@ dls_active_set(dld_str_t *dsp) ASSERT(MAC_PERIM_HELD(dsp->ds_mh)); - /* If we're already active, then there's nothing more to do. */ - if (dsp->ds_active) + if (dsp->ds_passivestate == DLD_PASSIVE) return (0); - if ((err = dls_mac_active_set(dsp->ds_dlp)) != 0) { + /* If we're already active, then there's nothing more to do. */ + if ((dsp->ds_nactive == 0) && + ((err = dls_mac_active_set(dsp->ds_dlp)) != 0)) { /* except for ENXIO all other errors are mapped to EBUSY */ if (err != ENXIO) return (EBUSY); return (err); } - dsp->ds_active = B_TRUE; + dsp->ds_passivestate = DLD_ACTIVE; + dsp->ds_nactive++; return (0); } +/* + * Note that dls_active_set() is called whenever an active operation + * (DL_BIND_REQ, DL_ENABMULTI_REQ ...) is processed and + * dls_active_clear(dsp, B_FALSE) is called whenever the active operation + * is being undone (DL_UNBIND_REQ, DL_DISABMULTI_REQ ...). In some cases, + * a stream is closed without every active operation being undone and we + * need to clear all the "active" states by calling + * dls_active_clear(dsp, B_TRUE). + */ void -dls_active_clear(dld_str_t *dsp) +dls_active_clear(dld_str_t *dsp, boolean_t all) { ASSERT(MAC_PERIM_HELD(dsp->ds_mh)); - if (!dsp->ds_active) + if (dsp->ds_passivestate == DLD_PASSIVE) + return; + + if (all && dsp->ds_nactive == 0) + return; + + ASSERT(dsp->ds_nactive > 0); + + dsp->ds_nactive -= (all ? dsp->ds_nactive : 1); + if (dsp->ds_nactive != 0) return; + ASSERT(dsp->ds_passivestate == DLD_ACTIVE); dls_mac_active_clear(dsp->ds_dlp); - dsp->ds_active = B_FALSE; + dsp->ds_passivestate = DLD_UNINITIALIZED; } diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c index 6e6f451ca9..c700e500fe 100644 --- a/usr/src/uts/common/io/mac/mac.c +++ b/usr/src/uts/common/io/mac/mac.c @@ -2158,7 +2158,9 @@ uint32_t mac_no_notification(mac_handle_t mh) { mac_impl_t *mip = (mac_impl_t *)mh; - return (mip->mi_unsup_note); + + return (((mip->mi_state_flags & MIS_LEGACY) != 0) ? + mip->mi_capab_legacy.ml_unsup_note : 0); } /* @@ -2842,6 +2844,28 @@ mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize, return (err); } +int +mac_fastpath_disable(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + if ((mip->mi_state_flags & MIS_LEGACY) == 0) + return (0); + + return (mip->mi_capab_legacy.ml_fastpath_disable(mip->mi_driver)); +} + +void +mac_fastpath_enable(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + if ((mip->mi_state_flags & MIS_LEGACY) == 0) + return; + + mip->mi_capab_legacy.ml_fastpath_enable(mip->mi_driver); +} + void mac_register_priv_prop(mac_impl_t *mip, mac_priv_prop_t *mpp, uint_t nprop) { @@ -4391,34 +4415,79 @@ mac_log_linkinfo(void *arg) rw_exit(&i_mac_impl_lock); } +typedef struct i_mac_fastpath_state_s { + boolean_t mf_disable; + int mf_err; +} i_mac_fastpath_state_t; + +/*ARGSUSED*/ +static uint_t +i_mac_fastpath_disable_walker(mod_hash_key_t key, mod_hash_val_t *val, + void *arg) +{ + i_mac_fastpath_state_t *state = arg; + mac_handle_t mh = (mac_handle_t)val; + + if (state->mf_disable) + state->mf_err = mac_fastpath_disable(mh); + else + mac_fastpath_enable(mh); + + return (state->mf_err == 0 ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); +} + /* * Start the logging timer. */ -void +int mac_start_logusage(mac_logtype_t type, uint_t interval) { + i_mac_fastpath_state_t state = {B_TRUE, 0}; + int err; + rw_enter(&i_mac_impl_lock, RW_WRITER); switch (type) { case MAC_LOGTYPE_FLOW: if (mac_flow_log_enable) { rw_exit(&i_mac_impl_lock); - return; + return (0); } - mac_flow_log_enable = B_TRUE; /* FALLTHRU */ case MAC_LOGTYPE_LINK: if (mac_link_log_enable) { rw_exit(&i_mac_impl_lock); - return; + return (0); } - mac_link_log_enable = B_TRUE; break; default: ASSERT(0); } + + /* Disable fastpath */ + mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_disable_walker, &state); + if ((err = state.mf_err) != 0) { + /* Reenable fastpath */ + state.mf_disable = B_FALSE; + state.mf_err = 0; + mod_hash_walk(i_mac_impl_hash, + i_mac_fastpath_disable_walker, &state); + rw_exit(&i_mac_impl_lock); + return (err); + } + + switch (type) { + case MAC_LOGTYPE_FLOW: + mac_flow_log_enable = B_TRUE; + /* FALLTHRU */ + case MAC_LOGTYPE_LINK: + mac_link_log_enable = B_TRUE; + break; + } + mac_logging_interval = interval; rw_exit(&i_mac_impl_lock); mac_log_linkinfo(NULL); + return (0); } /* @@ -4428,6 +4497,7 @@ void mac_stop_logusage(mac_logtype_t type) { i_mac_log_state_t lstate; + i_mac_fastpath_state_t state = {B_FALSE, 0}; rw_enter(&i_mac_impl_lock, RW_WRITER); lstate.mi_fenable = mac_flow_log_enable; @@ -4455,6 +4525,10 @@ mac_stop_logusage(mac_logtype_t type) default: ASSERT(0); } + + /* Reenable fastpath */ + mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_disable_walker, &state); + rw_exit(&i_mac_impl_lock); (void) untimeout(mac_logging_timer); mac_logging_timer = 0; diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c index 85614bdd9e..8b4006a805 100644 --- a/usr/src/uts/common/io/mac/mac_client.c +++ b/usr/src/uts/common/io/mac/mac_client.c @@ -410,6 +410,12 @@ mac_devinfo_get(mac_handle_t mh) return (((mac_impl_t *)mh)->mi_dip); } +void * +mac_driver(mac_handle_t mh) +{ + return (((mac_impl_t *)mh)->mi_driver); +} + const char * mac_name(mac_handle_t mh) { @@ -1637,10 +1643,9 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, boolean_t bcast_added = B_FALSE; boolean_t nactiveclients_added = B_FALSE; boolean_t mac_started = B_FALSE; + boolean_t fastpath_disabled = B_FALSE; mac_resource_props_t mrp; - ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - /* when VID is non-zero, the underlying MAC can not be VNIC */ ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != 0))); @@ -1708,19 +1713,39 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, } /* - * Return EBUSY if: - * - this is an exclusive active mac client and there already exist - * active mac clients, or - * - there already exist an exclusively active mac client. + * If this is a VNIC/VLAN, disable softmac fast-path. */ - if ((mcip->mci_state_flags & MCIS_EXCLUSIVE) && - (mip->mi_nactiveclients != 0) || (mip->mi_state_flags & - MIS_EXCLUSIVE)) { + if (mcip->mci_state_flags & MCIS_IS_VNIC) { + err = mac_fastpath_disable((mac_handle_t)mip); + if (err != 0) + return (err); + fastpath_disabled = B_TRUE; + } + + /* + * Return EBUSY if: + * - there is an exclusively active mac client exists. + * - this is an exclusive active mac client but + * a. there is already active mac clients exist, or + * b. fastpath streams are already plumbed on this legacy device + */ + if (mip->mi_state_flags & MIS_EXCLUSIVE) { + if (fastpath_disabled) + mac_fastpath_enable((mac_handle_t)mip); return (EBUSY); } - if (mcip->mci_state_flags & MCIS_EXCLUSIVE) + if (mcip->mci_state_flags & MCIS_EXCLUSIVE) { + ASSERT(!fastpath_disabled); + if (mip->mi_nactiveclients != 0) + return (EBUSY); + + if ((mip->mi_state_flags & MIS_LEGACY) && + !(mip->mi_capab_legacy.ml_active_set(mip->mi_driver))) { + return (EBUSY); + } mip->mi_state_flags |= MIS_EXCLUSIVE; + } bzero(&mrp, sizeof (mac_resource_props_t)); if (is_primary && !(mcip->mci_state_flags & (MCIS_IS_VNIC | @@ -1970,8 +1995,15 @@ bail: if (nactiveclients_added) mip->mi_nactiveclients--; - if (mcip->mci_state_flags & MCIS_EXCLUSIVE) + if (mcip->mci_state_flags & MCIS_EXCLUSIVE) { mip->mi_state_flags &= ~MIS_EXCLUSIVE; + if (mip->mi_state_flags & MIS_LEGACY) + mip->mi_capab_legacy.ml_active_clear(mip->mi_driver); + } + + if (fastpath_disabled) + mac_fastpath_enable((mac_handle_t)mip); + kmem_free(muip, sizeof (mac_unicast_impl_t)); return (err); } @@ -2087,10 +2119,10 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr, muip->mui_vid); } - mac_stop((mac_handle_t)mip); + FLOW_FINAL_REFRELE(flent); - i_mac_perim_exit(mip); - return (0); + ASSERT(!(mcip->mci_state_flags & MCIS_EXCLUSIVE)); + goto done; } /* @@ -2170,8 +2202,14 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) mac_capab_update((mac_handle_t)mip); mac_virtual_link_update(mip); } - if (mcip->mci_state_flags & MCIS_EXCLUSIVE) + + if (mcip->mci_state_flags & MCIS_EXCLUSIVE) { mip->mi_state_flags &= ~MIS_EXCLUSIVE; + + if (mip->mi_state_flags & MIS_LEGACY) + mip->mi_capab_legacy.ml_active_clear(mip->mi_driver); + } + mcip->mci_state_flags &= ~MCIS_UNICAST_HW; if (mcip->mci_state_flags & MCIS_TAG_DISABLE) @@ -2183,10 +2221,16 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK) mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK; - mac_stop((mac_handle_t)mip); + kmem_free(muip, sizeof (mac_unicast_impl_t)); +done: + /* + * Disable fastpath if this is a VNIC or a VLAN. + */ + if (mcip->mci_state_flags & MCIS_IS_VNIC) + mac_fastpath_enable((mac_handle_t)mip); + mac_stop((mac_handle_t)mip); i_mac_perim_exit(mip); - kmem_free(muip, sizeof (mac_unicast_impl_t)); return (0); } @@ -3149,16 +3193,17 @@ mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data) mac_impl_t *mip = (mac_impl_t *)mh; /* - * if mi_nactiveclients > 1, only MAC_CAPAB_HCKSUM, - * MAC_CAPAB_NO_NATIVEVLAN, MAC_CAPAB_NO_ZCOPY can be advertised. + * if mi_nactiveclients > 1, only MAC_CAPAB_LEGACY, MAC_CAPAB_HCKSUM, + * MAC_CAPAB_NO_NATIVEVLAN and MAC_CAPAB_NO_ZCOPY can be advertised. */ if (mip->mi_nactiveclients > 1) { switch (cap) { - case MAC_CAPAB_HCKSUM: - return (i_mac_capab_get(mh, cap, cap_data)); case MAC_CAPAB_NO_NATIVEVLAN: case MAC_CAPAB_NO_ZCOPY: return (B_TRUE); + case MAC_CAPAB_LEGACY: + case MAC_CAPAB_HCKSUM: + break; default: return (B_FALSE); } @@ -3303,7 +3348,8 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp) mac_impl_t *mip = (mac_impl_t *)mh; mac_client_impl_t *mcip; int err = 0; - mac_resource_props_t tmrp; + uint32_t resmask, newresmask; + mac_resource_props_t tmrp, umrp; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); @@ -3311,6 +3357,20 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp) if (err != 0) return (err); + bcopy(&mip->mi_resource_props, &umrp, sizeof (mac_resource_props_t)); + resmask = umrp.mrp_mask; + mac_update_resources(mrp, &umrp, B_FALSE); + newresmask = umrp.mrp_mask; + + if (resmask == 0 && newresmask != 0) { + /* + * Bandwidth, priority or cpu link properties configured, + * must disable fastpath. + */ + if ((err = mac_fastpath_disable((mac_handle_t)mip)) != 0) + return (err); + } + /* * Since bind_cpu may be modified by mac_client_set_resources() * we use a copy of bind_cpu and finally cache bind_cpu in mip. @@ -3322,9 +3382,20 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp) err = mac_client_set_resources((mac_client_handle_t)mcip, &tmrp); } - /* if mac_client_set_resources failed, do not update the values */ - if (err == 0) - mac_update_resources(mrp, &mip->mi_resource_props, B_FALSE); + + /* Only update the values if mac_client_set_resources succeeded */ + if (err == 0) { + bcopy(&umrp, &mip->mi_resource_props, + sizeof (mac_resource_props_t)); + /* + * If bankwidth, priority or cpu link properties cleared, + * renable fastpath. + */ + if (resmask != 0 && newresmask == 0) + mac_fastpath_enable((mac_handle_t)mip); + } else if (resmask == 0 && newresmask != 0) { + mac_fastpath_enable((mac_handle_t)mip); + } return (err); } diff --git a/usr/src/uts/common/io/mac/mac_flow.c b/usr/src/uts/common/io/mac/mac_flow.c index cb6560b1f7..fd4d13cf1b 100644 --- a/usr/src/uts/common/io/mac/mac_flow.c +++ b/usr/src/uts/common/io/mac/mac_flow.c @@ -335,8 +335,9 @@ mac_flow_rem_subflow(flow_entry_t *flent) { flow_tab_t *ft = flent->fe_flow_tab; mac_client_impl_t *mcip = ft->ft_mcip; + mac_handle_t mh = (mac_handle_t)ft->ft_mip; - ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); + ASSERT(MAC_PERIM_HELD(mh)); mac_flow_remove(ft, flent, B_FALSE); if (flent->fe_mcip == NULL) { @@ -348,10 +349,11 @@ mac_flow_rem_subflow(flow_entry_t *flent) mac_flow_tab_destroy(ft); mcip->mci_subflow_tab = NULL; } - return; + } else { + mac_flow_wait(flent, FLOW_DRIVER_UPCALL); + mac_link_flow_clean((mac_client_handle_t)mcip, flent); } - mac_flow_wait(flent, FLOW_DRIVER_UPCALL); - mac_link_flow_clean((mac_client_handle_t)mcip, flent); + mac_fastpath_enable(mh); } /* @@ -363,13 +365,17 @@ mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent, boolean_t instantiate_flow) { mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_handle_t mh = (mac_handle_t)mcip->mci_mip; flow_tab_info_t *ftinfo; flow_mask_t mask; flow_tab_t *ft; int err; boolean_t ft_created = B_FALSE; - ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); + ASSERT(MAC_PERIM_HELD(mh)); + + if ((err = mac_fastpath_disable(mh)) != 0) + return (err); /* * If the subflow table exists already just add the new subflow @@ -382,8 +388,10 @@ mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent, * Try to create a new table and then add the subflow to the * newly created subflow table */ - if ((ftinfo = mac_flow_tab_info_get(mask)) == NULL) + if ((ftinfo = mac_flow_tab_info_get(mask)) == NULL) { + mac_fastpath_enable(mh); return (EOPNOTSUPP); + } mac_flow_tab_create(ftinfo->fti_ops, mask, ftinfo->fti_size, mcip->mci_mip, &ft); @@ -394,6 +402,7 @@ mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent, if (err != 0) { if (ft_created) mac_flow_tab_destroy(ft); + mac_fastpath_enable(mh); return (err); } @@ -405,6 +414,7 @@ mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent, mac_flow_remove(ft, flent, B_FALSE); if (ft_created) mac_flow_tab_destroy(ft); + mac_fastpath_enable(mh); return (err); } } else { diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c index 4d9d590457..0c9d6fddf2 100644 --- a/usr/src/uts/common/io/mac/mac_provider.c +++ b/usr/src/uts/common/io/mac/mac_provider.c @@ -131,7 +131,6 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) uint_t instance; boolean_t style1_created = B_FALSE; boolean_t style2_created = B_FALSE; - mac_capab_legacy_t legacy; char *driver; minor_t minor = 0; @@ -298,14 +297,11 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) } mip->mi_callbacks = mregp->m_callbacks; - if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY, &legacy)) + if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY, + &mip->mi_capab_legacy)) { mip->mi_state_flags |= MIS_LEGACY; - - if (mip->mi_state_flags & MIS_LEGACY) { - mip->mi_unsup_note = legacy.ml_unsup_note; - mip->mi_phy_dev = legacy.ml_dev; + mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev; } else { - mip->mi_unsup_note = 0; mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip), ddi_get_instance(mip->mi_dip) + 1); } @@ -505,6 +501,12 @@ mac_unregister(mac_handle_t mh) i_mac_perim_enter(mip); + /* + * There is still resource properties configured over this mac. + */ + if (mip->mi_resource_props.mrp_mask != 0) + mac_fastpath_enable((mac_handle_t)mip); + if (mip->mi_minor < MAC_MAX_MINOR + 1) { ddi_remove_minor_node(mip->mi_dip, mip->mi_name); ddi_remove_minor_node(mip->mi_dip, diff --git a/usr/src/uts/common/io/softmac/softmac_ctl.c b/usr/src/uts/common/io/softmac/softmac_ctl.c index 99c665aae6..d4c8afa8ce 100644 --- a/usr/src/uts/common/io/softmac/softmac_ctl.c +++ b/usr/src/uts/common/io/softmac/softmac_ctl.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -70,6 +70,22 @@ softmac_send_bind_req(softmac_lower_t *slp, uint_t sap) } int +softmac_send_unbind_req(softmac_lower_t *slp) +{ + mblk_t *reqmp; + + /* + * create unbind req message and send it down + */ + reqmp = mexchange(NULL, NULL, DL_UNBIND_REQ_SIZE, M_PROTO, + DL_UNBIND_REQ); + if (reqmp == NULL) + return (ENOMEM); + + return (softmac_proto_tx(slp, reqmp, NULL)); +} + +int softmac_send_promisc_req(softmac_lower_t *slp, t_uscalar_t level, boolean_t on) { mblk_t *reqmp; @@ -105,6 +121,7 @@ softmac_m_promisc(void *arg, boolean_t on) softmac_t *softmac = arg; softmac_lower_t *slp = softmac->smac_lower; + ASSERT(MAC_PERIM_HELD(softmac->smac_mh)); ASSERT(slp != NULL); return (softmac_send_promisc_req(slp, DL_PROMISC_PHYS, on)); } @@ -120,6 +137,7 @@ softmac_m_multicst(void *arg, boolean_t add, const uint8_t *mca) t_uscalar_t dl_prim; uint32_t size, addr_length; + ASSERT(MAC_PERIM_HELD(softmac->smac_mh)); /* * create multicst message and send it down */ @@ -162,6 +180,7 @@ softmac_m_unicst(void *arg, const uint8_t *macaddr) mblk_t *reqmp; size_t size; + ASSERT(MAC_PERIM_HELD(softmac->smac_mh)); /* * create set_phys_addr message and send it down */ @@ -425,16 +444,72 @@ runt: } void -softmac_rput_process_notdata(queue_t *rq, mblk_t *mp) +softmac_rput_process_notdata(queue_t *rq, softmac_upper_t *sup, mblk_t *mp) { - softmac_lower_t *slp = rq->q_ptr; + softmac_lower_t *slp = rq->q_ptr; + union DL_primitives *dlp; + ssize_t len = MBLKL(mp); switch (DB_TYPE(mp)) { case M_PROTO: case M_PCPROTO: - softmac_rput_process_proto(rq, mp); - break; + /* + * If this is a shared-lower-stream, pass it to softmac to + * process. + */ + if (sup == NULL) { + softmac_rput_process_proto(rq, mp); + break; + } + /* + * Dedicated-lower-stream. + */ + dlp = (union DL_primitives *)mp->b_rptr; + ASSERT(len >= sizeof (dlp->dl_primitive)); + switch (dlp->dl_primitive) { + case DL_OK_ACK: + if (len < DL_OK_ACK_SIZE) + goto runt; + + /* + * If this is a DL_OK_ACK for a DL_UNBIND_REQ, pass it + * to softmac to process, otherwise directly pass it to + * the upper stream. + */ + if (dlp->ok_ack.dl_correct_primitive == DL_UNBIND_REQ) { + softmac_rput_process_proto(rq, mp); + break; + } + + putnext(sup->su_rq, mp); + break; + case DL_ERROR_ACK: + if (len < DL_ERROR_ACK_SIZE) + goto runt; + + /* + * If this is a DL_ERROR_ACK for a DL_UNBIND_REQ, pass + * it to softmac to process, otherwise directly pass it + * to the upper stream. + */ + if (dlp->error_ack.dl_error_primitive == + DL_UNBIND_REQ) { + softmac_rput_process_proto(rq, mp); + break; + } + + putnext(sup->su_rq, mp); + break; + case DL_BIND_ACK: + case DL_CAPABILITY_ACK: + softmac_rput_process_proto(rq, mp); + break; + default: + putnext(sup->su_rq, mp); + break; + } + break; case M_FLUSH: if (*mp->b_rptr & FLUSHR) flushq(rq, FLUSHDATA); @@ -447,6 +522,11 @@ softmac_rput_process_notdata(queue_t *rq, mblk_t *mp) case M_IOCNAK: case M_COPYIN: case M_COPYOUT: + if (sup != NULL) { + putnext(sup->su_rq, mp); + break; + } + mutex_enter(&slp->sl_mutex); if (!slp->sl_pending_ioctl) { mutex_exit(&slp->sl_mutex); @@ -460,7 +540,7 @@ softmac_rput_process_notdata(queue_t *rq, mblk_t *mp) slp->sl_ack_mp = mp; cv_broadcast(&slp->sl_cv); mutex_exit(&slp->sl_mutex); - return; + break; default: cmn_err(CE_NOTE, "softmac: got unsupported mblk type 0x%x", @@ -468,4 +548,8 @@ softmac_rput_process_notdata(queue_t *rq, mblk_t *mp) freemsg(mp); break; } + return; +runt: + cmn_err(CE_WARN, "softmac: got runt %s", dl_primstr(dlp->dl_primitive)); + freemsg(mp); } diff --git a/usr/src/uts/common/io/softmac/softmac_dev.c b/usr/src/uts/common/io/softmac/softmac_dev.c index f548df055d..37c5740846 100644 --- a/usr/src/uts/common/io/softmac/softmac_dev.c +++ b/usr/src/uts/common/io/softmac/softmac_dev.c @@ -19,28 +19,45 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <sys/types.h> -#include <sys/dld.h> #include <inet/common.h> #include <sys/stropts.h> #include <sys/modctl.h> -#include <sys/avl.h> +#include <sys/dld.h> #include <sys/softmac_impl.h> -#include <sys/softmac.h> dev_info_t *softmac_dip = NULL; +static kmem_cache_t *softmac_upper_cachep; + +/* + * This function is a generic open(9E) entry point into the softmac for + * both the softmac module and the softmac driver. + */ +static int softmac_cmn_open(queue_t *, dev_t *, int, int, cred_t *); + +/* + * The following softmac_mod_xxx() functions are (9E) entry point functions for + * the softmac module. + */ +static int softmac_mod_close(queue_t *); +static void softmac_mod_rput(queue_t *, mblk_t *); +static void softmac_mod_wput(queue_t *, mblk_t *); +static void softmac_mod_wsrv(queue_t *); + +/* + * The following softmac_drv_xxx() functions are (9E) entry point functions for + * the softmac driver. + */ +static int softmac_drv_open(queue_t *, dev_t *, int, int, cred_t *); +static int softmac_drv_close(queue_t *); +static void softmac_drv_wput(queue_t *, mblk_t *); +static void softmac_drv_wsrv(queue_t *); -static int softmac_open(queue_t *, dev_t *, int, int, cred_t *); -static int softmac_close(queue_t *); -static void softmac_rput(queue_t *, mblk_t *); -static void softmac_rsrv(queue_t *); -static void softmac_wput(queue_t *, mblk_t *); -static void softmac_wsrv(queue_t *); static int softmac_attach(dev_info_t *, ddi_attach_cmd_t); static int softmac_detach(dev_info_t *, ddi_detach_cmd_t); static int softmac_info(dev_info_t *, ddi_info_cmd_t, void *, void **); @@ -68,21 +85,21 @@ static struct module_info softmac_dld_modinfo = { }; static struct qinit softmac_urinit = { - (pfi_t)softmac_rput, /* qi_putp */ - (pfi_t)softmac_rsrv, /* qi_srvp */ - softmac_open, /* qi_qopen */ - softmac_close, /* qi_qclose */ - NULL, /* qi_qadmin */ - &softmac_modinfo /* qi_minfo */ + (pfi_t)softmac_mod_rput, /* qi_putp */ + (pfi_t)NULL, /* qi_srvp */ + softmac_cmn_open, /* qi_qopen */ + softmac_mod_close, /* qi_qclose */ + NULL, /* qi_qadmin */ + &softmac_modinfo /* qi_minfo */ }; static struct qinit softmac_uwinit = { - (pfi_t)softmac_wput, /* qi_putp */ - (pfi_t)softmac_wsrv, /* qi_srvp */ - NULL, /* qi_qopen */ - NULL, /* qi_qclose */ - NULL, /* qi_qadmin */ - &softmac_modinfo /* qi_minfo */ + (pfi_t)softmac_mod_wput, /* qi_putp */ + (pfi_t)softmac_mod_wsrv, /* qi_srvp */ + NULL, /* qi_qopen */ + NULL, /* qi_qclose */ + NULL, /* qi_qadmin */ + &softmac_modinfo /* qi_minfo */ }; static struct streamtab softmac_tab = { @@ -95,11 +112,12 @@ DDI_DEFINE_STREAM_OPS(softmac_ops, nulldev, nulldev, softmac_attach, ddi_quiesce_not_supported); static struct qinit softmac_dld_r_qinit = { - NULL, NULL, dld_open, dld_close, NULL, &softmac_dld_modinfo + NULL, NULL, softmac_drv_open, softmac_drv_close, NULL, + &softmac_dld_modinfo }; static struct qinit softmac_dld_w_qinit = { - (pfi_t)dld_wput, (pfi_t)dld_wsrv, NULL, NULL, NULL, + (pfi_t)softmac_drv_wput, (pfi_t)softmac_drv_wsrv, NULL, NULL, NULL, &softmac_dld_modinfo }; @@ -128,6 +146,49 @@ static struct modlinkage softmac_modlinkage = { NULL }; +/*ARGSUSED*/ +static int +softmac_upper_constructor(void *buf, void *arg, int kmflag) +{ + softmac_upper_t *sup = buf; + + bzero(buf, sizeof (softmac_upper_t)); + + mutex_init(&sup->su_mutex, NULL, MUTEX_DEFAULT, NULL); + cv_init(&sup->su_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&sup->su_disp_mutex, NULL, MUTEX_DEFAULT, NULL); + cv_init(&sup->su_disp_cv, NULL, CV_DEFAULT, NULL); + list_create(&sup->su_req_list, sizeof (softmac_switch_req_t), + offsetof(softmac_switch_req_t, ssq_req_list_node)); + return (0); +} + +/*ARGSUSED*/ +static void +softmac_upper_destructor(void *buf, void *arg) +{ + softmac_upper_t *sup = buf; + + ASSERT(sup->su_slp == NULL); + ASSERT(sup->su_pending_head == NULL && sup->su_pending_tail == NULL); + ASSERT(!sup->su_dlpi_pending); + ASSERT(!sup->su_active); + ASSERT(!sup->su_closing); + ASSERT(sup->su_tx_flow_mp == NULL); + ASSERT(sup->su_tx_inprocess == 0); + ASSERT(sup->su_mode == SOFTMAC_UNKNOWN); + ASSERT(!sup->su_tx_busy); + ASSERT(!sup->su_bound); + ASSERT(!sup->su_taskq_scheduled); + ASSERT(list_is_empty(&sup->su_req_list)); + + list_destroy(&sup->su_req_list); + mutex_destroy(&sup->su_mutex); + cv_destroy(&sup->su_cv); + mutex_destroy(&sup->su_disp_mutex); + cv_destroy(&sup->su_disp_cv); +} + int _init(void) { @@ -135,6 +196,11 @@ _init(void) softmac_init(); + softmac_upper_cachep = kmem_cache_create("softmac_upper_cache", + sizeof (softmac_upper_t), 0, softmac_upper_constructor, + softmac_upper_destructor, NULL, NULL, NULL, 0); + ASSERT(softmac_upper_cachep != NULL); + if ((err = mod_install(&softmac_modlinkage)) != 0) { softmac_fini(); return (err); @@ -154,6 +220,7 @@ _fini(void) if ((err = mod_remove(&softmac_modlinkage)) != 0) return (err); + kmem_cache_destroy(softmac_upper_cachep); softmac_fini(); return (0); @@ -166,7 +233,7 @@ _info(struct modinfo *modinfop) } static int -softmac_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) +softmac_cmn_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) { softmac_lower_t *slp; /* @@ -198,16 +265,15 @@ softmac_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) /* * Regular device open of a softmac DLPI node. We modify * the queues' q_qinfo pointer such that all future STREAMS - * operations will go through dld's entry points (including - * dld_close()). + * operations will go through another set of entry points */ rq->q_qinfo = &softmac_dld_r_qinit; WR(rq)->q_qinfo = &softmac_dld_w_qinit; - return (dld_open(rq, devp, flag, sflag, credp)); + return (softmac_drv_open(rq, devp, flag, sflag, credp)); } static int -softmac_close(queue_t *rq) +softmac_mod_close(queue_t *rq) { softmac_lower_t *slp = rq->q_ptr; @@ -237,10 +303,11 @@ softmac_close(queue_t *rq) } static void -softmac_rput(queue_t *rq, mblk_t *mp) +softmac_mod_rput(queue_t *rq, mblk_t *mp) { - softmac_lower_t *slp = rq->q_ptr; - union DL_primitives *dlp; + softmac_lower_t *slp = rq->q_ptr; + softmac_lower_rxinfo_t *rxinfo; + union DL_primitives *dlp; /* * This is the softmac module. @@ -249,11 +316,21 @@ softmac_rput(queue_t *rq, mblk_t *mp) ASSERT((mp->b_next == NULL) && (mp->b_prev == NULL)); switch (DB_TYPE(mp)) { - case M_DATA: + case M_DATA: { + + /* + * If sl_rxinfo is non-NULL. This is dedicated-lower-stream + * created for fastpath. Directly call the rx callback. + */ + if ((rxinfo = slp->sl_rxinfo) != NULL) { + rxinfo->slr_rx(rxinfo->slr_arg, NULL, mp, NULL); + break; + } + /* - * Some drivers start to send up packets even if not in the - * DL_IDLE state, where sl_softmac is not set yet. Drop the - * packet in this case. + * A shared-lower-stream. Some driver starts to send up + * packets even it not in the DL_IDLE state, where + * sl_softmac is not set yet. Drop the packet in this case. */ if (slp->sl_softmac == NULL) { freemsg(mp); @@ -275,18 +352,13 @@ softmac_rput(queue_t *rq, mblk_t *mp) */ if (DB_REF(mp) == 1) { ASSERT(slp->sl_softmac != NULL); - /* - * We don't need any locks to protect sl_handle - * because ip_input() can tolerate if sl_handle - * is reset to NULL when DL_CAPAB_POLL is - * disabled. - */ mac_rx(slp->sl_softmac->smac_mh, NULL, mp); return; } else { softmac_rput_process_data(slp, mp); } break; + } case M_PROTO: case M_PCPROTO: if (MBLKL(mp) < sizeof (dlp->dl_primitive)) { @@ -295,6 +367,12 @@ softmac_rput(queue_t *rq, mblk_t *mp) } dlp = (union DL_primitives *)mp->b_rptr; if (dlp->dl_primitive == DL_UNITDATA_IND) { + + if ((rxinfo = slp->sl_rxinfo) != NULL) { + rxinfo->slr_rx(rxinfo->slr_arg, NULL, mp, NULL); + break; + } + cmn_err(CE_WARN, "got unexpected %s message", dl_primstr(DL_UNITDATA_IND)); freemsg(mp); @@ -302,19 +380,13 @@ softmac_rput(queue_t *rq, mblk_t *mp) } /*FALLTHROUGH*/ default: - softmac_rput_process_notdata(rq, mp); + softmac_rput_process_notdata(rq, slp->sl_sup, mp); break; } } -/* ARGSUSED */ -static void -softmac_rsrv(queue_t *rq) -{ -} - static void -softmac_wput(queue_t *wq, mblk_t *mp) +softmac_mod_wput(queue_t *wq, mblk_t *mp) { /* * This is the softmac module @@ -342,7 +414,6 @@ softmac_wput(queue_t *wq, mblk_t *mp) */ arg = (smac_ioc_start_t *)mp->b_cont->b_rptr; arg->si_slp = slp; - miocack(wq, mp, sizeof (*arg), 0); break; } @@ -359,7 +430,7 @@ softmac_wput(queue_t *wq, mblk_t *mp) } static void -softmac_wsrv(queue_t *wq) +softmac_mod_wsrv(queue_t *wq) { softmac_lower_t *slp = wq->q_ptr; @@ -372,7 +443,9 @@ softmac_wsrv(queue_t *wq) * Inform that the tx resource is available; mac_tx_update() will * inform all the upper streams sharing this lower stream. */ - if (slp->sl_softmac != NULL) + if (slp->sl_sup != NULL) + qenable(slp->sl_sup->su_wq); + else if (slp->sl_softmac != NULL) mac_tx_update(slp->sl_softmac->smac_mh); } @@ -420,3 +493,179 @@ softmac_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) return (DDI_FAILURE); } + +/*ARGSUSED*/ +static void +softmac_dedicated_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp, + mac_header_info_t *mhip) +{ + queue_t *rq = ((softmac_upper_t *)arg)->su_rq; + + if (canputnext(rq)) + putnext(rq, mp); + else + freemsg(mp); +} + +/*ARGSUSED*/ +static int +softmac_drv_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) +{ + softmac_upper_t *sup = NULL; + softmac_t *softmac; + int err = 0; + + /* + * This is a softmac device created for a legacy device, find the + * associated softmac and initialize the softmac_upper_t structure. + */ + if ((err = softmac_hold(*devp, &softmac)) != 0) + return (err); + + sup = kmem_cache_alloc(softmac_upper_cachep, KM_NOSLEEP); + if (sup == NULL) { + err = ENOMEM; + goto fail; + } + + ASSERT(list_is_empty(&sup->su_req_list)); + + if ((sup->su_tx_flow_mp = allocb(1, BPRI_HI)) == NULL) { + err = ENOMEM; + goto fail; + } + + sup->su_rq = rq; + sup->su_wq = WR(rq); + sup->su_softmac = softmac; + sup->su_mode = SOFTMAC_UNKNOWN; + + sup->su_rxinfo.slr_arg = sup; + sup->su_rxinfo.slr_rx = softmac_dedicated_rx; + sup->su_direct_rxinfo.slr_arg = sup; + sup->su_direct_rxinfo.slr_rx = softmac_dedicated_rx; + + if ((err = dld_str_open(rq, devp, sup)) != 0) { + freeb(sup->su_tx_flow_mp); + sup->su_tx_flow_mp = NULL; + goto fail; + } + + return (0); + +fail: + if (sup != NULL) + kmem_cache_free(softmac_upper_cachep, sup); + softmac_rele(softmac); + return (err); +} + +static int +softmac_drv_close(queue_t *rq) +{ + softmac_upper_t *sup = dld_str_private(rq); + softmac_t *softmac = sup->su_softmac; + + ASSERT(WR(rq)->q_next == NULL); + + qprocsoff(rq); + + ASSERT(sup->su_tx_inprocess == 0); + + /* + * Wait until the pending request are processed by the worker thread. + */ + mutex_enter(&sup->su_disp_mutex); + sup->su_closing = B_TRUE; + while (sup->su_dlpi_pending) + cv_wait(&sup->su_disp_cv, &sup->su_disp_mutex); + mutex_exit(&sup->su_disp_mutex); + + softmac_upperstream_close(sup); + + if (sup->su_tx_flow_mp != NULL) { + freeb(sup->su_tx_flow_mp); + sup->su_tx_flow_mp = NULL; + } + + if (sup->su_active) { + mutex_enter(&softmac->smac_active_mutex); + softmac->smac_nactive--; + mutex_exit(&softmac->smac_active_mutex); + sup->su_active = B_FALSE; + } + + sup->su_bound = B_FALSE; + sup->su_softmac = NULL; + sup->su_closing = B_FALSE; + + kmem_cache_free(softmac_upper_cachep, sup); + + softmac_rele(softmac); + return (dld_str_close(rq)); +} + +static void +softmac_drv_wput(queue_t *wq, mblk_t *mp) +{ + softmac_upper_t *sup = dld_str_private(wq); + t_uscalar_t prim; + + ASSERT(wq->q_next == NULL); + + switch (DB_TYPE(mp)) { + case M_DATA: + case M_MULTIDATA: + softmac_wput_data(sup, mp); + break; + case M_PROTO: + case M_PCPROTO: + + if (MBLKL(mp) < sizeof (t_uscalar_t)) { + freemsg(mp); + return; + } + + prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; + if (prim == DL_UNITDATA_REQ) { + softmac_wput_data(sup, mp); + return; + } + + softmac_wput_nondata(sup, mp); + break; + default: + softmac_wput_nondata(sup, mp); + break; + } +} + +static void +softmac_drv_wsrv(queue_t *wq) +{ + softmac_upper_t *sup = dld_str_private(wq); + + ASSERT(wq->q_next == NULL); + + mutex_enter(&sup->su_mutex); + if (sup->su_mode != SOFTMAC_FASTPATH) { + /* + * Bump su_tx_inprocess so that su_mode won't change. + */ + sup->su_tx_inprocess++; + mutex_exit(&sup->su_mutex); + dld_wsrv(wq); + mutex_enter(&sup->su_mutex); + if (--sup->su_tx_inprocess == 0) + cv_signal(&sup->su_cv); + } else if (sup->su_tx_busy && SOFTMAC_CANPUTNEXT(sup->su_slp->sl_wq)) { + /* + * The flow-conctol of the dedicated-lower-stream is + * relieved, relieve the flow-control of the + * upper-stream too. + */ + sup->su_tx_flow_mp = getq(wq); + sup->su_tx_busy = B_FALSE; + } + mutex_exit(&sup->su_mutex); +} diff --git a/usr/src/uts/common/io/softmac/softmac_fp.c b/usr/src/uts/common/io/softmac/softmac_fp.c new file mode 100644 index 0000000000..a012aa32a4 --- /dev/null +++ b/usr/src/uts/common/io/softmac/softmac_fp.c @@ -0,0 +1,1252 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Softmac data-path switching: + * + * - Fast-path model + * + * When the softmac fast-path is used, a dedicated lower-stream + * will be opened over the legacy device for each IP/ARP (upper-)stream + * over the softMAC, and all DLPI messages (including control messages + * and data messages) will be exchanged between the upper-stream and + * the corresponding lower-stream directly. Therefore, the data + * demultiplexing, filtering and classification processing will be done + * by the lower-stream, and the GLDv3 DLS/MAC layer processing will be + * no longer needed. + * + * - Slow-path model + * + * Some GLDv3 features requires the GLDv3 DLS/MAC layer processing to + * not be bypassed to assure its function correctness. For example, + * softmac fast-path must be disabled to support GLDv3 VNIC functionality. + * In this case, a shared lower-stream will be opened over the legacy + * device, which is responsible for implementing the GLDv3 callbacks + * and passing RAW data messages between the legacy devices and the GLDv3 + * framework. + * + * By default, the softmac fast-path mode will be used to assure the + * performance; MAC clients will be able to request to disable the softmac + * fast-path mode to support certain features, and if that succeeds, + * the system will fallback to the slow-path softmac data-path model. + * + * + * The details of the softmac data fast-path model is stated as below + * + * 1. When a stream is opened on a softMAC, the softmac module will takes + * over the DLPI processing on this stream; + * + * 2. For IP/ARP streams over a softMAC, softmac data fast-path will be + * used by default, unless fast-path is disabled by any MAC client + * explicitly. The softmac module first identifies an IP/ARP stream + * by seeing whether there is a SIOCSLIFNAME ioctl sent from upstream, + * if there is one, this stream is either an IP or an ARP stream + * and will use fast-path potentially; + * + * 3. When the softmac fast-path is used, an dedicated lower-stream will + * be setup for each IP/ARP stream (1-1 mapping). From that point on, + * all control and data messages will be exchanged between the IP/ARP + * upper-stream and the legacy device through this dedicated + * lower-stream. As a result, the DLS/MAC layer processing in GLDv3 + * will be skipped, and this greatly improves the performance; + * + * 4. When the softmac data fast-path is disabled by a MAC client (e.g., + * by a VNIC), all the IP/ARP upper streams will try to switch from + * the fast-path to the slow-path. The dedicated lower-stream will be + * destroyed, and all the control and data-messages will go through the + * existing GLDv3 code path and (in the end) the shared lower-stream; + * + * 5. On the other hand, when the last MAC client cancels its fast-path + * disable request, all the IP/ARP streams will try to switch back to + * the fast-path mode; + * + * Step 5 and 6 both rely on the data-path mode switching process + * described below: + * + * 1) To switch the softmac data-path mode (between fast-path and slow-path), + * softmac will first send a DL_NOTE_REPLUMB DL_NOTIFY_IND message + * upstream over each IP/ARP streams that needs data-path mode switching; + * + * 2) When IP receives this DL_NOTE_REPLUMB message, it will bring down + * all the IP interfaces on the corresponding ill (IP Lower level + * structure), and bring up those interfaces over again; this will in + * turn cause the ARP to "replumb" the interface. + * + * During the replumb process, both IP and ARP will send downstream the + * necessary DL_DISABMULTI_REQ and DL_UNBIND_REQ messages and cleanup + * the old state of the underlying softMAC, following with the necessary + * DL_BIND_REQ and DL_ENABMULTI_REQ messages to setup the new state. + * Between the cleanup and re-setup process, IP/ARP will also send down + * a DL_NOTE_REPLUMB_DONE DL_NOTIFY_CONF messages to the softMAC to + * indicate the *switching point*; + * + * 3) When softmac receives the DL_NOTE_REPLUMB_DONE message, it either + * creates or destroys the dedicated lower-stream (depending on which + * data-path mode the softMAC switches to), and change the softmac + * data-path mode. From then on, softmac will process all the succeeding + * control messages (including the DL_BIND_REQ and DL_ENABMULTI_REQ + * messages) and data messages based on new data-path mode. + */ + +#include <sys/types.h> +#include <sys/disp.h> +#include <sys/callb.h> +#include <sys/sysmacros.h> +#include <sys/file.h> +#include <sys/vlan.h> +#include <sys/dld.h> +#include <sys/sockio.h> +#include <sys/softmac_impl.h> + +static kmutex_t softmac_taskq_lock; +static kcondvar_t softmac_taskq_cv; +static list_t softmac_taskq_list; /* List of softmac_upper_t */ +boolean_t softmac_taskq_quit; +boolean_t softmac_taskq_done; + +static void softmac_taskq_dispatch(); +static int softmac_fastpath_setup(softmac_upper_t *); +static mac_tx_cookie_t softmac_fastpath_wput_data(softmac_upper_t *, mblk_t *, + uintptr_t, uint16_t); +static void softmac_datapath_switch_done(softmac_upper_t *); + +void +softmac_fp_init() +{ + mutex_init(&softmac_taskq_lock, NULL, MUTEX_DRIVER, NULL); + cv_init(&softmac_taskq_cv, NULL, CV_DRIVER, NULL); + + softmac_taskq_quit = B_FALSE; + softmac_taskq_done = B_FALSE; + list_create(&softmac_taskq_list, sizeof (softmac_upper_t), + offsetof(softmac_upper_t, su_taskq_list_node)); + (void) thread_create(NULL, 0, softmac_taskq_dispatch, NULL, 0, + &p0, TS_RUN, minclsyspri); +} + +void +softmac_fp_fini() +{ + /* + * Request the softmac_taskq thread to quit and wait for it to be done. + */ + mutex_enter(&softmac_taskq_lock); + softmac_taskq_quit = B_TRUE; + cv_signal(&softmac_taskq_cv); + while (!softmac_taskq_done) + cv_wait(&softmac_taskq_cv, &softmac_taskq_lock); + mutex_exit(&softmac_taskq_lock); + list_destroy(&softmac_taskq_list); + + mutex_destroy(&softmac_taskq_lock); + cv_destroy(&softmac_taskq_cv); +} + +static boolean_t +check_ip_above(queue_t *q) +{ + queue_t *next_q; + boolean_t ret = B_TRUE; + + claimstr(q); + next_q = q->q_next; + if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0) + ret = B_FALSE; + releasestr(q); + return (ret); +} + +/* ARGSUSED */ +static int +softmac_capab_perim(softmac_upper_t *sup, void *data, uint_t flags) +{ + switch (flags) { + case DLD_ENABLE: + mutex_enter(&sup->su_mutex); + break; + case DLD_DISABLE: + mutex_exit(&sup->su_mutex); + break; + case DLD_QUERY: + return (MUTEX_HELD(&sup->su_mutex)); + } + return (0); +} + +/* ARGSUSED */ +static mac_tx_notify_handle_t +softmac_client_tx_notify(void *txcb, mac_tx_notify_t func, void *arg) +{ + return (NULL); +} + +static int +softmac_capab_direct(softmac_upper_t *sup, void *data, uint_t flags) +{ + dld_capab_direct_t *direct = data; + softmac_lower_t *slp = sup->su_slp; + + ASSERT(MUTEX_HELD(&sup->su_mutex)); + + ASSERT(sup->su_mode == SOFTMAC_FASTPATH); + + switch (flags) { + case DLD_ENABLE: + if (sup->su_direct) + return (0); + + sup->su_direct_rxinfo.slr_rx = (softmac_rx_t)direct->di_rx_cf; + sup->su_direct_rxinfo.slr_arg = direct->di_rx_ch; + slp->sl_rxinfo = &sup->su_direct_rxinfo; + direct->di_tx_df = (uintptr_t)softmac_fastpath_wput_data; + direct->di_tx_dh = sup; + + /* + * We relying on the STREAM flow-control to backenable + * the IP stream. Therefore, no notify callback needs to + * be registered. But IP requires this to be a valid function + * pointer. + */ + direct->di_tx_cb_df = (uintptr_t)softmac_client_tx_notify; + direct->di_tx_cb_dh = NULL; + sup->su_direct = B_TRUE; + return (0); + + case DLD_DISABLE: + if (!sup->su_direct) + return (0); + + slp->sl_rxinfo = &sup->su_rxinfo; + sup->su_direct = B_FALSE; + return (0); + } + return (ENOTSUP); +} + +static int +softmac_dld_capab(softmac_upper_t *sup, uint_t type, void *data, uint_t flags) +{ + int err; + + /* + * Don't enable direct callback capabilities unless the caller is + * the IP client. When a module is inserted in a stream (_I_INSERT) + * the stack initiates capability disable, but due to races, the + * module insertion may complete before the capability disable + * completes. So we limit the check to DLD_ENABLE case. + */ + if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) && + !check_ip_above(sup->su_rq)) { + return (ENOTSUP); + } + + switch (type) { + case DLD_CAPAB_DIRECT: + err = softmac_capab_direct(sup, data, flags); + break; + + case DLD_CAPAB_PERIM: + err = softmac_capab_perim(sup, data, flags); + break; + + default: + err = ENOTSUP; + break; + } + return (err); +} + +static void +softmac_capability_advertise(softmac_upper_t *sup, mblk_t *mp) +{ + dl_capability_ack_t *dlap; + dl_capability_sub_t *dlsp; + t_uscalar_t subsize; + uint8_t *ptr; + queue_t *q = sup->su_wq; + mblk_t *mp1; + softmac_t *softmac = sup->su_softmac; + boolean_t dld_capable = B_FALSE; + boolean_t hcksum_capable = B_FALSE; + boolean_t zcopy_capable = B_FALSE; + boolean_t mdt_capable = B_FALSE; + + ASSERT(sup->su_mode == SOFTMAC_FASTPATH); + + /* + * Initially assume no capabilities. + */ + subsize = 0; + + /* + * Direct capability negotiation interface between IP and softmac + */ + if (check_ip_above(sup->su_rq)) { + dld_capable = B_TRUE; + subsize += sizeof (dl_capability_sub_t) + + sizeof (dl_capab_dld_t); + } + + /* + * Check if checksum offload is supported on this MAC. + */ + if (softmac->smac_capab_flags & MAC_CAPAB_HCKSUM) { + hcksum_capable = B_TRUE; + subsize += sizeof (dl_capability_sub_t) + + sizeof (dl_capab_hcksum_t); + } + + /* + * Check if zerocopy is supported on this interface. + */ + if (!(softmac->smac_capab_flags & MAC_CAPAB_NO_ZCOPY)) { + zcopy_capable = B_TRUE; + subsize += sizeof (dl_capability_sub_t) + + sizeof (dl_capab_zerocopy_t); + } + + if (softmac->smac_mdt) { + mdt_capable = B_TRUE; + subsize += sizeof (dl_capability_sub_t) + + sizeof (dl_capab_mdt_t); + } + + /* + * If there are no capabilities to advertise or if we + * can't allocate a response, send a DL_ERROR_ACK. + */ + if ((subsize == 0) || (mp1 = reallocb(mp, + sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) { + dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0); + return; + } + + mp = mp1; + DB_TYPE(mp) = M_PROTO; + mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize; + bzero(mp->b_rptr, MBLKL(mp)); + dlap = (dl_capability_ack_t *)mp->b_rptr; + dlap->dl_primitive = DL_CAPABILITY_ACK; + dlap->dl_sub_offset = sizeof (dl_capability_ack_t); + dlap->dl_sub_length = subsize; + ptr = (uint8_t *)&dlap[1]; + + /* + * IP polling interface. + */ + if (dld_capable) { + dl_capab_dld_t dld; + + dlsp = (dl_capability_sub_t *)ptr; + dlsp->dl_cap = DL_CAPAB_DLD; + dlsp->dl_length = sizeof (dl_capab_dld_t); + ptr += sizeof (dl_capability_sub_t); + + bzero(&dld, sizeof (dl_capab_dld_t)); + dld.dld_version = DLD_CURRENT_VERSION; + dld.dld_capab = (uintptr_t)softmac_dld_capab; + dld.dld_capab_handle = (uintptr_t)sup; + + dlcapabsetqid(&(dld.dld_mid), sup->su_rq); + bcopy(&dld, ptr, sizeof (dl_capab_dld_t)); + ptr += sizeof (dl_capab_dld_t); + } + + /* + * TCP/IP checksum offload. + */ + if (hcksum_capable) { + dl_capab_hcksum_t hcksum; + + dlsp = (dl_capability_sub_t *)ptr; + + dlsp->dl_cap = DL_CAPAB_HCKSUM; + dlsp->dl_length = sizeof (dl_capab_hcksum_t); + ptr += sizeof (dl_capability_sub_t); + + bzero(&hcksum, sizeof (dl_capab_hcksum_t)); + hcksum.hcksum_version = HCKSUM_VERSION_1; + hcksum.hcksum_txflags = softmac->smac_hcksum_txflags; + dlcapabsetqid(&(hcksum.hcksum_mid), sup->su_rq); + bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t)); + ptr += sizeof (dl_capab_hcksum_t); + } + + /* + * Zero copy + */ + if (zcopy_capable) { + dl_capab_zerocopy_t zcopy; + + dlsp = (dl_capability_sub_t *)ptr; + + dlsp->dl_cap = DL_CAPAB_ZEROCOPY; + dlsp->dl_length = sizeof (dl_capab_zerocopy_t); + ptr += sizeof (dl_capability_sub_t); + + bzero(&zcopy, sizeof (dl_capab_zerocopy_t)); + zcopy.zerocopy_version = ZEROCOPY_VERSION_1; + zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM; + dlcapabsetqid(&(zcopy.zerocopy_mid), sup->su_rq); + bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t)); + ptr += sizeof (dl_capab_zerocopy_t); + } + + /* + * MDT + */ + if (mdt_capable) { + dl_capab_mdt_t mdt; + + dlsp = (dl_capability_sub_t *)ptr; + + dlsp->dl_cap = DL_CAPAB_MDT; + dlsp->dl_length = sizeof (dl_capab_mdt_t); + ptr += sizeof (dl_capability_sub_t); + + bzero(&mdt, sizeof (dl_capab_mdt_t)); + mdt.mdt_version = MDT_VERSION_2; + mdt.mdt_flags = DL_CAPAB_MDT_ENABLE; + mdt.mdt_hdr_head = softmac->smac_mdt_capab.mdt_hdr_head; + mdt.mdt_hdr_tail = softmac->smac_mdt_capab.mdt_hdr_tail; + mdt.mdt_max_pld = softmac->smac_mdt_capab.mdt_max_pld; + mdt.mdt_span_limit = softmac->smac_mdt_capab.mdt_span_limit; + dlcapabsetqid(&(mdt.mdt_mid), sup->su_rq); + bcopy(&mdt, ptr, sizeof (dl_capab_mdt_t)); + ptr += sizeof (dl_capab_mdt_t); + } + + ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize); + qreply(q, mp); +} + +static void +softmac_capability_req(softmac_upper_t *sup, mblk_t *mp) +{ + dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr; + dl_capability_sub_t *sp; + size_t size, len; + offset_t off, end; + t_uscalar_t dl_err; + queue_t *q = sup->su_wq; + + ASSERT(sup->su_mode == SOFTMAC_FASTPATH); + if (MBLKL(mp) < sizeof (dl_capability_req_t)) { + dl_err = DL_BADPRIM; + goto failed; + } + + if (!sup->su_bound) { + dl_err = DL_OUTSTATE; + goto failed; + } + + /* + * This request is overloaded. If there are no requested capabilities + * then we just want to acknowledge with all the capabilities we + * support. Otherwise we enable the set of capabilities requested. + */ + if (dlp->dl_sub_length == 0) { + softmac_capability_advertise(sup, mp); + return; + } + + if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) { + dl_err = DL_BADPRIM; + goto failed; + } + + dlp->dl_primitive = DL_CAPABILITY_ACK; + + off = dlp->dl_sub_offset; + len = dlp->dl_sub_length; + + /* + * Walk the list of capabilities to be enabled. + */ + for (end = off + len; off < end; ) { + sp = (dl_capability_sub_t *)(mp->b_rptr + off); + size = sizeof (dl_capability_sub_t) + sp->dl_length; + + if (off + size > end || + !IS_P2ALIGNED(off, sizeof (uint32_t))) { + dl_err = DL_BADPRIM; + goto failed; + } + + switch (sp->dl_cap) { + /* + * TCP/IP checksum offload to hardware. + */ + case DL_CAPAB_HCKSUM: { + dl_capab_hcksum_t *hcksump; + dl_capab_hcksum_t hcksum; + + hcksump = (dl_capab_hcksum_t *)&sp[1]; + /* + * Copy for alignment. + */ + bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t)); + dlcapabsetqid(&(hcksum.hcksum_mid), sup->su_rq); + bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t)); + break; + } + + default: + break; + } + + off += size; + } + qreply(q, mp); + return; +failed: + dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0); +} + +static void +softmac_bind_req(softmac_upper_t *sup, mblk_t *mp) +{ + softmac_lower_t *slp = sup->su_slp; + softmac_t *softmac = sup->su_softmac; + mblk_t *ackmp, *mp1; + int err; + + if (MBLKL(mp) < DL_BIND_REQ_SIZE) { + freemsg(mp); + return; + } + + /* + * Allocate ackmp incase the underlying driver does not ack timely. + */ + if ((mp1 = allocb(sizeof (dl_error_ack_t), BPRI_HI)) == NULL) { + dlerrorack(sup->su_wq, mp, DL_BIND_REQ, DL_SYSERR, ENOMEM); + return; + } + + err = softmac_output(slp, mp, DL_BIND_REQ, DL_BIND_ACK, &ackmp); + if (ackmp != NULL) { + freemsg(mp1); + } else { + /* + * The driver does not ack timely. + */ + ASSERT(err == ENOMSG); + ackmp = mp1; + } + if (err != 0) + goto failed; + + /* + * Enable capabilities the underlying driver claims to support. + */ + if ((err = softmac_capab_enable(slp)) != 0) + goto failed; + + /* + * Check whether this softmac is already marked as exclusively used, + * e.g., an aggregation is created over it. Fail the BIND_REQ if so. + */ + mutex_enter(&softmac->smac_active_mutex); + if (softmac->smac_active) { + mutex_exit(&softmac->smac_active_mutex); + err = EBUSY; + goto failed; + } + softmac->smac_nactive++; + sup->su_active = B_TRUE; + mutex_exit(&softmac->smac_active_mutex); + sup->su_bound = B_TRUE; + + qreply(sup->su_wq, ackmp); + return; +failed: + if (err != 0) { + dlerrorack(sup->su_wq, ackmp, DL_BIND_REQ, DL_SYSERR, err); + return; + } +} + +static void +softmac_unbind_req(softmac_upper_t *sup, mblk_t *mp) +{ + softmac_lower_t *slp = sup->su_slp; + softmac_t *softmac = sup->su_softmac; + mblk_t *ackmp, *mp1; + int err; + + if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) { + freemsg(mp); + return; + } + + if (!sup->su_bound) { + dlerrorack(sup->su_wq, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0); + return; + } + + /* + * Allocate ackmp incase the underlying driver does not ack timely. + */ + if ((mp1 = allocb(sizeof (dl_error_ack_t), BPRI_HI)) == NULL) { + dlerrorack(sup->su_wq, mp, DL_UNBIND_REQ, DL_SYSERR, ENOMEM); + return; + } + + err = softmac_output(slp, mp, DL_UNBIND_REQ, DL_OK_ACK, &ackmp); + if (ackmp != NULL) { + freemsg(mp1); + } else { + /* + * The driver does not ack timely. + */ + ASSERT(err == ENOMSG); + ackmp = mp1; + } + if (err != 0) { + dlerrorack(sup->su_wq, ackmp, DL_UNBIND_REQ, DL_SYSERR, err); + return; + } + + sup->su_bound = B_FALSE; + + mutex_enter(&softmac->smac_active_mutex); + if (sup->su_active) { + ASSERT(!softmac->smac_active); + softmac->smac_nactive--; + sup->su_active = B_FALSE; + } + mutex_exit(&softmac->smac_active_mutex); + +done: + qreply(sup->su_wq, ackmp); +} + +/* + * Process the non-data mblk. + */ +static void +softmac_wput_single_nondata(softmac_upper_t *sup, mblk_t *mp) +{ + softmac_t *softmac = sup->su_softmac; + softmac_lower_t *slp = sup->su_slp; + unsigned char dbtype; + t_uscalar_t prim; + + dbtype = DB_TYPE(mp); + switch (dbtype) { + case M_IOCTL: + case M_CTL: { + uint32_t expected_mode; + + if (((struct iocblk *)(mp->b_rptr))->ioc_cmd != SIOCSLIFNAME) + break; + + /* + * Nak the M_IOCTL based on the STREAMS specification. + */ + if (dbtype == M_IOCTL) + miocnak(sup->su_wq, mp, 0, EINVAL); + + /* + * This stream is either IP or ARP. See whether + * we need to setup a dedicated-lower-stream for it. + */ + mutex_enter(&softmac->smac_fp_mutex); + + expected_mode = DATAPATH_MODE(softmac); + if (expected_mode == SOFTMAC_SLOWPATH) + sup->su_mode = SOFTMAC_SLOWPATH; + list_insert_head(&softmac->smac_sup_list, sup); + mutex_exit(&softmac->smac_fp_mutex); + + /* + * Setup the fast-path dedicated lower stream if fast-path + * is expected. Note that no lock is held here, and if + * smac_expected_mode is changed from SOFTMAC_FASTPATH to + * SOFTMAC_SLOWPATH, the DL_NOTE_REPLUMB message used for + * data-path switching would already be queued and will + * be processed by softmac_wput_single_nondata() later. + */ + if (expected_mode == SOFTMAC_FASTPATH) + (void) softmac_fastpath_setup(sup); + return; + } + case M_PROTO: + case M_PCPROTO: + if (MBLKL(mp) < sizeof (t_uscalar_t)) { + freemsg(mp); + return; + } + prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; + switch (prim) { + case DL_NOTIFY_IND: + if (MBLKL(mp) < sizeof (dl_notify_ind_t) || + ((dl_notify_ind_t *)mp->b_rptr)->dl_notification != + DL_NOTE_REPLUMB) { + freemsg(mp); + return; + } + /* + * This DL_NOTE_REPLUMB message is initiated + * and queued by the softmac itself, when the + * sup is trying to switching its datapath mode + * between SOFTMAC_SLOWPATH and SOFTMAC_FASTPATH. + * Send this message upstream. + */ + qreply(sup->su_wq, mp); + return; + case DL_NOTIFY_CONF: + if (MBLKL(mp) < sizeof (dl_notify_conf_t) || + ((dl_notify_conf_t *)mp->b_rptr)->dl_notification != + DL_NOTE_REPLUMB_DONE) { + freemsg(mp); + return; + } + /* + * This is an indication from IP/ARP that the + * fastpath->slowpath switch is done. + */ + freemsg(mp); + softmac_datapath_switch_done(sup); + return; + } + break; + } + + /* + * No need to hold lock to check su_mode, since su_mode updating only + * operation is is serialized by softmac_wput_nondata_task(). + */ + if (sup->su_mode != SOFTMAC_FASTPATH) { + dld_wput(sup->su_wq, mp); + return; + } + + /* + * Fastpath non-data message processing. Most of non-data messages + * can be directly passed down to the dedicated-lower-stream, aside + * from the following M_PROTO/M_PCPROTO messages. + */ + switch (dbtype) { + case M_PROTO: + case M_PCPROTO: + switch (prim) { + case DL_BIND_REQ: + softmac_bind_req(sup, mp); + break; + case DL_UNBIND_REQ: + softmac_unbind_req(sup, mp); + break; + case DL_CAPABILITY_REQ: + softmac_capability_req(sup, mp); + break; + default: + putnext(slp->sl_wq, mp); + break; + } + break; + default: + putnext(slp->sl_wq, mp); + break; + } +} + +/* + * The worker thread which processes non-data messages. Note we only process + * one message at one time in order to be able to "flush" the queued message + * and serialize the processing. + */ +static void +softmac_wput_nondata_task(void *arg) +{ + softmac_upper_t *sup = arg; + mblk_t *mp; + + mutex_enter(&sup->su_disp_mutex); + + while (sup->su_pending_head != NULL) { + if (sup->su_closing) + break; + + SOFTMAC_DQ_PENDING(sup, &mp); + mutex_exit(&sup->su_disp_mutex); + softmac_wput_single_nondata(sup, mp); + mutex_enter(&sup->su_disp_mutex); + } + + /* + * If the stream is closing, flush all queued messages and inform + * the stream to be closed. + */ + freemsgchain(sup->su_pending_head); + sup->su_pending_head = sup->su_pending_tail = NULL; + sup->su_dlpi_pending = B_FALSE; + cv_signal(&sup->su_disp_cv); + mutex_exit(&sup->su_disp_mutex); +} + +/* + * Kernel thread to handle taskq dispatch failures in softmac_wput_nondata(). + * This thread is started when the softmac module is first loaded. + */ +static void +softmac_taskq_dispatch(void) +{ + callb_cpr_t cprinfo; + softmac_upper_t *sup; + + CALLB_CPR_INIT(&cprinfo, &softmac_taskq_lock, callb_generic_cpr, + "softmac_taskq_dispatch"); + mutex_enter(&softmac_taskq_lock); + + while (!softmac_taskq_quit) { + sup = list_head(&softmac_taskq_list); + while (sup != NULL) { + list_remove(&softmac_taskq_list, sup); + sup->su_taskq_scheduled = B_FALSE; + mutex_exit(&softmac_taskq_lock); + VERIFY(taskq_dispatch(system_taskq, + softmac_wput_nondata_task, sup, TQ_SLEEP) != NULL); + mutex_enter(&softmac_taskq_lock); + sup = list_head(&softmac_taskq_list); + } + + CALLB_CPR_SAFE_BEGIN(&cprinfo); + cv_wait(&softmac_taskq_cv, &softmac_taskq_lock); + CALLB_CPR_SAFE_END(&cprinfo, &softmac_taskq_lock); + } + + softmac_taskq_done = B_TRUE; + cv_signal(&softmac_taskq_cv); + CALLB_CPR_EXIT(&cprinfo); + thread_exit(); +} + +void +softmac_wput_nondata(softmac_upper_t *sup, mblk_t *mp) +{ + /* + * The processing of the message might block. Enqueue the + * message for later processing. + */ + mutex_enter(&sup->su_disp_mutex); + + if (sup->su_closing) { + mutex_exit(&sup->su_disp_mutex); + freemsg(mp); + return; + } + + SOFTMAC_EQ_PENDING(sup, mp); + + if (sup->su_dlpi_pending) { + mutex_exit(&sup->su_disp_mutex); + return; + } + sup->su_dlpi_pending = B_TRUE; + mutex_exit(&sup->su_disp_mutex); + + if (taskq_dispatch(system_taskq, softmac_wput_nondata_task, + sup, TQ_NOSLEEP) != NULL) { + return; + } + + mutex_enter(&softmac_taskq_lock); + if (!sup->su_taskq_scheduled) { + list_insert_tail(&softmac_taskq_list, sup); + cv_signal(&softmac_taskq_cv); + } + sup->su_taskq_scheduled = B_TRUE; + mutex_exit(&softmac_taskq_lock); +} + +/* + * Setup the dedicated-lower-stream (fast-path) for the IP/ARP upperstream. + */ +static int +softmac_fastpath_setup(softmac_upper_t *sup) +{ + softmac_t *softmac = sup->su_softmac; + softmac_lower_t *slp; + int err; + + err = softmac_lower_setup(softmac, sup, &slp); + + mutex_enter(&sup->su_mutex); + /* + * Wait for all data messages to be processed so that we can change + * the su_mode. + */ + while (sup->su_tx_inprocess != 0) + cv_wait(&sup->su_cv, &sup->su_mutex); + + ASSERT(sup->su_mode != SOFTMAC_FASTPATH); + ASSERT(sup->su_slp == NULL); + if (err != 0) { + sup->su_mode = SOFTMAC_SLOWPATH; + } else { + sup->su_slp = slp; + sup->su_mode = SOFTMAC_FASTPATH; + } + mutex_exit(&sup->su_mutex); + return (err); +} + +/* + * Tear down the dedicated-lower-stream (fast-path) for the IP/ARP upperstream. + */ +static void +softmac_fastpath_tear(softmac_upper_t *sup) +{ + mutex_enter(&sup->su_mutex); + /* + * Wait for all data messages in the dedicated-lower-stream + * to be processed. + */ + while (sup->su_tx_inprocess != 0) + cv_wait(&sup->su_cv, &sup->su_mutex); + + if (sup->su_tx_busy) { + ASSERT(sup->su_tx_flow_mp == NULL); + sup->su_tx_flow_mp = getq(sup->su_wq); + sup->su_tx_busy = B_FALSE; + } + + sup->su_mode = SOFTMAC_SLOWPATH; + + /* + * Destroy the dedicated-lower-stream. Note that slp is destroyed + * when lh is closed. + */ + (void) ldi_close(sup->su_slp->sl_lh, FREAD|FWRITE, kcred); + sup->su_slp = NULL; + mutex_exit(&sup->su_mutex); +} + +void +softmac_wput_data(softmac_upper_t *sup, mblk_t *mp) +{ + /* + * No lock is required to access the su_mode field since the data + * traffic is quiesce by IP when the data-path mode is in the + * process of switching. + */ + if (sup->su_mode != SOFTMAC_FASTPATH) + dld_wput(sup->su_wq, mp); + else + (void) softmac_fastpath_wput_data(sup, mp, NULL, 0); +} + +/*ARGSUSED*/ +static mac_tx_cookie_t +softmac_fastpath_wput_data(softmac_upper_t *sup, mblk_t *mp, uintptr_t f_hint, + uint16_t flag) +{ + queue_t *wq = sup->su_slp->sl_wq; + + /* + * This function is called from IP, only the MAC_DROP_ON_NO_DESC + * flag can be specified. + */ + ASSERT((flag & ~MAC_DROP_ON_NO_DESC) == 0); + ASSERT(mp->b_next == NULL); + + /* + * Check wether the dedicated-lower-stream is able to handle more + * messages, and enable the flow-control if it is not. + * + * Note that in order not to introduce any packet reordering, we + * always send the message down to the dedicated-lower-stream: + * + * If the flow-control is already enabled, but we still get + * the messages from the upper-stream, it means that the upper + * stream does not respect STREAMS flow-control (e.g., TCP). Simply + * pass the message down to the lower-stream in that case. + */ + if (SOFTMAC_CANPUTNEXT(wq)) { + putnext(wq, mp); + return (NULL); + } + + if ((flag & MAC_DROP_ON_NO_DESC) != 0) { + freemsg(mp); + return ((mac_tx_cookie_t)wq); + } + + if (sup->su_tx_busy) { + putnext(wq, mp); + return ((mac_tx_cookie_t)wq); + } + + mutex_enter(&sup->su_mutex); + if (!sup->su_tx_busy) { + ASSERT(sup->su_tx_flow_mp != NULL); + (void) putq(sup->su_wq, sup->su_tx_flow_mp); + sup->su_tx_flow_mp = NULL; + sup->su_tx_busy = B_TRUE; + qenable(wq); + } + mutex_exit(&sup->su_mutex); + putnext(wq, mp); + return ((mac_tx_cookie_t)wq); +} + +boolean_t +softmac_active_set(void *arg) +{ + softmac_t *softmac = arg; + + mutex_enter(&softmac->smac_active_mutex); + if (softmac->smac_nactive != 0) { + mutex_exit(&softmac->smac_active_mutex); + return (B_FALSE); + } + softmac->smac_active = B_TRUE; + mutex_exit(&softmac->smac_active_mutex); + return (B_TRUE); +} + +void +softmac_active_clear(void *arg) +{ + softmac_t *softmac = arg; + + mutex_enter(&softmac->smac_active_mutex); + ASSERT(softmac->smac_active && (softmac->smac_nactive == 0)); + softmac->smac_active = B_FALSE; + mutex_exit(&softmac->smac_active_mutex); +} + +/* + * Disable/reenable fastpath on given softmac. This request could come from a + * MAC client or directly from administrators. + */ +int +softmac_datapath_switch(softmac_t *softmac, boolean_t disable, boolean_t admin) +{ + softmac_upper_t *sup; + mblk_t *head = NULL, *tail = NULL, *mp; + list_t reqlist; + softmac_switch_req_t *req; + uint32_t current_mode, expected_mode; + int err = 0; + + mutex_enter(&softmac->smac_fp_mutex); + + current_mode = DATAPATH_MODE(softmac); + if (admin) { + if (softmac->smac_fastpath_admin_disabled == disable) { + mutex_exit(&softmac->smac_fp_mutex); + return (0); + } + softmac->smac_fastpath_admin_disabled = disable; + } else if (disable) { + softmac->smac_fp_disable_clients++; + } else { + ASSERT(softmac->smac_fp_disable_clients != 0); + softmac->smac_fp_disable_clients--; + } + + expected_mode = DATAPATH_MODE(softmac); + if (current_mode == expected_mode) { + mutex_exit(&softmac->smac_fp_mutex); + return (0); + } + + /* + * The expected mode is different from whatever datapath mode + * this softmac is expected from last request, enqueue the data-path + * switch request. + */ + list_create(&reqlist, sizeof (softmac_switch_req_t), + offsetof(softmac_switch_req_t, ssq_req_list_node)); + + /* + * Allocate all DL_NOTIFY_IND messages and request structures that + * are required to switch each IP/ARP stream to the expected mode. + */ + for (sup = list_head(&softmac->smac_sup_list); sup != NULL; + sup = list_next(&softmac->smac_sup_list, sup)) { + dl_notify_ind_t *dlip; + + req = kmem_alloc(sizeof (softmac_switch_req_t), KM_NOSLEEP); + if (req == NULL) + break; + + req->ssq_expected_mode = expected_mode; + + /* + * Allocate the DL_NOTE_REPLUMB message. + */ + if ((mp = allocb(sizeof (dl_notify_ind_t), BPRI_LO)) == NULL) { + kmem_free(req, sizeof (softmac_switch_req_t)); + break; + } + + list_insert_tail(&reqlist, req); + + mp->b_wptr = mp->b_rptr + sizeof (dl_notify_ind_t); + mp->b_datap->db_type = M_PROTO; + bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); + dlip = (dl_notify_ind_t *)mp->b_rptr; + dlip->dl_primitive = DL_NOTIFY_IND; + dlip->dl_notification = DL_NOTE_REPLUMB; + if (head == NULL) { + head = tail = mp; + } else { + tail->b_next = mp; + tail = mp; + } + } + + /* + * Note that it is fine if the expected data-path mode is fast-path + * and some of streams fails to switch. Only return failure if we + * are expected to switch to the slow-path. + */ + if (sup != NULL && expected_mode == SOFTMAC_SLOWPATH) { + err = ENOMEM; + goto fail; + } + + /* + * Start switching for each IP/ARP stream. The switching operation + * will eventually succeed and there is no need to wait for it + * to finish. + */ + for (sup = list_head(&softmac->smac_sup_list); sup != NULL; + sup = list_next(&softmac->smac_sup_list, sup)) { + mp = head->b_next; + head->b_next = NULL; + + /* + * Add the swtich request to the requests list of the stream. + */ + req = list_head(&reqlist); + ASSERT(req != NULL); + list_remove(&reqlist, req); + list_insert_tail(&sup->su_req_list, req); + softmac_wput_nondata(sup, head); + head = mp; + } + + mutex_exit(&softmac->smac_fp_mutex); + ASSERT(list_is_empty(&reqlist)); + list_destroy(&reqlist); + return (0); +fail: + if (admin) { + softmac->smac_fastpath_admin_disabled = !disable; + } else if (disable) { + softmac->smac_fp_disable_clients--; + } else { + softmac->smac_fp_disable_clients++; + } + + mutex_exit(&softmac->smac_fp_mutex); + while ((req = list_head(&reqlist)) != NULL) { + list_remove(&reqlist, req); + kmem_free(req, sizeof (softmac_switch_req_t)); + } + freemsgchain(head); + list_destroy(&reqlist); + return (err); +} + +int +softmac_fastpath_disable(void *arg) +{ + return (softmac_datapath_switch((softmac_t *)arg, B_TRUE, B_FALSE)); +} + +void +softmac_fastpath_enable(void *arg) +{ + VERIFY(softmac_datapath_switch((softmac_t *)arg, B_FALSE, + B_FALSE) == 0); +} + +void +softmac_upperstream_close(softmac_upper_t *sup) +{ + softmac_t *softmac = sup->su_softmac; + softmac_switch_req_t *req; + + mutex_enter(&softmac->smac_fp_mutex); + + if (sup->su_mode == SOFTMAC_FASTPATH) + softmac_fastpath_tear(sup); + + if (sup->su_mode != SOFTMAC_UNKNOWN) { + list_remove(&softmac->smac_sup_list, sup); + sup->su_mode = SOFTMAC_UNKNOWN; + } + + /* + * Cleanup all the switch requests queueed on this stream. + */ + while ((req = list_head(&sup->su_req_list)) != NULL) { + list_remove(&sup->su_req_list, req); + kmem_free(req, sizeof (softmac_switch_req_t)); + } + mutex_exit(&softmac->smac_fp_mutex); +} + +/* + * Handle the DL_NOTE_REPLUMB_DONE indication from IP/ARP. Change the upper + * stream from the fastpath mode to the slowpath mode. + */ +static void +softmac_datapath_switch_done(softmac_upper_t *sup) +{ + softmac_t *softmac = sup->su_softmac; + softmac_switch_req_t *req; + uint32_t expected_mode; + + mutex_enter(&softmac->smac_fp_mutex); + req = list_head(&sup->su_req_list); + list_remove(&sup->su_req_list, req); + expected_mode = req->ssq_expected_mode; + kmem_free(req, sizeof (softmac_switch_req_t)); + + if (expected_mode == sup->su_mode) { + mutex_exit(&softmac->smac_fp_mutex); + return; + } + + ASSERT(!sup->su_bound); + mutex_exit(&softmac->smac_fp_mutex); + + /* + * It is fine if the expected mode is fast-path and we fail + * to enable fastpath on this stream. + */ + if (expected_mode == SOFTMAC_SLOWPATH) + softmac_fastpath_tear(sup); + else + (void) softmac_fastpath_setup(sup); +} diff --git a/usr/src/uts/common/io/softmac/softmac_main.c b/usr/src/uts/common/io/softmac/softmac_main.c index a44856c849..bfdf3ee851 100644 --- a/usr/src/uts/common/io/softmac/softmac_main.c +++ b/usr/src/uts/common/io/softmac/softmac_main.c @@ -69,6 +69,8 @@ static mod_hash_t *softmac_hash; static kmutex_t smac_global_lock; static kcondvar_t smac_global_cv; +static kmem_cache_t *softmac_cachep; + #define SOFTMAC_HASHSZ 64 static void softmac_create_task(void *); @@ -79,9 +81,14 @@ static void softmac_m_stop(void *); static int softmac_m_open(void *); static void softmac_m_close(void *); static boolean_t softmac_m_getcapab(void *, mac_capab_t, void *); +static int softmac_m_setprop(void *, const char *, mac_prop_id_t, + uint_t, const void *); +static int softmac_m_getprop(void *, const char *, mac_prop_id_t, + uint_t, uint_t, void *, uint_t *); + #define SOFTMAC_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_OPEN | MC_CLOSE) + (MC_IOCTL | MC_GETCAPAB | MC_OPEN | MC_CLOSE | MC_SETPROP | MC_GETPROP) static mac_callbacks_t softmac_m_callbacks = { SOFTMAC_M_CALLBACK_FLAGS, @@ -95,9 +102,57 @@ static mac_callbacks_t softmac_m_callbacks = { softmac_m_ioctl, softmac_m_getcapab, softmac_m_open, - softmac_m_close + softmac_m_close, + softmac_m_setprop, + softmac_m_getprop }; +/*ARGSUSED*/ +static int +softmac_constructor(void *buf, void *arg, int kmflag) +{ + softmac_t *softmac = buf; + + bzero(buf, sizeof (softmac_t)); + mutex_init(&softmac->smac_mutex, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&softmac->smac_active_mutex, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&softmac->smac_fp_mutex, NULL, MUTEX_DEFAULT, NULL); + cv_init(&softmac->smac_cv, NULL, CV_DEFAULT, NULL); + cv_init(&softmac->smac_fp_cv, NULL, CV_DEFAULT, NULL); + list_create(&softmac->smac_sup_list, sizeof (softmac_upper_t), + offsetof(softmac_upper_t, su_list_node)); + return (0); +} + +/*ARGSUSED*/ +static void +softmac_destructor(void *buf, void *arg) +{ + softmac_t *softmac = buf; + + ASSERT(softmac->smac_fp_disable_clients == 0); + ASSERT(!softmac->smac_fastpath_admin_disabled); + + ASSERT(!(softmac->smac_flags & SOFTMAC_ATTACH_DONE)); + ASSERT(softmac->smac_hold_cnt == 0); + ASSERT(softmac->smac_attachok_cnt == 0); + ASSERT(softmac->smac_mh == NULL); + ASSERT(softmac->smac_softmac[0] == NULL && + softmac->smac_softmac[1] == NULL); + ASSERT(softmac->smac_state == SOFTMAC_INITIALIZED); + ASSERT(softmac->smac_lower == NULL); + ASSERT(softmac->smac_active == B_FALSE); + ASSERT(softmac->smac_nactive == 0); + ASSERT(list_is_empty(&softmac->smac_sup_list)); + + list_destroy(&softmac->smac_sup_list); + mutex_destroy(&softmac->smac_mutex); + mutex_destroy(&softmac->smac_active_mutex); + mutex_destroy(&softmac->smac_fp_mutex); + cv_destroy(&softmac->smac_cv); + cv_destroy(&softmac->smac_fp_cv); +} + void softmac_init() { @@ -108,11 +163,19 @@ softmac_init() rw_init(&softmac_hash_lock, NULL, RW_DEFAULT, NULL); mutex_init(&smac_global_lock, NULL, MUTEX_DRIVER, NULL); cv_init(&smac_global_cv, NULL, CV_DRIVER, NULL); + + softmac_cachep = kmem_cache_create("softmac_cache", + sizeof (softmac_t), 0, softmac_constructor, + softmac_destructor, NULL, NULL, NULL, 0); + ASSERT(softmac_cachep != NULL); + softmac_fp_init(); } void softmac_fini() { + softmac_fp_fini(); + kmem_cache_destroy(softmac_cachep); rw_destroy(&softmac_hash_lock); mod_hash_destroy_hash(softmac_hash); mutex_destroy(&smac_global_lock); @@ -281,16 +344,12 @@ softmac_create(dev_info_t *dip, dev_t dev) * Check whether the softmac for the specified device already exists */ rw_enter(&softmac_hash_lock, RW_WRITER); - if ((err = mod_hash_find(softmac_hash, (mod_hash_key_t)devname, + if ((mod_hash_find(softmac_hash, (mod_hash_key_t)devname, (mod_hash_val_t *)&softmac)) != 0) { - softmac = kmem_zalloc(sizeof (softmac_t), KM_SLEEP); - mutex_init(&softmac->smac_mutex, NULL, MUTEX_DRIVER, NULL); - cv_init(&softmac->smac_cv, NULL, CV_DRIVER, NULL); + softmac = kmem_cache_alloc(softmac_cachep, KM_SLEEP); (void) strlcpy(softmac->smac_devname, devname, MAXNAMELEN); - /* - * Insert the softmac into the hash table. - */ + err = mod_hash_insert(softmac_hash, (mod_hash_key_t)softmac->smac_devname, (mod_hash_val_t)softmac); @@ -413,8 +472,18 @@ softmac_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) case MAC_CAPAB_LEGACY: { mac_capab_legacy_t *legacy = cap_data; + /* + * The caller is not interested in the details. + */ + if (legacy == NULL) + break; + legacy->ml_unsup_note = ~softmac->smac_notifications & (DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_SPEED); + legacy->ml_active_set = softmac_active_set; + legacy->ml_active_clear = softmac_active_clear; + legacy->ml_fastpath_disable = softmac_fastpath_disable; + legacy->ml_fastpath_enable = softmac_fastpath_enable; legacy->ml_dev = makedevice(softmac->smac_umajor, softmac->smac_uppa + 1); break; @@ -816,22 +885,23 @@ softmac_mac_register(softmac_t *softmac) * Try to create the datalink for this softmac. */ if ((err = softmac_create_datalink(softmac)) != 0) { - if (!(softmac->smac_flags & SOFTMAC_NOSUPP)) { + if (!(softmac->smac_flags & SOFTMAC_NOSUPP)) (void) mac_unregister(softmac->smac_mh); - softmac->smac_mh = NULL; - } + mutex_enter(&softmac->smac_mutex); + softmac->smac_mh = NULL; + goto done; } /* * If succeed, create the thread which handles the DL_NOTIFY_IND from * the lower stream. */ + mutex_enter(&softmac->smac_mutex); if (softmac->smac_mh != NULL) { softmac->smac_notify_thread = thread_create(NULL, 0, softmac_notify_thread, softmac, 0, &p0, TS_RUN, minclsyspri); } - mutex_enter(&softmac->smac_mutex); done: ASSERT(softmac->smac_state == SOFTMAC_ATTACH_INPROG && softmac->smac_attachok_cnt == softmac->smac_cnt); @@ -967,7 +1037,6 @@ softmac_destroy(dev_info_t *dip, dev_t dev) rw_exit(&softmac_hash_lock); return (0); } - err = mod_hash_remove(softmac_hash, (mod_hash_key_t)devname, (mod_hash_val_t *)&hashval); @@ -975,10 +1044,9 @@ softmac_destroy(dev_info_t *dip, dev_t dev) mutex_exit(&softmac->smac_mutex); rw_exit(&softmac_hash_lock); - - mutex_destroy(&softmac->smac_mutex); - cv_destroy(&softmac->smac_cv); - kmem_free(softmac, sizeof (softmac_t)); + ASSERT(softmac->smac_fp_disable_clients == 0); + softmac->smac_fastpath_admin_disabled = B_FALSE; + kmem_cache_free(softmac_cachep, softmac); return (0); } mutex_exit(&softmac->smac_mutex); @@ -1119,27 +1187,84 @@ softmac_recreate() } while (smw.smw_retry); } -/* ARGSUSED */ static int softmac_m_start(void *arg) { - return (0); + softmac_t *softmac = arg; + softmac_lower_t *slp = softmac->smac_lower; + int err; + + ASSERT(MAC_PERIM_HELD(softmac->smac_mh)); + /* + * Bind to SAP 2 on token ring, 0 on other interface types. + * (SAP 0 has special significance on token ring). + * Note that the receive-side packets could come anytime after bind. + */ + err = softmac_send_bind_req(slp, softmac->smac_media == DL_TPR ? 2 : 0); + if (err != 0) + return (err); + + /* + * Put the lower stream to the DL_PROMISC_SAP mode in order to receive + * all packets of interest. + * + * some driver (e.g. the old legacy eri driver) incorrectly passes up + * packets to DL_PROMISC_SAP stream when the lower stream is not bound, + * so that we send DL_PROMISON_REQ after DL_BIND_REQ. + */ + err = softmac_send_promisc_req(slp, DL_PROMISC_SAP, B_TRUE); + if (err != 0) { + (void) softmac_send_unbind_req(slp); + return (err); + } + + /* + * Enable capabilities the underlying driver claims to support. + * Some driver requires this being called after the stream is bound. + */ + if ((err = softmac_capab_enable(slp)) != 0) { + (void) softmac_send_promisc_req(slp, DL_PROMISC_SAP, B_FALSE); + (void) softmac_send_unbind_req(slp); + } + + return (err); } /* ARGSUSED */ static void softmac_m_stop(void *arg) { + softmac_t *softmac = arg; + softmac_lower_t *slp = softmac->smac_lower; + + ASSERT(MAC_PERIM_HELD(softmac->smac_mh)); + + /* + * It is not needed to reset zerocopy, MDT or HCKSUM capabilities. + */ + (void) softmac_send_promisc_req(slp, DL_PROMISC_SAP, B_FALSE); + (void) softmac_send_unbind_req(slp); } /* - * Set up the lower stream above the legacy device which is shared by - * GLDv3 MAC clients. Put the lower stream into DLIOCRAW mode to send - * and receive the raw data. Further, put the lower stream into + * Set up the lower stream above the legacy device. There are two different + * type of lower streams: + * + * - Shared lower-stream + * + * Shared by all GLDv3 MAC clients. Put the lower stream to the DLIOCRAW + * mode to send and receive the raw data. Further, put the lower stream into * DL_PROMISC_SAP mode to receive all packets of interest. + * + * - Dedicated lower-stream + * + * The lower-stream which is dedicated to upper IP/ARP stream. This is used + * as fast-path for IP. In this case, the second argument is the pointer to + * the softmac upper-stream. */ -static int -softmac_lower_setup(softmac_t *softmac, softmac_lower_t **slpp) +int +softmac_lower_setup(softmac_t *softmac, softmac_upper_t *sup, + softmac_lower_t **slpp) { ldi_ident_t li; dev_t dev; @@ -1153,7 +1278,13 @@ softmac_lower_setup(softmac_t *softmac, softmac_lower_t **slpp) if ((err = ldi_ident_from_dip(softmac_dip, &li)) != 0) return (err); + /* + * The GLDv3 framework makes sure that mac_unregister(), mac_open(), + * and mac_close() cannot be called at the same time. So we don't + * need any protection to access softmac here. + */ dev = softmac->smac_dev; + err = ldi_open_by_dev(&dev, OTYP_CHR, FREAD|FWRITE, kcred, &lh, li); ldi_ident_release(li); if (err != 0) @@ -1172,10 +1303,13 @@ softmac_lower_setup(softmac_t *softmac, softmac_lower_t **slpp) } /* - * Put the lower stream into DLIOCRAW mode to send/receive raw data. + * If this is the shared-lower-stream, put the lower stream to + * the DLIOCRAW mode to send/receive raw data. */ - if ((err = ldi_ioctl(lh, DLIOCRAW, 0, FKIOCTL, kcred, &rval)) != 0) + if ((sup == NULL) && (err = ldi_ioctl(lh, DLIOCRAW, 0, FKIOCTL, + kcred, &rval)) != 0) { goto done; + } /* * Then push the softmac shim layer atop the lower stream. @@ -1198,50 +1332,25 @@ softmac_lower_setup(softmac_t *softmac, softmac_lower_t **slpp) goto done; } slp = start_arg.si_slp; + slp->sl_sup = sup; slp->sl_lh = lh; slp->sl_softmac = softmac; *slpp = slp; - /* - * Bind to SAP 2 on token ring, 0 on other interface types. - * (SAP 0 has special significance on token ring). - * Note that the receive-side packets could come anytime after bind. - */ - if (softmac->smac_media == DL_TPR) - err = softmac_send_bind_req(slp, 2); - else - err = softmac_send_bind_req(slp, 0); - if (err != 0) - goto done; - - /* - * Put the lower stream into DL_PROMISC_SAP mode to receive all - * packets of interest. - * - * Some drivers (e.g. the old legacy eri driver) incorrectly pass up - * packets to DL_PROMISC_SAP stream when the lower stream is not bound, - * so we send DL_PROMISON_REQ after DL_BIND_REQ. - */ - if ((err = softmac_send_promisc_req(slp, DL_PROMISC_SAP, B_TRUE)) != 0) - goto done; - - /* - * Enable the capabilities the underlying driver claims to support. - * Some drivers require this to be called after the stream is bound. - */ - if ((err = softmac_capab_enable(slp)) != 0) - goto done; - - /* - * Send the DL_NOTIFY_REQ to enable certain DL_NOTIFY_IND. - * We don't have to wait for the ack. - */ - notifications = DL_NOTE_PHYS_ADDR | DL_NOTE_LINK_UP | - DL_NOTE_LINK_DOWN | DL_NOTE_PROMISC_ON_PHYS | - DL_NOTE_PROMISC_OFF_PHYS; + if (sup != NULL) { + slp->sl_rxinfo = &sup->su_rxinfo; + } else { + /* + * Send DL_NOTIFY_REQ to enable certain DL_NOTIFY_IND. + * We don't have to wait for the ack. + */ + notifications = DL_NOTE_PHYS_ADDR | DL_NOTE_LINK_UP | + DL_NOTE_LINK_DOWN | DL_NOTE_PROMISC_ON_PHYS | + DL_NOTE_PROMISC_OFF_PHYS; - (void) softmac_send_notify_req(slp, - (notifications & softmac->smac_notifications)); + (void) softmac_send_notify_req(slp, + (notifications & softmac->smac_notifications)); + } done: if (err != 0) @@ -1257,13 +1366,11 @@ softmac_m_open(void *arg) int err; ASSERT(MAC_PERIM_HELD(softmac->smac_mh)); - ASSERT(softmac->smac_lower_state == SOFTMAC_INITIALIZED); - if ((err = softmac_lower_setup(softmac, &slp)) != 0) + if ((err = softmac_lower_setup(softmac, NULL, &slp)) != 0) return (err); softmac->smac_lower = slp; - softmac->smac_lower_state = SOFTMAC_READY; return (0); } @@ -1274,7 +1381,6 @@ softmac_m_close(void *arg) softmac_lower_t *slp; ASSERT(MAC_PERIM_HELD(softmac->smac_mh)); - ASSERT(softmac->smac_lower_state == SOFTMAC_READY); slp = softmac->smac_lower; ASSERT(slp != NULL); @@ -1282,10 +1388,74 @@ softmac_m_close(void *arg) * Note that slp is destroyed when lh is closed. */ (void) ldi_close(slp->sl_lh, FREAD|FWRITE, kcred); - softmac->smac_lower_state = SOFTMAC_INITIALIZED; softmac->smac_lower = NULL; } +/* + * Softmac supports two priviate link properteis: + * + * - "_fastpath" + * + * This is a read-only link property which points out the current data-path + * model of the given legacy link. The possible values are "disabled" and + * "enabled". + * + * - "_disable_fastpath" + * + * This is a read-write link property which can be used to disable or enable + * the fast-path of the given legacy link. The possible values are "true" + * and "false". Note that even when "_disable_fastpath" is set to be + * "false", the fast-path may still not be enabled since there may be + * other mac cleints that request the fast-path to be disabled. + */ +/* ARGSUSED */ +static int +softmac_m_setprop(void *arg, const char *name, mac_prop_id_t id, + uint_t valsize, const void *val) +{ + softmac_t *softmac = arg; + + if (id != MAC_PROP_PRIVATE || strcmp(name, "_disable_fastpath") != 0) + return (ENOTSUP); + + if (strcmp(val, "true") == 0) + return (softmac_datapath_switch(softmac, B_TRUE, B_TRUE)); + else if (strcmp(val, "false") == 0) + return (softmac_datapath_switch(softmac, B_FALSE, B_TRUE)); + else + return (EINVAL); +} + +static int +softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t flags, + uint_t valsize, void *val, uint_t *perm) +{ + softmac_t *softmac = arg; + char *fpstr; + + if (id != MAC_PROP_PRIVATE) + return (ENOTSUP); + + if (strcmp(name, "_fastpath") == 0) { + if ((flags & MAC_PROP_DEFAULT) != 0) + return (ENOTSUP); + + *perm = MAC_PROP_PERM_READ; + mutex_enter(&softmac->smac_fp_mutex); + fpstr = (DATAPATH_MODE(softmac) == SOFTMAC_SLOWPATH) ? + "disabled" : "enabled"; + mutex_exit(&softmac->smac_fp_mutex); + } else if (strcmp(name, "_disable_fastpath") == 0) { + *perm = MAC_PROP_PERM_RW; + fpstr = ((flags & MAC_PROP_DEFAULT) != 0) ? "false" : + (softmac->smac_fastpath_admin_disabled ? "true" : "false"); + } else { + return (ENOTSUP); + } + + return (strlcpy(val, fpstr, valsize) >= valsize ? EINVAL : 0); +} + int softmac_hold_device(dev_t dev, dls_dev_handle_t *ddhp) { @@ -1367,12 +1537,39 @@ again: void softmac_rele_device(dls_dev_handle_t ddh) { + if (ddh != NULL) + softmac_rele((softmac_t *)ddh); +} + +int +softmac_hold(dev_t dev, softmac_t **softmacp) +{ softmac_t *softmac; + char *drv; + mac_handle_t mh; + char mac[MAXNAMELEN]; + int err; - if (ddh == NULL) - return; + if ((drv = ddi_major_to_name(getmajor(dev))) == NULL) + return (EINVAL); - softmac = (softmac_t *)ddh; + (void) snprintf(mac, MAXNAMELEN, "%s%d", drv, getminor(dev) - 1); + if ((err = mac_open(mac, &mh)) != 0) + return (err); + + softmac = (softmac_t *)mac_driver(mh); + + mutex_enter(&softmac->smac_mutex); + softmac->smac_hold_cnt++; + mutex_exit(&softmac->smac_mutex); + mac_close(mh); + *softmacp = softmac; + return (0); +} + +void +softmac_rele(softmac_t *softmac) +{ mutex_enter(&softmac->smac_mutex); softmac->smac_hold_cnt--; mutex_exit(&softmac->smac_mutex); diff --git a/usr/src/uts/common/io/softmac/softmac_pkt.c b/usr/src/uts/common/io/softmac/softmac_pkt.c index 4a856f4f58..b0d613b9be 100644 --- a/usr/src/uts/common/io/softmac/softmac_pkt.c +++ b/usr/src/uts/common/io/softmac/softmac_pkt.c @@ -27,16 +27,6 @@ #include <inet/led.h> #include <sys/softmac_impl.h> -/* - * Macro to check whether the write-queue of the lower stream is full. - * - * Because softmac is pushed right above the underlying device and - * _I_INSERT/_I_REMOVE is not processed in the lower stream, it is - * safe to directly access the q_next pointer. - */ -#define CANPUTNEXT(q) \ - (!((q)->q_next->q_nfsrv->q_flag & QFULL) || canput((q)->q_next)) - mblk_t * softmac_m_tx(void *arg, mblk_t *mp) { @@ -46,7 +36,7 @@ softmac_m_tx(void *arg, mblk_t *mp) * Optimize for the most common case. */ if (mp->b_next == NULL) { - if (!CANPUTNEXT(wq)) + if (!SOFTMAC_CANPUTNEXT(wq)) return (mp); mp->b_flag |= MSGNOLOOP; @@ -57,7 +47,7 @@ softmac_m_tx(void *arg, mblk_t *mp) while (mp != NULL) { mblk_t *next = mp->b_next; - if (!CANPUTNEXT(wq)) + if (!SOFTMAC_CANPUTNEXT(wq)) break; mp->b_next = NULL; mp->b_flag |= MSGNOLOOP; @@ -67,7 +57,6 @@ softmac_m_tx(void *arg, mblk_t *mp) return (mp); } - void softmac_rput_process_data(softmac_lower_t *slp, mblk_t *mp) { @@ -141,7 +130,7 @@ dlpi_get_errno(t_uscalar_t error, t_uscalar_t unix_errno) return (error == DL_SYSERR ? unix_errno : EINVAL); } -static int +int softmac_output(softmac_lower_t *slp, mblk_t *mp, t_uscalar_t dl_prim, t_uscalar_t ack, mblk_t **mpp) { @@ -227,7 +216,7 @@ softmac_ioctl_tx(softmac_lower_t *slp, mblk_t *mp, mblk_t **mpp) softmac_serialize_exit(slp); } -static int +int softmac_mexchange_error_ack(mblk_t **mpp, t_uscalar_t error_primitive, t_uscalar_t error, t_uscalar_t unix_errno) { diff --git a/usr/src/uts/common/io/sundlpi.c b/usr/src/uts/common/io/sundlpi.c index d537f8127b..a322634fb6 100644 --- a/usr/src/uts/common/io/sundlpi.c +++ b/usr/src/uts/common/io/sundlpi.c @@ -491,6 +491,7 @@ dl_primstr(t_uscalar_t prim) case DL_NOTIFY_REQ: return ("DL_NOTIFY_REQ"); case DL_NOTIFY_ACK: return ("DL_NOTIFY_ACK"); case DL_NOTIFY_IND: return ("DL_NOTIFY_IND"); + case DL_NOTIFY_CONF: return ("DL_NOTIFY_CONF"); case DL_CAPABILITY_REQ: return ("DL_CAPABILITY_REQ"); case DL_CAPABILITY_ACK: return ("DL_CAPABILITY_ACK"); case DL_CONTROL_REQ: return ("DL_CONTROL_REQ"); diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h index 3094fa1a09..5fede27bb2 100644 --- a/usr/src/uts/common/sys/dld.h +++ b/usr/src/uts/common/sys/dld.h @@ -411,6 +411,9 @@ int dld_open(queue_t *, dev_t *, int, int, cred_t *); int dld_close(queue_t *); void dld_wput(queue_t *, mblk_t *); void dld_wsrv(queue_t *); +int dld_str_open(queue_t *, dev_t *, void *); +int dld_str_close(queue_t *); +void *dld_str_private(queue_t *); void dld_init_ops(struct dev_ops *, const char *); void dld_fini_ops(struct dev_ops *); int dld_autopush(dev_t *, struct dlautopush *); diff --git a/usr/src/uts/common/sys/dld_impl.h b/usr/src/uts/common/sys/dld_impl.h index 79aa82ba75..68caa4f459 100644 --- a/usr/src/uts/common/sys/dld_impl.h +++ b/usr/src/uts/common/sys/dld_impl.h @@ -207,13 +207,19 @@ struct dld_str_s { /* Protected by */ dls_multicst_addr_t *ds_dmap; /* ds_rw_lock */ dls_rx_t ds_rx; /* ds_lock */ void *ds_rx_arg; /* ds_lock */ - boolean_t ds_active; /* SL */ + uint_t ds_nactive; /* SL */ dld_str_t *ds_next; /* SL */ dls_head_t *ds_head; dls_dl_handle_t ds_ddh; list_node_t ds_tqlist; + + /* + * driver private data set by the driver when calling dld_str_open(). + */ + void *ds_private; }; + #define DLD_DATATHR_INC(dsp) { \ ASSERT(MUTEX_HELD(&(dsp)->ds_lock)); \ dsp->ds_datathr_cnt++; \ diff --git a/usr/src/uts/common/sys/dlpi.h b/usr/src/uts/common/sys/dlpi.h index e67f604630..11293ac6d3 100644 --- a/usr/src/uts/common/sys/dlpi.h +++ b/usr/src/uts/common/sys/dlpi.h @@ -104,6 +104,7 @@ typedef struct dl_ipnetinfo { #define DL_CONTROL_ACK 0x113 /* Device specific control ack */ #define DL_PASSIVE_REQ 0x114 /* Allow access to aggregated link */ #define DL_INTR_MODE_REQ 0x115 /* Request Rx processing in INTR mode */ +#define DL_NOTIFY_CONF 0x116 /* Notification from upstream */ /* * Primitives used for Connectionless Service @@ -385,7 +386,7 @@ typedef struct dl_ipnetinfo { #define DL_PROMISC_MULTI 0x03 /* promiscuous mode for multicast */ /* - * DLPI notification codes for DL_NOTIFY primitives. + * DLPI notification codes for DL_NOTIFY_REQ primitives. * Bit-wise distinct since DL_NOTIFY_REQ and DL_NOTIFY_ACK carry multiple * notification codes. */ @@ -400,6 +401,12 @@ typedef struct dl_ipnetinfo { #define DL_NOTE_SPEED 0x0100 /* Approximate link speed */ #define DL_NOTE_FASTPATH_FLUSH 0x0200 /* Fast Path info changes */ #define DL_NOTE_CAPAB_RENEG 0x0400 /* Initiate capability renegotiation */ +#define DL_NOTE_REPLUMB 0x0800 /* Inform the link to replumb */ + +/* + * DLPI notification codes for DL_NOTIFY_CONF primitives. + */ +#define DL_NOTE_REPLUMB_DONE 0x0001 /* Indicate replumb has done */ /* * DLPI Quality Of Service definition for use in QOS structure definitions. @@ -1017,6 +1024,14 @@ typedef struct { } dl_notify_ind_t; /* + * DL_NOTIFY_CONF, M_PROTO type + */ +typedef struct { + t_uscalar_t dl_primitive; /* set to DL_NOTIFY_CONF */ + uint32_t dl_notification; /* Which notification? */ +} dl_notify_conf_t; + +/* * DL_AGGR_REQ, M_PROTO type */ typedef struct { @@ -1507,6 +1522,7 @@ union DL_primitives { dl_notify_req_t notify_req; dl_notify_ack_t notify_ack; dl_notify_ind_t notify_ind; + dl_notify_conf_t notify_conf; dl_aggr_req_t aggr_req; dl_aggr_ind_t aggr_ind; dl_unaggr_req_t unaggr_req; @@ -1574,6 +1590,7 @@ union DL_primitives { #define DL_NOTIFY_REQ_SIZE sizeof (dl_notify_req_t) #define DL_NOTIFY_ACK_SIZE sizeof (dl_notify_ack_t) #define DL_NOTIFY_IND_SIZE sizeof (dl_notify_ind_t) +#define DL_NOTIFY_CONF_SIZE sizeof (dl_notify_conf_t) #define DL_AGGR_REQ_SIZE sizeof (dl_aggr_req_t) #define DL_AGGR_IND_SIZE sizeof (dl_aggr_ind_t) #define DL_UNAGGR_REQ_SIZE sizeof (dl_unaggr_req_t) diff --git a/usr/src/uts/common/sys/dls_impl.h b/usr/src/uts/common/sys/dls_impl.h index 33162a4d5c..dafd451954 100644 --- a/usr/src/uts/common/sys/dls_impl.h +++ b/usr/src/uts/common/sys/dls_impl.h @@ -119,7 +119,7 @@ extern void dls_rx_promisc(void *, mac_resource_handle_t, mblk_t *, extern void dls_rx_vlan_promisc(void *, mac_resource_handle_t, mblk_t *, boolean_t); extern int dls_active_set(dld_str_t *); -extern void dls_active_clear(dld_str_t *); +extern void dls_active_clear(dld_str_t *, boolean_t); extern void dls_mgmt_init(void); extern void dls_mgmt_fini(void); diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h index 1756644b6c..2cfe7443e5 100644 --- a/usr/src/uts/common/sys/mac.h +++ b/usr/src/uts/common/sys/mac.h @@ -260,20 +260,6 @@ typedef struct mac_info_s { } mac_info_t; /* - * Information for legacy devices. - */ -typedef struct mac_capab_legacy_s { - /* - * Notifications that the legacy device does not support. - */ - uint32_t ml_unsup_note; - /* - * dev_t of the legacy device; can be held to force attach. - */ - dev_t ml_dev; -} mac_capab_legacy_t; - -/* * When VNICs are created on top of the NIC, there are two levels * of MAC layer, a lower MAC, which is the MAC layer at the level of the * physical NIC, and an upper MAC, which is the MAC layer at the level @@ -569,13 +555,15 @@ extern void mac_margin_get(mac_handle_t, uint32_t *); extern int mac_margin_remove(mac_handle_t, uint32_t); extern int mac_margin_add(mac_handle_t, uint32_t *, boolean_t); +extern int mac_fastpath_disable(mac_handle_t); +extern void mac_fastpath_enable(mac_handle_t); extern mactype_register_t *mactype_alloc(uint_t); extern void mactype_free(mactype_register_t *); extern int mactype_register(mactype_register_t *); extern int mactype_unregister(const char *); -extern void mac_start_logusage(mac_logtype_t, uint_t); +extern int mac_start_logusage(mac_logtype_t, uint_t); extern void mac_stop_logusage(mac_logtype_t); extern mac_handle_t mac_get_lower_mac_handle(mac_handle_t); diff --git a/usr/src/uts/common/sys/mac_client_priv.h b/usr/src/uts/common/sys/mac_client_priv.h index c1b999bb31..20e3afa82a 100644 --- a/usr/src/uts/common/sys/mac_client_priv.h +++ b/usr/src/uts/common/sys/mac_client_priv.h @@ -63,6 +63,7 @@ extern void mac_ioctl(mac_handle_t, queue_t *, mblk_t *); extern link_state_t mac_link_get(mac_handle_t); extern void mac_resource_set(mac_client_handle_t, mac_resource_add_t, void *); extern dev_info_t *mac_devinfo_get(mac_handle_t); +extern void *mac_driver(mac_handle_t); extern boolean_t mac_capab_get(mac_handle_t, mac_capab_t, void *); extern boolean_t mac_sap_verify(mac_handle_t, uint32_t, uint32_t *); extern mblk_t *mac_header(mac_handle_t, const uint8_t *, uint32_t, mblk_t *, diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h index ee5557b113..9a02c07b54 100644 --- a/usr/src/uts/common/sys/mac_impl.h +++ b/usr/src/uts/common/sys/mac_impl.h @@ -457,9 +457,10 @@ struct mac_impl_s { mac_resource_props_t mi_resource_props; /* SL */ minor_t mi_minor; /* WO */ - dev_t mi_phy_dev; /* WO */ uint32_t mi_oref; /* SL */ - uint32_t mi_unsup_note; /* WO */ + mac_capab_legacy_t mi_capab_legacy; /* WO */ + dev_t mi_phy_dev; /* WO */ + /* * List of margin value requests added by mac clients. This list is * sorted: the first one has the greatest value. diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h index 5522a6c884..6713912b63 100644 --- a/usr/src/uts/common/sys/mac_provider.h +++ b/usr/src/uts/common/sys/mac_provider.h @@ -145,6 +145,24 @@ typedef struct mac_capab_multifactaddr_s { } mac_capab_multifactaddr_t; /* + * Info and callbacks of legacy devices. + */ +typedef struct mac_capab_legacy_s { + /* + * Notifications that the legacy device does not support. + */ + uint32_t ml_unsup_note; + /* + * dev_t of the legacy device; can be held to force attach. + */ + dev_t ml_dev; + boolean_t (*ml_active_set)(void *); + void (*ml_active_clear)(void *); + int (*ml_fastpath_disable)(void *); + void (*ml_fastpath_enable)(void *); +} mac_capab_legacy_t; + +/* * MAC driver entry point types. */ typedef int (*mac_getstat_t)(void *, uint_t, uint64_t *); diff --git a/usr/src/uts/common/sys/softmac_impl.h b/usr/src/uts/common/sys/softmac_impl.h index 9cdb49de31..83caa23c82 100644 --- a/usr/src/uts/common/sys/softmac_impl.h +++ b/usr/src/uts/common/sys/softmac_impl.h @@ -44,9 +44,20 @@ extern "C" { #endif +typedef void (*softmac_rx_t)(void *, mac_resource_handle_t, mblk_t *, + mac_header_info_t *); + +typedef struct softmac_lower_rxinfo_s { + softmac_rx_t slr_rx; + void *slr_arg; +} softmac_lower_rxinfo_t; + typedef struct softmac_lower_s { + ldi_handle_t sl_lh; struct softmac *sl_softmac; queue_t *sl_wq; + struct softmac_upper_s *sl_sup; + softmac_lower_rxinfo_t *sl_rxinfo; /* * sl_ctl_inprogress is used to serialize the control path. It will @@ -68,8 +79,6 @@ typedef struct softmac_lower_s { t_uscalar_t sl_pending_prim; boolean_t sl_pending_ioctl; mblk_t *sl_ack_mp; - - ldi_handle_t sl_lh; } softmac_lower_t; typedef enum { @@ -110,55 +119,53 @@ typedef struct softmac_dev_s { * node, the other minor node can still be used to register the mac. * (Specifically, an incorrect xxx_getinfo() implementation will cause style-2 * minor node mac registration to fail.) + * + * Locking description: + * WO: write once, valid the life time. */ typedef struct softmac { - /* - * The following fields will be set when the softmac is created and - * will not change. No lock is required. - */ - char smac_devname[MAXNAMELEN]; - major_t smac_umajor; - int smac_uppa; - uint32_t smac_cnt; /* # of minor nodes for this device */ + char smac_devname[MAXNAMELEN]; /* WO */ + major_t smac_umajor; /* WO */ + int smac_uppa; /* WO */ + uint32_t smac_cnt; /* WO, # of minor nodes */ + kmutex_t smac_mutex; + kcondvar_t smac_cv; + softmac_state_t smac_state; /* smac_mutex */ /* - * The following fields are protected by smac_mutex. - * * The smac_hold_cnt field increases when softmac_hold_device() is * called to force the dls_vlan_t of the device to be created. The * device pre-detach fails if this counter is not 0. */ - softmac_state_t smac_state; - uint32_t smac_hold_cnt; - kmutex_t smac_mutex; - kcondvar_t smac_cv; - uint32_t smac_flags; - int smac_attacherr; + uint32_t smac_hold_cnt; /* smac_mutex */ + uint32_t smac_flags; /* smac_mutex */ + int smac_attacherr; /* smac_mutex */ mac_handle_t smac_mh; - softmac_dev_t *smac_softmac[2]; + softmac_dev_t *smac_softmac[2]; /* smac_mutex */ + /* * Number of minor nodes whose post-attach routine has succeeded. * This should be the same as the numbers of softmac_dev_t. * Note that it does not imply SOFTMAC_ATTACH_DONE as the taskq might * be still ongoing. */ - uint32_t smac_attachok_cnt; + uint32_t smac_attachok_cnt; /* smac_mutex */ /* * Number of softmac_dev_t left when pre-detach fails. This is used * to indicate whether postattach is called because of a failed * pre-detach. */ - uint32_t smac_attached_left; + uint32_t smac_attached_left; /* smac_mutex */ /* * Thread handles the DL_NOTIFY_IND message from the lower stream. */ - kthread_t *smac_notify_thread; + kthread_t *smac_notify_thread; /* smac_mutex */ /* * Head and tail of the DL_NOTIFY_IND messsages. */ - mblk_t *smac_notify_head; - mblk_t *smac_notify_tail; + mblk_t *smac_notify_head; /* smac_mutex */ + mblk_t *smac_notify_tail; /* smac_mutex */ /* * The remaining fields are used to register the MAC for a legacy @@ -193,10 +200,34 @@ typedef struct softmac { dl_capab_mdt_t smac_mdt_capab; boolean_t smac_mdt; - /* Following fields protected by the mac perimeter */ - softmac_lower_state_t smac_lower_state; - /* Lower stream structure */ + /* + * Lower stream structure, accessed by the MAC provider API. The GLDv3 + * framework assures it's validity. + */ softmac_lower_t *smac_lower; + + kmutex_t smac_active_mutex; + /* + * Set by xxx_active_set() when aggregation is created. + */ + boolean_t smac_active; /* smac_active_mutex */ + /* + * Numbers of the bounded streams in the fast-path mode. + */ + uint32_t smac_nactive; /* smac_active_mutex */ + + kmutex_t smac_fp_mutex; + kcondvar_t smac_fp_cv; + /* + * numbers of clients that request to disable fastpath. + */ + uint32_t smac_fp_disable_clients; /* smac_fp_mutex */ + boolean_t smac_fastpath_admin_disabled; /* smac_fp_mutex */ + + /* + * stream list over this softmac. + */ + list_t smac_sup_list; /* smac_fp_mutex */ } softmac_t; typedef struct smac_ioc_start_s { @@ -206,20 +237,157 @@ typedef struct smac_ioc_start_s { #define SMAC_IOC ('S' << 24 | 'M' << 16 | 'C' << 8) #define SMAC_IOC_START (SMAC_IOC | 0x01) +/* + * The su_mode of a non-IP/ARP stream is UNKNOWN, and the su_mode of an IP/ARP + * stream is either SLOWPATH or FASTPATH. + */ +#define SOFTMAC_UNKNOWN 0x00 +#define SOFTMAC_SLOWPATH 0x01 +#define SOFTMAC_FASTPATH 0x02 + +typedef struct softmac_switch_req_s { + list_node_t ssq_req_list_node; + uint32_t ssq_expected_mode; +} softmac_switch_req_t; + +#define DATAPATH_MODE(softmac) \ + ((((softmac)->smac_fp_disable_clients != 0) || \ + (softmac)->smac_fastpath_admin_disabled) ? SOFTMAC_SLOWPATH : \ + SOFTMAC_FASTPATH) + + +/* + * Locking description: + * + * WO: Set once and valid for life; + * SL: Serialized by the control path (softmac_wput_nondata_task()) + */ +typedef struct softmac_upper_s { + softmac_t *su_softmac; /* WO */ + queue_t *su_rq; /* WO */ + queue_t *su_wq; /* WO */ + + /* + * List of upper streams that has pending DLPI messages to be processed. + */ + list_node_t su_taskq_list_node; /* softmac_taskq_lock */ + + /* + * non-NULL for IP/ARP streams in the fast-path mode + */ + softmac_lower_t *su_slp; /* SL & su_mutex */ + + /* + * List of all IP/ARP upperstreams on the same softmac (including + * the ones in both data-path modes). + */ + list_node_t su_list_node; /* smac_fp_mutex */ + + /* + * List of datapath switch requests. + */ + list_t su_req_list; /* smac_fp_mutex */ + + /* + * Place holder of RX callbacks used to handles data messages comes + * from the dedicated-lower-stream associated with the IP/ARP stream. + * Another RX callback is softmac_drop_rxinfo, which is a global + * variable. + */ + softmac_lower_rxinfo_t su_rxinfo; /* WO */ + softmac_lower_rxinfo_t su_direct_rxinfo; /* WO */ + + /* + * Used to serialize the DLPI operation and fastpath<->slowpath + * switching over operation. + */ + kmutex_t su_disp_mutex; + kcondvar_t su_disp_cv; + mblk_t *su_pending_head; /* su_disp_mutex */ + mblk_t *su_pending_tail; /* su_disp_mutex */ + boolean_t su_dlpi_pending; /* su_disp_mutex */ + boolean_t su_closing; /* su_disp_mutex */ + + uint32_t su_bound : 1, /* SL */ + su_active : 1, /* SL */ + su_direct : 1; /* SL */ + + /* + * Used for fastpath data path. + */ + kmutex_t su_mutex; + kcondvar_t su_cv; + mblk_t *su_tx_flow_mp; /* su_mutex */ + boolean_t su_tx_busy; /* su_mutex */ + /* + * Number of softmac_srv() operation in fastpath processing. + */ + uint32_t su_tx_inprocess; /* su_mutex */ + /* + * SOFTMAC_SLOWPATH or SOFTMAC_FASTPATH + */ + uint32_t su_mode; /* SL & su_mutex */ + + /* + * Whether this stream is already scheduled in softmac_taskq_list. + */ + boolean_t su_taskq_scheduled; /* softmac_taskq_lock */ +} softmac_upper_t; + +#define SOFTMAC_EQ_PENDING(sup, mp) { \ + if ((sup)->su_pending_head == NULL) { \ + (sup)->su_pending_head = (sup)->su_pending_tail = (mp); \ + } else { \ + (sup)->su_pending_tail->b_next = (mp); \ + (sup)->su_pending_tail = (mp); \ + } \ +} + +#define SOFTMAC_DQ_PENDING(sup, mpp) { \ + if ((sup)->su_pending_head == NULL) { \ + *(mpp) = NULL; \ + } else { \ + *(mpp) = (sup)->su_pending_head; \ + if (((sup)->su_pending_head = (*(mpp))->b_next) == NULL)\ + (sup)->su_pending_tail = NULL; \ + (*(mpp))->b_next = NULL; \ + } \ +} + +/* + * A macro to check whether the write-queue of the lower stream is full + * and packets need to be enqueued. + * + * Because softmac is pushed right above the underlying device and + * _I_INSERT/_I_REMOVE is not processed in the lower stream, it is + * safe to directly access the q_next pointer. + */ +#define SOFTMAC_CANPUTNEXT(q) \ + (!((q)->q_next->q_nfsrv->q_flag & QFULL) || canput((q)->q_next)) + + extern dev_info_t *softmac_dip; #define SOFTMAC_DEV_NAME "softmac" extern int softmac_send_bind_req(softmac_lower_t *, uint_t); +extern int softmac_send_unbind_req(softmac_lower_t *); extern int softmac_send_notify_req(softmac_lower_t *, uint32_t); extern int softmac_send_promisc_req(softmac_lower_t *, t_uscalar_t, boolean_t); -extern void softmac_init(void); -extern void softmac_fini(void); -extern boolean_t softmac_busy(void); +extern void softmac_init(); +extern void softmac_fini(); +extern void softmac_fp_init(); +extern void softmac_fp_fini(); +extern boolean_t softmac_busy(); extern int softmac_fill_capab(ldi_handle_t, softmac_t *); extern int softmac_capab_enable(softmac_lower_t *); -extern void softmac_rput_process_notdata(queue_t *, mblk_t *); +extern void softmac_rput_process_notdata(queue_t *, softmac_upper_t *, + mblk_t *); extern void softmac_rput_process_data(softmac_lower_t *, mblk_t *); +extern int softmac_output(softmac_lower_t *, mblk_t *, t_uscalar_t, + t_uscalar_t, mblk_t **); +extern int softmac_mexchange_error_ack(mblk_t **, t_uscalar_t, + t_uscalar_t, t_uscalar_t); extern int softmac_m_promisc(void *, boolean_t); extern int softmac_m_multicst(void *, boolean_t, const uint8_t *); @@ -231,6 +399,20 @@ extern int softmac_proto_tx(softmac_lower_t *, mblk_t *, mblk_t **); extern void softmac_ioctl_tx(softmac_lower_t *, mblk_t *, mblk_t **); extern void softmac_notify_thread(void *); +extern int softmac_hold(dev_t, softmac_t **); +extern void softmac_rele(softmac_t *); +extern int softmac_lower_setup(softmac_t *, softmac_upper_t *, + softmac_lower_t **); +extern boolean_t softmac_active_set(void *); +extern void softmac_active_clear(void *); +extern int softmac_fastpath_disable(void *); +extern void softmac_fastpath_enable(void *); +extern int softmac_datapath_switch(softmac_t *, boolean_t, boolean_t); + +extern void softmac_wput_data(softmac_upper_t *, mblk_t *); +extern void softmac_wput_nondata(softmac_upper_t *, mblk_t *); +extern void softmac_upperstream_close(softmac_upper_t *); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/intel/ip/ip.global-objs.debug64 b/usr/src/uts/intel/ip/ip.global-objs.debug64 index d89224677b..13c13b2057 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.debug64 +++ b/usr/src/uts/intel/ip/ip.global-objs.debug64 @@ -95,7 +95,6 @@ ip_ard_template ip_area_template ip_ared_template ip_areq_template -ip_aresq_template ip_arma_multi_template ip_aroff_template ip_aron_template diff --git a/usr/src/uts/intel/ip/ip.global-objs.obj64 b/usr/src/uts/intel/ip/ip.global-objs.obj64 index 0e58fdc219..e46a4353e7 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.obj64 +++ b/usr/src/uts/intel/ip/ip.global-objs.obj64 @@ -95,7 +95,6 @@ ip_ard_template ip_area_template ip_ared_template ip_areq_template -ip_aresq_template ip_arma_multi_template ip_aroff_template ip_aron_template diff --git a/usr/src/uts/sparc/ip/ip.global-objs.debug64 b/usr/src/uts/sparc/ip/ip.global-objs.debug64 index 6606b472bf..a2269a3a2c 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.debug64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64 @@ -95,7 +95,6 @@ ip_ard_template ip_area_template ip_ared_template ip_areq_template -ip_aresq_template ip_arma_multi_template ip_aroff_template ip_aron_template diff --git a/usr/src/uts/sparc/ip/ip.global-objs.obj64 b/usr/src/uts/sparc/ip/ip.global-objs.obj64 index 89d40afbbb..4be214bbb5 100644 --- a/usr/src/uts/sparc/ip/ip.global-objs.obj64 +++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64 @@ -95,7 +95,6 @@ ip_ard_template ip_area_template ip_ared_template ip_areq_template -ip_aresq_template ip_arma_multi_template ip_aroff_template ip_aron_template |
