diff options
Diffstat (limited to 'usr/src/uts')
170 files changed, 15867 insertions, 5690 deletions
diff --git a/usr/src/uts/common/inet/ip/ip6_input.c b/usr/src/uts/common/inet/ip/ip6_input.c index d596c313c5..8f305114d1 100644 --- a/usr/src/uts/common/inet/ip/ip6_input.c +++ b/usr/src/uts/common/inet/ip/ip6_input.c @@ -1910,6 +1910,13 @@ ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h, hck_flags = DB_CKSUMFLAGS(mp); + if (hck_flags & HCK_FULLCKSUM_OK) { + /* + * Hardware has already verified the checksum. + */ + return (B_TRUE); + } + if (hck_flags & HCK_FULLCKSUM) { /* * Full checksum has been computed by the hardware @@ -1918,9 +1925,6 @@ ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h, * order to protect against faulty hardware, compare * it against -0 (0xFFFF) to see if it's valid. */ - if (hck_flags & HCK_FULLCKSUM_OK) - return (B_TRUE); - cksum = DB_CKSUM16(mp); if (cksum == 0xFFFF) return (B_TRUE); diff --git a/usr/src/uts/common/inet/ip/ip_input.c b/usr/src/uts/common/inet/ip/ip_input.c index a54b2e8737..0781560daf 100644 --- a/usr/src/uts/common/inet/ip/ip_input.c +++ b/usr/src/uts/common/inet/ip/ip_input.c @@ -2260,6 +2260,13 @@ ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, hck_flags = DB_CKSUMFLAGS(mp); + if (hck_flags & HCK_FULLCKSUM_OK) { + /* + * Hardware has already verified the checksum. + */ + return (B_TRUE); + } + if (hck_flags & HCK_FULLCKSUM) { /* * Full checksum has been computed by the hardware @@ -2268,9 +2275,6 @@ ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, * order to protect against faulty hardware, compare * it against -0 (0xFFFF) to see if it's valid. */ - if (hck_flags & HCK_FULLCKSUM_OK) - return (B_TRUE); - cksum = DB_CKSUM16(mp); if (cksum == 0xFFFF) return (B_TRUE); diff --git a/usr/src/uts/common/inet/ip/ip_netinfo.c b/usr/src/uts/common/inet/ip/ip_netinfo.c index 0d0d943676..3849d1fe06 100644 --- a/usr/src/uts/common/inet/ip/ip_netinfo.c +++ b/usr/src/uts/common/inet/ip/ip_netinfo.c @@ -1175,10 +1175,10 @@ ip_isvalidchecksum(net_handle_t neti, mblk_t *mp) ASSERT(mp != NULL); if (dohwcksum && - DB_CKSUM16(mp) != 0xFFFF && - (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) && - (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM_OK) && - (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) + ((DB_CKSUM16(mp) != 0xFFFF && + (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM)) || + (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM_OK)) && + (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM_OK)) return (1); hlen = (ipha->ipha_version_and_hdr_length & 0x0F) << 2; diff --git a/usr/src/uts/common/inet/iptun/iptun.c b/usr/src/uts/common/inet/iptun/iptun.c index 099a14fc2e..215221241d 100644 --- a/usr/src/uts/common/inet/iptun/iptun.c +++ b/usr/src/uts/common/inet/iptun/iptun.c @@ -379,92 +379,58 @@ iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, /* ARGSUSED */ static int iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { iptun_t *iptun = barg; - mac_propval_range_t range; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); - boolean_t is_possible = (pr_flags & MAC_PROP_POSSIBLE); int err; if ((err = iptun_enter(iptun)) != 0) return (err); - if ((pr_flags & ~(MAC_PROP_DEFAULT | MAC_PROP_POSSIBLE)) != 0) { + switch (pr_num) { + case MAC_PROP_IPTUN_HOPLIMIT: + ASSERT(pr_valsize >= sizeof (uint32_t)); + *(uint32_t *)pr_val = iptun->iptun_hoplimit; + break; + + case MAC_PROP_IPTUN_ENCAPLIMIT: + *(uint32_t *)pr_val = iptun->iptun_encaplimit; + break; + default: err = ENOTSUP; - goto done; - } - if (is_default && is_possible) { - err = EINVAL; - goto done; } +done: + iptun_exit(iptun); + return (err); +} - *perm = MAC_PROP_PERM_RW; - - if (is_possible) { - if (pr_valsize < sizeof (mac_propval_range_t)) { - err = EINVAL; - goto done; - } - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - } else if (pr_valsize < sizeof (uint32_t)) { - err = EINVAL; - goto done; - } +/* ARGSUSED */ +static void +iptun_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + iptun_t *iptun = barg; switch (pr_num) { case MAC_PROP_IPTUN_HOPLIMIT: - if (is_possible) { - range.range_uint32[0].mpur_min = IPTUN_MIN_HOPLIMIT; - range.range_uint32[0].mpur_max = IPTUN_MAX_HOPLIMIT; - } else if (is_default) { - *(uint32_t *)pr_val = IPTUN_DEFAULT_HOPLIMIT; - } else { - *(uint32_t *)pr_val = iptun->iptun_hoplimit; - } + mac_prop_info_set_range_uint32(prh, + IPTUN_MIN_HOPLIMIT, IPTUN_MAX_HOPLIMIT); + mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_HOPLIMIT); break; + case MAC_PROP_IPTUN_ENCAPLIMIT: - if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) { - err = ENOTSUP; - goto done; - } - if (is_possible) { - range.range_uint32[0].mpur_min = IPTUN_MIN_ENCAPLIMIT; - range.range_uint32[0].mpur_max = IPTUN_MAX_ENCAPLIMIT; - } else if (is_default) { - *(uint32_t *)pr_val = IPTUN_DEFAULT_ENCAPLIMIT; - } else { - *(uint32_t *)pr_val = iptun->iptun_encaplimit; - } + if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) + break; + mac_prop_info_set_range_uint32(prh, + IPTUN_MIN_ENCAPLIMIT, IPTUN_MAX_ENCAPLIMIT); + mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_ENCAPLIMIT); break; - case MAC_PROP_MTU: { - uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0); - - if (is_possible) { - range.range_uint32[0].mpur_min = - iptun->iptun_typeinfo->iti_minmtu; - range.range_uint32[0].mpur_max = maxmtu; - } else { - /* - * The MAC module knows the current value and should - * never call us for it. There is also no default - * MTU, as by default, it is a dynamic property. - */ - err = ENOTSUP; - goto done; - } + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + iptun->iptun_typeinfo->iti_minmtu, + iptun_get_maxmtu(iptun, NULL, 0)); break; } - default: - err = EINVAL; - goto done; - } - if (is_possible) - bcopy(&range, pr_val, sizeof (range)); -done: - iptun_exit(iptun); - return (err); } uint_t @@ -3514,7 +3480,7 @@ iptun_output_common(iptun_t *iptun, ip_xmit_attr_t *ixa, mblk_t *mp) } static mac_callbacks_t iptun_m_callbacks = { - .mc_callbacks = (MC_SETPROP | MC_GETPROP), + .mc_callbacks = (MC_SETPROP | MC_GETPROP | MC_PROPINFO), .mc_getstat = iptun_m_getstat, .mc_start = iptun_m_start, .mc_stop = iptun_m_stop, @@ -3522,6 +3488,8 @@ static mac_callbacks_t iptun_m_callbacks = { .mc_multicst = iptun_m_multicst, .mc_unicst = iptun_m_unicst, .mc_tx = iptun_m_tx, + .mc_reserved = NULL, .mc_setprop = iptun_m_setprop, - .mc_getprop = iptun_m_getprop + .mc_getprop = iptun_m_getprop, + .mc_propinfo = iptun_m_propinfo }; diff --git a/usr/src/uts/common/io/afe/afe.c b/usr/src/uts/common/io/afe/afe.c index ca67e753b9..a80775c502 100644 --- a/usr/src/uts/common/io/afe/afe.c +++ b/usr/src/uts/common/io/afe/afe.c @@ -29,7 +29,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -119,9 +119,11 @@ static int afe_m_stat(void *, uint_t, uint64_t *); static int afe_m_start(void *); static void afe_m_stop(void *); static int afe_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int afe_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void afe_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static unsigned afe_intr(caddr_t); static void afe_startmac(afe_t *); static void afe_stopmac(afe_t *); @@ -173,7 +175,7 @@ static mii_ops_t afe_mii_ops = { }; static mac_callbacks_t afe_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, afe_m_stat, afe_m_start, afe_m_stop, @@ -181,12 +183,14 @@ static mac_callbacks_t afe_m_callbacks = { afe_m_multicst, afe_m_unicst, afe_m_tx, + NULL, afe_m_ioctl, /* mc_ioctl */ NULL, /* mc_getcapab */ NULL, /* mc_open */ NULL, /* mc_close */ afe_m_setprop, afe_m_getprop, + afe_m_propinfo }; @@ -2372,12 +2376,12 @@ afe_m_stat(void *arg, uint_t stat, uint64_t *val) } int -afe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +afe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { afe_t *afep = arg; - return (mii_m_getprop(afep->afe_mii, name, num, flags, sz, val, perm)); + return (mii_m_getprop(afep->afe_mii, name, num, sz, val)); } int @@ -2389,6 +2393,15 @@ afe_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, return (mii_m_setprop(afep->afe_mii, name, num, sz, val)); } +static void +afe_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + afe_t *afep = arg; + + mii_m_propinfo(afep->afe_mii, name, num, prh); +} + /* * Debugging and error reporting. */ diff --git a/usr/src/uts/common/io/aggr/aggr_grp.c b/usr/src/uts/common/io/aggr/aggr_grp.c index 32ce4dfd08..eac04f2087 100644 --- a/usr/src/uts/common/io/aggr/aggr_grp.c +++ b/usr/src/uts/common/io/aggr/aggr_grp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,6 +33,38 @@ * aggregation group. * * A set of MAC ports are associated with each association group. + * + * Aggr pseudo TX rings + * -------------------- + * The underlying ports (NICs) in an aggregation can have TX rings. To + * enhance aggr's performance, these TX rings are made available to the + * aggr layer as pseudo TX rings. The concept of pseudo rings are not new. + * They are already present and implemented on the RX side. It is called + * as pseudo RX rings. The same concept is extended to the TX side where + * each TX ring of an underlying port is reflected in aggr as a pseudo + * TX ring. Thus each pseudo TX ring will map to a specific hardware TX + * ring. Even in the case of a NIC that does not have a TX ring, a pseudo + * TX ring is given to the aggregation layer. + * + * With this change, the outgoing stack depth looks much better: + * + * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() -> + * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx() + * + * Two new modes are introduced to mac_tx() to handle aggr pseudo TX rings: + * SRS_TX_AGGR and SRS_TX_BW_AGGR. + * + * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine + * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) TX + * ring belonging to a port on which the packet has to be sent. + * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4 + * policy and then uses the fanout_hint passed to it to pick a TX ring from + * the selected port. + * + * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where + * bandwidth limit is applied first on the outgoing packet and the packets + * allowed to go out would call mac_tx_aggr_mode() to send the packet on a + * particular TX ring. */ #include <sys/types.h> @@ -71,9 +103,8 @@ static void aggr_m_ioctl(void *, queue_t *, mblk_t *); static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *); static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -static int aggr_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); - +static void aggr_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t); static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *, @@ -113,7 +144,7 @@ static id_space_t *key_ids; static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; #define AGGR_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO) static mac_callbacks_t aggr_m_callbacks = { AGGR_M_CALLBACK_FLAGS, @@ -123,13 +154,15 @@ static mac_callbacks_t aggr_m_callbacks = { aggr_m_promisc, aggr_m_multicst, NULL, - aggr_m_tx, + NULL, + NULL, aggr_m_ioctl, aggr_m_capab_get, NULL, NULL, aggr_m_setprop, - aggr_m_getprop + NULL, + aggr_m_propinfo }; /*ARGSUSED*/ @@ -144,6 +177,8 @@ aggr_grp_constructor(void *buf, void *arg, int kmflag) rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL); mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&grp->lg_tx_flowctl_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&grp->lg_tx_flowctl_cv, NULL, CV_DEFAULT, NULL); grp->lg_link_state = LINK_STATE_UNKNOWN; return (0); } @@ -164,6 +199,8 @@ aggr_grp_destructor(void *buf, void *arg) mutex_destroy(&grp->lg_port_lock); cv_destroy(&grp->lg_port_cv); rw_destroy(&grp->lg_tx_lock); + mutex_destroy(&grp->lg_tx_flowctl_lock); + cv_destroy(&grp->lg_tx_flowctl_cv); } void @@ -536,7 +573,7 @@ aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force, } /* - * Add a pseudo Rx ring for the given HW ring handle. + * Add a pseudo RX ring for the given HW ring handle. */ static int aggr_add_pseudo_rx_ring(aggr_port_t *port, @@ -553,7 +590,7 @@ aggr_add_pseudo_rx_ring(aggr_port_t *port, } /* - * No slot for this new Rx ring. + * No slot for this new RX ring. */ if (j == MAX_RINGS_PER_GROUP) return (EIO); @@ -567,19 +604,20 @@ aggr_add_pseudo_rx_ring(aggr_port_t *port, * The group is already registered, dynamically add a new ring to the * mac group. */ - mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring); if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) { ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE; ring->arr_hw_rh = NULL; ring->arr_port = NULL; rx_grp->arg_ring_cnt--; - mac_hwring_teardown(hw_rh); + } else { + mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring, + mac_find_ring(rx_grp->arg_gh, j)); } return (err); } /* - * Remove the pseudo Rx ring of the given HW ring handle. + * Remove the pseudo RX ring of the given HW ring handle. */ static void aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) @@ -632,8 +670,8 @@ aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) /* * Get the list the the underlying HW rings. */ - hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh, - MAC_RING_TYPE_RX); + hw_rh_cnt = mac_hwrings_get(port->lp_mch, + &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX); if (port->lp_hwgh != NULL) { /* @@ -671,7 +709,7 @@ aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) port->lp_hwgh = NULL; } } else { - port->lp_grp_added = B_TRUE; + port->lp_rx_grp_added = B_TRUE; } done: mac_perim_exit(pmph); @@ -695,12 +733,12 @@ aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) ASSERT(MAC_PERIM_HELD(grp->lg_mh)); mac_perim_enter_by_mh(port->lp_mh, &pmph); - if (!port->lp_grp_added) + if (!port->lp_rx_grp_added) goto done; ASSERT(rx_grp->arg_gh != NULL); - hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh, - MAC_RING_TYPE_RX); + hw_rh_cnt = mac_hwrings_get(port->lp_mch, + &hwgh, hw_rh, MAC_RING_TYPE_RX); /* * If hw_rh_cnt is 0, it means that the underlying port does not @@ -725,7 +763,196 @@ aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) mac_rx_client_restart(port->lp_mch); } - port->lp_grp_added = B_FALSE; + port->lp_rx_grp_added = B_FALSE; +done: + mac_perim_exit(pmph); +} + +/* + * Add a pseudo TX ring for the given HW ring handle. + */ +static int +aggr_add_pseudo_tx_ring(aggr_port_t *port, + aggr_pseudo_tx_group_t *tx_grp, mac_ring_handle_t hw_rh, + mac_ring_handle_t *pseudo_rh) +{ + aggr_pseudo_tx_ring_t *ring; + int err; + int i; + + ASSERT(MAC_PERIM_HELD(port->lp_mh)); + for (i = 0; i < MAX_RINGS_PER_GROUP; i++) { + ring = tx_grp->atg_rings + i; + if (!(ring->atr_flags & MAC_PSEUDO_RING_INUSE)) + break; + } + /* + * No slot for this new TX ring. + */ + if (i == MAX_RINGS_PER_GROUP) + return (EIO); + /* + * The following 4 statements needs to be done before + * calling mac_group_add_ring(). Otherwise it will + * result in an assertion failure in mac_init_ring(). + */ + ring->atr_flags |= MAC_PSEUDO_RING_INUSE; + ring->atr_hw_rh = hw_rh; + ring->atr_port = port; + tx_grp->atg_ring_cnt++; + + /* + * The TX side has no concept of ring groups unlike RX groups. + * There is just a single group which stores all the TX rings. + * This group will be used to store aggr's pseudo TX rings. + */ + if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) { + ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE; + ring->atr_hw_rh = NULL; + ring->atr_port = NULL; + tx_grp->atg_ring_cnt--; + } else { + *pseudo_rh = mac_find_ring(tx_grp->atg_gh, i); + if (hw_rh != NULL) { + mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring, + mac_find_ring(tx_grp->atg_gh, i)); + } + } + return (err); +} + +/* + * Remove the pseudo TX ring of the given HW ring handle. + */ +static void +aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp, + mac_ring_handle_t pseudo_hw_rh) +{ + aggr_pseudo_tx_ring_t *ring; + int i; + + for (i = 0; i < MAX_RINGS_PER_GROUP; i++) { + ring = tx_grp->atg_rings + i; + if (ring->atr_rh != pseudo_hw_rh) + continue; + + ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE); + mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh); + ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE; + mac_hwring_teardown(ring->atr_hw_rh); + ring->atr_hw_rh = NULL; + ring->atr_port = NULL; + tx_grp->atg_ring_cnt--; + break; + } +} + +/* + * This function is called to create pseudo rings over hardware rings of + * the underlying device. There is a 1:1 mapping between the pseudo TX + * rings of the aggr and the hardware rings of the underlying port. + */ +static int +aggr_add_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp) +{ + aggr_grp_t *grp = port->lp_grp; + mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP], pseudo_rh; + mac_perim_handle_t pmph; + int hw_rh_cnt, i = 0, j; + int err = 0; + + ASSERT(MAC_PERIM_HELD(grp->lg_mh)); + mac_perim_enter_by_mh(port->lp_mh, &pmph); + + /* + * Get the list the the underlying HW rings. + */ + hw_rh_cnt = mac_hwrings_get(port->lp_mch, + NULL, hw_rh, MAC_RING_TYPE_TX); + + /* + * Even if the underlying NIC does not have TX rings, we + * still make a psuedo TX ring for that NIC with NULL as + * the ring handle. + */ + if (hw_rh_cnt == 0) + port->lp_tx_ring_cnt = 1; + else + port->lp_tx_ring_cnt = hw_rh_cnt; + + port->lp_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * + port->lp_tx_ring_cnt), KM_SLEEP); + port->lp_pseudo_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * + port->lp_tx_ring_cnt), KM_SLEEP); + + if (hw_rh_cnt == 0) { + if ((err = aggr_add_pseudo_tx_ring(port, tx_grp, + NULL, &pseudo_rh)) == 0) { + port->lp_tx_rings[0] = NULL; + port->lp_pseudo_tx_rings[0] = pseudo_rh; + } + } else { + for (i = 0; err == 0 && i < hw_rh_cnt; i++) { + err = aggr_add_pseudo_tx_ring(port, + tx_grp, hw_rh[i], &pseudo_rh); + if (err != 0) + break; + port->lp_tx_rings[i] = hw_rh[i]; + port->lp_pseudo_tx_rings[i] = pseudo_rh; + } + } + + if (err != 0) { + if (hw_rh_cnt != 0) { + for (j = 0; j < i; j++) { + aggr_rem_pseudo_tx_ring(tx_grp, + port->lp_pseudo_tx_rings[j]); + } + } + kmem_free(port->lp_tx_rings, + (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); + kmem_free(port->lp_pseudo_tx_rings, + (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); + port->lp_tx_ring_cnt = 0; + } else { + port->lp_tx_grp_added = B_TRUE; + port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch, + aggr_tx_ring_update, port); + } + mac_perim_exit(pmph); + return (err); +} + +/* + * This function is called by aggr to remove pseudo TX rings over the + * HW rings of the underlying port. + */ +static void +aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp) +{ + aggr_grp_t *grp = port->lp_grp; + mac_perim_handle_t pmph; + int i; + + ASSERT(MAC_PERIM_HELD(grp->lg_mh)); + mac_perim_enter_by_mh(port->lp_mh, &pmph); + + if (!port->lp_tx_grp_added) + goto done; + + ASSERT(tx_grp->atg_gh != NULL); + + for (i = 0; i < port->lp_tx_ring_cnt; i++) + aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]); + + kmem_free(port->lp_tx_rings, + (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); + kmem_free(port->lp_pseudo_tx_rings, + (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); + + port->lp_tx_ring_cnt = 0; + (void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh); + port->lp_tx_grp_added = B_FALSE; done: mac_perim_exit(pmph); } @@ -813,6 +1040,9 @@ aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force, * Create the pseudo ring for each HW ring of the underlying * port. */ + rc = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group); + if (rc != 0) + goto bail; rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group); if (rc != 0) goto bail; @@ -877,6 +1107,7 @@ bail: aggr_port_stop(port); mac_perim_exit(pmph); } + aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); (void) aggr_grp_rem_port(grp, port, NULL, NULL); } @@ -1001,6 +1232,7 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, mac_perim_handle_t mph; int err; int i; + kt_did_t tid = 0; /* need at least one port */ if (nports == 0) @@ -1029,10 +1261,17 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, grp->lg_started = B_FALSE; grp->lg_promisc = B_FALSE; grp->lg_lacp_done = B_FALSE; + grp->lg_tx_notify_done = B_FALSE; grp->lg_lacp_head = grp->lg_lacp_tail = NULL; grp->lg_lacp_rx_thread = thread_create(NULL, 0, aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri); + grp->lg_tx_notify_thread = thread_create(NULL, 0, + aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri); + grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * + MAX_RINGS_PER_GROUP), KM_SLEEP); + grp->lg_tx_blocked_cnt = 0; bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t)); + bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t)); aggr_lacp_init_grp(grp); /* add MAC ports to group */ @@ -1127,6 +1366,7 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, * port. Note that this is done after the aggr registers the * mac. */ + VERIFY(aggr_add_pseudo_tx_group(port, &grp->lg_tx_group) == 0); VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0); if (aggr_port_notify_link(grp, port)) link_state_changed = B_TRUE; @@ -1172,7 +1412,21 @@ bail: while (grp->lg_lacp_rx_thread != NULL) cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); mutex_exit(&grp->lg_lacp_lock); - + /* + * Inform the tx_notify thread to exit. + */ + mutex_enter(&grp->lg_tx_flowctl_lock); + if (grp->lg_tx_notify_thread != NULL) { + tid = grp->lg_tx_notify_thread->t_did; + grp->lg_tx_notify_done = B_TRUE; + cv_signal(&grp->lg_tx_flowctl_cv); + } + mutex_exit(&grp->lg_tx_flowctl_lock); + if (tid != 0) + thread_join(tid); + + kmem_free(grp->lg_tx_blocked_rings, + (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP)); rw_exit(&aggr_grp_lock); AGGR_GRP_REFRELE(grp); return (err); @@ -1272,6 +1526,7 @@ aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, grp->lg_nports--; mac_perim_exit(mph); + aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); aggr_port_delete(port); /* @@ -1378,7 +1633,20 @@ aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports) mac_perim_exit(pmph); } + /* + * aggr_rem_pseudo_tx_group() is not called here. Instead + * it is called from inside aggr_grp_rem_port() after the + * port has been detached. The reason is that + * aggr_rem_pseudo_tx_group() removes one ring at a time + * and if there is still traffic going on, then there + * is the possibility of aggr_find_tx_ring() returning a + * removed ring for transmission. Once the port has been + * detached, that port will not be used and + * aggr_find_tx_ring() will not return any rings + * belonging to it. + */ aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); + /* remove port from group */ rc = aggr_grp_rem_port(grp, port, &mac_addr_changed, &link_state_changed); @@ -1408,6 +1676,7 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) mod_hash_val_t val; mac_perim_handle_t mph, pmph; int err; + kt_did_t tid = 0; rw_enter(&aggr_grp_lock, RW_WRITER); @@ -1455,6 +1724,18 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) while (grp->lg_lacp_rx_thread != NULL) cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); mutex_exit(&grp->lg_lacp_lock); + /* + * Inform the tx_notify_thread to exit. + */ + mutex_enter(&grp->lg_tx_flowctl_lock); + if (grp->lg_tx_notify_thread != NULL) { + tid = grp->lg_tx_notify_thread->t_did; + grp->lg_tx_notify_done = B_TRUE; + cv_signal(&grp->lg_tx_flowctl_cv); + } + mutex_exit(&grp->lg_tx_flowctl_lock); + if (tid != 0) + thread_join(tid); mac_perim_enter_by_mh(grp->lg_mh, &mph); @@ -1468,6 +1749,7 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) aggr_port_stop(port); (void) aggr_grp_detach_port(grp, port); mac_perim_exit(pmph); + aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); aggr_port_delete(port); port = cport; @@ -1475,6 +1757,8 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) mac_perim_exit(mph); + kmem_free(grp->lg_tx_blocked_rings, + (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP)); /* * Wait for the port's lacp timer thread and its notification callback * to exit before calling mac_unregister() since both needs to access @@ -1600,6 +1884,37 @@ aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val) return (0); } +int +aggr_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + aggr_pseudo_rx_ring_t *rx_ring = (aggr_pseudo_rx_ring_t *)rdriver; + + if (rx_ring->arr_hw_rh != NULL) { + *val = mac_pseudo_rx_ring_stat_get(rx_ring->arr_hw_rh, stat); + } else { + aggr_port_t *port = rx_ring->arr_port; + + *val = mac_stat_get(port->lp_mh, stat); + + } + return (0); +} + +int +aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + aggr_pseudo_tx_ring_t *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver; + + if (tx_ring->atr_hw_rh != NULL) { + *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat); + } else { + aggr_port_t *port = tx_ring->atr_port; + + *val = mac_stat_get(port->lp_mh, stat); + } + return (0); +} + static int aggr_m_stat(void *arg, uint_t stat, uint64_t *val) { @@ -1821,7 +2136,6 @@ aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) if (cap_rings->mr_type == MAC_RING_TYPE_RX) { cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt; - cap_rings->mr_rget = aggr_fill_ring; /* * An aggregation advertises only one (pseudo) RX @@ -1829,12 +2143,15 @@ aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) * the underlying devices. */ cap_rings->mr_gnum = 1; - cap_rings->mr_gget = aggr_fill_group; cap_rings->mr_gaddring = NULL; cap_rings->mr_gremring = NULL; } else { - return (B_FALSE); + cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; + cap_rings->mr_rnum = grp->lg_tx_group.atg_ring_cnt; + cap_rings->mr_gnum = 0; } + cap_rings->mr_rget = aggr_fill_ring; + cap_rings->mr_gget = aggr_fill_group; break; } case MAC_CAPAB_AGGR: @@ -1845,6 +2162,8 @@ aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) aggr_cap = cap_data; aggr_cap->mca_rename_fn = aggr_grp_port_rename; aggr_cap->mca_unicst = aggr_m_unicst; + aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring; + aggr_cap->mca_arg = arg; } return (B_TRUE); } @@ -1863,18 +2182,24 @@ aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index, { aggr_grp_t *grp = arg; aggr_pseudo_rx_group_t *rx_group; - - ASSERT(rtype == MAC_RING_TYPE_RX && index == 0); - rx_group = &grp->lg_rx_group; - rx_group->arg_gh = gh; - rx_group->arg_grp = grp; - - infop->mgi_driver = (mac_group_driver_t)rx_group; - infop->mgi_start = NULL; - infop->mgi_stop = NULL; - infop->mgi_addmac = aggr_addmac; - infop->mgi_remmac = aggr_remmac; - infop->mgi_count = rx_group->arg_ring_cnt; + aggr_pseudo_tx_group_t *tx_group; + + ASSERT(index == 0); + if (rtype == MAC_RING_TYPE_RX) { + rx_group = &grp->lg_rx_group; + rx_group->arg_gh = gh; + rx_group->arg_grp = grp; + + infop->mgi_driver = (mac_group_driver_t)rx_group; + infop->mgi_start = NULL; + infop->mgi_stop = NULL; + infop->mgi_addmac = aggr_addmac; + infop->mgi_remmac = aggr_remmac; + infop->mgi_count = rx_group->arg_ring_cnt; + } else { + tx_group = &grp->lg_tx_group; + tx_group->atg_gh = gh; + } } /* @@ -1905,6 +2230,7 @@ aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring; aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr; aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr; + aggr_mac_intr.mi_ddi_handle = NULL; infop->mri_driver = (mac_ring_driver_t)rx_ring; infop->mri_start = aggr_pseudo_start_ring; @@ -1912,6 +2238,34 @@ aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_intr = aggr_mac_intr; infop->mri_poll = aggr_rx_poll; + + infop->mri_stat = aggr_rx_ring_stat; + break; + } + case MAC_RING_TYPE_TX: { + aggr_pseudo_tx_group_t *tx_group = &grp->lg_tx_group; + aggr_pseudo_tx_ring_t *tx_ring; + + ASSERT(rg_index == -1); + ASSERT(index < tx_group->atg_ring_cnt); + + tx_ring = &tx_group->atg_rings[index]; + tx_ring->atr_rh = rh; + + infop->mri_driver = (mac_ring_driver_t)tx_ring; + infop->mri_start = NULL; + infop->mri_stop = NULL; + infop->mri_tx = aggr_ring_tx; + infop->mri_stat = aggr_tx_ring_stat; + /* + * Use the hw TX ring handle to find if the ring needs + * serialization or not. For NICs that do not expose + * Tx rings, atr_hw_rh will be NULL. + */ + if (tx_ring->atr_hw_rh != NULL) { + infop->mri_flags = + mac_hwring_getinfo(tx_ring->atr_hw_rh); + } break; } default: @@ -2399,34 +2753,33 @@ aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, } int -aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_range_t *range) +aggr_grp_possible_mtu_range(aggr_grp_t *grp, uint32_t *min, uint32_t *max) { mac_propval_range_t *vals; mac_propval_uint32_range_t *ur; aggr_port_t *port; mac_perim_handle_t mph; - mac_prop_t macprop; - uint_t perm, i; - uint32_t min = 0, max = (uint32_t)-1; + uint_t i; int err = 0; ASSERT(MAC_PERIM_HELD(grp->lg_mh)); + *min = 0; + *max = (uint32_t)-1; + vals = kmem_alloc(sizeof (mac_propval_range_t) * grp->lg_nports, KM_SLEEP); - macprop.mp_id = MAC_PROP_MTU; - macprop.mp_name = "mtu"; - macprop.mp_flags = MAC_PROP_POSSIBLE; for (port = grp->lg_ports, i = 0; port != NULL; port = port->lp_next, i++) { mac_perim_enter_by_mh(port->lp_mh, &mph); - err = mac_get_prop(port->lp_mh, &macprop, vals + i, - sizeof (mac_propval_range_t), &perm); + err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL, + NULL, 0, vals + i, NULL); mac_perim_exit(mph); if (err != 0) break; } + /* * if any of the underlying ports does not support changing MTU then * just return ENOTSUP @@ -2435,47 +2788,42 @@ aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_range_t *range) ASSERT(err != 0); goto done; } - range->mpr_count = 1; - range->mpr_type = MAC_PROPVAL_UINT32; + for (i = 0; i < grp->lg_nports; i++) { - ur = &((vals + i)->range_uint32[0]); + ur = &((vals + i)->mpr_range_uint32[0]); /* * Take max of the min, for range_min; that is the minimum * MTU value for an aggregation is the maximum of the * minimum values of all the underlying ports */ - if (ur->mpur_min > min) - min = ur->mpur_min; + if (ur->mpur_min > *min) + *min = ur->mpur_min; /* Take min of the max, for range_max */ - if (ur->mpur_max < max) - max = ur->mpur_max; + if (ur->mpur_max < *max) + *max = ur->mpur_max; } - range->range_uint32[0].mpur_min = min; - range->range_uint32[0].mpur_max = max; done: kmem_free(vals, sizeof (mac_propval_range_t) * grp->lg_nports); + return (err); } -/*ARGSUSED*/ -static int -aggr_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) +static void +aggr_m_propinfo(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) { - mac_propval_range_t range; - int err = ENOTSUP; aggr_grp_t *grp = m_driver; + _NOTE(ARGUNUSED(pr_name)); + switch (pr_num) { - case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - if ((err = aggr_grp_possible_mtu_range(grp, &range)) != 0) - return (err); - bcopy(&range, pr_val, sizeof (range)); - return (0); + case MAC_PROP_MTU: { + uint32_t min, max; + + if (aggr_grp_possible_mtu_range(grp, &min, &max) != 0) + return; + mac_prop_info_set_range_uint32(prh, min, max); + break; + } } - return (err); } diff --git a/usr/src/uts/common/io/aggr/aggr_lacp.c b/usr/src/uts/common/io/aggr/aggr_lacp.c index 936e783e9e..2892f1438a 100644 --- a/usr/src/uts/common/io/aggr/aggr_lacp.c +++ b/usr/src/uts/common/io/aggr/aggr_lacp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -653,7 +653,10 @@ lacp_xmit_sm(aggr_port_t *portp) fill_lacp_pdu(portp, (lacp_t *)(mp->b_rptr + sizeof (struct ether_header))); - (void) mac_tx(portp->lp_mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL); + /* Send the packet over the first TX ring */ + mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp); + if (mp != NULL) + freemsg(mp); pl->NTT = B_FALSE; portp->lp_lacp_stats.LACPDUsTx++; @@ -1322,8 +1325,14 @@ lacp_selection_logic(aggr_port_t *portp) if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem, &aggrp->aggr.PartnerSystem) == 0 && (tpp->lp_lacp.PartnerOperKey == - aggrp->aggr.PartnerOperAggrKey)) + aggrp->aggr.PartnerOperAggrKey)) { + /* Set aggregation Partner MAC and key */ + aggrp->aggr.PartnerSystem = + pl->PartnerOperSystem; + aggrp->aggr.PartnerOperAggrKey = + pl->PartnerOperKey; break; + } } if (tpp == NULL) { @@ -2293,7 +2302,11 @@ aggr_lacp_rx(mblk_t *dmp) if (receive_marker_pdu(portp, dmp) != 0) break; - (void) mac_tx(portp->lp_mch, dmp, 0, MAC_DROP_ON_NO_DESC, NULL); + /* Send the packet over the first TX ring */ + dmp = mac_hwring_send_priv(portp->lp_mch, + portp->lp_tx_rings[0], dmp); + if (dmp != NULL) + freemsg(dmp); mac_perim_exit(mph); AGGR_PORT_REFRELE(portp); return; diff --git a/usr/src/uts/common/io/aggr/aggr_port.c b/usr/src/uts/common/io/aggr/aggr_port.c index 2c7e74131a..00545d2c03 100644 --- a/usr/src/uts/common/io/aggr/aggr_port.c +++ b/usr/src/uts/common/io/aggr/aggr_port.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -71,7 +71,7 @@ aggr_port_destructor(void *buf, void *arg) ASSERT(port->lp_mnh == NULL); ASSERT(port->lp_mphp == NULL); - ASSERT(!port->lp_grp_added); + ASSERT(!port->lp_rx_grp_added && !port->lp_tx_grp_added); ASSERT(port->lp_hwgh == NULL); } @@ -111,7 +111,7 @@ aggr_port_init_callbacks(aggr_port_t *port) port->lp_mnh = mac_notify_add(port->lp_mh, aggr_port_notify_cb, port); /* * Hold a reference of the grp and the port and this reference will - * be release when the thread exits. + * be released when the thread exits. * * The reference on the port is used for aggr_port_delete() to * continue without waiting for the thread to exit; the reference diff --git a/usr/src/uts/common/io/aggr/aggr_send.c b/usr/src/uts/common/io/aggr/aggr_send.c index bc0a19368d..7d423f267e 100644 --- a/usr/src/uts/common/io/aggr/aggr_send.c +++ b/usr/src/uts/common/io/aggr/aggr_send.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ #include <sys/conf.h> #include <sys/modctl.h> #include <sys/sunddi.h> +#include <sys/callb.h> #include <sys/vlan.h> #include <sys/strsun.h> #include <sys/strsubr.h> @@ -68,79 +69,163 @@ aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy) grp->lg_mac_tx_policy = mac_policy; } +#define HASH_HINT(hint) \ + ((hint) ^ ((hint) >> 24) ^ ((hint) >> 16) ^ ((hint) >> 8)) + /* - * Send function invoked by the MAC service module. + * Function invoked by mac layer to find a specific TX ring on a port + * to send data. */ mblk_t * -aggr_m_tx(void *arg, mblk_t *mp) +aggr_find_tx_ring(void *arg, mblk_t *mp, uintptr_t hint, mac_ring_handle_t *rh) { aggr_grp_t *grp = arg; aggr_port_t *port; - mblk_t *nextp; - mac_tx_cookie_t cookie; uint64_t hash; - void *mytx_handle; - - for (;;) { - rw_enter(&grp->lg_tx_lock, RW_READER); - if (grp->lg_ntx_ports == 0) { - /* - * We could have returned from aggr_m_start() before - * the ports were actually attached. Drop the chain. - */ - rw_exit(&grp->lg_tx_lock); - freemsgchain(mp); - return (NULL); - } - - nextp = mp->b_next; - mp->b_next = NULL; - - hash = mac_pkt_hash(DL_ETHER, mp, grp->lg_mac_tx_policy, - B_TRUE); - port = grp->lg_tx_ports[hash % grp->lg_ntx_ports]; + rw_enter(&grp->lg_tx_lock, RW_READER); + if (grp->lg_ntx_ports == 0) { /* - * Bump the active Tx ref count so that the port won't - * be deleted. The reference count will be dropped in mac_tx(). + * We could have returned from aggr_m_start() before + * the ports were actually attached. Drop the chain. */ - mytx_handle = mac_tx_hold(port->lp_mch); rw_exit(&grp->lg_tx_lock); + freemsgchain(mp); + return (NULL); + } + hash = mac_pkt_hash(DL_ETHER, mp, grp->lg_mac_tx_policy, B_TRUE); + port = grp->lg_tx_ports[hash % grp->lg_ntx_ports]; - if (mytx_handle == NULL) { - /* - * The port is quiesced. - */ - freemsg(mp); - } else { - mblk_t *ret_mp = NULL; - - /* - * It is fine that the port state changes now. - * Set MAC_TX_NO_HOLD to inform mac_tx() not to bump - * the active Tx ref again. Use hash as the hint so - * to direct traffic to different TX rings. Note below - * bit operation is needed to get the most benefit - * from the mac_tx() hash algorithm. - */ + /* + * Use hash as the hint so to direct traffic to + * different TX rings. Note below bit operation + * is needed in case hint is 0 to get the most + * benefit from HASH_HINT() algorithm. + */ + if (port->lp_tx_ring_cnt > 1) { + if (hint == 0) { hash = (hash << 24 | hash << 16 | hash); hash = (hash << 32 | hash); - cookie = mac_tx(port->lp_mch, mp, (uintptr_t)hash, - MAC_TX_NO_ENQUEUE | MAC_TX_NO_HOLD, &ret_mp); + } else { + hash = hint; + } + hash = HASH_HINT(hash); + *rh = port->lp_pseudo_tx_rings[hash % port->lp_tx_ring_cnt]; + } else { + *rh = port->lp_pseudo_tx_rings[0]; + } + rw_exit(&grp->lg_tx_lock); - mac_tx_rele(port->lp_mch, mytx_handle); + return (mp); +} - if (cookie != NULL) { - ret_mp->b_next = nextp; - mp = ret_mp; - break; - } +/* + * aggr_tx_notify_thread: + * + * aggr_tx_ring_update() callback function wakes up this thread when + * it gets called. This thread will call mac_tx_ring_update() to + * notify upper mac of flow control getting relieved. Note that + * aggr_tx_ring_update() cannot call mac_tx_ring_update() directly + * because aggr_tx_ring_update() is called from lower mac with + * mi_rw_lock held. + */ +void +aggr_tx_notify_thread(void *arg) +{ + callb_cpr_t cprinfo; + aggr_grp_t *grp = (aggr_grp_t *)arg; + mac_ring_handle_t pseudo_mrh; + + CALLB_CPR_INIT(&cprinfo, &grp->lg_tx_flowctl_lock, callb_generic_cpr, + "aggr_tx_notify_thread"); + + mutex_enter(&grp->lg_tx_flowctl_lock); + while (!grp->lg_tx_notify_done) { + if ((grp->lg_tx_blocked_cnt) == 0) { + CALLB_CPR_SAFE_BEGIN(&cprinfo); + cv_wait(&grp->lg_tx_flowctl_cv, + &grp->lg_tx_flowctl_lock); + CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_tx_flowctl_lock); + continue; + } + while (grp->lg_tx_blocked_cnt != 0) { + grp->lg_tx_blocked_cnt--; + pseudo_mrh = + grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt]; + mutex_exit(&grp->lg_tx_flowctl_lock); + mac_tx_ring_update(grp->lg_mh, pseudo_mrh); + mutex_enter(&grp->lg_tx_flowctl_lock); } + } + /* + * The grp is being destroyed, exit the thread. + */ + grp->lg_tx_notify_thread = NULL; + CALLB_CPR_EXIT(&cprinfo); + thread_exit(); +} + +/* + * Callback function registered with lower mac to receive wakeups from + * drivers when flow control is relieved (i.e. Tx descriptors are + * available). + */ +void +aggr_tx_ring_update(void *arg1, uintptr_t arg2) +{ + aggr_port_t *port = (aggr_port_t *)arg1; + mac_ring_handle_t mrh = (mac_ring_handle_t)arg2; + mac_ring_handle_t pseudo_mrh; + aggr_grp_t *grp = port->lp_grp; + int i = 0; - if ((mp = nextp) == NULL) - break; + if (mrh == NULL) { + /* + * If the underlying NIC does not expose TX rings, + * still as pseudo TX ring is presented to the + * aggr mac. + */ + pseudo_mrh = port->lp_pseudo_tx_rings[0]; + } else { + for (i = 0; i < port->lp_tx_ring_cnt; i++) { + if (port->lp_tx_rings[i] == mrh) + break; + } + ASSERT(i < port->lp_tx_ring_cnt); + pseudo_mrh = port->lp_pseudo_tx_rings[i]; } - return (mp); + mutex_enter(&grp->lg_tx_flowctl_lock); + /* + * It could be possible that some (broken?) device driver + * could send more than one wakeup on the same ring. In + * such a case, multiple instances of the same pseudo TX + * ring should not be saved in lg_tx_blocked_rings[] + * array. So first check if woken up ring (pseudo_mrh) is + * already in the lg_tx_blocked_rings[] array. + */ + for (i = 0; i < grp->lg_tx_blocked_cnt; i++) { + if (grp->lg_tx_blocked_rings[i] == pseudo_mrh) { + mutex_exit(&grp->lg_tx_flowctl_lock); + return; + } + } + /* A distinct mac_ring_handle. Save and increment count */ + grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt] = pseudo_mrh; + grp->lg_tx_blocked_cnt++; + cv_signal(&grp->lg_tx_flowctl_cv); + mutex_exit(&grp->lg_tx_flowctl_lock); +} + +/* + * Send function invoked by the MAC service module. + */ +mblk_t * +aggr_ring_tx(void *arg, mblk_t *mp) +{ + aggr_pseudo_tx_ring_t *pseudo_ring = (aggr_pseudo_tx_ring_t *)arg; + aggr_port_t *port = pseudo_ring->atr_port; + + return (mac_hwring_send_priv(port->lp_mch, pseudo_ring->atr_hw_rh, mp)); } /* diff --git a/usr/src/uts/common/io/arn/arn_main.c b/usr/src/uts/common/io/arn/arn_main.c index 32f22b007a..68e61a6773 100644 --- a/usr/src/uts/common/io/arn/arn_main.c +++ b/usr/src/uts/common/io/arn/arn_main.c @@ -139,11 +139,13 @@ static void arn_m_ioctl(void *, queue_t *, mblk_t *); static int arn_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int arn_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void arn_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); /* MAC Callcack Functions */ static mac_callbacks_t arn_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, arn_m_stat, arn_m_start, arn_m_stop, @@ -151,12 +153,14 @@ static mac_callbacks_t arn_m_callbacks = { arn_m_multicst, arn_m_unicst, arn_m_tx, + NULL, arn_m_ioctl, NULL, NULL, NULL, arn_m_setprop, - arn_m_getprop + arn_m_getprop, + arn_m_propinfo }; /* @@ -2518,17 +2522,26 @@ arn_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, /* ARGSUSED */ static int arn_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct arn_softc *sc = arg; int err = 0; err = ieee80211_getprop(&sc->sc_isc, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +arn_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct arn_softc *sc = arg; + + ieee80211_propinfo(&sc->sc_isc, pr_name, wldp_pr_num, prh); +} + /* return bus cachesize in 4B word units */ static void arn_pci_config_cachesize(struct arn_softc *sc) diff --git a/usr/src/uts/common/io/atge/atge.h b/usr/src/uts/common/io/atge/atge.h index 01c295047a..68a11705c9 100644 --- a/usr/src/uts/common/io/atge/atge.h +++ b/usr/src/uts/common/io/atge/atge.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,6 +30,7 @@ extern "C" { #endif +#include <sys/ethernet.h> #include <sys/mac_provider.h> #include "atge_l1e_reg.h" diff --git a/usr/src/uts/common/io/atge/atge_main.c b/usr/src/uts/common/io/atge/atge_main.c index c368df44aa..938d0ddefb 100644 --- a/usr/src/uts/common/io/atge/atge_main.c +++ b/usr/src/uts/common/io/atge/atge_main.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -204,16 +204,18 @@ static int atge_m_stat(void *, uint_t, uint64_t *); static int atge_m_start(void *); static void atge_m_stop(void *); static int atge_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int atge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void atge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int atge_m_unicst(void *, const uint8_t *); static int atge_m_multicst(void *, boolean_t, const uint8_t *); static int atge_m_promisc(void *, boolean_t); static mblk_t *atge_m_tx(void *, mblk_t *); static mac_callbacks_t atge_m_callbacks = { - MC_SETPROP | MC_GETPROP, + MC_SETPROP | MC_GETPROP | MC_PROPINFO, atge_m_stat, atge_m_start, atge_m_stop, @@ -221,12 +223,14 @@ static mac_callbacks_t atge_m_callbacks = { atge_m_multicst, atge_m_unicst, atge_m_tx, + NULL, /* mc_reserved */ NULL, /* mc_ioctl */ NULL, /* mc_getcapab */ NULL, /* mc_open */ NULL, /* mc_close */ atge_m_setprop, atge_m_getprop, + atge_m_propinfo }; /* @@ -1724,13 +1728,12 @@ atge_m_stat(void *arg, uint_t stat, uint64_t *val) } int -atge_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +atge_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { atge_t *atgep = arg; - return (mii_m_getprop(atgep->atge_mii, name, num, flags, sz, val, - perm)); + return (mii_m_getprop(atgep->atge_mii, name, num, sz, val)); } int @@ -1757,6 +1760,14 @@ atge_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, return (r); } +static void +atge_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + atge_t *atgep = arg; + + mii_m_propinfo(atgep->atge_mii, name, num, prh); +} void atge_program_ether(atge_t *atgep) diff --git a/usr/src/uts/common/io/ath/ath_main.c b/usr/src/uts/common/io/ath/ath_main.c index 451f827415..fa2a3dba24 100644 --- a/usr/src/uts/common/io/ath/ath_main.c +++ b/usr/src/uts/common/io/ath/ath_main.c @@ -1,5 +1,5 @@ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -221,10 +221,12 @@ static void ath_m_ioctl(void *, queue_t *, mblk_t *); static int ath_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int ath_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void ath_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t ath_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, ath_m_stat, ath_m_start, ath_m_stop, @@ -232,12 +234,14 @@ static mac_callbacks_t ath_m_callbacks = { ath_m_multicst, ath_m_unicst, ath_m_tx, + NULL, ath_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, ath_m_setprop, - ath_m_getprop + ath_m_getprop, + ath_m_propinfo }; /* @@ -1779,21 +1783,30 @@ ath_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } -/* ARGSUSED */ + static int ath_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { ath_t *asc = arg; int err = 0; err = ieee80211_getprop(&asc->asc_isc, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } static void +ath_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + ath_t *asc = arg; + + ieee80211_propinfo(&asc->asc_isc, pr_name, wldp_pr_num, mph); +} + +static void ath_m_ioctl(void *arg, queue_t *wq, mblk_t *mp) { ath_t *asc = arg; diff --git a/usr/src/uts/common/io/atu/atu.c b/usr/src/uts/common/io/atu/atu.c index 881d72f869..fdbb932fca 100644 --- a/usr/src/uts/common/io/atu/atu.c +++ b/usr/src/uts/common/io/atu/atu.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1511,6 +1511,26 @@ atu_m_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t len, return (0); } +static int +atu_m_getprop(void *arg, const char *name, mac_prop_id_t id, + uint_t length, void *buf) +{ + struct atu_softc *sc = (struct atu_softc *)arg; + struct ieee80211com *ic = &sc->sc_ic; + + return (ieee80211_getprop(ic, name, id, length, buf)); +} + +static void +atu_m_propinfo(void *arg, const char *name, mac_prop_id_t id, + mac_prop_info_handle_t mph) +{ + struct atu_softc *sc = (struct atu_softc *)arg; + struct ieee80211com *ic = &sc->sc_ic; + + ieee80211_propinfo(ic, name, id, mph); +} + static void atu_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) { @@ -1635,7 +1655,7 @@ atu_m_stat(void *arg, uint_t stat, uint64_t *val) } static mac_callbacks_t atu_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, atu_m_stat, atu_m_start, atu_m_stop, @@ -1643,10 +1663,12 @@ static mac_callbacks_t atu_m_callbacks = { atu_m_multicst, atu_m_unicst, atu_m_tx, + NULL, atu_m_ioctl, NULL, NULL, NULL, atu_m_setprop, - ieee80211_getprop + atu_m_getprop, + atu_m_propinfo }; diff --git a/usr/src/uts/common/io/bfe/bfe.c b/usr/src/uts/common/io/bfe/bfe.c index b71bcc229b..42e87bb745 100644 --- a/usr/src/uts/common/io/bfe/bfe.c +++ b/usr/src/uts/common/io/bfe/bfe.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <sys/stream.h> @@ -161,7 +161,7 @@ static void bfe_clear_stats(bfe_t *); static void bfe_gather_stats(bfe_t *); static void bfe_error(dev_info_t *, char *, ...); static int bfe_mac_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int bfe_mac_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int bfe_tx_reclaim(bfe_ring_t *); @@ -1651,92 +1651,66 @@ bfe_mac_getstat(void *arg, uint_t stat, uint64_t *val) return (err); } -/*ARGSUSED*/ int -bfe_mac_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +bfe_mac_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { bfe_t *bfe = (bfe_t *)arg; int err = 0; - boolean_t dfl = flags & MAC_PROP_DEFAULT; - - if (sz == 0) - return (EINVAL); - *perm = MAC_PROP_PERM_RW; switch (num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (link_duplex_t)) { - bcopy(&bfe->bfe_chip.duplex, val, - sizeof (link_duplex_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (link_duplex_t)); + bcopy(&bfe->bfe_chip.duplex, val, sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (uint64_t)) { - bcopy(&bfe->bfe_chip.speed, val, sizeof (uint64_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (uint64_t)); + bcopy(&bfe->bfe_chip.speed, val, sizeof (uint64_t)); break; case MAC_PROP_AUTONEG: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_aneg : bfe->bfe_adv_aneg; + *(uint8_t *)val = bfe->bfe_adv_aneg; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100fdx : bfe->bfe_adv_100fdx; + *(uint8_t *)val = bfe->bfe_adv_100fdx; break; + case MAC_PROP_EN_100FDX_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100fdx : bfe->bfe_adv_100fdx; + *(uint8_t *)val = bfe->bfe_adv_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100hdx : bfe->bfe_adv_100hdx; + *(uint8_t *)val = bfe->bfe_adv_100hdx; break; + case MAC_PROP_EN_100HDX_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100hdx : bfe->bfe_adv_100hdx; + *(uint8_t *)val = bfe->bfe_adv_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_10fdx : bfe->bfe_adv_10fdx; + *(uint8_t *)val = bfe->bfe_adv_10fdx; break; + case MAC_PROP_EN_10FDX_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_10fdx : bfe->bfe_adv_10fdx; + *(uint8_t *)val = bfe->bfe_adv_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_10hdx : bfe->bfe_adv_10hdx; + *(uint8_t *)val = bfe->bfe_adv_10hdx; break; + case MAC_PROP_EN_10HDX_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_10hdx : bfe->bfe_adv_10hdx; + *(uint8_t *)val = bfe->bfe_adv_10hdx; break; case MAC_PROP_ADV_100T4_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100T4 : bfe->bfe_adv_100T4; + *(uint8_t *)val = bfe->bfe_adv_100T4; break; + case MAC_PROP_EN_100T4_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100T4 : bfe->bfe_adv_100T4; + *(uint8_t *)val = bfe->bfe_adv_100T4; break; default: @@ -1746,6 +1720,51 @@ bfe_mac_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, return (err); } + +static void +bfe_mac_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + bfe_t *bfe = (bfe_t *)arg; + + switch (num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_aneg); + break; + + case MAC_PROP_EN_100FDX_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_100fdx); + break; + + case MAC_PROP_EN_100HDX_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_100hdx); + break; + + case MAC_PROP_EN_10FDX_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_10fdx); + break; + + case MAC_PROP_EN_10HDX_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_10hdx); + break; + + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_100T4); + break; + } +} + + /*ARGSUSED*/ int bfe_mac_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, @@ -2067,7 +2086,7 @@ bfe_mac_set_multicast(void *arg, boolean_t add, const uint8_t *macaddr) } static mac_callbacks_t bfe_mac_callbacks = { - MC_SETPROP | MC_GETPROP, + MC_SETPROP | MC_GETPROP | MC_PROPINFO, bfe_mac_getstat, /* gets stats */ bfe_mac_start, /* starts mac */ bfe_mac_stop, /* stops mac */ @@ -2075,12 +2094,14 @@ static mac_callbacks_t bfe_mac_callbacks = { bfe_mac_set_multicast, /* multicast implementation */ bfe_mac_set_ether_addr, /* sets ethernet address (unicast) */ bfe_mac_transmit_packet, /* transmits packet */ + NULL, NULL, /* ioctl */ NULL, /* getcap */ NULL, /* open */ NULL, /* close */ bfe_mac_setprop, bfe_mac_getprop, + bfe_mac_propinfo }; static void diff --git a/usr/src/uts/common/io/bge/bge_impl.h b/usr/src/uts/common/io/bge/bge_impl.h index 86b8d3093a..350cc32ac2 100644 --- a/usr/src/uts/common/io/bge/bge_impl.h +++ b/usr/src/uts/common/io/bge/bge_impl.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -441,7 +441,11 @@ typedef struct recv_ring { bge_rule_info_t *mac_addr_rule; uint8_t mac_addr_val[ETHERADDRL]; int poll_flag; /* Polling flag */ -} recv_ring_t; /* 0x90 (144) bytes */ + + /* Per-ring statistics */ + uint64_t rx_pkts; /* Received Packets Count */ + uint64_t rx_bytes; /* Received Bytes Count */ +} recv_ring_t; /* @@ -1196,6 +1200,7 @@ void bge_chip_msi_trig(bge_t *bgep); void bge_init_kstats(bge_t *bgep, int instance); void bge_fini_kstats(bge_t *bgep); int bge_m_stat(void *arg, uint_t stat, uint64_t *val); +int bge_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); /* bge_log.c */ #if BGE_DEBUGGING diff --git a/usr/src/uts/common/io/bge/bge_kstats.c b/usr/src/uts/common/io/bge/bge_kstats.c index 73994cb8d3..c10a1b4601 100644 --- a/usr/src/uts/common/io/bge/bge_kstats.c +++ b/usr/src/uts/common/io/bge/bge_kstats.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1126,3 +1126,28 @@ bge_m_stat(void *arg, uint_t stat, uint64_t *val) return (0); } + +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +bge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + recv_ring_t *rx_ring = (recv_ring_t *)rh; + + switch (stat) { + case MAC_STAT_RBYTES: + *val = rx_ring->rx_bytes; + break; + + case MAC_STAT_IPACKETS: + *val = rx_ring->rx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} diff --git a/usr/src/uts/common/io/bge/bge_main2.c b/usr/src/uts/common/io/bge/bge_main2.c index 73045e9e3e..fdd38676e7 100644 --- a/usr/src/uts/common/io/bge/bge_main2.c +++ b/usr/src/uts/common/io/bge/bge_main2.c @@ -34,10 +34,6 @@ * This is the string displayed by modinfo, etc. */ static char bge_ident[] = "Broadcom Gb Ethernet"; -/* - * Make sure you keep the version ID up to date! - */ -static char bge_version[] = "Broadcom Gb Ethernet v1.14"; /* * Property names @@ -116,13 +112,18 @@ static int bge_unicst_set(void *, const uint8_t *, static int bge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int bge_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void bge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int bge_set_priv_prop(bge_t *, const char *, uint_t, const void *); static int bge_get_priv_prop(bge_t *, const char *, uint_t, - uint_t, void *); + void *); +static void bge_priv_propinfo(const char *, + mac_prop_info_handle_t); -#define BGE_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) +#define BGE_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | \ + MC_GETPROP | MC_PROPINFO) static mac_callbacks_t bge_m_callbacks = { BGE_M_CALLBACK_FLAGS, @@ -133,22 +134,28 @@ static mac_callbacks_t bge_m_callbacks = { bge_m_multicst, NULL, bge_m_tx, + NULL, bge_m_ioctl, bge_m_getcapab, NULL, NULL, bge_m_setprop, - bge_m_getprop + bge_m_getprop, + bge_m_propinfo }; -mac_priv_prop_t bge_priv_prop[] = { - {"_adv_asym_pause_cap", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_RW} +char *bge_priv_prop[] = { + "_adv_asym_pause_cap", + "_adv_pause_cap", + "_drain_max", + "_msi_cnt", + "_rx_intr_coalesce_blank_time", + "_tx_intr_coalesce_blank_time", + "_rx_intr_coalesce_pkt_cnt", + "_tx_intr_coalesce_pkt_cnt", + NULL }; -#define BGE_MAX_PRIV_PROPS \ - (sizeof (bge_priv_prop) / sizeof (mac_priv_prop_t)) - uint8_t zero_addr[6] = {0, 0, 0, 0, 0, 0}; /* * ========== Transmit and receive ring reinitialisation ========== @@ -716,7 +723,6 @@ bge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, bge_t *bgep = barg; int err = 0; uint32_t cur_mtu, new_mtu; - uint_t maxsdu; link_flowctrl_t fl; mutex_enter(bgep->genlock); @@ -819,16 +825,11 @@ reprogram: err = EINVAL; break; } - maxsdu = bgep->chipid.ethmax_size - - sizeof (struct ether_header); - err = mac_maxsdu_update(bgep->mh, maxsdu); - if (err == 0) { - bgep->bge_dma_error = B_TRUE; - bgep->manual_reset = B_TRUE; - bge_chip_stop(bgep, B_TRUE); - bge_wake_factotum(bgep); - err = 0; - } + bgep->bge_dma_error = B_TRUE; + bgep->manual_reset = B_TRUE; + bge_chip_stop(bgep, B_TRUE); + bge_wake_factotum(bgep); + err = 0; break; case MAC_PROP_FLOWCTRL: bcopy(pr_val, &fl, sizeof (fl)); @@ -887,71 +888,36 @@ reprogram: /* ARGSUSED */ static int bge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { bge_t *bgep = barg; int err = 0; - link_flowctrl_t fl; - uint64_t speed; - int flags = bgep->chipid.flags; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); - - if (pr_valsize == 0) - return (EINVAL); - bzero(pr_val, pr_valsize); - - *perm = MAC_PROP_PERM_RW; - - mutex_enter(bgep->genlock); - if ((bgep->param_loop_mode != BGE_LOOP_NONE && - bge_param_locked(pr_num)) || - ((bgep->chipid.flags & CHIP_FLAG_SERDES) && - ((pr_num == MAC_PROP_EN_100FDX_CAP) || - (pr_num == MAC_PROP_EN_100HDX_CAP) || - (pr_num == MAC_PROP_EN_10FDX_CAP) || - (pr_num == MAC_PROP_EN_10HDX_CAP))) || - (DEVICE_5906_SERIES_CHIPSETS(bgep) && - ((pr_num == MAC_PROP_EN_1000FDX_CAP) || - (pr_num == MAC_PROP_EN_1000HDX_CAP)))) - *perm = MAC_PROP_PERM_READ; - mutex_exit(bgep->genlock); switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (link_duplex_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (link_duplex_t)); bcopy(&bgep->param_link_duplex, pr_val, sizeof (link_duplex_t)); break; - case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (speed)) - return (EINVAL); - speed = bgep->param_link_speed * 1000000ull; + case MAC_PROP_SPEED: { + uint64_t speed = bgep->param_link_speed * 1000000ull; + + ASSERT(pr_valsize >= sizeof (speed)); bcopy(&speed, pr_val, sizeof (speed)); break; + } case MAC_PROP_STATUS: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (link_state_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (link_state_t)); bcopy(&bgep->link_state, pr_val, sizeof (link_state_t)); break; case MAC_PROP_AUTONEG: - if (is_default) - *(uint8_t *)pr_val = 1; - else - *(uint8_t *)pr_val = bgep->param_adv_autoneg; + *(uint8_t *)pr_val = bgep->param_adv_autoneg; break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (fl)) - return (EINVAL); - if (is_default) { - fl = LINK_FLOWCTRL_BI; - bcopy(&fl, pr_val, sizeof (fl)); - break; - } + case MAC_PROP_FLOWCTRL: { + link_flowctrl_t fl; + + ASSERT(pr_valsize >= sizeof (fl)); if (bgep->param_link_rx_pause && !bgep->param_link_tx_pause) @@ -970,148 +936,135 @@ bge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, fl = LINK_FLOWCTRL_BI; bcopy(&fl, pr_val, sizeof (fl)); break; + } case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - if (DEVICE_5906_SERIES_CHIPSETS(bgep)) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = 1; - } - else - *(uint8_t *)pr_val = bgep->param_adv_1000fdx; + *(uint8_t *)pr_val = bgep->param_adv_1000fdx; break; case MAC_PROP_EN_1000FDX_CAP: - if (is_default) { - if (DEVICE_5906_SERIES_CHIPSETS(bgep)) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = 1; - } - else - *(uint8_t *)pr_val = bgep->param_en_1000fdx; + *(uint8_t *)pr_val = bgep->param_en_1000fdx; break; case MAC_PROP_ADV_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - if (DEVICE_5906_SERIES_CHIPSETS(bgep)) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = 1; - } - else - *(uint8_t *)pr_val = bgep->param_adv_1000hdx; + *(uint8_t *)pr_val = bgep->param_adv_1000hdx; break; case MAC_PROP_EN_1000HDX_CAP: - if (is_default) { - if (DEVICE_5906_SERIES_CHIPSETS(bgep)) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = 1; - } - else - *(uint8_t *)pr_val = bgep->param_en_1000hdx; + *(uint8_t *)pr_val = bgep->param_en_1000hdx; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_adv_100fdx; - } + *(uint8_t *)pr_val = bgep->param_adv_100fdx; break; case MAC_PROP_EN_100FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_en_100fdx; - } + *(uint8_t *)pr_val = bgep->param_en_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_adv_100hdx; - } + *(uint8_t *)pr_val = bgep->param_adv_100hdx; break; case MAC_PROP_EN_100HDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_en_100hdx; - } + *(uint8_t *)pr_val = bgep->param_en_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_adv_10fdx; - } + *(uint8_t *)pr_val = bgep->param_adv_10fdx; break; case MAC_PROP_EN_10FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_en_10fdx; - } + *(uint8_t *)pr_val = bgep->param_en_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_adv_10hdx; - } + *(uint8_t *)pr_val = bgep->param_adv_10hdx; break; case MAC_PROP_EN_10HDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_en_10hdx; - } + *(uint8_t *)pr_val = bgep->param_en_10hdx; break; case MAC_PROP_ADV_100T4_CAP: case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = 0; break; case MAC_PROP_PRIVATE: - err = bge_get_priv_prop(bgep, pr_name, pr_flags, + err = bge_get_priv_prop(bgep, pr_name, pr_valsize, pr_val); return (err); - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = BGE_DEFAULT_MTU; - if (!(flags & CHIP_FLAG_NO_JUMBO)) - range.range_uint32[0].mpur_max = - BGE_MAXIMUM_MTU; - bcopy(&range, pr_val, sizeof (range)); - break; - } default: return (ENOTSUP); } return (0); } +static void +bge_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + bge_t *bgep = barg; + int flags = bgep->chipid.flags; + + /* + * By default permissions are read/write unless specified + * otherwise by the driver. + */ + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + if (DEVICE_5906_SERIES_CHIPSETS(bgep)) + mac_prop_info_set_default_uint8(prh, 0); + else + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + mac_prop_info_set_default_uint8(prh, + (flags & CHIP_FLAG_SERDES) ? 0 : 1); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, + LINK_FLOWCTRL_BI); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, BGE_DEFAULT_MTU, + (flags & CHIP_FLAG_NO_JUMBO) ? + BGE_DEFAULT_MTU : BGE_MAXIMUM_MTU); + break; + + case MAC_PROP_PRIVATE: + bge_priv_propinfo(pr_name, prh); + break; + } + + mutex_enter(bgep->genlock); + if ((bgep->param_loop_mode != BGE_LOOP_NONE && + bge_param_locked(pr_num)) || + ((bgep->chipid.flags & CHIP_FLAG_SERDES) && + ((pr_num == MAC_PROP_EN_100FDX_CAP) || + (pr_num == MAC_PROP_EN_100HDX_CAP) || + (pr_num == MAC_PROP_EN_10FDX_CAP) || + (pr_num == MAC_PROP_EN_10HDX_CAP))) || + (DEVICE_5906_SERIES_CHIPSETS(bgep) && + ((pr_num == MAC_PROP_EN_1000FDX_CAP) || + (pr_num == MAC_PROP_EN_1000HDX_CAP)))) + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + mutex_exit(bgep->genlock); +} + /* ARGSUSED */ static int bge_set_priv_prop(bge_t *bgep, const char *pr_name, uint_t pr_valsize, @@ -1235,53 +1188,61 @@ bge_set_priv_prop(bge_t *bgep, const char *pr_name, uint_t pr_valsize, } static int -bge_get_priv_prop(bge_t *bge, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val) +bge_get_priv_prop(bge_t *bge, const char *pr_name, uint_t pr_valsize, + void *pr_val) { - int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; - if (strcmp(pr_name, "_adv_pause_cap") == 0) { - value = (is_default? 1 : bge->param_adv_pause); - err = 0; - goto done; - } - if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { - value = (is_default? 1 : bge->param_adv_asym_pause); - err = 0; - goto done; - } - if (strcmp(pr_name, "_drain_max") == 0) { - value = (is_default? 64 : bge->param_drain_max); - err = 0; - goto done; - } - if (strcmp(pr_name, "_msi_cnt") == 0) { - value = (is_default? 0 : bge->param_msi_cnt); - err = 0; - goto done; - } + if (strcmp(pr_name, "_adv_pause_cap") == 0) + value = bge->param_adv_pause; + else if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) + value = bge->param_adv_asym_pause; + else if (strcmp(pr_name, "_drain_max") == 0) + value = bge->param_drain_max; + else if (strcmp(pr_name, "_msi_cnt") == 0) + value = bge->param_msi_cnt; + else if (strcmp(pr_name, "_rx_intr_coalesce_blank_time") == 0) + value = bge->chipid.rx_ticks_norm; + else if (strcmp(pr_name, "_tx_intr_coalesce_blank_time") == 0) + value = bge->chipid.tx_ticks_norm; + else if (strcmp(pr_name, "_rx_intr_coalesce_pkt_cnt") == 0) + value = bge->chipid.rx_count_norm; + else if (strcmp(pr_name, "_tx_intr_coalesce_pkt_cnt") == 0) + value = bge->chipid.tx_count_norm; + else + return (ENOTSUP); - if (strcmp(pr_name, "_intr_coalesce_blank_time") == 0) { - value = (is_default? bge_rx_ticks_norm : - bge->chipid.rx_ticks_norm); - err = 0; - goto done; - } + (void) snprintf(pr_val, pr_valsize, "%d", value); + return (0); +} - if (strcmp(pr_name, "_intr_coalesce_pkt_cnt") == 0) { - value = (is_default? bge_rx_count_norm : - bge->chipid.rx_count_norm); - err = 0; - goto done; - } +static void +bge_priv_propinfo(const char *pr_name, mac_prop_info_handle_t mph) +{ + char valstr[64]; + int value; -done: - if (err == 0) { - (void) snprintf(pr_val, pr_valsize, "%d", value); - } - return (err); + if (strcmp(pr_name, "_adv_pause_cap") == 0) + value = 1; + else if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) + value = 1; + else if (strcmp(pr_name, "_drain_max") == 0) + value = 64; + else if (strcmp(pr_name, "_msi_cnt") == 0) + value = 0; + else if (strcmp(pr_name, "_rx_intr_coalesce_blank_time") == 0) + value = bge_rx_ticks_norm; + else if (strcmp(pr_name, "_tx_intr_coalesce_blank_time") == 0) + value = bge_tx_ticks_norm; + else if (strcmp(pr_name, "_rx_intr_coalesce_pkt_cnt") == 0) + value = bge_rx_count_norm; + else if (strcmp(pr_name, "_tx_intr_coalesce_pkt_cnt") == 0) + value = bge_tx_count_norm; + else + return; + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + mac_prop_info_set_default_str(mph, valstr); } /* @@ -1682,6 +1643,7 @@ bge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = bge_ring_start; infop->mri_stop = NULL; infop->mri_poll = bge_poll_ring; + infop->mri_stat = bge_rx_ring_stat; mintr = &infop->mri_intr; mintr->mi_handle = (mac_intr_handle_t)rx_ring; @@ -3517,7 +3479,6 @@ bge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) macp->m_max_sdu = cidp->ethmax_size - sizeof (struct ether_header); macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = bge_priv_prop; - macp->m_priv_prop_count = BGE_MAX_PRIV_PROPS; macp->m_v12n = MAC_VIRT_LEVEL1; /* @@ -3551,7 +3512,6 @@ bge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) #endif ddi_report_dev(devinfo); - BGE_REPORT((bgep, "bge version: %s", bge_version)); return (DDI_SUCCESS); diff --git a/usr/src/uts/common/io/bge/bge_recv2.c b/usr/src/uts/common/io/bge/bge_recv2.c index fb8e1fa881..ecda51cb0a 100644 --- a/usr/src/uts/common/io/bge/bge_recv2.c +++ b/usr/src/uts/common/io/bge/bge_recv2.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -65,11 +65,12 @@ bge_refill(bge_t *bgep, buff_ring_t *brp, sw_rbd_t *srbdp) bge_mbx_put(bgep, brp->chip_mbx_reg, slot); } -static mblk_t *bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p); +static mblk_t *bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p, + recv_ring_t *rrp); #pragma inline(bge_receive_packet) static mblk_t * -bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p) +bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p, recv_ring_t *rrp) { bge_rbd_t hw_rbd; buff_ring_t *brp; @@ -237,10 +238,13 @@ bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p) if (hw_rbd.flags & RBD_FLAG_TCP_UDP_CHECKSUM) pflags |= HCK_FULLCKSUM; if (hw_rbd.flags & RBD_FLAG_IP_CHECKSUM) - pflags |= HCK_IPV4_HDRCKSUM; + pflags |= HCK_IPV4_HDRCKSUM_OK; if (pflags != 0) - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, - hw_rbd.tcp_udp_cksum, pflags, 0); + mac_hcksum_set(mp, 0, 0, 0, hw_rbd.tcp_udp_cksum, pflags); + + /* Update per-ring rx statistics */ + rrp->rx_pkts++; + rrp->rx_bytes += len; refill: /* @@ -313,7 +317,8 @@ bge_receive_ring(bge_t *bgep, recv_ring_t *rrp) while ((slot != *rrp->prod_index_p) && /* Note: volatile */ (recv_cnt < BGE_MAXPKT_RCVED)) { - if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot])) != NULL) { + if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot], rrp)) + != NULL) { *tail = mp; tail = &mp->b_next; recv_cnt++; @@ -383,7 +388,8 @@ bge_poll_ring(void *arg, int bytes_to_pickup) /* Note: volatile */ while ((slot != *rrp->prod_index_p) && (sz <= bytes_to_pickup)) { - if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot])) != NULL) { + if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot], rrp)) + != NULL) { *tail = mp; sz += msgdsize(mp); tail = &mp->b_next; diff --git a/usr/src/uts/common/io/bge/bge_send.c b/usr/src/uts/common/io/bge/bge_send.c index 11f23e9f64..be3f179f31 100644 --- a/usr/src/uts/common/io/bge/bge_send.c +++ b/usr/src/uts/common/io/bge/bge_send.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -533,7 +533,7 @@ bge_ring_tx(void *arg, mblk_t *mp) /* * Retrieve checksum offloading info. */ - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); /* * Calculate pseudo checksum if needed. diff --git a/usr/src/uts/common/io/bridge.c b/usr/src/uts/common/io/bridge.c index ffd215c132..7b45039b62 100644 --- a/usr/src/uts/common/io/bridge.c +++ b/usr/src/uts/common/io/bridge.c @@ -492,36 +492,16 @@ bridge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, static int bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { bridge_mac_t *bmp = arg; int err = 0; _NOTE(ARGUNUSED(pr_name)); switch (pr_num) { - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = bmp->bm_maxsdu; - bcopy(&range, pr_val, sizeof (range)); - *perm = MAC_PROP_PERM_RW; - break; - } case MAC_PROP_STATUS: - if (pr_valsize < sizeof (bmp->bm_linkstate)) { - err = EINVAL; - } else { - bcopy(&bmp->bm_linkstate, pr_val, - sizeof (&bmp->bm_linkstate)); - *perm = MAC_PROP_PERM_READ; - } + ASSERT(pr_valsize >= sizeof (bmp->bm_linkstate)); + bcopy(&bmp->bm_linkstate, pr_val, sizeof (&bmp->bm_linkstate)); break; default: @@ -531,8 +511,27 @@ bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, return (err); } +static void +bridge_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + bridge_mac_t *bmp = arg; + + _NOTE(ARGUNUSED(pr_name)); + + switch (pr_num) { + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, bmp->bm_maxsdu, + bmp->bm_maxsdu); + break; + case MAC_PROP_STATUS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + } +} + static mac_callbacks_t bridge_m_callbacks = { - MC_SETPROP | MC_GETPROP, + MC_SETPROP | MC_GETPROP | MC_PROPINFO, bridge_m_getstat, bridge_m_start, bridge_m_stop, @@ -540,12 +539,14 @@ static mac_callbacks_t bridge_m_callbacks = { bridge_m_multicst, bridge_m_unicst, bridge_m_tx, + NULL, /* reserved */ NULL, /* ioctl */ NULL, /* getcapab */ NULL, /* open */ NULL, /* close */ bridge_m_setprop, - bridge_m_getprop + bridge_m_getprop, + bridge_m_propinfo }; /* diff --git a/usr/src/uts/common/io/dld/dld_drv.c b/usr/src/uts/common/io/dld/dld_drv.c index c91793723e..36d1c3d6ff 100644 --- a/usr/src/uts/common/io/dld/dld_drv.c +++ b/usr/src/uts/common/io/dld/dld_drv.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -429,8 +429,9 @@ drv_ioc_hwgrpget(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) dld_ioc_hwgrpget_t *hwgrpp = karg; dld_hwgrpinfo_t hwgrp, *hip; mac_handle_t mh = NULL; - int i, err, grpnum; + int i, err, rgrpnum, tgrpnum; uint_t bytes_left; + int totgrps = 0; zoneid_t zoneid = crgetzoneid(cred); if (zoneid != GLOBAL_ZONEID && @@ -445,8 +446,35 @@ drv_ioc_hwgrpget(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) hip = (dld_hwgrpinfo_t *) ((uchar_t *)arg + sizeof (dld_ioc_hwgrpget_t)); bytes_left = hwgrpp->dih_size; - grpnum = mac_hwgrp_num(mh); - for (i = 0; i < grpnum; i++) { + + rgrpnum = mac_hwgrp_num(mh, MAC_RING_TYPE_RX); + /* display the default group information first */ + if (rgrpnum > 0) { + if (sizeof (dld_hwgrpinfo_t) > bytes_left) { + err = ENOSPC; + goto done; + } + + bzero(&hwgrp, sizeof (hwgrp)); + bcopy(mac_name(mh), hwgrp.dhi_link_name, + sizeof (hwgrp.dhi_link_name)); + mac_get_hwrxgrp_info(mh, 0, &hwgrp.dhi_grp_num, + &hwgrp.dhi_n_rings, hwgrp.dhi_rings, &hwgrp.dhi_grp_type, + &hwgrp.dhi_n_clnts, hwgrp.dhi_clnts); + if (hwgrp.dhi_n_rings != 0) { + if (copyout(&hwgrp, hip, sizeof (hwgrp)) != 0) { + err = EFAULT; + goto done; + } + } + hip++; + totgrps++; + bytes_left -= sizeof (dld_hwgrpinfo_t); + } + + tgrpnum = mac_hwgrp_num(mh, MAC_RING_TYPE_TX); + /* display the default group information first */ + if (tgrpnum > 0) { if (sizeof (dld_hwgrpinfo_t) > bytes_left) { err = ENOSPC; goto done; @@ -455,15 +483,68 @@ drv_ioc_hwgrpget(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) bzero(&hwgrp, sizeof (hwgrp)); bcopy(mac_name(mh), hwgrp.dhi_link_name, sizeof (hwgrp.dhi_link_name)); - mac_get_hwgrp_info(mh, i, &hwgrp.dhi_grp_num, - &hwgrp.dhi_n_rings, &hwgrp.dhi_grp_type, + mac_get_hwtxgrp_info(mh, tgrpnum - 1, &hwgrp.dhi_grp_num, + &hwgrp.dhi_n_rings, hwgrp.dhi_rings, &hwgrp.dhi_grp_type, &hwgrp.dhi_n_clnts, hwgrp.dhi_clnts); + if (hwgrp.dhi_n_rings != 0) { + if (copyout(&hwgrp, hip, sizeof (hwgrp)) != 0) { + err = EFAULT; + goto done; + } + } + hip++; + totgrps++; + bytes_left -= sizeof (dld_hwgrpinfo_t); + } + + /* Rest of the rx groups */ + for (i = 1; i < rgrpnum; i++) { + if (sizeof (dld_hwgrpinfo_t) > bytes_left) { + err = ENOSPC; + goto done; + } + + bzero(&hwgrp, sizeof (hwgrp)); + bcopy(mac_name(mh), hwgrp.dhi_link_name, + sizeof (hwgrp.dhi_link_name)); + mac_get_hwrxgrp_info(mh, i, &hwgrp.dhi_grp_num, + &hwgrp.dhi_n_rings, hwgrp.dhi_rings, &hwgrp.dhi_grp_type, + &hwgrp.dhi_n_clnts, hwgrp.dhi_clnts); + if (hwgrp.dhi_n_rings == 0) + continue; if (copyout(&hwgrp, hip, sizeof (hwgrp)) != 0) { err = EFAULT; goto done; } hip++; + totgrps++; + bytes_left -= sizeof (dld_hwgrpinfo_t); + } + + /* Rest of the tx group */ + tgrpnum = mac_hwgrp_num(mh, MAC_RING_TYPE_TX); + for (i = 0; i < tgrpnum - 1; i++) { + if (sizeof (dld_hwgrpinfo_t) > bytes_left) { + err = ENOSPC; + goto done; + } + + bzero(&hwgrp, sizeof (hwgrp)); + bcopy(mac_name(mh), hwgrp.dhi_link_name, + sizeof (hwgrp.dhi_link_name)); + mac_get_hwtxgrp_info(mh, i, &hwgrp.dhi_grp_num, + &hwgrp.dhi_n_rings, hwgrp.dhi_rings, &hwgrp.dhi_grp_type, + &hwgrp.dhi_n_clnts, hwgrp.dhi_clnts); + if (hwgrp.dhi_n_rings == 0) + continue; + if (copyout(&hwgrp, hip, sizeof (hwgrp)) != 0) { + err = EFAULT; + goto done; + } + + hip++; + totgrps++; bytes_left -= sizeof (dld_hwgrpinfo_t); } @@ -471,7 +552,7 @@ done: if (mh != NULL) dld_mac_close(mh); if (err == 0) - hwgrpp->dih_n_groups = grpnum; + hwgrpp->dih_n_groups = totgrps; return (err); } @@ -542,7 +623,7 @@ done: } /* - * DLDIOC_SET/GETPROP + * DLDIOC_SET/GETMACPROP */ static int drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, @@ -552,7 +633,6 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, dls_dl_handle_t dlh = NULL; dls_link_t *dlp = NULL; mac_perim_handle_t mph = NULL; - mac_prop_t macprop; dld_ioc_macprop_t *kprop; datalink_id_t linkid; datalink_class_t class; @@ -606,6 +686,12 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, goto done; } + if (!mac_prop_check_size(kprop->pr_num, kprop->pr_valsize, + kprop->pr_flags & DLD_PROP_POSSIBLE)) { + err = ENOBUFS; + goto done; + } + switch (kprop->pr_num) { case MAC_PROP_ZONE: if (set) { @@ -630,6 +716,9 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, else err = drv_ioc_clrap(linkid); } else { + if (kprop->pr_valsize == 0) + return (ENOBUFS); + kprop->pr_perm_flags = MAC_PROP_PERM_RW; err = drv_ioc_getap(linkid, dlap); } @@ -652,19 +741,51 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, err = 0; } break; - default: - macprop.mp_name = kprop->pr_name; - macprop.mp_id = kprop->pr_num; - macprop.mp_flags = kprop->pr_flags; - + default: { + mac_propval_range_t range, *rangep = NULL; + void *default_val = NULL; + uint_t default_size = 0; + void *val = kprop->pr_val; + uint_t val_size = kprop->pr_valsize; + + /* set a property value */ if (set) { - err = mac_set_prop(dlp->dl_mh, &macprop, kprop->pr_val, - kprop->pr_valsize); - } else { - kprop->pr_perm_flags = MAC_PROP_PERM_RW; - err = mac_get_prop(dlp->dl_mh, &macprop, kprop->pr_val, - kprop->pr_valsize, &kprop->pr_perm_flags); + err = mac_set_prop(dlp->dl_mh, kprop->pr_num, + kprop->pr_name, kprop->pr_val, kprop->pr_valsize); + break; + } + + /* + * Get the property value, default, or possible value + * depending on flags passed from the user. + */ + + /* a property has RW permissions by default */ + kprop->pr_perm_flags = MAC_PROP_PERM_RW; + + if (kprop->pr_flags & DLD_PROP_POSSIBLE) { + rangep = ⦥ + } else if (kprop->pr_flags & DLD_PROP_DEFAULT) { + default_val = val; + default_size = val_size; } + + /* + * Always return the permissions, and optionally return + * the default value or possible values range. + */ + mac_prop_info(dlp->dl_mh, kprop->pr_num, kprop->pr_name, + default_val, default_size, rangep, &kprop->pr_perm_flags); + err = 0; + + if (default_val == NULL && rangep == NULL) { + err = mac_get_prop(dlp->dl_mh, kprop->pr_num, + kprop->pr_name, kprop->pr_val, kprop->pr_valsize); + } + + if (rangep != NULL) + bcopy(rangep, val, sizeof (range)); + } } done: @@ -673,6 +794,7 @@ done: if (dlp != NULL) dls_link_rele(dlp); + if (mph != NULL) { int32_t cpuid; void *mdip = NULL; @@ -684,9 +806,10 @@ done: mac_perim_exit(mph); - if (mdip != NULL) + if (mdip != NULL && cpuid != -1) mac_client_set_intr_cpu(mdip, dlp->dl_mch, cpuid); } + if (dlh != NULL) dls_devnet_rele_tmp(dlh); @@ -828,7 +951,8 @@ drv_ioc_getap(datalink_id_t linkid, struct dlautopush *dlap) (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&dap) != 0) { rw_exit(&dld_ap_hash_lock); - return (ENOENT); + dlap->dap_npush = 0; + return (0); } /* @@ -1221,7 +1345,7 @@ static dld_ioc_info_t drv_ioc_list[] = { {DLDIOC_GETMACPROP, DLDCOPYIN, sizeof (dld_ioc_macprop_t), drv_ioc_getprop, NULL}, {DLDIOC_GETHWGRP, DLDCOPYINOUT, sizeof (dld_ioc_hwgrpget_t), - drv_ioc_hwgrpget, secpolicy_dl_config}, + drv_ioc_hwgrpget, NULL}, }; typedef struct dld_ioc_modentry { diff --git a/usr/src/uts/common/io/dld/dld_flow.c b/usr/src/uts/common/io/dld/dld_flow.c index 281217d02d..7171953a2d 100644 --- a/usr/src/uts/common/io/dld/dld_flow.c +++ b/usr/src/uts/common/io/dld/dld_flow.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -69,20 +69,23 @@ static int dld_walk_flow_cb(mac_flowinfo_t *finfo, void *arg) { flowinfo_state_t *statep = arg; - dld_flowinfo_t fi; + dld_flowinfo_t *fi; if (statep->fi_bufsize < sizeof (dld_flowinfo_t)) return (ENOSPC); - (void) strlcpy(fi.fi_flowname, finfo->fi_flow_name, - sizeof (fi.fi_flowname)); - fi.fi_linkid = finfo->fi_link_id; - fi.fi_flow_desc = finfo->fi_flow_desc; - fi.fi_resource_props = finfo->fi_resource_props; + fi = kmem_zalloc(sizeof (*fi), KM_SLEEP); + (void) strlcpy(fi->fi_flowname, finfo->fi_flow_name, + sizeof (fi->fi_flowname)); + fi->fi_linkid = finfo->fi_link_id; + fi->fi_flow_desc = finfo->fi_flow_desc; + fi->fi_resource_props = finfo->fi_resource_props; - if (copyout(&fi, statep->fi_fl, sizeof (fi)) != 0) { + if (copyout(fi, statep->fi_fl, sizeof (*fi)) != 0) { + kmem_free(fi, sizeof (*fi)); return (EFAULT); } + kmem_free(fi, sizeof (*fi)); statep->fi_nflows++; statep->fi_bufsize -= sizeof (dld_flowinfo_t); statep->fi_fl += sizeof (dld_flowinfo_t); @@ -98,13 +101,14 @@ int dld_walk_flow(dld_ioc_walkflow_t *wf, intptr_t uaddr, cred_t *credp) { flowinfo_state_t state; - mac_flowinfo_t finfo; + mac_flowinfo_t *finfo; int err = 0; /* For now, one can only view flows from the global zone. */ if (crgetzoneid(credp) != GLOBAL_ZONEID) return (EPERM); + finfo = kmem_zalloc(sizeof (*finfo), KM_SLEEP); state.fi_bufsize = wf->wf_len; state.fi_fl = (uchar_t *)uaddr + sizeof (*wf); state.fi_nflows = 0; @@ -113,12 +117,14 @@ dld_walk_flow(dld_ioc_walkflow_t *wf, intptr_t uaddr, cred_t *credp) err = mac_link_flow_walk(wf->wf_linkid, dld_walk_flow_cb, &state); } else { - err = mac_link_flow_info(wf->wf_name, &finfo); - if (err != 0) + err = mac_link_flow_info(wf->wf_name, finfo); + if (err != 0) { + kmem_free(finfo, sizeof (*finfo)); return (err); - - err = dld_walk_flow_cb(&finfo, &state); + } + err = dld_walk_flow_cb(finfo, &state); } + kmem_free(finfo, sizeof (*finfo)); wf->wf_nflows = state.fi_nflows; return (err); } diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c index ca1fc10306..67774c329f 100644 --- a/usr/src/uts/common/io/dld/dld_proto.c +++ b/usr/src/uts/common/io/dld/dld_proto.c @@ -476,7 +476,8 @@ proto_bind_req(dld_str_t *dsp, mblk_t *mp) * etc. since part of mac_client_retarget_intr is to walk the * device tree in order to find and retarget the interrupts. */ - mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu); + if (intr_cpu != -1) + mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu); /* * Copy in MAC address. diff --git a/usr/src/uts/common/io/dmfe/dmfe_main.c b/usr/src/uts/common/io/dmfe/dmfe_main.c index 7c32a176aa..1bd204683d 100644 --- a/usr/src/uts/common/io/dmfe/dmfe_main.c +++ b/usr/src/uts/common/io/dmfe/dmfe_main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -197,12 +197,14 @@ static void dmfe_m_ioctl(void *, queue_t *, mblk_t *); static mblk_t *dmfe_m_tx(void *, mblk_t *); static int dmfe_m_stat(void *, uint_t, uint64_t *); static int dmfe_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); static int dmfe_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void dmfe_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t dmfe_m_callbacks = { - (MC_IOCTL | MC_SETPROP | MC_GETPROP), + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, dmfe_m_stat, dmfe_m_start, dmfe_m_stop, @@ -210,12 +212,14 @@ static mac_callbacks_t dmfe_m_callbacks = { dmfe_m_multicst, dmfe_m_unicst, dmfe_m_tx, + NULL, dmfe_m_ioctl, NULL, /* getcapab */ NULL, /* open */ NULL, /* close */ dmfe_m_setprop, - dmfe_m_getprop + dmfe_m_getprop, + dmfe_m_propinfo }; @@ -2178,12 +2182,12 @@ dmfe_m_ioctl(void *arg, queue_t *wq, mblk_t *mp) } int -dmfe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +dmfe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { dmfe_t *dmfep = arg; - return (mii_m_getprop(dmfep->mii, name, num, flags, sz, val, perm)); + return (mii_m_getprop(dmfep->mii, name, num, sz, val)); } int @@ -2195,6 +2199,14 @@ dmfe_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, return (mii_m_setprop(dmfep->mii, name, num, sz, val)); } +static void +dmfe_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t mph) +{ + dmfe_t *dmfep = arg; + + mii_m_propinfo(dmfep->mii, name, num, mph); +} /* * ========== Per-instance setup/teardown code ========== diff --git a/usr/src/uts/common/io/e1000g/e1000g_main.c b/usr/src/uts/common/io/e1000g/e1000g_main.c index 57d2401894..569b3f6f87 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_main.c +++ b/usr/src/uts/common/io/e1000g/e1000g_main.c @@ -45,8 +45,6 @@ #include "e1000g_debug.h" static char ident[] = "Intel PRO/1000 Ethernet"; -static char e1000g_string[] = "Intel(R) PRO/1000 Network Connection"; -static char e1000g_version[] = "Driver Ver. 5.3.22"; /* * Proto types for DDI entry points @@ -76,11 +74,12 @@ static void e1000g_m_ioctl(void *, queue_t *, mblk_t *); static int e1000g_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int e1000g_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void e1000g_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int e1000g_set_priv_prop(struct e1000g *, const char *, uint_t, const void *); -static int e1000g_get_priv_prop(struct e1000g *, const char *, uint_t, - uint_t, void *, uint_t *); +static int e1000g_get_priv_prop(struct e1000g *, const char *, uint_t, void *); static void e1000g_init_locks(struct e1000g *); static void e1000g_destroy_locks(struct e1000g *); static int e1000g_identify_hardware(struct e1000g *); @@ -154,29 +153,26 @@ static int e1000g_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data); static void e1000g_fm_init(struct e1000g *Adapter); static void e1000g_fm_fini(struct e1000g *Adapter); -static int e1000g_get_def_val(struct e1000g *, mac_prop_id_t, uint_t, void *); static void e1000g_param_sync(struct e1000g *); static void e1000g_get_driver_control(struct e1000_hw *); static void e1000g_release_driver_control(struct e1000_hw *); static void e1000g_restore_promisc(struct e1000g *Adapter); -mac_priv_prop_t e1000g_priv_props[] = { - {"_tx_bcopy_threshold", MAC_PROP_PERM_RW}, - {"_tx_interrupt_enable", MAC_PROP_PERM_RW}, - {"_tx_intr_delay", MAC_PROP_PERM_RW}, - {"_tx_intr_abs_delay", MAC_PROP_PERM_RW}, - {"_rx_bcopy_threshold", MAC_PROP_PERM_RW}, - {"_max_num_rcv_packets", MAC_PROP_PERM_RW}, - {"_rx_intr_delay", MAC_PROP_PERM_RW}, - {"_rx_intr_abs_delay", MAC_PROP_PERM_RW}, - {"_intr_throttling_rate", MAC_PROP_PERM_RW}, - {"_intr_adaptive", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_READ}, - {"_adv_asym_pause_cap", MAC_PROP_PERM_READ}, +char *e1000g_priv_props[] = { + "_tx_bcopy_threshold", + "_tx_interrupt_enable", + "_tx_intr_delay", + "_tx_intr_abs_delay", + "_rx_bcopy_threshold", + "_max_num_rcv_packets", + "_rx_intr_delay", + "_rx_intr_abs_delay", + "_intr_throttling_rate", + "_intr_adaptive", + "_adv_pause_cap", + "_adv_asym_pause_cap", + NULL }; -#define E1000G_MAX_PRIV_PROPS \ - (sizeof (e1000g_priv_props)/sizeof (mac_priv_prop_t)) - static struct cb_ops cb_ws_ops = { nulldev, /* cb_open */ @@ -233,7 +229,7 @@ static ddi_device_acc_attr_t e1000g_regs_acc_attr = { }; #define E1000G_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) static mac_callbacks_t e1000g_m_callbacks = { E1000G_M_CALLBACK_FLAGS, @@ -244,12 +240,14 @@ static mac_callbacks_t e1000g_m_callbacks = { e1000g_m_multicst, NULL, e1000g_m_tx, + NULL, e1000g_m_ioctl, e1000g_m_getcapab, NULL, NULL, e1000g_m_setprop, - e1000g_m_getprop + e1000g_m_getprop, + e1000g_m_propinfo }; /* @@ -581,7 +579,6 @@ e1000g_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) mutex_exit(&e1000g_rx_detach_lock); } - cmn_err(CE_CONT, "!%s, %s\n", e1000g_string, e1000g_version); Adapter->e1000g_state = E1000G_INITIALIZED; return (DDI_SUCCESS); @@ -610,7 +607,6 @@ e1000g_register_mac(struct e1000g *Adapter) mac->m_max_sdu = Adapter->default_mtu; mac->m_margin = VLAN_TAGSZ; mac->m_priv_props = e1000g_priv_props; - mac->m_priv_prop_count = E1000G_MAX_PRIV_PROPS; mac->m_v12n = MAC_VIRT_LEVEL1; err = mac_register(mac, &Adapter->mh); @@ -1964,6 +1960,10 @@ e1000g_stop(struct e1000g *Adapter, boolean_t global) ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_LOST); } + mutex_enter(&Adapter->link_lock); + Adapter->link_complete = B_FALSE; + mutex_exit(&Adapter->link_lock); + /* Release resources still held by the TX descriptors */ e1000g_tx_clean(Adapter); @@ -2961,12 +2961,15 @@ e1000g_fill_ring(void *arg, mac_ring_type_t rtype, const int grp_index, infop->mri_start = e1000g_ring_start; infop->mri_stop = NULL; infop->mri_poll = e1000g_poll_ring; + infop->mri_stat = e1000g_rx_ring_stat; /* Ring level interrupts */ mintr = &infop->mri_intr; mintr->mi_handle = (mac_intr_handle_t)rx_ring; mintr->mi_enable = e1000g_rx_ring_intr_enable; mintr->mi_disable = e1000g_rx_ring_intr_disable; + if (Adapter->msi_enable) + mintr->mi_ddi_handle = Adapter->htable[0]; } /* ARGSUSED */ @@ -3282,159 +3285,246 @@ reset: static int e1000g_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { struct e1000g *Adapter = arg; struct e1000_fc_info *fc = &Adapter->shared.fc; - struct e1000_hw *hw = &Adapter->shared; int err = 0; link_flowctrl_t flowctrl; uint64_t tmp = 0; - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; - - bzero(pr_val, pr_valsize); - if ((pr_flags & MAC_PROP_DEFAULT) && (pr_num != MAC_PROP_PRIVATE)) { - return (e1000g_get_def_val(Adapter, pr_num, - pr_valsize, pr_val)); - } - switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (link_duplex_t)) { - bcopy(&Adapter->link_duplex, pr_val, - sizeof (link_duplex_t)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_duplex_t)); + bcopy(&Adapter->link_duplex, pr_val, + sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (uint64_t)) { - tmp = Adapter->link_speed * 1000000ull; - bcopy(&tmp, pr_val, sizeof (tmp)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint64_t)); + tmp = Adapter->link_speed * 1000000ull; + bcopy(&tmp, pr_val, sizeof (tmp)); break; case MAC_PROP_AUTONEG: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_autoneg; break; case MAC_PROP_FLOWCTRL: - if (pr_valsize >= sizeof (link_flowctrl_t)) { - switch (fc->current_mode) { - case e1000_fc_none: - flowctrl = LINK_FLOWCTRL_NONE; - break; - case e1000_fc_rx_pause: - flowctrl = LINK_FLOWCTRL_RX; - break; - case e1000_fc_tx_pause: - flowctrl = LINK_FLOWCTRL_TX; - break; - case e1000_fc_full: - flowctrl = LINK_FLOWCTRL_BI; - break; - } - bcopy(&flowctrl, pr_val, sizeof (flowctrl)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_flowctrl_t)); + switch (fc->current_mode) { + case e1000_fc_none: + flowctrl = LINK_FLOWCTRL_NONE; + break; + case e1000_fc_rx_pause: + flowctrl = LINK_FLOWCTRL_RX; + break; + case e1000_fc_tx_pause: + flowctrl = LINK_FLOWCTRL_TX; + break; + case e1000_fc_full: + flowctrl = LINK_FLOWCTRL_BI; + break; + } + bcopy(&flowctrl, pr_val, sizeof (flowctrl)); break; case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_1000fdx; break; case MAC_PROP_EN_1000FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_1000fdx; break; case MAC_PROP_ADV_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_1000hdx; break; case MAC_PROP_EN_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_1000hdx; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_100fdx; break; case MAC_PROP_EN_100FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_100hdx; break; case MAC_PROP_EN_100HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_10fdx; break; case MAC_PROP_EN_10FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_10hdx; break; case MAC_PROP_EN_10HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_10hdx; break; case MAC_PROP_ADV_100T4_CAP: case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_100t4; break; case MAC_PROP_PRIVATE: err = e1000g_get_priv_prop(Adapter, pr_name, - pr_flags, pr_valsize, pr_val, perm); - break; - case MAC_PROP_MTU: { - struct e1000_mac_info *mac = &Adapter->shared.mac; - struct e1000_phy_info *phy = &Adapter->shared.phy; - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = DEFAULT_MTU; - range.range_uint32[0].mpur_max = Adapter->max_mtu; - /* following MAC type do not support jumbo frames */ - if ((mac->type == e1000_ich8lan) || - ((mac->type == e1000_ich9lan) && (phy->type == - e1000_phy_ife))) { - range.range_uint32[0].mpur_max = DEFAULT_MTU; - } - bcopy(&range, pr_val, sizeof (range)); + pr_valsize, pr_val); break; - } default: err = ENOTSUP; break; } + return (err); } +static void +e1000g_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + struct e1000g *Adapter = arg; + struct e1000_hw *hw = &Adapter->shared; + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_EN_1000FDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + ((Adapter->phy_ext_status & + IEEE_ESR_1000T_FD_CAPS) || + (Adapter->phy_ext_status & + IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0); + } + break; + + case MAC_PROP_EN_100FDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + ((Adapter->phy_status & MII_SR_100X_FD_CAPS) || + (Adapter->phy_status & MII_SR_100T2_FD_CAPS)) + ? 1 : 0); + } + break; + + case MAC_PROP_EN_100HDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + ((Adapter->phy_status & MII_SR_100X_HD_CAPS) || + (Adapter->phy_status & MII_SR_100T2_HD_CAPS)) + ? 1 : 0); + } + break; + + case MAC_PROP_EN_10FDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + (Adapter->phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0); + } + break; + + case MAC_PROP_EN_10HDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + (Adapter->phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0); + } + break; + + case MAC_PROP_EN_1000HDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + (Adapter->phy_status & MII_SR_AUTONEG_CAPS) + ? 1 : 0); + } + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, LINK_FLOWCTRL_BI); + break; + + case MAC_PROP_MTU: { + struct e1000_mac_info *mac = &Adapter->shared.mac; + struct e1000_phy_info *phy = &Adapter->shared.phy; + uint32_t max; + + /* some MAC types do not support jumbo frames */ + if ((mac->type == e1000_ich8lan) || + ((mac->type == e1000_ich9lan) && (phy->type == + e1000_phy_ife))) { + max = DEFAULT_MTU; + } else { + max = Adapter->max_mtu; + } + + mac_prop_info_set_range_uint32(prh, DEFAULT_MTU, max); + break; + } + case MAC_PROP_PRIVATE: { + char valstr[64]; + int value; + + if (strcmp(pr_name, "_adv_pause_cap") == 0 || + strcmp(pr_name, "_adv_asym_pause_cap") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; + } else if (strcmp(pr_name, "_tx_bcopy_threshold") == 0) { + value = DEFAULT_TX_BCOPY_THRESHOLD; + } else if (strcmp(pr_name, "_tx_interrupt_enable") == 0) { + value = DEFAULT_TX_INTR_ENABLE; + } else if (strcmp(pr_name, "_tx_intr_delay") == 0) { + value = DEFAULT_TX_INTR_DELAY; + } else if (strcmp(pr_name, "_tx_intr_abs_delay") == 0) { + value = DEFAULT_TX_INTR_ABS_DELAY; + } else if (strcmp(pr_name, "_rx_bcopy_threshold") == 0) { + value = DEFAULT_RX_BCOPY_THRESHOLD; + } else if (strcmp(pr_name, "_max_num_rcv_packets") == 0) { + value = DEFAULT_RX_LIMIT_ON_INTR; + } else if (strcmp(pr_name, "_rx_intr_delay") == 0) { + value = DEFAULT_RX_INTR_DELAY; + } else if (strcmp(pr_name, "_rx_intr_abs_delay") == 0) { + value = DEFAULT_RX_INTR_ABS_DELAY; + } else if (strcmp(pr_name, "_intr_throttling_rate") == 0) { + value = DEFAULT_INTR_THROTTLING; + } else if (strcmp(pr_name, "_intr_adaptive") == 0) { + value = 1; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + mac_prop_info_set_default_str(prh, valstr); + break; + } + } +} + /* ARGSUSED2 */ static int e1000g_set_priv_prop(struct e1000g *Adapter, const char *pr_name, @@ -3643,84 +3733,68 @@ e1000g_set_priv_prop(struct e1000g *Adapter, const char *pr_name, static int e1000g_get_priv_prop(struct e1000g *Adapter, const char *pr_name, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; if (strcmp(pr_name, "_adv_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - if (is_default) - goto done; value = Adapter->param_adv_pause; err = 0; goto done; } if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - if (is_default) - goto done; value = Adapter->param_adv_asym_pause; err = 0; goto done; } if (strcmp(pr_name, "_tx_bcopy_threshold") == 0) { - value = (is_default ? DEFAULT_TX_BCOPY_THRESHOLD : - Adapter->tx_bcopy_thresh); + value = Adapter->tx_bcopy_thresh; err = 0; goto done; } if (strcmp(pr_name, "_tx_interrupt_enable") == 0) { - value = (is_default ? DEFAULT_TX_INTR_ENABLE : - Adapter->tx_intr_enable); + value = Adapter->tx_intr_enable; err = 0; goto done; } if (strcmp(pr_name, "_tx_intr_delay") == 0) { - value = (is_default ? DEFAULT_TX_INTR_DELAY : - Adapter->tx_intr_delay); + value = Adapter->tx_intr_delay; err = 0; goto done; } if (strcmp(pr_name, "_tx_intr_abs_delay") == 0) { - value = (is_default ? DEFAULT_TX_INTR_ABS_DELAY : - Adapter->tx_intr_abs_delay); + value = Adapter->tx_intr_abs_delay; err = 0; goto done; } if (strcmp(pr_name, "_rx_bcopy_threshold") == 0) { - value = (is_default ? DEFAULT_RX_BCOPY_THRESHOLD : - Adapter->rx_bcopy_thresh); + value = Adapter->rx_bcopy_thresh; err = 0; goto done; } if (strcmp(pr_name, "_max_num_rcv_packets") == 0) { - value = (is_default ? DEFAULT_RX_LIMIT_ON_INTR : - Adapter->rx_limit_onintr); + value = Adapter->rx_limit_onintr; err = 0; goto done; } if (strcmp(pr_name, "_rx_intr_delay") == 0) { - value = (is_default ? DEFAULT_RX_INTR_DELAY : - Adapter->rx_intr_delay); + value = Adapter->rx_intr_delay; err = 0; goto done; } if (strcmp(pr_name, "_rx_intr_abs_delay") == 0) { - value = (is_default ? DEFAULT_RX_INTR_ABS_DELAY : - Adapter->rx_intr_abs_delay); + value = Adapter->rx_intr_abs_delay; err = 0; goto done; } if (strcmp(pr_name, "_intr_throttling_rate") == 0) { - value = (is_default ? DEFAULT_INTR_THROTTLING : - Adapter->intr_throttling_rate); + value = Adapter->intr_throttling_rate; err = 0; goto done; } if (strcmp(pr_name, "_intr_adaptive") == 0) { - value = (is_default ? 1 : Adapter->intr_adaptive); + value = Adapter->intr_adaptive; err = 0; goto done; } @@ -6284,88 +6358,6 @@ e1000g_quiesce(dev_info_t *devinfo) return (DDI_SUCCESS); } -static int -e1000g_get_def_val(struct e1000g *Adapter, mac_prop_id_t pr_num, - uint_t pr_valsize, void *pr_val) -{ - link_flowctrl_t fl; - struct e1000_hw *hw = &Adapter->shared; - int err = 0; - - ASSERT(pr_valsize > 0); - switch (pr_num) { - case MAC_PROP_AUTONEG: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - ((Adapter->phy_status & MII_SR_AUTONEG_CAPS) - ? 1 : 0); - break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); - fl = LINK_FLOWCTRL_BI; - bcopy(&fl, pr_val, sizeof (fl)); - break; - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_EN_1000FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 1; - else - *(uint8_t *)pr_val = - ((Adapter->phy_ext_status & - IEEE_ESR_1000T_FD_CAPS) || - (Adapter->phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) - ? 1 : 0; - break; - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - *(uint8_t *)pr_val = 0; - break; - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_EN_100FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - ((Adapter->phy_status & MII_SR_100X_FD_CAPS) || - (Adapter->phy_status & MII_SR_100T2_FD_CAPS)) - ? 1 : 0; - break; - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - ((Adapter->phy_status & MII_SR_100X_HD_CAPS) || - (Adapter->phy_status & MII_SR_100T2_HD_CAPS)) - ? 1 : 0; - break; - case MAC_PROP_ADV_10FDX_CAP: - case MAC_PROP_EN_10FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - (Adapter->phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0; - break; - case MAC_PROP_ADV_10HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - (Adapter->phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0; - break; - default: - err = ENOTSUP; - break; - } - return (err); -} - /* * synchronize the adv* and en* parameters. * diff --git a/usr/src/uts/common/io/e1000g/e1000g_rx.c b/usr/src/uts/common/io/e1000g/e1000g_rx.c index a4ff68894a..fb4d621bfb 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_rx.c +++ b/usr/src/uts/common/io/e1000g/e1000g_rx.c @@ -19,7 +19,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -709,8 +709,7 @@ rx_copy: E1000_RXD_STAT_TCPCS) && !(current_desc->errors & E1000_RXD_ERR_TCPE)) - cksumflags |= HCK_FULLCKSUM | - HCK_FULLCKSUM_OK; + cksumflags |= HCK_FULLCKSUM_OK; /* * Check IP Checksum */ @@ -718,7 +717,7 @@ rx_copy: E1000_RXD_STAT_IPCS) && !(current_desc->errors & E1000_RXD_ERR_IPE)) - cksumflags |= HCK_IPV4_HDRCKSUM; + cksumflags |= HCK_IPV4_HDRCKSUM_OK; } } @@ -771,8 +770,8 @@ rx_end_of_packet: * Process the last fragment. */ if (cksumflags != 0) { - (void) hcksum_assoc(rx_data->rx_mblk, - NULL, NULL, 0, 0, 0, 0, cksumflags, 0); + mac_hcksum_set(rx_data->rx_mblk, + 0, 0, 0, 0, cksumflags); cksumflags = 0; } diff --git a/usr/src/uts/common/io/e1000g/e1000g_stat.c b/usr/src/uts/common/io/e1000g/e1000g_stat.c index e2a7544004..7ec964f628 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_stat.c +++ b/usr/src/uts/common/io/e1000g/e1000g_stat.c @@ -19,7 +19,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -971,3 +971,60 @@ e1000g_read_phy_stat(struct e1000_hw *hw, int reg) return (val); } + +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +e1000g_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + e1000g_rx_ring_t *rx_ring = (e1000g_rx_ring_t *)rh; + struct e1000g *Adapter = rx_ring->adapter; + struct e1000_hw *hw = &Adapter->shared; + p_e1000g_stat_t e1000g_ksp = + (p_e1000g_stat_t)Adapter->e1000g_ksp->ks_data; + uint32_t low_val, high_val; + + rw_enter(&Adapter->chip_lock, RW_READER); + + if (Adapter->e1000g_state & E1000G_SUSPENDED) { + rw_exit(&Adapter->chip_lock); + return (ECANCELED); + } + + switch (stat) { + case MAC_STAT_RBYTES: + /* + * The 64-bit register will reset whenever the upper + * 32 bits are read. So we need to read the lower + * 32 bits first, then read the upper 32 bits. + */ + low_val = E1000_READ_REG(hw, E1000_TORL); + high_val = E1000_READ_REG(hw, E1000_TORH); + *val = (uint64_t)e1000g_ksp->Torh.value.ul << 32 | + (uint64_t)e1000g_ksp->Torl.value.ul; + *val += (uint64_t)high_val << 32 | (uint64_t)low_val; + + e1000g_ksp->Torl.value.ul = (uint32_t)*val; + e1000g_ksp->Torh.value.ul = (uint32_t)(*val >> 32); + break; + + case MAC_STAT_IPACKETS: + e1000g_ksp->Tpr.value.ul += + E1000_READ_REG(hw, E1000_TPR); + *val = e1000g_ksp->Tpr.value.ul; + break; + + default: + *val = 0; + rw_exit(&Adapter->chip_lock); + return (ENOTSUP); + } + + rw_exit(&Adapter->chip_lock); + + if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) + ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_UNAFFECTED); + + return (0); +} diff --git a/usr/src/uts/common/io/e1000g/e1000g_sw.h b/usr/src/uts/common/io/e1000g/e1000g_sw.h index ee9ff56fbf..40611707bb 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_sw.h +++ b/usr/src/uts/common/io/e1000g/e1000g_sw.h @@ -1052,6 +1052,7 @@ void e1000g_rxfree_func(p_rx_sw_packet_t packet); int e1000g_m_stat(void *arg, uint_t stat, uint64_t *val); int e1000g_init_stats(struct e1000g *Adapter); +int e1000g_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); void e1000_tbi_adjust_stats(struct e1000g *Adapter, uint32_t frame_len, uint8_t *mac_addr); diff --git a/usr/src/uts/common/io/e1000g/e1000g_tx.c b/usr/src/uts/common/io/e1000g/e1000g_tx.c index 9d58d9b127..512f1bd21e 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_tx.c +++ b/usr/src/uts/common/io/e1000g/e1000g_tx.c @@ -19,7 +19,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -470,10 +470,10 @@ e1000g_retrieve_context(mblk_t *mp, context_data_t *cur_context, bzero(cur_context, sizeof (context_data_t)); /* first check lso information */ - lso_info_get(mp, &mss, &lsoflags); + mac_lso_get(mp, &mss, &lsoflags); /* retrieve checksum info */ - hcksum_retrieve(mp, NULL, NULL, &cur_context->cksum_start, + mac_hcksum_get(mp, &cur_context->cksum_start, &cur_context->cksum_stuff, NULL, NULL, &cur_context->cksum_flags); /* retrieve ethernet header size */ if (((struct ether_vlan_header *)(uintptr_t)mp->b_rptr)->ether_tpid == diff --git a/usr/src/uts/common/io/elxl/elxl.c b/usr/src/uts/common/io/elxl/elxl.c index b23702cebc..2ffe96aff3 100644 --- a/usr/src/uts/common/io/elxl/elxl.c +++ b/usr/src/uts/common/io/elxl/elxl.c @@ -84,9 +84,11 @@ static int elxl_m_promisc(void *, boolean_t); static int elxl_m_multicst(void *, boolean_t, const uint8_t *); static int elxl_m_unicst(void *, const uint8_t *); static int elxl_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int elxl_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void elxl_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static boolean_t elxl_m_getcapab(void *, mac_capab_t cap, void *); static uint_t elxl_intr(caddr_t, caddr_t); static void elxl_error(elxl_t *, char *, ...); @@ -198,9 +200,10 @@ static const struct ex_product { { 0, NULL, 0 }, }; -mac_priv_prop_t ex_priv_prop[] = { - { "_media", MAC_PROP_PERM_RW }, - { "_available_media", MAC_PROP_PERM_READ }, +static char *ex_priv_prop[] = { + "_media", + "_available_media", + NULL }; static mii_ops_t ex_mii_ops = { @@ -211,7 +214,7 @@ static mii_ops_t ex_mii_ops = { }; static mac_callbacks_t elxl_m_callbacks = { - MC_GETCAPAB | MC_SETPROP | MC_GETPROP, + MC_GETCAPAB | MC_PROPERTIES, elxl_m_stat, elxl_m_start, elxl_m_stop, @@ -220,11 +223,13 @@ static mac_callbacks_t elxl_m_callbacks = { elxl_m_unicst, elxl_m_tx, NULL, + NULL, elxl_m_getcapab, NULL, NULL, elxl_m_setprop, - elxl_m_getprop + elxl_m_getprop, + elxl_m_propinfo }; /* @@ -575,7 +580,6 @@ elxl_attach(dev_info_t *dip) macp->m_max_sdu = ETHERMTU; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = ex_priv_prop; - macp->m_priv_prop_count = 2; (void) ddi_intr_enable(sc->ex_intrh); @@ -1387,38 +1391,32 @@ elxl_m_getcapab(void *arg, mac_capab_t cap, void *data) } static int -elxl_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +elxl_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { elxl_t *sc = arg; int rv; - boolean_t isdef = (flags & MAC_PROP_DEFAULT); if (sc->ex_mii_active) { - rv = mii_m_getprop(sc->ex_miih, name, num, flags, sz, - val, perm); + rv = mii_m_getprop(sc->ex_miih, name, num, sz, val); if (rv != ENOTSUP) return (rv); } switch (num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = isdef ? LINK_DUPLEX_HALF : sc->ex_duplex; + *(uint8_t *)val = sc->ex_duplex; break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)val = sc->ex_speed; break; case MAC_PROP_STATUS: - *perm = MAC_PROP_PERM_READ; bcopy(&sc->ex_link, val, sizeof (link_state_t)); break; case MAC_PROP_PRIVATE: if (strcmp(name, "_media") == 0) { char *str; - *perm = MAC_PROP_PERM_RW; switch (sc->ex_xcvr) { case XCVR_SEL_AUTO: @@ -1456,7 +1454,6 @@ elxl_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, * MAC_PROP_POSSIBLE with private properties.) */ if (strcmp(name, "_available_media") == 0) { - *perm = MAC_PROP_PERM_READ; (void) snprintf(val, sz, "%s", sc->ex_medias); return (0); } @@ -1577,6 +1574,29 @@ reset: return (0); } +static void +elxl_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + elxl_t *sc = arg; + + if (sc->ex_mii_active) + mii_m_propinfo(sc->ex_miih, name, num, prh); + + switch (num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_PRIVATE: + if (strcmp(name, "_available_media") == 0) + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + } +} + static int elxl_m_stat(void *arg, uint_t stat, uint64_t *val) { diff --git a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_gld.c b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_gld.c index 9814fdb5e7..c13b5237b5 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_gld.c +++ b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_gld.c @@ -33,14 +33,12 @@ #include <oce_ioctl.h> /* array of properties supported by this driver */ -mac_priv_prop_t oce_priv_props[] = { - {"_tx_ring_size", MAC_PROP_PERM_READ}, - {"_tx_bcopy_limit", MAC_PROP_PERM_RW}, - {"_rx_bcopy_limit", MAC_PROP_PERM_RW}, - {"_rx_ring_size", MAC_PROP_PERM_READ}, +char *oce_priv_props[] = { + "_tx_ring_size", + "_tx_bcopy_limit", + "_rx_ring_size", + NULL }; -uint32_t oce_num_props = sizeof (oce_priv_props) / sizeof (mac_priv_prop_t); - /* ---[ static function declarations ]----------------------------------- */ static int oce_power10(int power); @@ -48,7 +46,7 @@ static int oce_set_priv_prop(struct oce_dev *dev, const char *name, uint_t size, const void *val); static int oce_get_priv_prop(struct oce_dev *dev, const char *name, - uint_t flags, uint_t size, void *val); + uint_t size, void *val); /* ---[ GLD entry points ]----------------------------------------------- */ int @@ -446,119 +444,62 @@ oce_m_setprop(void *arg, const char *name, mac_prop_id_t id, int oce_m_getprop(void *arg, const char *name, mac_prop_id_t id, - uint_t flags, uint_t size, void *val, uint_t *perm) + uint_t size, void *val) { struct oce_dev *dev = arg; uint32_t ret = 0; - *perm = MAC_PROP_PERM_READ; - switch (id) { - case MAC_PROP_AUTONEG: - case MAC_PROP_EN_AUTONEG: - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_EN_1000FDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_EN_100FDX_CAP: - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - case MAC_PROP_ADV_10FDX_CAP: - case MAC_PROP_EN_10FDX_CAP: - case MAC_PROP_ADV_10HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - case MAC_PROP_ADV_100T4_CAP: - case MAC_PROP_EN_100T4_CAP: { - *(uint8_t *)val = 0x0; - break; - } - - case MAC_PROP_ADV_10GFDX_CAP: { - *(uint8_t *)val = 0x01; - break; - } - - case MAC_PROP_EN_10GFDX_CAP: { + case MAC_PROP_ADV_10GFDX_CAP: + case MAC_PROP_EN_10GFDX_CAP: *(uint8_t *)val = 0x01; break; - } case MAC_PROP_DUPLEX: { - if (size >= sizeof (link_duplex_t)) { - uint32_t *mode = (uint32_t *)val; - - *perm = MAC_PROP_PERM_READ; - if (dev->state & STATE_MAC_STARTED) - *mode = LINK_DUPLEX_FULL; - else - *mode = LINK_DUPLEX_UNKNOWN; + uint32_t *mode = (uint32_t *)val; - } else - ret = EINVAL; + ASSERT(size >= sizeof (link_duplex_t)); + if (dev->state & STATE_MAC_STARTED) + *mode = LINK_DUPLEX_FULL; + else + *mode = LINK_DUPLEX_UNKNOWN; break; } case MAC_PROP_SPEED: { - if (size >= sizeof (uint64_t)) { - uint64_t *speed = (uint64_t *)val; - - *perm = MAC_PROP_PERM_READ; - *speed = 0; - if ((dev->state & STATE_MAC_STARTED) && - (dev->link.mac_speed != 0)) { - *speed = 1000000ull * - oce_power10(dev->link.mac_speed); - } - } else - ret = EINVAL; - break; - } - - case MAC_PROP_MTU: { - mac_propval_range_t range; + uint64_t *speed = (uint64_t *)val; - *perm = MAC_PROP_PERM_RW; - if (!(flags & MAC_PROP_POSSIBLE)) { - ret = ENOTSUP; - break; + ASSERT(size >= sizeof (uint64_t)); + *speed = 0; + if ((dev->state & STATE_MAC_STARTED) && + (dev->link.mac_speed != 0)) { + *speed = 1000000ull * oce_power10(dev->link.mac_speed); } - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = OCE_MIN_MTU; - range.range_uint32[0].mpur_max = OCE_MAX_MTU; - bcopy(&range, val, sizeof (mac_propval_range_t)); break; } case MAC_PROP_FLOWCTRL: { link_flowctrl_t *fc = (link_flowctrl_t *)val; - if (size < sizeof (link_flowctrl_t)) { + ASSERT(size >= sizeof (link_flowctrl_t)); + if (dev->flow_control & OCE_FC_TX && + dev->flow_control & OCE_FC_RX) + *fc = LINK_FLOWCTRL_BI; + else if (dev->flow_control == OCE_FC_TX) + *fc = LINK_FLOWCTRL_TX; + else if (dev->flow_control == OCE_FC_RX) + *fc = LINK_FLOWCTRL_RX; + else if (dev->flow_control == 0) + *fc = LINK_FLOWCTRL_NONE; + else ret = EINVAL; - break; - } - - if (size >= sizeof (link_flowctrl_t)) { - if (dev->flow_control & OCE_FC_TX && - dev->flow_control & OCE_FC_RX) - *fc = LINK_FLOWCTRL_BI; - else if (dev->flow_control == OCE_FC_TX) - *fc = LINK_FLOWCTRL_TX; - else if (dev->flow_control == OCE_FC_RX) - *fc = LINK_FLOWCTRL_RX; - else if (dev->flow_control == 0) - *fc = LINK_FLOWCTRL_NONE; - else - ret = EINVAL; - } break; } - case MAC_PROP_PRIVATE: { - ret = oce_get_priv_prop(dev, name, flags, size, val); + case MAC_PROP_PRIVATE: + ret = oce_get_priv_prop(dev, name, size, val); break; - } + default: ret = ENOTSUP; break; @@ -566,6 +507,59 @@ oce_m_getprop(void *arg, const char *name, mac_prop_id_t id, return (ret); } /* oce_m_getprop */ +void +oce_m_propinfo(void *arg, const char *name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(arg)); + + switch (pr_num) { + case MAC_PROP_AUTONEG: + case MAC_PROP_EN_AUTONEG: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + case MAC_PROP_ADV_10GFDX_CAP: + case MAC_PROP_EN_10GFDX_CAP: + case MAC_PROP_SPEED: + case MAC_PROP_DUPLEX: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, OCE_MIN_MTU, OCE_MAX_MTU); + break; + + case MAC_PROP_PRIVATE: { + char valstr[64]; + int value; + + if (strcmp(name, "_tx_ring_size") == 0) { + value = OCE_DEFAULT_TX_RING_SIZE; + } else if (strcmp(name, "_rx_ring_size") == 0) { + value = OCE_DEFAULT_RX_RING_SIZE; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + mac_prop_info_set_default_str(prh, valstr); + break; + } + } +} /* oce_m_propinfo */ + /* * function to handle dlpi streams message from GLDv3 mac layer */ @@ -701,7 +695,6 @@ oce_set_priv_prop(struct oce_dev *dev, const char *name, * * dev - software handle to the device * name - string containing the property name - * flags - flags sent by the OS to get_prop * size - length of the string contained name * val - [OUT] pointer to the location where the result is returned * @@ -709,46 +702,22 @@ oce_set_priv_prop(struct oce_dev *dev, const char *name, */ static int oce_get_priv_prop(struct oce_dev *dev, const char *name, - uint_t flags, uint_t size, void *val) + uint_t size, void *val) { - int ret = ENOTSUP; int value; - boolean_t is_default = (flags & MAC_PROP_DEFAULT); - - if (NULL == val) { - ret = EINVAL; - return (ret); - } if (strcmp(name, "_tx_ring_size") == 0) { - value = is_default ? OCE_DEFAULT_TX_RING_SIZE : - dev->tx_ring_size; - ret = 0; - goto done; - } - - if (strcmp(name, "_tx_bcopy_limit") == 0) { + value = dev->tx_ring_size; + } else if (strcmp(name, "_tx_bcopy_limit") == 0) { value = dev->tx_bcopy_limit; - ret = 0; - goto done; - } - - if (strcmp(name, "_rx_bcopy_limit") == 0) { + } else if (strcmp(name, "_rx_ring_size") == 0) { + value = dev->rx_ring_size; + } else if (strcmp(name, "_rx_bcopy_limit") == 0) { value = dev->rx_bcopy_limit; - ret = 0; - goto done; - } - - if (strcmp(name, "_rx_ring_size") == 0) { - value = is_default ? OCE_DEFAULT_RX_RING_SIZE : - dev->rx_ring_size; - ret = 0; - goto done; + } else { + return (ENOTSUP); } -done: - if (ret == 0) { - (void) snprintf(val, size, "%d", value); - } - return (ret); + (void) snprintf(val, size, "%d", value); + return (0); } /* oce_get_priv_prop */ diff --git a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_main.c b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_main.c index f3346bb444..a4c0fdc6a5 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_main.c +++ b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_main.c @@ -116,7 +116,8 @@ static struct modlinkage oce_mod_linkage = { MODREV_1, &oce_drv, NULL }; -#define OCE_M_CB_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) +#define OCE_M_CB_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | \ + MC_PROPINFO) static mac_callbacks_t oce_mac_cb = { OCE_M_CB_FLAGS, /* mc_callbacks */ oce_m_stat, /* mc_getstat */ @@ -126,16 +127,17 @@ static mac_callbacks_t oce_mac_cb = { oce_m_multicast, /* mc_multicast */ oce_m_unicast, /* mc_unicast */ oce_m_send, /* mc_tx */ + NULL, oce_m_ioctl, /* mc_ioctl */ oce_m_getcap, /* mc_getcapab */ NULL, /* open */ NULL, /* close */ oce_m_setprop, /* set properties */ - oce_m_getprop /* get properties */ + oce_m_getprop, /* get properties */ + oce_m_propinfo /* properties info */ }; -extern mac_priv_prop_t oce_priv_props[]; -extern uint32_t oce_num_props; +extern char *oce_priv_props[]; /* Module Init */ int @@ -293,7 +295,6 @@ oce_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) mac->m_max_sdu = dev->mtu; mac->m_margin = VLAN_TAGSZ; mac->m_priv_props = oce_priv_props; - mac->m_priv_prop_count = oce_num_props; oce_log(dev, CE_NOTE, MOD_CONFIG, "Driver Private structure = 0x%p", (void *)dev); diff --git a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c index 99f210925f..cc1ddb33f9 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c +++ b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c @@ -25,7 +25,7 @@ */ /* - * Source file containing the Recieve Path handling + * Source file containing the Receive Path handling * functions */ #include <oce_impl.h> @@ -420,7 +420,7 @@ oce_set_rx_oflags(mblk_t *mp, struct oce_nic_rx_cqe *cqe) /* set flags */ if (cqe->u0.s.ip_cksum_pass) { - csum_flags |= HCK_IPV4_HDRCKSUM; + csum_flags |= HCK_IPV4_HDRCKSUM_OK; } if (cqe->u0.s.l4_cksum_pass) { @@ -428,8 +428,7 @@ oce_set_rx_oflags(mblk_t *mp, struct oce_nic_rx_cqe *cqe) } if (csum_flags) { - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, - csum_flags, 0); + (void) mac_hcksum_set(mp, 0, 0, 0, 0, csum_flags); } } diff --git a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_tx.c b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_tx.c index 5198cfc710..c1925b8074 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_tx.c +++ b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_tx.c @@ -712,11 +712,10 @@ oce_send_packet(struct oce_wq *wq, mblk_t *mp) } /* Retrieve LSO info */ - lso_info_get(mp, &mss, &flags); + mac_lso_get(mp, &mss, &flags); /* get the offload flags */ - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, - NULL, &csum_flags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &csum_flags); /* Limit should be always less than Tx Buffer Size */ if (pkt_len < dev->tx_bcopy_limit) { diff --git a/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge.c b/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge.c index 7d2873e9fe..9290ecdde7 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge.c +++ b/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge.c @@ -2203,13 +2203,13 @@ ql_set_rx_cksum(mblk_t *mp, struct ib_mac_iocb_rsp *net_rsp) /* TCP or UDP packet and checksum valid */ if (((net_rsp->flags2 & IB_MAC_IOCB_RSP_T) != 0) && ((net_rsp->flags1 & IB_MAC_IOCB_RSP_NU) == 0)) { - flags = HCK_FULLCKSUM | HCK_FULLCKSUM_OK; - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, flags, 0); + flags = HCK_FULLCKSUM_OK; + mac_hcksum_set(mp, 0, 0, 0, 0, flags); } if (((net_rsp->flags2 & IB_MAC_IOCB_RSP_U) != 0) && ((net_rsp->flags1 & IB_MAC_IOCB_RSP_NU) == 0)) { - flags = HCK_FULLCKSUM | HCK_FULLCKSUM_OK; - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, flags, 0); + flags = HCK_FULLCKSUM_OK; + mac_hcksum_set(mp, 0, 0, 0, 0, flags); } } @@ -4750,13 +4750,12 @@ ql_send_common(struct tx_ring *tx_ring, mblk_t *mp) tx_mode = USE_COPY; if (qlge->chksum_cap) { - hcksum_retrieve(mp, NULL, NULL, NULL, - NULL, NULL, NULL, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); QL_PRINT(DBG_TX, ("checksum flag is :0x%x, card capability " "is 0x%x \n", pflags, qlge->chksum_cap)); if (qlge->lso_enable) { uint32_t lso_flags = 0; - lso_info_get(mp, &mss, &lso_flags); + mac_lso_get(mp, &mss, &lso_flags); use_lso = (lso_flags == HW_LSO); } QL_PRINT(DBG_TX, ("mss :%d, use_lso %x \n", diff --git a/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge_gld.c b/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge_gld.c index 6ad591435c..83ef993a0c 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge_gld.c +++ b/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge_gld.c @@ -23,6 +23,7 @@ * Copyright 2009 QLogic Corporation. All rights reserved. */ +#include <sys/note.h> #include <qlge.h> #include <sys/strsubr.h> #include <netinet/in.h> @@ -46,9 +47,12 @@ static int ql_unicst_set(qlge_t *qlge, const uint8_t *macaddr, int slot); static int ql_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -static int ql_m_getprop(void *, const char *, mac_prop_id_t, uint_t, uint_t, - void *, uint_t *); -#define QL_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) +static int ql_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +static void ql_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); + +#define QL_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | \ + MC_GETPROP | MC_PROPINFO) static mac_callbacks_t ql_m_callbacks = { QL_M_CALLBACK_FLAGS, ql_m_getstat, @@ -58,19 +62,20 @@ static mac_callbacks_t ql_m_callbacks = { ql_m_multicst, NULL, NULL, + NULL, ql_m_ioctl, ql_m_getcapab, NULL, NULL, ql_m_setprop, - ql_m_getprop -}; -mac_priv_prop_t qlge_priv_prop[] = { - {"_adv_pause_mode", MAC_PROP_PERM_RW} + ql_m_getprop, + ql_m_propinfo }; -#define QLGE_MAX_PRIV_PROPS \ - (sizeof (qlge_priv_prop) / sizeof (mac_priv_prop_t)) +char *qlge_priv_prop[] = { + "_adv_pause_mode", + NULL +}; /* * This function starts the driver @@ -689,27 +694,6 @@ qlge_set_priv_prop(qlge_t *qlge, const char *pr_name, uint_t pr_valsize, return (ENOTSUP); } -static int -qlge_get_priv_prop(qlge_t *qlge, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val) -{ - int err = ENOTSUP; - boolean_t is_default = (boolean_t)(pr_flags & MAC_PROP_DEFAULT); - uint32_t value; - - if (strcmp(pr_name, "_adv_pause_mode") == 0) { - value = (is_default? 2 : qlge->pause); - err = 0; - goto done; - } - -done: - if (err == 0) { - (void) snprintf(pr_val, pr_valsize, "%d", value); - } - return (err); -} - /* * callback functions for set/get of properties */ @@ -778,10 +762,30 @@ ql_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, return (err); } +static int +qlge_get_priv_prop(qlge_t *qlge, const char *pr_name, uint_t pr_valsize, + void *pr_val) +{ + int err = ENOTSUP; + uint32_t value; + + if (strcmp(pr_name, "_adv_pause_mode") == 0) { + value = qlge->pause; + err = 0; + goto done; + } + +done: + if (err == 0) { + (void) snprintf(pr_val, pr_valsize, "%d", value); + } + return (err); +} + /* ARGSUSED */ static int ql_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { qlge_t *qlge = barg; uint64_t speed; @@ -795,20 +799,9 @@ ql_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, goto out; } - if (pr_valsize == 0) { - err = EINVAL; - goto out; - } - bzero(pr_val, pr_valsize); - /* mostly read only */ - *perm = MAC_PROP_PERM_READ; - switch (pr_num) { case MAC_PROP_DUPLEX: - if (pr_valsize < sizeof (link_duplex_t)) { - err = EINVAL; - goto out; - } + ASSERT(pr_valsize >= sizeof (link_duplex_t)); if (qlge->duplex) link_duplex = LINK_DUPLEX_FULL; else @@ -818,18 +811,12 @@ ql_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - if (pr_valsize < sizeof (speed)) { - err = EINVAL; - goto out; - } + ASSERT(pr_valsize >= sizeof (speed)); speed = qlge->speed * 1000000ull; bcopy(&speed, pr_val, sizeof (speed)); break; case MAC_PROP_STATUS: - if (pr_valsize < sizeof (link_state_t)) { - err = EINVAL; - goto out; - } + ASSERT(pr_valsize >= sizeof (link_state_t)); if (qlge->port_link_state == LS_DOWN) link_state = LINK_STATE_DOWN; else @@ -839,8 +826,7 @@ ql_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, break; case MAC_PROP_PRIVATE: - err = qlge_get_priv_prop(qlge, pr_name, pr_flags, - pr_valsize, pr_val); + err = qlge_get_priv_prop(qlge, pr_name, pr_valsize, pr_val); break; default: @@ -851,6 +837,35 @@ out: return (err); } +static void +ql_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(barg)); + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_PRIVATE: { + char val_str[64]; + int default_val; + + if (strcmp(pr_name, "_adv_pause_mode") == 0) + default_val = 2; + else + return; + + (void) snprintf(val_str, sizeof (val_str), "%d", default_val); + mac_prop_info_set_default_str(prh, val_str); + break; + } + } +} + /* ARGSUSED */ static boolean_t ql_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) @@ -911,7 +926,6 @@ ql_gld3_init(qlge_t *qlge, mac_register_t *macp) macp->m_max_sdu = qlge->mtu; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = qlge_priv_prop; - macp->m_priv_prop_count = QLGE_MAX_PRIV_PROPS; macp->m_v12n = 0; ql_m_callbacks.mc_unicst = ql_m_unicst; ql_m_callbacks.mc_tx = ql_m_tx; diff --git a/usr/src/uts/common/io/hme/hme.c b/usr/src/uts/common/io/hme/hme.c index 2d1d3995df..71017b5464 100644 --- a/usr/src/uts/common/io/hme/hme.c +++ b/usr/src/uts/common/io/hme/hme.c @@ -50,6 +50,7 @@ #include <sys/policy.h> #include <sys/ddi.h> #include <sys/sunddi.h> +#include <sys/byteorder.h> #include "hme_phy.h" #include "hme_mac.h" #include "hme.h" @@ -113,11 +114,12 @@ static int hme_64bit_enable = 1; /* Use 64-bit sbus transfers */ static int hme_reject_own = 1; /* Reject packets with own SA */ static int hme_ngu_enable = 0; /* Never Give Up mode */ -mac_priv_prop_t hme_priv_prop[] = { - { "_ipg0", MAC_PROP_PERM_RW }, - { "_ipg1", MAC_PROP_PERM_RW }, - { "_ipg2", MAC_PROP_PERM_RW }, - { "_lance_mode", MAC_PROP_PERM_RW }, +char *hme_priv_prop[] = { + "_ipg0", + "_ipg1", + "_ipg2", + "_lance_mode", + NULL }; static int hme_lance_mode = 1; /* to enable lance mode */ @@ -232,8 +234,9 @@ static int hme_m_multicst(void *, boolean_t, const uint8_t *); static int hme_m_unicst(void *, const uint8_t *); static mblk_t *hme_m_tx(void *, mblk_t *); static boolean_t hme_m_getcapab(void *, mac_capab_t, void *); -static int hme_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); +static int hme_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +static void hme_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int hme_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); @@ -246,7 +249,7 @@ static mii_ops_t hme_mii_ops = { }; static mac_callbacks_t hme_m_callbacks = { - MC_GETCAPAB | MC_SETPROP | MC_GETPROP, + MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO, hme_m_stat, hme_m_start, hme_m_stop, @@ -255,11 +258,13 @@ static mac_callbacks_t hme_m_callbacks = { hme_m_unicst, hme_m_tx, NULL, + NULL, hme_m_getcapab, NULL, NULL, hme_m_setprop, hme_m_getprop, + hme_m_propinfo }; DDI_DEFINE_STREAM_OPS(hme_dev_ops, nulldev, nulldev, hmeattach, hmedetach, @@ -1506,8 +1511,6 @@ hmeattach(dev_info_t *dip, ddi_attach_cmd_t cmd) macp->m_max_sdu = ETHERMTU; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = hme_priv_prop; - macp->m_priv_prop_count = - sizeof (hme_priv_prop) / sizeof (hme_priv_prop[0]); if (mac_register(macp, &hmep->hme_mh) != 0) { mac_free(macp); goto error_intr; @@ -1901,15 +1904,14 @@ hmestatinit(struct hme *hmep) } int -hme_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +hme_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { struct hme *hmep = arg; int value; - boolean_t is_default; int rv; - rv = mii_m_getprop(hmep->hme_mii, name, num, flags, sz, val, perm); + rv = mii_m_getprop(hmep->hme_mii, name, num, sz, val); if (rv != ENOTSUP) return (rv); @@ -1920,18 +1922,14 @@ hme_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, return (ENOTSUP); } - *perm = MAC_PROP_PERM_RW; - - is_default = (flags & MAC_PROP_DEFAULT) ? B_TRUE : B_FALSE; if (strcmp(name, "_ipg0") == 0) { - value = is_default ? hme_ipg0 : hmep->hme_ipg0; - + value = hmep->hme_ipg0; } else if (strcmp(name, "_ipg1") == 0) { - value = is_default ? hme_ipg1 : hmep->hme_ipg1; + value = hmep->hme_ipg1; } else if (strcmp(name, "_ipg2") == 0) { - value = is_default ? hme_ipg2 : hmep->hme_ipg2; + value = hmep->hme_ipg2; } else if (strcmp(name, "_lance_mode") == 0) { - value = is_default ? hme_lance_mode : hmep->hme_lance_mode; + value = hmep->hme_lance_mode; } else { return (ENOTSUP); } @@ -1939,6 +1937,38 @@ hme_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, return (0); } +static void +hme_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t mph) +{ + struct hme *hmep = arg; + + mii_m_propinfo(hmep->hme_mii, name, num, mph); + + switch (num) { + case MAC_PROP_PRIVATE: { + char valstr[64]; + int default_val; + + if (strcmp(name, "_ipg0") == 0) { + default_val = hme_ipg0; + } else if (strcmp(name, "_ipg1") == 0) { + default_val = hme_ipg1; + } else if (strcmp(name, "_ipg2") == 0) { + default_val = hme_ipg2; + } if (strcmp(name, "_lance_mode") == 0) { + default_val = hme_lance_mode; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%d", default_val); + mac_prop_info_set_default_str(mph, valstr); + break; + } + } +} + int hme_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, const void *val) @@ -2267,8 +2297,7 @@ hmestart(struct hme *hmep, mblk_t *mp) uint32_t start_offset; uint32_t stuff_offset; - hcksum_retrieve(mp, NULL, NULL, &start_offset, &stuff_offset, - NULL, NULL, &flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, NULL, NULL, &flags); if (flags & HCK_PARTIALCKSUM) { if (get_ether_type(mp->b_rptr) == ETHERTYPE_VLAN) { @@ -3434,8 +3463,7 @@ hmeread(struct hme *hmep, hmebuf_t *rbuf, uint32_t rflags) if (type == ETHERTYPE_IP || type == ETHERTYPE_IPV6) { uint16_t cksum = ~rflags & HMERMD_CKSUM; uint_t end = len - sizeof (struct ether_header); - (void) hcksum_assoc(bp, NULL, NULL, 0, - 0, end, htons(cksum), HCK_PARTIALCKSUM, 0); + mac_hcksum_set(bp, 0, 0, end, htons(cksum), HCK_PARTIALCKSUM); } return (bp); diff --git a/usr/src/uts/common/io/hxge/hxge_impl.h b/usr/src/uts/common/io/hxge/hxge_impl.h index 36b94382bf..0e1567e148 100644 --- a/usr/src/uts/common/io/hxge/hxge_impl.h +++ b/usr/src/uts/common/io/hxge/hxge_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -70,6 +70,7 @@ extern "C" { #include <sys/mac_provider.h> #include <sys/mac_ether.h> +#include <sys/note.h> /* * Handy macros (taken from bge driver) @@ -258,6 +259,7 @@ struct _hxge_ldg_t { p_hxge_ldv_t ldvp; hxge_sys_intr_t sys_intr_handler; p_hxge_t hxgep; + uint32_t htable_idx; }; struct _hxge_ldv_t { @@ -378,6 +380,8 @@ void hxge_destroy_kstats(p_hxge_t); int hxge_port_kstat_update(kstat_t *, int); int hxge_m_stat(void *arg, uint_t stat, uint64_t *val); +int hxge_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); +int hxge_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); /* hxge_hw.c */ void diff --git a/usr/src/uts/common/io/hxge/hxge_kstats.c b/usr/src/uts/common/io/hxge/hxge_kstats.c index bd42641d5d..d9bfffeece 100644 --- a/usr/src/uts/common/io/hxge/hxge_kstats.c +++ b/usr/src/uts/common/io/hxge/hxge_kstats.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -881,6 +881,70 @@ hxge_port_kstat_update(kstat_t *ksp, int rw) return (0); } +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +hxge_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + p_hxge_ring_handle_t rhp = (p_hxge_ring_handle_t)rdriver; + p_hxge_t hxgep = rhp->hxgep; + + ASSERT(rhp != NULL); + ASSERT(hxgep != NULL); + ASSERT(hxgep->statsp != NULL); + ASSERT(0 <= rhp->index < HXGE_MAX_RDCS); + + switch (stat) { + case MAC_STAT_IERRORS: + *val = hxgep->statsp->rdc_stats[rhp->index].ierrors; + break; + case MAC_STAT_RBYTES: + *val = hxgep->statsp->rdc_stats[rhp->index].ibytes; + break; + case MAC_STAT_IPACKETS: + *val = hxgep->statsp->rdc_stats[rhp->index].ipackets; + break; + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +hxge_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + p_hxge_ring_handle_t rhp = (p_hxge_ring_handle_t)rdriver; + p_hxge_t hxgep = rhp->hxgep; + + ASSERT(rhp != NULL); + ASSERT(hxgep != NULL); + ASSERT(hxgep->statsp != NULL); + ASSERT(0 <= rhp->index < HXGE_MAX_TDCS); + + switch (stat) { + case MAC_STAT_OERRORS: + *val = hxgep->statsp->tdc_stats[rhp->index].oerrors; + break; + case MAC_STAT_OBYTES: + *val = hxgep->statsp->tdc_stats[rhp->index].obytes; + break; + case MAC_STAT_OPACKETS: + *val = hxgep->statsp->tdc_stats[rhp->index].opackets; + break; + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + int hxge_m_stat(void *arg, uint_t stat, uint64_t *value) { diff --git a/usr/src/uts/common/io/hxge/hxge_main.c b/usr/src/uts/common/io/hxge/hxge_main.c index 24d4bec784..ee2dfc365a 100644 --- a/usr/src/uts/common/io/hxge/hxge_main.c +++ b/usr/src/uts/common/io/hxge/hxge_main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -146,28 +146,29 @@ static boolean_t hxge_param_locked(mac_prop_id_t pr_num); static int hxge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, const void *pr_val); static int hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *); -static int hxge_get_def_val(hxge_t *hxgep, mac_prop_id_t pr_num, uint_t pr_valsize, void *pr_val); +static void hxge_m_propinfo(void *barg, const char *pr_name, + mac_prop_id_t pr_num, mac_prop_info_handle_t mph); static int hxge_set_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_valsize, const void *pr_val); static int hxge_get_priv_prop(p_hxge_t hxgep, const char *pr_name, - uint_t pr_flags, uint_t pr_valsize, void *pr_val); + uint_t pr_valsize, void *pr_val); static void hxge_link_poll(void *arg); static void hxge_link_update(p_hxge_t hxge, link_state_t state); static void hxge_msix_init(p_hxge_t hxgep); -mac_priv_prop_t hxge_priv_props[] = { - {"_rxdma_intr_time", MAC_PROP_PERM_RW}, - {"_rxdma_intr_pkts", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_tcp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_udp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_ah", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_sctp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_tcp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_udp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_ah", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_sctp", MAC_PROP_PERM_RW} +char *hxge_priv_props[] = { + "_rxdma_intr_time", + "_rxdma_intr_pkts", + "_class_opt_ipv4_tcp", + "_class_opt_ipv4_udp", + "_class_opt_ipv4_ah", + "_class_opt_ipv4_sctp", + "_class_opt_ipv6_tcp", + "_class_opt_ipv6_udp", + "_class_opt_ipv6_ah", + "_class_opt_ipv6_sctp", + NULL }; #define HXGE_MAX_PRIV_PROPS \ @@ -177,7 +178,7 @@ mac_priv_prop_t hxge_priv_props[] = { #define MAX_DUMP_SZ 256 #define HXGE_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) extern hxge_status_t hxge_pfc_set_default_mac_addr(p_hxge_t hxgep); @@ -190,12 +191,14 @@ static mac_callbacks_t hxge_m_callbacks = { hxge_m_multicst, NULL, NULL, + NULL, hxge_m_ioctl, hxge_m_getcapab, NULL, NULL, hxge_m_setprop, - hxge_m_getprop + hxge_m_getprop, + hxge_m_propinfo }; /* PSARC/2007/453 MSI-X interrupt limit override. */ @@ -2935,6 +2938,41 @@ hxge_group_get(void *arg, mac_ring_type_t type, int groupid, } } +static int +hxge_ring_get_htable_idx(p_hxge_t hxgep, mac_ring_type_t type, uint32_t channel) +{ + int i; + + ASSERT(hxgep->ldgvp != NULL); + + switch (type) { + case MAC_RING_TYPE_RX: + for (i = 0; i < hxgep->ldgvp->maxldvs; i++) { + if ((hxgep->ldgvp->ldvp[i].is_rxdma) && + (hxgep->ldgvp->ldvp[i].channel == channel)) { + return ((int) + hxgep->ldgvp->ldvp[i].ldgp->htable_idx); + } + } + break; + + case MAC_RING_TYPE_TX: + for (i = 0; i < hxgep->ldgvp->maxldvs; i++) { + if ((hxgep->ldgvp->ldvp[i].is_txdma) && + (hxgep->ldgvp->ldvp[i].channel == channel)) { + return ((int) + hxgep->ldgvp->ldvp[i].ldgp->htable_idx); + } + } + break; + + default: + break; + } + + return (-1); +} + /* * Callback function for the GLDv3 layer to register all rings. */ @@ -2945,9 +2983,15 @@ hxge_fill_ring(void *arg, mac_ring_type_t type, const int rg_index, { p_hxge_t hxgep = arg; + ASSERT(hxgep != NULL); + ASSERT(infop != NULL); + switch (type) { case MAC_RING_TYPE_TX: { p_hxge_ring_handle_t rhp; + mac_intr_t *mintr = &infop->mri_intr; + p_hxge_intr_t intrp; + int htable_idx; ASSERT((index >= 0) && (index < HXGE_MAX_TDCS)); rhp = &hxgep->tx_ring_handles[index]; @@ -2958,11 +3002,22 @@ hxge_fill_ring(void *arg, mac_ring_type_t type, const int rg_index, infop->mri_start = hxge_tx_ring_start; infop->mri_stop = hxge_tx_ring_stop; infop->mri_tx = hxge_tx_ring_send; + infop->mri_stat = hxge_tx_ring_stat; + + intrp = (p_hxge_intr_t)&hxgep->hxge_intr_type; + htable_idx = hxge_ring_get_htable_idx(hxgep, type, index); + if (htable_idx >= 0) + mintr->mi_ddi_handle = intrp->htable[htable_idx]; + else + mintr->mi_ddi_handle = NULL; break; } + case MAC_RING_TYPE_RX: { p_hxge_ring_handle_t rhp; mac_intr_t hxge_mac_intr; + p_hxge_intr_t intrp; + int htable_idx; ASSERT((index >= 0) && (index < HXGE_MAX_RDCS)); rhp = &hxgep->rx_ring_handles[index]; @@ -2975,17 +3030,25 @@ hxge_fill_ring(void *arg, mac_ring_type_t type, const int rg_index, * disable interrupt (enable poll). */ hxge_mac_intr.mi_handle = (mac_intr_handle_t)rhp; - hxge_mac_intr.mi_enable = - (mac_intr_enable_t)hxge_disable_poll; - hxge_mac_intr.mi_disable = - (mac_intr_disable_t)hxge_enable_poll; + hxge_mac_intr.mi_enable = (mac_intr_enable_t)hxge_disable_poll; + hxge_mac_intr.mi_disable = (mac_intr_disable_t)hxge_enable_poll; + + intrp = (p_hxge_intr_t)&hxgep->hxge_intr_type; + htable_idx = hxge_ring_get_htable_idx(hxgep, type, index); + if (htable_idx >= 0) + hxge_mac_intr.mi_ddi_handle = intrp->htable[htable_idx]; + else + hxge_mac_intr.mi_ddi_handle = NULL; + infop->mri_driver = (mac_ring_driver_t)rhp; infop->mri_start = hxge_rx_ring_start; infop->mri_stop = hxge_rx_ring_stop; infop->mri_intr = hxge_mac_intr; infop->mri_poll = hxge_rx_poll; + infop->mri_stat = hxge_rx_ring_stat; break; } + default: break; } @@ -3186,37 +3249,9 @@ hxge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, return (err); } -/* ARGSUSED */ -static int -hxge_get_def_val(hxge_t *hxgep, mac_prop_id_t pr_num, uint_t pr_valsize, - void *pr_val) -{ - int err = 0; - link_flowctrl_t fl; - - switch (pr_num) { - case MAC_PROP_DUPLEX: - *(uint8_t *)pr_val = 2; - break; - case MAC_PROP_AUTONEG: - *(uint8_t *)pr_val = 0; - break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); - fl = LINK_FLOWCTRL_TX; - bcopy(&fl, pr_val, sizeof (fl)); - break; - default: - err = ENOTSUP; - break; - } - return (err); -} - static int hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { hxge_t *hxgep = barg; p_hxge_stats_t statsp = hxgep->statsp; @@ -3228,20 +3263,8 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, HXGE_DEBUG_MSG((hxgep, DLADM_CTL, "==> hxge_m_getprop: pr_num %d", pr_num)); - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; - - if ((pr_flags & MAC_PROP_DEFAULT) && (pr_num != MAC_PROP_PRIVATE)) { - err = hxge_get_def_val(hxgep, pr_num, pr_valsize, pr_val); - return (err); - } - - bzero(pr_val, pr_valsize); switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = statsp->mac_stats.link_duplex; HXGE_DEBUG_MSG((hxgep, DLADM_CTL, "==> hxge_m_getprop: duplex mode %d", @@ -3249,17 +3272,13 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (uint64_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (uint64_t)); tmp = statsp->mac_stats.link_speed * 1000000ull; bcopy(&tmp, pr_val, sizeof (tmp)); break; case MAC_PROP_STATUS: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (link_state_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (link_state_t)); if (!statsp->mac_stats.link_up) ls = LINK_STATE_DOWN; else @@ -3272,15 +3291,12 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, * Flow control is supported by the shared domain and * it is currently transmit only */ - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); + ASSERT(pr_valsize < sizeof (link_flowctrl_t)); fl = LINK_FLOWCTRL_TX; bcopy(&fl, pr_val, sizeof (fl)); break; case MAC_PROP_AUTONEG: /* 10G link only and it is not negotiable */ - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = 0; break; case MAC_PROP_ADV_1000FDX_CAP: @@ -3299,25 +3315,10 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, break; case MAC_PROP_PRIVATE: - err = hxge_get_priv_prop(hxgep, pr_name, pr_flags, - pr_valsize, pr_val); - break; - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = MIN_FRAME_SIZE - - MTU_TO_FRAME_SIZE; - range.range_uint32[0].mpur_max = MAX_FRAME_SIZE - - MTU_TO_FRAME_SIZE; - bcopy(&range, pr_val, sizeof (range)); + err = hxge_get_priv_prop(hxgep, pr_name, pr_valsize, + pr_val); break; - } + default: err = EINVAL; break; @@ -3328,6 +3329,60 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, return (err); } +static void +hxge_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t pr_num, mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(arg)); + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + case MAC_PROP_AUTONEG: + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + MIN_FRAME_SIZE - MTU_TO_FRAME_SIZE, + MAX_FRAME_SIZE - MTU_TO_FRAME_SIZE); + break; + + case MAC_PROP_PRIVATE: { + char valstr[MAXNAMELEN]; + + bzero(valstr, sizeof (valstr)); + + /* Receive Interrupt Blanking Parameters */ + if (strcmp(pr_name, "_rxdma_intr_time") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", + RXDMA_RCR_TO_DEFAULT); + } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", + RXDMA_RCR_PTHRES_DEFAULT); + + /* Classification and Load Distribution Configuration */ + } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0 || + strcmp(pr_name, "_class_opt_ipv4_udp") == 0 || + strcmp(pr_name, "_class_opt_ipv4_ah") == 0 || + strcmp(pr_name, "_class_opt_ipv4_sctp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_tcp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_udp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_ah") == 0 || + strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", + HXGE_CLASS_TCAM_LOOKUP); + } + + if (strlen(valstr) > 0) + mac_prop_info_set_default_str(prh, valstr); + break; + } + } +} + + /* ARGSUSED */ static int hxge_set_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_valsize, @@ -3387,8 +3442,8 @@ hxge_set_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_valsize, } static int -hxge_get_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val) +hxge_get_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_valsize, + void *pr_val) { p_hxge_param_t param_arr = hxgep->param_arr; char valstr[MAXNAMELEN]; @@ -3399,77 +3454,55 @@ hxge_get_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_flags, HXGE_DEBUG_MSG((hxgep, DLADM_CTL, "==> hxge_get_priv_prop: property %s", pr_name)); - if (pr_flags & MAC_PROP_DEFAULT) { - /* Receive Interrupt Blanking Parameters */ - if (strcmp(pr_name, "_rxdma_intr_time") == 0) { - value = RXDMA_RCR_TO_DEFAULT; - } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { - value = RXDMA_RCR_PTHRES_DEFAULT; + /* Receive Interrupt Blanking Parameters */ + if (strcmp(pr_name, "_rxdma_intr_time") == 0) { + value = hxgep->intr_timeout; + } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { + value = hxgep->intr_threshold; - /* Classification and Load Distribution Configuration */ - } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0 || - strcmp(pr_name, "_class_opt_ipv4_udp") == 0 || - strcmp(pr_name, "_class_opt_ipv4_ah") == 0 || - strcmp(pr_name, "_class_opt_ipv4_sctp") == 0 || - strcmp(pr_name, "_class_opt_ipv6_tcp") == 0 || - strcmp(pr_name, "_class_opt_ipv6_udp") == 0 || - strcmp(pr_name, "_class_opt_ipv6_ah") == 0 || - strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { - value = HXGE_CLASS_TCAM_LOOKUP; - } else { - err = EINVAL; - } - } else { - /* Receive Interrupt Blanking Parameters */ - if (strcmp(pr_name, "_rxdma_intr_time") == 0) { - value = hxgep->intr_timeout; - } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { - value = hxgep->intr_threshold; + /* Classification and Load Distribution Configuration */ + } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv4_tcp]); - /* Classification and Load Distribution Configuration */ - } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv4_tcp]); - - value = (int)param_arr[param_class_opt_ipv4_tcp].value; - } else if (strcmp(pr_name, "_class_opt_ipv4_udp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv4_udp]); - - value = (int)param_arr[param_class_opt_ipv4_udp].value; - } else if (strcmp(pr_name, "_class_opt_ipv4_ah") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv4_ah]); - - value = (int)param_arr[param_class_opt_ipv4_ah].value; - } else if (strcmp(pr_name, "_class_opt_ipv4_sctp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv4_sctp]); - - value = (int)param_arr[param_class_opt_ipv4_sctp].value; - } else if (strcmp(pr_name, "_class_opt_ipv6_tcp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv6_tcp]); - - value = (int)param_arr[param_class_opt_ipv6_tcp].value; - } else if (strcmp(pr_name, "_class_opt_ipv6_udp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv6_udp]); - - value = (int)param_arr[param_class_opt_ipv6_udp].value; - } else if (strcmp(pr_name, "_class_opt_ipv6_ah") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv6_ah]); - - value = (int)param_arr[param_class_opt_ipv6_ah].value; - } else if (strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv6_sctp]); - - value = (int)param_arr[param_class_opt_ipv6_sctp].value; - } else { - err = EINVAL; - } + value = (int)param_arr[param_class_opt_ipv4_tcp].value; + } else if (strcmp(pr_name, "_class_opt_ipv4_udp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv4_udp]); + + value = (int)param_arr[param_class_opt_ipv4_udp].value; + } else if (strcmp(pr_name, "_class_opt_ipv4_ah") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv4_ah]); + + value = (int)param_arr[param_class_opt_ipv4_ah].value; + } else if (strcmp(pr_name, "_class_opt_ipv4_sctp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv4_sctp]); + + value = (int)param_arr[param_class_opt_ipv4_sctp].value; + } else if (strcmp(pr_name, "_class_opt_ipv6_tcp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv6_tcp]); + + value = (int)param_arr[param_class_opt_ipv6_tcp].value; + } else if (strcmp(pr_name, "_class_opt_ipv6_udp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv6_udp]); + + value = (int)param_arr[param_class_opt_ipv6_udp].value; + } else if (strcmp(pr_name, "_class_opt_ipv6_ah") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv6_ah]); + + value = (int)param_arr[param_class_opt_ipv6_ah].value; + } else if (strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv6_sctp]); + + value = (int)param_arr[param_class_opt_ipv6_sctp].value; + } else { + err = EINVAL; } if (err == 0) { @@ -3916,6 +3949,7 @@ hxge_add_intrs_adv_type(p_hxge_t hxgep, uint32_t int_type) return (HXGE_ERROR | HXGE_DDI_FAILED); } + ldgp->htable_idx = x; intrp->intr_added++; } intrp->msi_intx_cnt = nactual; @@ -4219,7 +4253,6 @@ hxge_mac_register(p_hxge_t hxgep) macp->m_max_sdu = hxgep->vmac.maxframesize - MTU_TO_FRAME_SIZE; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = hxge_priv_props; - macp->m_priv_prop_count = HXGE_MAX_PRIV_PROPS; macp->m_v12n = MAC_VIRT_LEVEL1; HXGE_DEBUG_MSG((hxgep, DDI_CTL, diff --git a/usr/src/uts/common/io/hxge/hxge_rxdma.c b/usr/src/uts/common/io/hxge/hxge_rxdma.c index 6700313f63..3ac170277d 100644 --- a/usr/src/uts/common/io/hxge/hxge_rxdma.c +++ b/usr/src/uts/common/io/hxge/hxge_rxdma.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2060,8 +2060,7 @@ hxge_receive_packet(p_hxge_t hxgep, p_rx_rcr_ring_t rcr_p, pkt_type == RCR_PKT_IS_UDP) ? B_TRUE : B_FALSE); if (!no_port_bit && l4_cs_eq_bit && is_tcp_udp && !error_type) { - (void) hcksum_assoc(nmp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM_OK | HCK_FULLCKSUM, 0); + mac_hcksum_set(nmp, 0, 0, 0, 0, HCK_FULLCKSUM_OK); HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_receive_packet: Full tcp/udp cksum " diff --git a/usr/src/uts/common/io/hxge/hxge_send.c b/usr/src/uts/common/io/hxge/hxge_send.c index e453322486..647717b82c 100644 --- a/usr/src/uts/common/io/hxge/hxge_send.c +++ b/usr/src/uts/common/io/hxge/hxge_send.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -163,8 +163,8 @@ hxge_start(p_hxge_t hxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp) } } - hcksum_retrieve(mp, NULL, NULL, &start_offset, - &stuff_offset, &end_offset, &value, &cksum_flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, &end_offset, &value, + &cksum_flags); if (!HXGE_IS_VLAN_PACKET(mp->b_rptr)) { start_offset += sizeof (ether_header_t); stuff_offset += sizeof (ether_header_t); @@ -593,8 +593,8 @@ hxge_start_control_header_only: i = TXDMA_DESC_NEXT_INDEX(i, 1, tx_ring_p->tx_wrap_mask); if (ngathers > hxge_tx_max_gathers) { good_packet = B_FALSE; - hcksum_retrieve(mp, NULL, NULL, &start_offset, - &stuff_offset, &end_offset, &value, &cksum_flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, + &end_offset, &value, &cksum_flags); HXGE_DEBUG_MSG((NULL, TX_CTL, "==> hxge_start(14): pull msg - " diff --git a/usr/src/uts/common/io/ib/clients/ibd/ibd.c b/usr/src/uts/common/io/ib/clients/ibd/ibd.c index b3a39a2efc..1ca10a43e4 100644 --- a/usr/src/uts/common/io/ib/clients/ibd/ibd.c +++ b/usr/src/uts/common/io/ib/clients/ibd/ibd.c @@ -458,6 +458,7 @@ static mac_callbacks_t ibd_m_callbacks = { ibd_m_unicst, ibd_m_tx, NULL, + NULL, ibd_m_getcapab }; @@ -6256,7 +6257,7 @@ ibd_rc_large_copy: * ud destination, the opcode and the LSO header information to the * work request. */ - lso_info_get(mp, &mss, &lsoflags); + mac_lso_get(mp, &mss, &lsoflags); if ((lsoflags & HW_LSO) != HW_LSO) { node->w_swr.wr_opcode = IBT_WRC_SEND; lsohdr_sz = 0; @@ -6277,7 +6278,7 @@ ibd_rc_large_copy: lsohdr_sz = (node->w_swr.wr.ud_lso).lso_hdr_sz; } - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &hckflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &hckflags); if ((hckflags & HCK_FULLCKSUM) == HCK_FULLCKSUM) node->w_swr.wr_flags |= IBT_WR_SEND_CKSUM; else @@ -6940,8 +6941,7 @@ ibd_process_rx(ibd_state_t *state, ibd_rwqe_t *rwqe, ibt_wc_t *wc) if (((wc->wc_flags & IBT_WC_CKSUM_OK) == IBT_WC_CKSUM_OK) && (wc->wc_cksum == 0xFFFF) && (iphap->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION)) { - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM_OK); } return (mp); diff --git a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c index 45fbfd7932..334c7dcd04 100644 --- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +29,7 @@ #include <inet/ip.h> #include <inet/ip_ire.h> #include <inet/ip_if.h> +#include <sys/ethernet.h> #include <sys/ib/mgt/ibcm/ibcm_arp.h> extern char cmlog[]; diff --git a/usr/src/uts/common/io/igb/igb_gld.c b/usr/src/uts/common/io/igb/igb_gld.c index becf960af5..3630bb5019 100644 --- a/usr/src/uts/common/io/igb/igb_gld.c +++ b/usr/src/uts/common/io/igb/igb_gld.c @@ -850,11 +850,15 @@ igb_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = igb_ring_start; infop->mri_stop = NULL; infop->mri_poll = (mac_ring_poll_t)igb_rx_ring_poll; + infop->mri_stat = igb_rx_ring_stat; mintr->mi_handle = (mac_intr_handle_t)rx_ring; mintr->mi_enable = igb_rx_ring_intr_enable; mintr->mi_disable = igb_rx_ring_intr_disable; - + if (igb->intr_type & (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) { + mintr->mi_ddi_handle = + igb->htable[rx_ring->intr_vector]; + } break; } case MAC_RING_TYPE_TX: { @@ -867,7 +871,11 @@ igb_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = NULL; infop->mri_stop = NULL; infop->mri_tx = igb_tx_ring_send; - + infop->mri_stat = igb_tx_ring_stat; + if (igb->intr_type & (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) { + mintr->mi_ddi_handle = + igb->htable[tx_ring->intr_vector]; + } break; } default: @@ -1152,141 +1160,90 @@ setup_link: int igb_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { igb_t *igb = (igb_t *)arg; struct e1000_hw *hw = &igb->hw; int err = 0; uint32_t flow_control; uint64_t tmp = 0; - mac_propval_range_t range; - - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; - - bzero(pr_val, pr_valsize); - if ((pr_flags & MAC_PROP_DEFAULT) && (pr_num != MAC_PROP_PRIVATE)) - return (igb_get_def_val(igb, pr_num, pr_valsize, pr_val)); switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (link_duplex_t)) { - bcopy(&igb->link_duplex, pr_val, - sizeof (link_duplex_t)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_duplex_t)); + bcopy(&igb->link_duplex, pr_val, sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (uint64_t)) { - tmp = igb->link_speed * 1000000ull; - bcopy(&tmp, pr_val, sizeof (tmp)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint64_t)); + tmp = igb->link_speed * 1000000ull; + bcopy(&tmp, pr_val, sizeof (tmp)); break; case MAC_PROP_AUTONEG: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; + ASSERT(pr_valsize >= sizeof (uint8_t)); *(uint8_t *)pr_val = igb->param_adv_autoneg_cap; break; case MAC_PROP_FLOWCTRL: - if (pr_valsize >= sizeof (uint32_t)) { - switch (hw->fc.requested_mode) { - case e1000_fc_none: - flow_control = LINK_FLOWCTRL_NONE; - break; - case e1000_fc_rx_pause: - flow_control = LINK_FLOWCTRL_RX; - break; - case e1000_fc_tx_pause: - flow_control = LINK_FLOWCTRL_TX; - break; - case e1000_fc_full: - flow_control = LINK_FLOWCTRL_BI; - break; - } - bcopy(&flow_control, pr_val, sizeof (flow_control)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint32_t)); + switch (hw->fc.requested_mode) { + case e1000_fc_none: + flow_control = LINK_FLOWCTRL_NONE; + break; + case e1000_fc_rx_pause: + flow_control = LINK_FLOWCTRL_RX; + break; + case e1000_fc_tx_pause: + flow_control = LINK_FLOWCTRL_TX; + break; + case e1000_fc_full: + flow_control = LINK_FLOWCTRL_BI; + break; + } + bcopy(&flow_control, pr_val, sizeof (flow_control)); break; case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_1000fdx_cap; break; case MAC_PROP_EN_1000FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_1000fdx_cap; break; case MAC_PROP_ADV_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_1000hdx_cap; break; case MAC_PROP_EN_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_1000hdx_cap; break; case MAC_PROP_ADV_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_100t4_cap; break; case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_100t4_cap; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_100fdx_cap; break; case MAC_PROP_EN_100FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_100fdx_cap; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_100hdx_cap; break; case MAC_PROP_EN_100HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_100hdx_cap; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_10fdx_cap; break; case MAC_PROP_EN_10FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_10fdx_cap; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_10hdx_cap; break; case MAC_PROP_EN_10HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_10hdx_cap; break; case MAC_PROP_PRIVATE: - err = igb_get_priv_prop(igb, pr_name, - pr_flags, pr_valsize, pr_val, perm); - break; - case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = MIN_MTU; - range.range_uint32[0].mpur_max = MAX_MTU; - bcopy(&range, pr_val, sizeof (range)); + err = igb_get_priv_prop(igb, pr_name, pr_valsize, pr_val); break; default: err = EINVAL; @@ -1295,98 +1252,106 @@ igb_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, return (err); } -int -igb_get_def_val(igb_t *igb, mac_prop_id_t pr_num, - uint_t pr_valsize, void *pr_val) +void +igb_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) { - uint32_t flow_control; + igb_t *igb = (igb_t *)arg; struct e1000_hw *hw = &igb->hw; - uint16_t phy_status; - uint16_t phy_ext_status; - int err = 0; + uint16_t phy_status, phy_ext_status; - ASSERT(pr_valsize > 0); switch (pr_num) { - case MAC_PROP_AUTONEG: - if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; - } else { - (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = - (phy_status & MII_SR_AUTONEG_CAPS) ? 1 : 0; - } - break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (uint32_t)) - return (EINVAL); - flow_control = LINK_FLOWCTRL_BI; - bcopy(&flow_control, pr_val, sizeof (flow_control)); - break; + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + case MAC_PROP_EN_1000FDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 1; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { - (void) e1000_read_phy_reg(hw, - PHY_EXT_STATUS, &phy_ext_status); - *(uint8_t *)pr_val = + (void) e1000_read_phy_reg(hw, PHY_EXT_STATUS, + &phy_ext_status); + mac_prop_info_set_default_uint8(prh, ((phy_ext_status & IEEE_ESR_1000T_FD_CAPS) || - (phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0; + (phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0); } break; - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - case MAC_PROP_ADV_100T4_CAP: - case MAC_PROP_EN_100T4_CAP: - *(uint8_t *)pr_val = 0; - break; + case MAC_PROP_ADV_100FDX_CAP: case MAC_PROP_EN_100FDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = + mac_prop_info_set_default_uint8(prh, ((phy_status & MII_SR_100X_FD_CAPS) || - (phy_status & MII_SR_100T2_FD_CAPS)) ? 1 : 0; + (phy_status & MII_SR_100T2_FD_CAPS)) ? 1 : 0); } break; + case MAC_PROP_ADV_100HDX_CAP: case MAC_PROP_EN_100HDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = + mac_prop_info_set_default_uint8(prh, ((phy_status & MII_SR_100X_HD_CAPS) || - (phy_status & MII_SR_100T2_HD_CAPS)) ? 1 : 0; + (phy_status & MII_SR_100T2_HD_CAPS)) ? 1 : 0); } break; + case MAC_PROP_ADV_10FDX_CAP: case MAC_PROP_EN_10FDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = - (phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0; + mac_prop_info_set_default_uint8(prh, + (phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0); } break; + case MAC_PROP_ADV_10HDX_CAP: case MAC_PROP_EN_10HDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = - (phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0; + mac_prop_info_set_default_uint8(prh, + (phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0); } break; - default: - err = ENOTSUP; + + case MAC_PROP_AUTONEG: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); + mac_prop_info_set_default_uint8(prh, + (phy_status & MII_SR_AUTONEG_CAPS) ? 1 : 0); + } + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, LINK_FLOWCTRL_BI); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, MIN_MTU, MAX_MTU); + break; + + case MAC_PROP_PRIVATE: + igb_priv_prop_info(igb, pr_name, prh); break; } - return (err); + } boolean_t @@ -1533,72 +1498,65 @@ igb_set_priv_prop(igb_t *igb, const char *pr_name, } int -igb_get_priv_prop(igb_t *igb, const char *pr_name, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) +igb_get_priv_prop(igb_t *igb, const char *pr_name, uint_t pr_valsize, + void *pr_val) { - int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; - *perm = MAC_PROP_PERM_RW; - if (strcmp(pr_name, "_adv_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - value = (is_default ? 1 : igb->param_adv_pause_cap); - err = 0; - goto done; - } - if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - value = (is_default ? 1 : igb->param_adv_asym_pause_cap); - err = 0; - goto done; - } - if (strcmp(pr_name, "_tx_copy_thresh") == 0) { - value = (is_default ? DEFAULT_TX_COPY_THRESHOLD : - igb->tx_copy_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { - value = (is_default ? DEFAULT_TX_RECYCLE_THRESHOLD : - igb->tx_recycle_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_tx_overload_thresh") == 0) { - value = (is_default ? DEFAULT_TX_OVERLOAD_THRESHOLD : - igb->tx_overload_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_tx_resched_thresh") == 0) { - value = (is_default ? DEFAULT_TX_RESCHED_THRESHOLD : - igb->tx_resched_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_rx_copy_thresh") == 0) { - value = (is_default ? DEFAULT_RX_COPY_THRESHOLD : - igb->rx_copy_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { - value = (is_default ? DEFAULT_RX_LIMIT_PER_INTR : - igb->rx_limit_per_intr); - err = 0; - goto done; - } - if (strcmp(pr_name, "_intr_throttling") == 0) { - value = (is_default ? igb->capab->def_intr_throttle : - igb->intr_throttling[0]); - err = 0; - goto done; + value = igb->param_adv_pause_cap; + } else if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { + value = igb->param_adv_asym_pause_cap; + } else if (strcmp(pr_name, "_tx_copy_thresh") == 0) { + value = igb->tx_copy_thresh; + } else if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { + value = igb->tx_recycle_thresh; + } else if (strcmp(pr_name, "_tx_overload_thresh") == 0) { + value = igb->tx_overload_thresh; + } else if (strcmp(pr_name, "_tx_resched_thresh") == 0) { + value = igb->tx_resched_thresh; + } else if (strcmp(pr_name, "_rx_copy_thresh") == 0) { + value = igb->rx_copy_thresh; + } else if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { + value = igb->rx_limit_per_intr; + } else if (strcmp(pr_name, "_intr_throttling") == 0) { + value = igb->intr_throttling[0]; + } else { + return (ENOTSUP); } -done: - if (err == 0) { - (void) snprintf(pr_val, pr_valsize, "%d", value); + + (void) snprintf(pr_val, pr_valsize, "%d", value); + return (0); +} + +void +igb_priv_prop_info(igb_t *igb, const char *pr_name, mac_prop_info_handle_t prh) +{ + char valstr[64]; + int value; + + if (strcmp(pr_name, "_adv_pause_cap") == 0 || + strcmp(pr_name, "_adv_asym_pause_cap") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; + } else if (strcmp(pr_name, "_tx_copy_thresh") == 0) { + value = DEFAULT_TX_COPY_THRESHOLD; + } else if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { + value = DEFAULT_TX_RECYCLE_THRESHOLD; + } else if (strcmp(pr_name, "_tx_overload_thresh") == 0) { + value = DEFAULT_TX_OVERLOAD_THRESHOLD; + } else if (strcmp(pr_name, "_tx_resched_thresh") == 0) { + value = DEFAULT_TX_RESCHED_THRESHOLD; + } else if (strcmp(pr_name, "_rx_copy_thresh") == 0) { + value = DEFAULT_RX_COPY_THRESHOLD; + } else if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { + value = DEFAULT_RX_LIMIT_PER_INTR; + } else if (strcmp(pr_name, "_intr_throttling") == 0) { + value = igb->capab->def_intr_throttle; + } else { + return; } - return (err); + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + mac_prop_info_set_default_str(prh, valstr); } diff --git a/usr/src/uts/common/io/igb/igb_main.c b/usr/src/uts/common/io/igb/igb_main.c index b4070b8389..3ac2c03e1d 100644 --- a/usr/src/uts/common/io/igb/igb_main.c +++ b/usr/src/uts/common/io/igb/igb_main.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -121,21 +120,19 @@ static void igb_fm_init(igb_t *); static void igb_fm_fini(igb_t *); static void igb_release_multicast(igb_t *); -mac_priv_prop_t igb_priv_props[] = { - {"_tx_copy_thresh", MAC_PROP_PERM_RW}, - {"_tx_recycle_thresh", MAC_PROP_PERM_RW}, - {"_tx_overload_thresh", MAC_PROP_PERM_RW}, - {"_tx_resched_thresh", MAC_PROP_PERM_RW}, - {"_rx_copy_thresh", MAC_PROP_PERM_RW}, - {"_rx_limit_per_intr", MAC_PROP_PERM_RW}, - {"_intr_throttling", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_READ}, - {"_adv_asym_pause_cap", MAC_PROP_PERM_READ} +char *igb_priv_props[] = { + "_tx_copy_thresh", + "_tx_recycle_thresh", + "_tx_overload_thresh", + "_tx_resched_thresh", + "_rx_copy_thresh", + "_rx_limit_per_intr", + "_intr_throttling", + "_adv_pause_cap", + "_adv_asym_pause_cap", + NULL }; -#define IGB_MAX_PRIV_PROPS \ - (sizeof (igb_priv_props) / sizeof (mac_priv_prop_t)) - static struct cb_ops igb_cb_ops = { nulldev, /* cb_open */ nulldev, /* cb_close */ @@ -191,7 +188,7 @@ ddi_device_acc_attr_t igb_regs_acc_attr = { }; #define IGB_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) static mac_callbacks_t igb_m_callbacks = { IGB_M_CALLBACK_FLAGS, @@ -202,12 +199,14 @@ static mac_callbacks_t igb_m_callbacks = { igb_m_multicst, NULL, NULL, + NULL, igb_m_ioctl, igb_m_getcapab, NULL, NULL, igb_m_setprop, - igb_m_getprop + igb_m_getprop, + igb_m_propinfo }; /* @@ -783,7 +782,6 @@ igb_register_mac(igb_t *igb) sizeof (struct ether_vlan_header) - ETHERFCSL; mac->m_margin = VLAN_TAGSZ; mac->m_priv_props = igb_priv_props; - mac->m_priv_prop_count = IGB_MAX_PRIV_PROPS; mac->m_v12n = MAC_VIRT_LEVEL1; status = mac_register(mac, &igb->mac_hdl); diff --git a/usr/src/uts/common/io/igb/igb_rx.c b/usr/src/uts/common/io/igb/igb_rx.c index 1eeaf9d325..3f7ac957a5 100644 --- a/usr/src/uts/common/io/igb/igb_rx.c +++ b/usr/src/uts/common/io/igb/igb_rx.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -23,7 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms of the CDDL. + * Use is subject to license terms. */ #include "igb_sw.h" @@ -272,18 +271,17 @@ igb_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error) if (((status_error & E1000_RXD_STAT_TCPCS) || (status_error & E1000_RXD_STAT_UDPCS)) && !(status_error & E1000_RXDEXT_STATERR_TCPE)) - hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK; + hcksum_flags |= HCK_FULLCKSUM_OK; /* * Check IP Checksum */ if ((status_error & E1000_RXD_STAT_IPCS) && !(status_error & E1000_RXDEXT_STATERR_IPE)) - hcksum_flags |= HCK_IPV4_HDRCKSUM; + hcksum_flags |= HCK_IPV4_HDRCKSUM_OK; if (hcksum_flags != 0) { - (void) hcksum_assoc(mp, - NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, hcksum_flags); } } @@ -413,6 +411,10 @@ igb_rx(igb_rx_ring_t *rx_ring, int poll_bytes) mblk_tail = &mp->b_next; } + /* Update per-ring rx statistics */ + rx_ring->rx_pkts++; + rx_ring->rx_bytes += pkt_len; + rx_discard: /* * Reset rx descriptor read bits diff --git a/usr/src/uts/common/io/igb/igb_stat.c b/usr/src/uts/common/io/igb/igb_stat.c index 8edc4dbeed..3f5f4d69a2 100644 --- a/usr/src/uts/common/io/igb/igb_stat.c +++ b/usr/src/uts/common/io/igb/igb_stat.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -22,8 +21,8 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms of the CDDL. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #include "igb_sw.h" @@ -271,3 +270,53 @@ igb_init_stats(igb_t *igb) return (IGB_SUCCESS); } + +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +igb_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)rh; + + switch (stat) { + case MAC_STAT_RBYTES: + *val = rx_ring->rx_bytes; + break; + + case MAC_STAT_IPACKETS: + *val = rx_ring->rx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +igb_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + igb_tx_ring_t *tx_ring = (igb_tx_ring_t *)rh; + + switch (stat) { + case MAC_STAT_OBYTES: + *val = tx_ring->tx_bytes; + break; + + case MAC_STAT_OPACKETS: + *val = tx_ring->tx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} diff --git a/usr/src/uts/common/io/igb/igb_sw.h b/usr/src/uts/common/io/igb/igb_sw.h index e7e886f35c..080cd1bed6 100644 --- a/usr/src/uts/common/io/igb/igb_sw.h +++ b/usr/src/uts/common/io/igb/igb_sw.h @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -451,6 +450,12 @@ typedef struct igb_tx_ring { uint32_t recycle_fail; uint32_t stall_watchdog; + /* + * Per-ring statistics + */ + uint64_t tx_pkts; /* Packets Transmitted Count */ + uint64_t tx_bytes; /* Bytes Transmitted Count */ + #ifdef IGB_DEBUG /* * Debug statistics @@ -516,6 +521,12 @@ typedef struct igb_rx_ring { kmutex_t rx_lock; /* Rx access lock */ + /* + * Per-ring statistics + */ + uint64_t rx_pkts; /* Packets Received Count */ + uint64_t rx_bytes; /* Bytes Received Count */ + #ifdef IGB_DEBUG /* * Debug statistics @@ -810,11 +821,12 @@ boolean_t igb_m_getcapab(void *, mac_capab_t, void *); void igb_fill_ring(void *, mac_ring_type_t, const int, const int, mac_ring_info_t *, mac_ring_handle_t); int igb_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -int igb_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); +int igb_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +void igb_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); int igb_set_priv_prop(igb_t *, const char *, uint_t, const void *); -int igb_get_priv_prop(igb_t *, const char *, - uint_t, uint_t, void *, uint_t *); +int igb_get_priv_prop(igb_t *, const char *, uint_t, void *); +void igb_priv_prop_info(igb_t *, const char *, mac_prop_info_handle_t); boolean_t igb_param_locked(mac_prop_id_t); void igb_fill_group(void *arg, mac_ring_type_t, const int, mac_group_info_t *, mac_group_handle_t); @@ -850,6 +862,8 @@ int igb_init_stats(igb_t *); mblk_t *igb_rx_ring_poll(void *, int); mblk_t *igb_tx_ring_send(void *, mblk_t *); +int igb_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); +int igb_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/igb/igb_tx.c b/usr/src/uts/common/io/igb/igb_tx.c index b77afe1a5d..31e46609a5 100644 --- a/usr/src/uts/common/io/igb/igb_tx.c +++ b/usr/src/uts/common/io/igb/igb_tx.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -418,6 +417,10 @@ adjust_threshold: ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); + /* Update per-ring tx statistics */ + tx_ring->tx_pkts++; + tx_ring->tx_bytes += mbsize; + mutex_exit(&tx_ring->tx_lock); return (B_TRUE); @@ -599,7 +602,7 @@ igb_get_tx_context(mblk_t *mp, tx_context_t *ctx) ASSERT(mp != NULL); - hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, &flags); + mac_hcksum_get(mp, &start, NULL, NULL, NULL, &flags); bzero(ctx, sizeof (tx_context_t)); ctx->hcksum_flags = flags; @@ -607,7 +610,7 @@ igb_get_tx_context(mblk_t *mp, tx_context_t *ctx) if (flags == 0) return (TX_CXT_SUCCESS); - lso_info_get(mp, &mss, &lso_flag); + mac_lso_get(mp, &mss, &lso_flag); ctx->mss = mss; ctx->lso_flag = (lso_flag == HW_LSO); diff --git a/usr/src/uts/common/io/ipw/ipw2100.c b/usr/src/uts/common/io/ipw/ipw2100.c index 8c6bdbbe6f..2559c64762 100644 --- a/usr/src/uts/common/io/ipw/ipw2100.c +++ b/usr/src/uts/common/io/ipw/ipw2100.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -140,9 +140,9 @@ static void ipw2100_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); static int ipw2100_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int ipw2100_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); - + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void ipw2100_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); /* * Interrupt and Data transferring operations @@ -181,7 +181,7 @@ static int ipw2100_cpr_resume(struct ipw2100_softc *sc); * Mac Call Back entries */ mac_callbacks_t ipw2100_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, ipw2100_m_stat, ipw2100_m_start, ipw2100_m_stop, @@ -189,12 +189,14 @@ mac_callbacks_t ipw2100_m_callbacks = { ipw2100_m_multicst, ipw2100_m_unicst, ipw2100_m_tx, + NULL, ipw2100_m_ioctl, NULL, NULL, NULL, ipw2100_m_setprop, - ipw2100_m_getprop + ipw2100_m_getprop, + ipw2100_m_propinfo }; @@ -2470,7 +2472,7 @@ ipw2100_getset(struct ipw2100_softc *sc, mblk_t *m, uint32_t cmd, */ static int ipw2100_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct ipw2100_softc *sc = (struct ipw2100_softc *)arg; struct ieee80211com *ic = &sc->sc_ic; @@ -2487,14 +2489,25 @@ ipw2100_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, break; default: /* go through net80211 */ - err = ieee80211_getprop(ic, pr_name, wldp_pr_num, pr_flags, - wldp_length, wldp_buf, perm); + err = ieee80211_getprop(ic, pr_name, wldp_pr_num, + wldp_length, wldp_buf); break; } return (err); } +static void +ipw2100_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct ipw2100_softc *sc = (struct ipw2100_softc *)arg; + struct ieee80211com *ic = &sc->sc_ic; + + ieee80211_propinfo(ic, pr_name, wldp_pr_num, prh); + +} + static int ipw2100_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/iwh/iwh.c b/usr/src/uts/common/io/iwh/iwh.c index 407b814066..39b6f27e7f 100644 --- a/usr/src/uts/common/io/iwh/iwh.c +++ b/usr/src/uts/common/io/iwh/iwh.c @@ -379,8 +379,10 @@ static void iwh_m_ioctl(void *, queue_t *, mblk_t *); static int iwh_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int iwh_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, + void *wldp_buf); +static void iwh_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); /* * Supported rates for 802.11b/g modes (in 500Kbps unit). @@ -466,7 +468,7 @@ _info(struct modinfo *mip) * Mac Call Back entries */ mac_callbacks_t iwh_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, iwh_m_stat, iwh_m_start, iwh_m_stop, @@ -474,12 +476,14 @@ mac_callbacks_t iwh_m_callbacks = { iwh_m_multicst, iwh_m_unicst, iwh_m_tx, + NULL, iwh_m_ioctl, NULL, NULL, NULL, iwh_m_setprop, - iwh_m_getprop + iwh_m_getprop, + iwh_m_propinfo }; #ifdef DEBUG @@ -3520,7 +3524,7 @@ iwh_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int iwh_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { iwh_sc_t *sc; int err = EINVAL; @@ -3531,11 +3535,20 @@ iwh_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, sc = (iwh_sc_t *)arg; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +iwh_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + iwh_sc_t *sc = (iwh_sc_t *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + static int iwh_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/iwi/ipw2200.c b/usr/src/uts/common/io/iwi/ipw2200.c index d52e069496..0c9a729b43 100644 --- a/usr/src/uts/common/io/iwi/ipw2200.c +++ b/usr/src/uts/common/io/iwi/ipw2200.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -161,9 +161,9 @@ static mblk_t *ipw2200_m_tx(void *arg, mblk_t *mp); static int ipw2200_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int ipw2200_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); - + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void ipw2200_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); /* * Interrupt and Data transferring operations @@ -205,7 +205,7 @@ extern void ieee80211_notify_node_leave(ieee80211com_t *ic, * Mac Call Back entries */ mac_callbacks_t ipw2200_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, ipw2200_m_stat, ipw2200_m_start, ipw2200_m_stop, @@ -213,12 +213,14 @@ mac_callbacks_t ipw2200_m_callbacks = { ipw2200_m_multicst, ipw2200_m_unicst, ipw2200_m_tx, + NULL, ipw2200_m_ioctl, NULL, NULL, NULL, ipw2200_m_setprop, - ipw2200_m_getprop + ipw2200_m_getprop, + ipw2200_m_propinfo }; /* @@ -2558,7 +2560,7 @@ ipw2200_getset(struct ipw2200_softc *sc, mblk_t *m, uint32_t cmd, */ static int ipw2200_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct ipw2200_softc *sc = (struct ipw2200_softc *)arg; struct ieee80211com *ic = &sc->sc_ic; @@ -2575,14 +2577,24 @@ ipw2200_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, break; default: /* go through net80211 */ - err = ieee80211_getprop(ic, pr_name, wldp_pr_num, pr_flags, - wldp_length, wldp_buf, perm); + err = ieee80211_getprop(ic, pr_name, wldp_pr_num, + wldp_length, wldp_buf); break; } return (err); } +static void +ipw2200_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wlpd_pr_num, mac_prop_info_handle_t mph) +{ + struct ipw2200_softc *sc = (struct ipw2200_softc *)arg; + struct ieee80211com *ic = &sc->sc_ic; + + ieee80211_propinfo(ic, pr_name, wlpd_pr_num, mph); +} + static int ipw2200_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/iwk/iwk2.c b/usr/src/uts/common/io/iwk/iwk2.c index fbf600039c..6b5c64363c 100644 --- a/usr/src/uts/common/io/iwk/iwk2.c +++ b/usr/src/uts/common/io/iwk/iwk2.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -340,8 +340,9 @@ static void iwk_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); static int iwk_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_name, uint_t wldp_length, const void *wldp_buf); static int iwk_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_name, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_name, uint_t wldp_length, void *wldp_buf); +static void iwk_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); static void iwk_destroy_locks(iwk_sc_t *sc); static int iwk_send(ieee80211com_t *ic, mblk_t *mp, uint8_t type); static void iwk_thread(iwk_sc_t *sc); @@ -432,7 +433,7 @@ _info(struct modinfo *mip) * Mac Call Back entries */ mac_callbacks_t iwk_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, iwk_m_stat, iwk_m_start, iwk_m_stop, @@ -440,12 +441,14 @@ mac_callbacks_t iwk_m_callbacks = { iwk_m_multicst, iwk_m_unicst, iwk_m_tx, + NULL, iwk_m_ioctl, NULL, NULL, NULL, iwk_m_setprop, - iwk_m_getprop + iwk_m_getprop, + iwk_m_propinfo }; #ifdef DEBUG @@ -3090,19 +3093,20 @@ iwk_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) /* * callback functions for set/get properties */ -/* ARGSUSED */ + static int iwk_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; iwk_sc_t *sc = (iwk_sc_t *)arg; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } + static int iwk_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) @@ -3129,6 +3133,16 @@ iwk_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } +static void +iwk_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + iwk_sc_t *sc = (iwk_sc_t *)arg; + ieee80211com_t *ic = &sc->sc_ic; + + ieee80211_propinfo(ic, pr_name, wldp_pr_num, mph); +} + /*ARGSUSED*/ static int iwk_m_stat(void *arg, uint_t stat, uint64_t *val) diff --git a/usr/src/uts/common/io/iwp/iwp.c b/usr/src/uts/common/io/iwp/iwp.c index 317c02bee0..eec2b84664 100644 --- a/usr/src/uts/common/io/iwp/iwp.c +++ b/usr/src/uts/common/io/iwp/iwp.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -347,8 +347,9 @@ static void iwp_m_ioctl(void *, queue_t *, mblk_t *); static int iwp_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int iwp_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void iwp_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); /* * Supported rates for 802.11b/g modes (in 500Kbps unit). @@ -429,7 +430,7 @@ _info(struct modinfo *mip) * Mac Call Back entries */ mac_callbacks_t iwp_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, iwp_m_stat, iwp_m_start, iwp_m_stop, @@ -437,12 +438,14 @@ mac_callbacks_t iwp_m_callbacks = { iwp_m_multicst, iwp_m_unicst, iwp_m_tx, + NULL, iwp_m_ioctl, NULL, NULL, NULL, iwp_m_setprop, - iwp_m_getprop + iwp_m_getprop, + iwp_m_propinfo }; #ifdef DEBUG @@ -3355,7 +3358,7 @@ iwp_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int iwp_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { iwp_sc_t *sc; int err = EINVAL; @@ -3366,11 +3369,21 @@ iwp_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, sc = (iwp_sc_t *)arg; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +iwp_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + iwp_sc_t *sc; + + sc = (iwp_sc_t *)arg; + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + static int iwp_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/ixgbe/ixgbe.conf b/usr/src/uts/common/io/ixgbe/ixgbe.conf index 8163fa8d9a..d5cee7501a 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe.conf +++ b/usr/src/uts/common/io/ixgbe/ixgbe.conf @@ -21,7 +21,7 @@ # # Copyright(c) 2007-2008 Intel Corporation. All rights reserved. # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # @@ -66,7 +66,8 @@ # # rx_group_number # The number of the receive groups -# Allowed values: 1 - 16 +# Allowed values: 1 - 16 (for Intel 82598 10Gb ethernet controller) +# Allowed values: 1 - 64 (for Intel 82599 10Gb ethernet controller) # Default value: 1 # # -------- How to set parameters for a particular interface --------- diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_debug.c b/usr/src/uts/common/io/ixgbe/ixgbe_debug.c index f4dc85aad6..1430817445 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_debug.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_debug.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -22,7 +21,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -40,8 +39,8 @@ ixgbe_dump_interrupt(void *adapter, char *tag) { ixgbe_t *ixgbe = (ixgbe_t *)adapter; struct ixgbe_hw *hw = &ixgbe->hw; - ixgbe_intr_vector_t *vect; - uint32_t ivar, reg; + ixgbe_intr_vector_t *vect; + uint32_t ivar, reg, hw_index; int i, j; /* @@ -74,10 +73,11 @@ ixgbe_dump_interrupt(void *adapter, char *tag) /* for each rx ring bit set */ j = bt_getlowbit(vect->rx_map, 0, (ixgbe->num_rx_rings - 1)); while (j >= 0) { + hw_index = ixgbe->rx_rings[j].hw_index; ixgbe_log(ixgbe, "rx %d ivar %d rxdctl: 0x%x srrctl: 0x%x\n", - j, IXGBE_IVAR_RX_QUEUE(j), - IXGBE_READ_REG(hw, IXGBE_RXDCTL(j)), - IXGBE_READ_REG(hw, IXGBE_SRRCTL(j))); + hw_index, IXGBE_IVAR_RX_QUEUE(hw_index), + IXGBE_READ_REG(hw, IXGBE_RXDCTL(hw_index)), + IXGBE_READ_REG(hw, IXGBE_SRRCTL(hw_index))); j = bt_getlowbit(vect->rx_map, (j + 1), (ixgbe->num_rx_rings - 1)); } @@ -427,7 +427,7 @@ void ixgbe_dump_regs(void *adapter) { ixgbe_t *ixgbe = (ixgbe_t *)adapter; - uint32_t reg_val; + uint32_t reg_val, hw_index; struct ixgbe_hw *hw = &ixgbe->hw; int i; DEBUGFUNC("ixgbe_dump_regs"); @@ -460,10 +460,11 @@ ixgbe_dump_regs(void *adapter) reg_val = IXGBE_READ_REG(hw, IXGBE_RXCTRL); ixgbe_log(ixgbe, "\tRXCTRL=%x\n", reg_val); for (i = 0; i < ixgbe->num_rx_rings; i++) { - reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); - ixgbe_log(ixgbe, "\tRXDCTL(%d)=%x\n", i, reg_val); - reg_val = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); - ixgbe_log(ixgbe, "\tSRRCTL(%d)=%x\n", i, reg_val); + hw_index = ixgbe->rx_rings[i].hw_index; + reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(hw_index)); + ixgbe_log(ixgbe, "\tRXDCTL(%d)=%x\n", hw_index, reg_val); + reg_val = IXGBE_READ_REG(hw, IXGBE_SRRCTL(hw_index)); + ixgbe_log(ixgbe, "\tSRRCTL(%d)=%x\n", hw_index, reg_val); } reg_val = IXGBE_READ_REG(hw, IXGBE_RXCSUM); ixgbe_log(ixgbe, "\tRXCSUM=%x\n", reg_val); diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_gld.c b/usr/src/uts/common/io/ixgbe/ixgbe_gld.c index abc1f3647f..a7d580de1d 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_gld.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_gld.c @@ -21,308 +21,13 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include "ixgbe_sw.h" /* - * Retrieve a value for one of the statistics. - */ -int -ixgbe_m_stat(void *arg, uint_t stat, uint64_t *val) -{ - ixgbe_t *ixgbe = (ixgbe_t *)arg; - struct ixgbe_hw *hw = &ixgbe->hw; - ixgbe_stat_t *ixgbe_ks; - int i; - - ixgbe_ks = (ixgbe_stat_t *)ixgbe->ixgbe_ks->ks_data; - - mutex_enter(&ixgbe->gen_lock); - - if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { - mutex_exit(&ixgbe->gen_lock); - return (ECANCELED); - } - - switch (stat) { - case MAC_STAT_IFSPEED: - *val = ixgbe->link_speed * 1000000ull; - break; - - case MAC_STAT_MULTIRCV: - ixgbe_ks->mprc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_MPRC); - *val = ixgbe_ks->mprc.value.ui64; - break; - - case MAC_STAT_BRDCSTRCV: - ixgbe_ks->bprc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_BPRC); - *val = ixgbe_ks->bprc.value.ui64; - break; - - case MAC_STAT_MULTIXMT: - ixgbe_ks->mptc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_MPTC); - *val = ixgbe_ks->mptc.value.ui64; - break; - - case MAC_STAT_BRDCSTXMT: - ixgbe_ks->bptc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_BPTC); - *val = ixgbe_ks->bptc.value.ui64; - break; - - case MAC_STAT_NORCVBUF: - for (i = 0; i < 8; i++) { - ixgbe_ks->rnbc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RNBC(i)); - } - *val = ixgbe_ks->rnbc.value.ui64; - break; - - case MAC_STAT_IERRORS: - ixgbe_ks->crcerrs.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_CRCERRS); - ixgbe_ks->illerrc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ILLERRC); - ixgbe_ks->errbc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ERRBC); - ixgbe_ks->rlec.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RLEC); - *val = ixgbe_ks->crcerrs.value.ui64 + - ixgbe_ks->illerrc.value.ui64 + - ixgbe_ks->errbc.value.ui64 + - ixgbe_ks->rlec.value.ui64; - break; - - case MAC_STAT_RBYTES: - ixgbe_ks->tor.value.ui64 = 0; - for (i = 0; i < 16; i++) { - ixgbe_ks->qbrc[i].value.ui64 += - IXGBE_READ_REG(hw, IXGBE_QBRC(i)); - ixgbe_ks->tor.value.ui64 += - ixgbe_ks->qbrc[i].value.ui64; - } - *val = ixgbe_ks->tor.value.ui64; - break; - - case MAC_STAT_OBYTES: - ixgbe_ks->tot.value.ui64 = 0; - for (i = 0; i < 16; i++) { - if (hw->mac.type >= ixgbe_mac_82599EB) { - ixgbe_ks->qbtc[i].value.ui64 += - IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); - ixgbe_ks->qbtc[i].value.ui64 += ((uint64_t) - IXGBE_READ_REG(hw, IXGBE_QBTC_H(i))) << 32; - } else { - ixgbe_ks->qbtc[i].value.ui64 += - IXGBE_READ_REG(hw, IXGBE_QBTC(i)); - } - ixgbe_ks->tot.value.ui64 += - ixgbe_ks->qbtc[i].value.ui64; - } - *val = ixgbe_ks->tot.value.ui64; - break; - - case MAC_STAT_IPACKETS: - ixgbe_ks->tpr.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_TPR); - *val = ixgbe_ks->tpr.value.ui64; - break; - - case MAC_STAT_OPACKETS: - ixgbe_ks->tpt.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_TPT); - *val = ixgbe_ks->tpt.value.ui64; - break; - - /* RFC 1643 stats */ - case ETHER_STAT_FCS_ERRORS: - ixgbe_ks->crcerrs.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_CRCERRS); - *val = ixgbe_ks->crcerrs.value.ui64; - break; - - case ETHER_STAT_TOOLONG_ERRORS: - ixgbe_ks->roc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ROC); - *val = ixgbe_ks->roc.value.ui64; - break; - - case ETHER_STAT_MACRCV_ERRORS: - ixgbe_ks->crcerrs.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_CRCERRS); - ixgbe_ks->illerrc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ILLERRC); - ixgbe_ks->errbc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ERRBC); - ixgbe_ks->rlec.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RLEC); - *val = ixgbe_ks->crcerrs.value.ui64 + - ixgbe_ks->illerrc.value.ui64 + - ixgbe_ks->errbc.value.ui64 + - ixgbe_ks->rlec.value.ui64; - break; - - /* MII/GMII stats */ - case ETHER_STAT_XCVR_ADDR: - /* The Internal PHY's MDI address for each MAC is 1 */ - *val = 1; - break; - - case ETHER_STAT_XCVR_ID: - *val = hw->phy.id; - break; - - case ETHER_STAT_XCVR_INUSE: - switch (ixgbe->link_speed) { - case IXGBE_LINK_SPEED_1GB_FULL: - *val = - (hw->phy.media_type == ixgbe_media_type_copper) ? - XCVR_1000T : XCVR_1000X; - break; - case IXGBE_LINK_SPEED_100_FULL: - *val = (hw->phy.media_type == ixgbe_media_type_copper) ? - XCVR_100T2 : XCVR_100X; - break; - default: - *val = XCVR_NONE; - break; - } - break; - - case ETHER_STAT_CAP_10GFDX: - *val = 1; - break; - - case ETHER_STAT_CAP_1000FDX: - *val = 1; - break; - - case ETHER_STAT_CAP_100FDX: - *val = 1; - break; - - case ETHER_STAT_CAP_ASMPAUSE: - *val = ixgbe->param_asym_pause_cap; - break; - - case ETHER_STAT_CAP_PAUSE: - *val = ixgbe->param_pause_cap; - break; - - case ETHER_STAT_CAP_AUTONEG: - *val = 1; - break; - - case ETHER_STAT_ADV_CAP_10GFDX: - *val = ixgbe->param_adv_10000fdx_cap; - break; - - case ETHER_STAT_ADV_CAP_1000FDX: - *val = ixgbe->param_adv_1000fdx_cap; - break; - - case ETHER_STAT_ADV_CAP_100FDX: - *val = ixgbe->param_adv_100fdx_cap; - break; - - case ETHER_STAT_ADV_CAP_ASMPAUSE: - *val = ixgbe->param_adv_asym_pause_cap; - break; - - case ETHER_STAT_ADV_CAP_PAUSE: - *val = ixgbe->param_adv_pause_cap; - break; - - case ETHER_STAT_ADV_CAP_AUTONEG: - *val = ixgbe->param_adv_autoneg_cap; - break; - - case ETHER_STAT_LP_CAP_10GFDX: - *val = ixgbe->param_lp_10000fdx_cap; - break; - - case ETHER_STAT_LP_CAP_1000FDX: - *val = ixgbe->param_lp_1000fdx_cap; - break; - - case ETHER_STAT_LP_CAP_100FDX: - *val = ixgbe->param_lp_100fdx_cap; - break; - - case ETHER_STAT_LP_CAP_ASMPAUSE: - *val = ixgbe->param_lp_asym_pause_cap; - break; - - case ETHER_STAT_LP_CAP_PAUSE: - *val = ixgbe->param_lp_pause_cap; - break; - - case ETHER_STAT_LP_CAP_AUTONEG: - *val = ixgbe->param_lp_autoneg_cap; - break; - - case ETHER_STAT_LINK_ASMPAUSE: - *val = ixgbe->param_asym_pause_cap; - break; - - case ETHER_STAT_LINK_PAUSE: - *val = ixgbe->param_pause_cap; - break; - - case ETHER_STAT_LINK_AUTONEG: - *val = ixgbe->param_adv_autoneg_cap; - break; - - case ETHER_STAT_LINK_DUPLEX: - *val = ixgbe->link_duplex; - break; - - case ETHER_STAT_TOOSHORT_ERRORS: - ixgbe_ks->ruc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RUC); - *val = ixgbe_ks->ruc.value.ui64; - break; - - case ETHER_STAT_CAP_REMFAULT: - *val = ixgbe->param_rem_fault; - break; - - case ETHER_STAT_ADV_REMFAULT: - *val = ixgbe->param_adv_rem_fault; - break; - - case ETHER_STAT_LP_REMFAULT: - *val = ixgbe->param_lp_rem_fault; - break; - - case ETHER_STAT_JABBER_ERRORS: - ixgbe_ks->rjc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RJC); - *val = ixgbe_ks->rjc.value.ui64; - break; - - default: - mutex_exit(&ixgbe->gen_lock); - return (ENOTSUP); - } - - mutex_exit(&ixgbe->gen_lock); - - if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) { - ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); - return (EIO); - } - - return (0); -} - -/* * Bring the device out of the reset/quiesced state that it * was in when the interface was registered. */ @@ -732,115 +437,68 @@ setup_link: int ixgbe_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { ixgbe_t *ixgbe = (ixgbe_t *)arg; struct ixgbe_hw *hw = &ixgbe->hw; int err = 0; uint32_t flow_control; uint64_t tmp = 0; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); - mac_propval_range_t range; - - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_READ; - - bzero(pr_val, pr_valsize); switch (pr_num) { case MAC_PROP_DUPLEX: - if (pr_valsize >= sizeof (link_duplex_t)) { - bcopy(&ixgbe->link_duplex, pr_val, - sizeof (link_duplex_t)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_duplex_t)); + bcopy(&ixgbe->link_duplex, pr_val, + sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - if (pr_valsize >= sizeof (uint64_t)) { - tmp = ixgbe->link_speed * 1000000ull; - bcopy(&tmp, pr_val, sizeof (tmp)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint64_t)); + tmp = ixgbe->link_speed * 1000000ull; + bcopy(&tmp, pr_val, sizeof (tmp)); break; case MAC_PROP_AUTONEG: - if (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) - *perm = MAC_PROP_PERM_RW; - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_adv_autoneg_cap); + *(uint8_t *)pr_val = ixgbe->param_adv_autoneg_cap; break; case MAC_PROP_FLOWCTRL: - *perm = MAC_PROP_PERM_RW; - if (pr_valsize >= sizeof (uint32_t)) { - if (is_default) { + ASSERT(pr_valsize >= sizeof (uint32_t)); + + switch (hw->fc.requested_mode) { + case ixgbe_fc_none: flow_control = LINK_FLOWCTRL_NONE; - bcopy(&flow_control, pr_val, - sizeof (flow_control)); break; - } - switch (hw->fc.requested_mode) { - case ixgbe_fc_none: - flow_control = LINK_FLOWCTRL_NONE; - break; - case ixgbe_fc_rx_pause: - flow_control = LINK_FLOWCTRL_RX; - break; - case ixgbe_fc_tx_pause: - flow_control = LINK_FLOWCTRL_TX; - break; - case ixgbe_fc_full: - flow_control = LINK_FLOWCTRL_BI; - break; - } - bcopy(&flow_control, pr_val, sizeof (flow_control)); - } else - err = EINVAL; + case ixgbe_fc_rx_pause: + flow_control = LINK_FLOWCTRL_RX; + break; + case ixgbe_fc_tx_pause: + flow_control = LINK_FLOWCTRL_TX; + break; + case ixgbe_fc_full: + flow_control = LINK_FLOWCTRL_BI; + break; + } + bcopy(&flow_control, pr_val, sizeof (flow_control)); break; case MAC_PROP_ADV_10GFDX_CAP: - *(uint8_t *)pr_val = (is_default ? 1 : - ixgbe->param_adv_10000fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_adv_10000fdx_cap; break; case MAC_PROP_EN_10GFDX_CAP: - if (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) - *perm = MAC_PROP_PERM_RW; - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_en_10000fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_en_10000fdx_cap; break; case MAC_PROP_ADV_1000FDX_CAP: - *(uint8_t *)pr_val = (is_default ? 1 : - ixgbe->param_adv_1000fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_adv_1000fdx_cap; break; case MAC_PROP_EN_1000FDX_CAP: - if (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) - *perm = MAC_PROP_PERM_RW; - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_en_1000fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_en_1000fdx_cap; break; case MAC_PROP_ADV_100FDX_CAP: - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_adv_100fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_adv_100fdx_cap; break; case MAC_PROP_EN_100FDX_CAP: - if (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) - *perm = MAC_PROP_PERM_RW; - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_en_100fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_en_100fdx_cap; break; case MAC_PROP_PRIVATE: err = ixgbe_get_priv_prop(ixgbe, pr_name, - pr_flags, pr_valsize, pr_val, perm); - break; - case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = DEFAULT_MTU; - range.range_uint32[0].mpur_max = ixgbe->capab->max_mtu; - bcopy(&range, pr_val, sizeof (range)); + pr_valsize, pr_val); break; default: err = EINVAL; @@ -849,6 +507,78 @@ ixgbe_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, return (err); } +void +ixgbe_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + ixgbe_t *ixgbe = (ixgbe_t *)arg; + uint_t perm; + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_10GFDX_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + case MAC_PROP_EN_10GFDX_CAP: + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + perm = (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) ? + MAC_PROP_PERM_RW : MAC_PROP_PERM_READ; + if (perm == MAC_PROP_PERM_RW) + mac_prop_info_set_default_uint8(prh, 1); + mac_prop_info_set_perm(prh, perm); + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, + LINK_FLOWCTRL_NONE); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + DEFAULT_MTU, ixgbe->capab->max_mtu); + break; + + case MAC_PROP_PRIVATE: { + char valstr[64]; + int value; + + bzero(valstr, sizeof (valstr)); + + if (strcmp(pr_name, "_adv_pause_cap") == 0 || + strcmp(pr_name, "_adv_asym_pause_cap") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; + } + + if (strcmp(pr_name, "_tx_copy_thresh") == 0) { + value = DEFAULT_TX_COPY_THRESHOLD; + } else if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { + value = DEFAULT_TX_RECYCLE_THRESHOLD; + } else if (strcmp(pr_name, "_tx_overload_thresh") == 0) { + value = DEFAULT_TX_OVERLOAD_THRESHOLD; + } else if (strcmp(pr_name, "_tx_resched_thresh") == 0) { + value = DEFAULT_TX_RESCHED_THRESHOLD; + } else if (strcmp(pr_name, "_rx_copy_thresh") == 0) { + value = DEFAULT_RX_COPY_THRESHOLD; + } else if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { + value = DEFAULT_RX_LIMIT_PER_INTR; + } if (strcmp(pr_name, "_intr_throttling") == 0) { + value = ixgbe->capab->def_intr_throttle; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%x", value); + } + } +} + boolean_t ixgbe_param_locked(mac_prop_id_t pr_num) { @@ -999,65 +729,53 @@ ixgbe_set_priv_prop(ixgbe_t *ixgbe, const char *pr_name, int ixgbe_get_priv_prop(ixgbe_t *ixgbe, const char *pr_name, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; - *perm = MAC_PROP_PERM_RW; - if (strcmp(pr_name, "_adv_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - value = (is_default ? 1 : ixgbe->param_adv_pause_cap); + value = ixgbe->param_adv_pause_cap; err = 0; goto done; } if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - value = (is_default ? 1 : ixgbe->param_adv_asym_pause_cap); + value = ixgbe->param_adv_asym_pause_cap; err = 0; goto done; } if (strcmp(pr_name, "_tx_copy_thresh") == 0) { - value = (is_default ? DEFAULT_TX_COPY_THRESHOLD : - ixgbe->tx_copy_thresh); + value = ixgbe->tx_copy_thresh; err = 0; goto done; } if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { - value = (is_default ? DEFAULT_TX_RECYCLE_THRESHOLD : - ixgbe->tx_recycle_thresh); + value = ixgbe->tx_recycle_thresh; err = 0; goto done; } if (strcmp(pr_name, "_tx_overload_thresh") == 0) { - value = (is_default ? DEFAULT_TX_OVERLOAD_THRESHOLD : - ixgbe->tx_overload_thresh); + value = ixgbe->tx_overload_thresh; err = 0; goto done; } if (strcmp(pr_name, "_tx_resched_thresh") == 0) { - value = (is_default ? DEFAULT_TX_RESCHED_THRESHOLD : - ixgbe->tx_resched_thresh); + value = ixgbe->tx_resched_thresh; err = 0; goto done; } if (strcmp(pr_name, "_rx_copy_thresh") == 0) { - value = (is_default ? DEFAULT_RX_COPY_THRESHOLD : - ixgbe->rx_copy_thresh); + value = ixgbe->rx_copy_thresh; err = 0; goto done; } if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { - value = (is_default ? DEFAULT_RX_LIMIT_PER_INTR : - ixgbe->rx_limit_per_intr); + value = ixgbe->rx_limit_per_intr; err = 0; goto done; } if (strcmp(pr_name, "_intr_throttling") == 0) { - value = (is_default ? ixgbe->capab->def_intr_throttle : - ixgbe->intr_throttling[0]); + value = ixgbe->intr_throttling[0]; err = 0; goto done; } diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_main.c b/usr/src/uts/common/io/ixgbe/ixgbe_main.c index 4f9dd4f40f..3d97264a52 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_main.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_main.c @@ -28,7 +28,7 @@ #include "ixgbe_sw.h" static char ixgbe_ident[] = "Intel 10Gb Ethernet"; -static char ixgbe_version[] = "driver version 1.1.4"; +static char ixgbe_version[] = "ixgbe 1.1.4"; /* * Local function protoypes @@ -57,11 +57,13 @@ static void ixgbe_setup_tx(ixgbe_t *); static void ixgbe_setup_rx_ring(ixgbe_rx_ring_t *); static void ixgbe_setup_tx_ring(ixgbe_tx_ring_t *); static void ixgbe_setup_rss(ixgbe_t *); +static void ixgbe_setup_vmdq(ixgbe_t *); +static void ixgbe_setup_vmdq_rss(ixgbe_t *); static void ixgbe_init_unicst(ixgbe_t *); -static int ixgbe_unicst_set(ixgbe_t *, const uint8_t *, int); static int ixgbe_unicst_find(ixgbe_t *, const uint8_t *); static void ixgbe_setup_multicst(ixgbe_t *); static void ixgbe_get_hw_state(ixgbe_t *); +static void ixgbe_setup_vmdq_rss_conf(ixgbe_t *ixgbe); static void ixgbe_get_conf(ixgbe_t *); static void ixgbe_init_params(ixgbe_t *); static int ixgbe_get_prop(ixgbe_t *, char *, int, int, int); @@ -86,6 +88,7 @@ static void ixgbe_map_txring_to_vector(ixgbe_t *, int, int); static void ixgbe_setup_ivar(ixgbe_t *, uint16_t, uint8_t, int8_t); static void ixgbe_enable_ivar(ixgbe_t *, uint16_t, int8_t); static void ixgbe_disable_ivar(ixgbe_t *, uint16_t, int8_t); +static uint32_t ixgbe_get_hw_rx_index(ixgbe_t *ixgbe, uint32_t sw_rx_index); static int ixgbe_map_intrs_to_vectors(ixgbe_t *); static void ixgbe_setup_adapter_vector(ixgbe_t *); static void ixgbe_rem_intr_handlers(ixgbe_t *); @@ -109,22 +112,26 @@ static int ixgbe_resume(dev_info_t *); static int ixgbe_suspend(dev_info_t *); static void ixgbe_unconfigure(dev_info_t *, ixgbe_t *); static uint8_t *ixgbe_mc_table_itr(struct ixgbe_hw *, uint8_t **, uint32_t *); +static int ixgbe_cbfunc(dev_info_t *, ddi_cb_action_t, void *, void *, void *); +static int ixgbe_intr_cb_register(ixgbe_t *); +static int ixgbe_intr_adjust(ixgbe_t *, ddi_cb_action_t, int); static int ixgbe_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data); static void ixgbe_fm_init(ixgbe_t *); static void ixgbe_fm_fini(ixgbe_t *); -mac_priv_prop_t ixgbe_priv_props[] = { - {"_tx_copy_thresh", MAC_PROP_PERM_RW}, - {"_tx_recycle_thresh", MAC_PROP_PERM_RW}, - {"_tx_overload_thresh", MAC_PROP_PERM_RW}, - {"_tx_resched_thresh", MAC_PROP_PERM_RW}, - {"_rx_copy_thresh", MAC_PROP_PERM_RW}, - {"_rx_limit_per_intr", MAC_PROP_PERM_RW}, - {"_intr_throttling", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_READ}, - {"_adv_asym_pause_cap", MAC_PROP_PERM_READ} +char *ixgbe_priv_props[] = { + "_tx_copy_thresh", + "_tx_recycle_thresh", + "_tx_overload_thresh", + "_tx_resched_thresh", + "_rx_copy_thresh", + "_rx_limit_per_intr", + "_intr_throttling", + "_adv_pause_cap", + "_adv_asym_pause_cap", + NULL }; #define IXGBE_MAX_PRIV_PROPS \ @@ -202,7 +209,7 @@ static lb_property_t lb_external = { }; #define IXGBE_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) static mac_callbacks_t ixgbe_m_callbacks = { IXGBE_M_CALLBACK_FLAGS, @@ -213,12 +220,14 @@ static mac_callbacks_t ixgbe_m_callbacks = { ixgbe_m_multicst, NULL, NULL, + NULL, ixgbe_m_ioctl, ixgbe_m_getcapab, NULL, NULL, ixgbe_m_setprop, - ixgbe_m_getprop + ixgbe_m_getprop, + ixgbe_m_propinfo }; /* @@ -227,7 +236,10 @@ static mac_callbacks_t ixgbe_m_callbacks = { static adapter_info_t ixgbe_82598eb_cap = { 64, /* maximum number of rx queues */ 1, /* minimum number of rx queues */ - 8, /* default number of rx queues */ + 64, /* default number of rx queues */ + 16, /* maximum number of rx groups */ + 1, /* minimum number of rx groups */ + 1, /* default number of rx groups */ 32, /* maximum number of tx queues */ 1, /* minimum number of tx queues */ 8, /* default number of tx queues */ @@ -247,7 +259,10 @@ static adapter_info_t ixgbe_82598eb_cap = { static adapter_info_t ixgbe_82599eb_cap = { 128, /* maximum number of rx queues */ 1, /* minimum number of rx queues */ - 8, /* default number of rx queues */ + 128, /* default number of rx queues */ + 64, /* maximum number of rx groups */ + 1, /* minimum number of rx groups */ + 1, /* default number of rx groups */ 128, /* maximum number of tx queues */ 1, /* minimum number of tx queues */ 8, /* default number of tx queues */ @@ -406,6 +421,14 @@ ixgbe_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) ixgbe->attach_progress |= ATTACH_PROGRESS_PROPS; /* + * Register interrupt callback + */ + if (ixgbe_intr_cb_register(ixgbe) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "Failed to register interrupt callback"); + goto attach_fail; + } + + /* * Allocate interrupts */ if (ixgbe_alloc_intrs(ixgbe) != IXGBE_SUCCESS) { @@ -662,6 +685,11 @@ ixgbe_unconfigure(dev_info_t *devinfo, ixgbe_t *ixgbe) } /* + * Unregister interrupt callback handler + */ + (void) ddi_cb_unregister(ixgbe->cb_hdl); + + /* * Remove driver properties */ if (ixgbe->attach_progress & ATTACH_PROGRESS_PROPS) { @@ -745,7 +773,6 @@ ixgbe_register_mac(ixgbe_t *ixgbe) mac->m_max_sdu = ixgbe->default_mtu; mac->m_margin = VLAN_TAGSZ; mac->m_priv_props = ixgbe_priv_props; - mac->m_priv_prop_count = IXGBE_MAX_PRIV_PROPS; mac->m_v12n = MAC_VIRT_LEVEL1; status = mac_register(mac, &ixgbe->mac_hdl); @@ -879,9 +906,11 @@ ixgbe_init_driver_settings(ixgbe_t *ixgbe) struct ixgbe_hw *hw = &ixgbe->hw; dev_info_t *devinfo = ixgbe->dip; ixgbe_rx_ring_t *rx_ring; + ixgbe_rx_group_t *rx_group; ixgbe_tx_ring_t *tx_ring; uint32_t rx_size; uint32_t tx_size; + uint32_t ring_per_group; int i; /* @@ -915,12 +944,21 @@ ixgbe_init_driver_settings(ixgbe_t *ixgbe) ((tx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10; /* - * Initialize rx/tx rings parameters + * Initialize rx/tx rings/groups parameters */ + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; for (i = 0; i < ixgbe->num_rx_rings; i++) { rx_ring = &ixgbe->rx_rings[i]; rx_ring->index = i; rx_ring->ixgbe = ixgbe; + rx_ring->group_index = i / ring_per_group; + rx_ring->hw_index = ixgbe_get_hw_rx_index(ixgbe, i); + } + + for (i = 0; i < ixgbe->num_rx_groups; i++) { + rx_group = &ixgbe->rx_groups[i]; + rx_group->index = i; + rx_group->ixgbe = ixgbe; } for (i = 0; i < ixgbe->num_tx_rings; i++) { @@ -1605,6 +1643,218 @@ ixgbe_stop(ixgbe_t *ixgbe, boolean_t free_buffer) } /* + * ixgbe_cbfunc - Driver interface for generic DDI callbacks + */ +/* ARGSUSED */ +static int +ixgbe_cbfunc(dev_info_t *dip, ddi_cb_action_t cbaction, void *cbarg, + void *arg1, void *arg2) +{ + ixgbe_t *ixgbe = (ixgbe_t *)arg1; + + switch (cbaction) { + /* IRM callback */ + int count; + case DDI_CB_INTR_ADD: + case DDI_CB_INTR_REMOVE: + count = (int)(uintptr_t)cbarg; + ASSERT(ixgbe->intr_type == DDI_INTR_TYPE_MSIX); + DTRACE_PROBE2(ixgbe__irm__callback, int, count, + int, ixgbe->intr_cnt); + if (ixgbe_intr_adjust(ixgbe, cbaction, count) != + DDI_SUCCESS) { + ixgbe_error(ixgbe, + "IRM CB: Failed to adjust interrupts"); + goto cb_fail; + } + break; + default: + IXGBE_DEBUGLOG_1(ixgbe, "DDI CB: action 0x%x NOT supported", + cbaction); + return (DDI_ENOTSUP); + } + return (DDI_SUCCESS); +cb_fail: + return (DDI_FAILURE); +} + +/* + * ixgbe_intr_adjust - Adjust interrupt to respond to IRM request. + */ +static int +ixgbe_intr_adjust(ixgbe_t *ixgbe, ddi_cb_action_t cbaction, int count) +{ + int i, rc, actual; + + if (count == 0) + return (DDI_SUCCESS); + + if ((cbaction == DDI_CB_INTR_ADD && + ixgbe->intr_cnt + count > ixgbe->intr_cnt_max) || + (cbaction == DDI_CB_INTR_REMOVE && + ixgbe->intr_cnt - count < ixgbe->intr_cnt_min)) + return (DDI_FAILURE); + + if (!(ixgbe->ixgbe_state & IXGBE_STARTED)) { + return (DDI_FAILURE); + } + + for (i = 0; i < ixgbe->num_rx_rings; i++) + mac_ring_intr_set(ixgbe->rx_rings[i].ring_handle, NULL); + for (i = 0; i < ixgbe->num_tx_rings; i++) + mac_ring_intr_set(ixgbe->tx_rings[i].ring_handle, NULL); + + mutex_enter(&ixgbe->gen_lock); + ixgbe->ixgbe_state &= ~IXGBE_STARTED; + ixgbe->ixgbe_state |= IXGBE_INTR_ADJUST; + ixgbe->ixgbe_state |= IXGBE_SUSPENDED; + mac_link_update(ixgbe->mac_hdl, LINK_STATE_UNKNOWN); + + ixgbe_stop(ixgbe, B_FALSE); + /* + * Disable interrupts + */ + if (ixgbe->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) { + rc = ixgbe_disable_intrs(ixgbe); + ASSERT(rc == IXGBE_SUCCESS); + } + ixgbe->attach_progress &= ~ATTACH_PROGRESS_ENABLE_INTR; + + /* + * Remove interrupt handlers + */ + if (ixgbe->attach_progress & ATTACH_PROGRESS_ADD_INTR) { + ixgbe_rem_intr_handlers(ixgbe); + } + ixgbe->attach_progress &= ~ATTACH_PROGRESS_ADD_INTR; + + /* + * Clear vect_map + */ + bzero(&ixgbe->vect_map, sizeof (ixgbe->vect_map)); + switch (cbaction) { + case DDI_CB_INTR_ADD: + rc = ddi_intr_alloc(ixgbe->dip, ixgbe->htable, + DDI_INTR_TYPE_MSIX, ixgbe->intr_cnt, count, &actual, + DDI_INTR_ALLOC_NORMAL); + if (rc != DDI_SUCCESS || actual != count) { + ixgbe_log(ixgbe, "Adjust interrupts failed." + "return: %d, irm cb size: %d, actual: %d", + rc, count, actual); + goto intr_adjust_fail; + } + ixgbe->intr_cnt += count; + break; + + case DDI_CB_INTR_REMOVE: + for (i = ixgbe->intr_cnt - count; + i < ixgbe->intr_cnt; i ++) { + rc = ddi_intr_free(ixgbe->htable[i]); + ixgbe->htable[i] = NULL; + if (rc != DDI_SUCCESS) { + ixgbe_log(ixgbe, "Adjust interrupts failed." + "return: %d, irm cb size: %d, actual: %d", + rc, count, actual); + goto intr_adjust_fail; + } + } + ixgbe->intr_cnt -= count; + break; + } + + /* + * Get priority for first vector, assume remaining are all the same + */ + rc = ddi_intr_get_pri(ixgbe->htable[0], &ixgbe->intr_pri); + if (rc != DDI_SUCCESS) { + ixgbe_log(ixgbe, + "Get interrupt priority failed: %d", rc); + goto intr_adjust_fail; + } + rc = ddi_intr_get_cap(ixgbe->htable[0], &ixgbe->intr_cap); + if (rc != DDI_SUCCESS) { + ixgbe_log(ixgbe, "Get interrupt cap failed: %d", rc); + goto intr_adjust_fail; + } + ixgbe->attach_progress |= ATTACH_PROGRESS_ALLOC_INTR; + + /* + * Map rings to interrupt vectors + */ + if (ixgbe_map_intrs_to_vectors(ixgbe) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, + "IRM CB: Failed to map interrupts to vectors"); + goto intr_adjust_fail; + } + + /* + * Add interrupt handlers + */ + if (ixgbe_add_intr_handlers(ixgbe) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "IRM CB: Failed to add interrupt handlers"); + goto intr_adjust_fail; + } + ixgbe->attach_progress |= ATTACH_PROGRESS_ADD_INTR; + + /* + * Now that mutex locks are initialized, and the chip is also + * initialized, enable interrupts. + */ + if (ixgbe_enable_intrs(ixgbe) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "IRM CB: Failed to enable DDI interrupts"); + goto intr_adjust_fail; + } + ixgbe->attach_progress |= ATTACH_PROGRESS_ENABLE_INTR; + if (ixgbe_start(ixgbe, B_FALSE) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "IRM CB: Failed to start"); + goto intr_adjust_fail; + } + ixgbe->ixgbe_state &= ~IXGBE_INTR_ADJUST; + ixgbe->ixgbe_state &= ~IXGBE_SUSPENDED; + ixgbe->ixgbe_state |= IXGBE_STARTED; + mutex_exit(&ixgbe->gen_lock); + + for (i = 0; i < ixgbe->num_rx_rings; i++) { + mac_ring_intr_set(ixgbe->rx_rings[i].ring_handle, + ixgbe->htable[ixgbe->rx_rings[i].intr_vector]); + } + for (i = 0; i < ixgbe->num_tx_rings; i++) { + mac_ring_intr_set(ixgbe->tx_rings[i].ring_handle, + ixgbe->htable[ixgbe->tx_rings[i].intr_vector]); + } + + /* Wakeup all Tx rings */ + for (i = 0; i < ixgbe->num_tx_rings; i++) { + mac_tx_ring_update(ixgbe->mac_hdl, + ixgbe->tx_rings[i].ring_handle); + } + + IXGBE_DEBUGLOG_3(ixgbe, + "IRM CB: interrupts new value: 0x%x(0x%x:0x%x).", + ixgbe->intr_cnt, ixgbe->intr_cnt_min, ixgbe->intr_cnt_max); + return (DDI_SUCCESS); + +intr_adjust_fail: + ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_LOST); + mutex_exit(&ixgbe->gen_lock); + return (DDI_FAILURE); +} + +/* + * ixgbe_intr_cb_register - Register interrupt callback function. + */ +static int +ixgbe_intr_cb_register(ixgbe_t *ixgbe) +{ + if (ddi_cb_register(ixgbe->dip, DDI_CB_FLAG_INTR, ixgbe_cbfunc, + ixgbe, NULL, &ixgbe->cb_hdl) != DDI_SUCCESS) { + return (IXGBE_FAILURE); + } + IXGBE_DEBUGLOG_0(ixgbe, "Interrupt callback function registered."); + return (IXGBE_SUCCESS); +} + +/* * ixgbe_alloc_rings - Allocate memory space for rx/tx rings. */ static int @@ -1771,21 +2021,22 @@ ixgbe_setup_rx_ring(ixgbe_rx_ring_t *rx_ring) * Initialize the length register */ size = rx_data->ring_size * sizeof (union ixgbe_adv_rx_desc); - IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rx_ring->index), size); + IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rx_ring->hw_index), size); /* * Initialize the base address registers */ buf_low = (uint32_t)rx_data->rbd_area.dma_address; buf_high = (uint32_t)(rx_data->rbd_area.dma_address >> 32); - IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rx_ring->index), buf_high); - IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rx_ring->index), buf_low); + IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rx_ring->hw_index), buf_high); + IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rx_ring->hw_index), buf_low); /* * Setup head & tail pointers */ - IXGBE_WRITE_REG(hw, IXGBE_RDT(rx_ring->index), rx_data->ring_size - 1); - IXGBE_WRITE_REG(hw, IXGBE_RDH(rx_ring->index), 0); + IXGBE_WRITE_REG(hw, IXGBE_RDT(rx_ring->hw_index), + rx_data->ring_size - 1); + IXGBE_WRITE_REG(hw, IXGBE_RDH(rx_ring->hw_index), 0); rx_data->rbd_next = 0; rx_data->lro_first = 0; @@ -1796,14 +2047,14 @@ ixgbe_setup_rx_ring(ixgbe_rx_ring_t *rx_ring) * HTHRESH=0 descriptors (to minimize latency on fetch) * WTHRESH defaults to 1 (writeback each descriptor) */ - reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rx_ring->index)); + reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rx_ring->hw_index)); reg_val |= IXGBE_RXDCTL_ENABLE; /* enable queue */ /* Not a valid value for 82599 */ if (hw->mac.type < ixgbe_mac_82599EB) { reg_val |= 0x0020; /* pthresh */ } - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rx_ring->index), reg_val); + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rx_ring->hw_index), reg_val); if (hw->mac.type == ixgbe_mac_82599EB) { reg_val = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); @@ -1818,7 +2069,7 @@ ixgbe_setup_rx_ring(ixgbe_rx_ring_t *rx_ring) reg_val = (ixgbe->rx_buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) | IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; reg_val |= IXGBE_SRRCTL_DROP_EN; - IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rx_ring->index), reg_val); + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rx_ring->hw_index), reg_val); } static void @@ -1826,18 +2077,33 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) { ixgbe_rx_ring_t *rx_ring; struct ixgbe_hw *hw = &ixgbe->hw; - ixgbe_rx_group_t *rx_group; uint32_t reg_val; uint32_t ring_mapping; - int i; + uint32_t i, index; + uint32_t psrtype_rss_bit; /* PSRTYPE must be configured for 82599 */ - reg_val = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | - IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; -#define IXGBE_PSRTYPE_L2_PKT 0x00001000 - reg_val |= IXGBE_PSRTYPE_L2_PKT; - reg_val |= 0xE0000000; - IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), reg_val); + if (ixgbe->classify_mode != IXGBE_CLASSIFY_VMDQ && + ixgbe->classify_mode != IXGBE_CLASSIFY_VMDQ_RSS) { + reg_val = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | + IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; + reg_val |= IXGBE_PSRTYPE_L2HDR; + reg_val |= 0x80000000; + IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), reg_val); + } else { + if (ixgbe->num_rx_groups > 32) { + psrtype_rss_bit = 0x20000000; + } else { + psrtype_rss_bit = 0x40000000; + } + for (i = 0; i < ixgbe->capab->max_rx_grp_num; i++) { + reg_val = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | + IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; + reg_val |= IXGBE_PSRTYPE_L2HDR; + reg_val |= psrtype_rss_bit; + IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(i), reg_val); + } + } /* * Set filter control in FCTRL to accept broadcast packets and do @@ -1850,6 +2116,46 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg_val); /* + * Hardware checksum settings + */ + if (ixgbe->rx_hcksum_enable) { + reg_val = IXGBE_RXCSUM_IPPCSE; /* IP checksum */ + IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, reg_val); + } + + /* + * Setup VMDq and RSS for multiple receive queues + */ + switch (ixgbe->classify_mode) { + case IXGBE_CLASSIFY_RSS: + /* + * One group, only RSS is needed when more than + * one ring enabled. + */ + ixgbe_setup_rss(ixgbe); + break; + + case IXGBE_CLASSIFY_VMDQ: + /* + * Multiple groups, each group has one ring, + * only VMDq is needed. + */ + ixgbe_setup_vmdq(ixgbe); + break; + + case IXGBE_CLASSIFY_VMDQ_RSS: + /* + * Multiple groups and multiple rings, both + * VMDq and RSS are needed. + */ + ixgbe_setup_vmdq_rss(ixgbe); + break; + + default: + break; + } + + /* * Enable the receive unit. This must be done after filter * control is set in FCTRL. */ @@ -1866,27 +2172,15 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) } /* - * Setup rx groups. - */ - for (i = 0; i < ixgbe->num_rx_groups; i++) { - rx_group = &ixgbe->rx_groups[i]; - rx_group->index = i; - rx_group->ixgbe = ixgbe; - } - - /* * Setup the per-ring statistics mapping. */ ring_mapping = 0; for (i = 0; i < ixgbe->num_rx_rings; i++) { - ring_mapping |= (i & 0xF) << (8 * (i & 0x3)); - if ((i & 0x3) == 0x3) { - IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i >> 2), ring_mapping); - ring_mapping = 0; - } + index = ixgbe->rx_rings[i].hw_index; + ring_mapping = IXGBE_READ_REG(hw, IXGBE_RQSMR(index >> 2)); + ring_mapping |= (i & 0xF) << (8 * (index & 0x3)); + IXGBE_WRITE_REG(hw, IXGBE_RQSMR(index >> 2), ring_mapping); } - if ((i & 0x3) != 0x3) - IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i >> 2), ring_mapping); /* * The Max Frame Size in MHADD/MAXFRS will be internally increased @@ -1906,50 +2200,6 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) reg_val |= IXGBE_HLREG0_JUMBOEN; IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg_val); } - - /* - * Hardware checksum settings - */ - if (ixgbe->rx_hcksum_enable) { - reg_val = IXGBE_RXCSUM_IPPCSE; /* IP checksum */ - IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, reg_val); - } - - /* - * Setup RSS for multiple receive queues - */ - if (ixgbe->num_rx_rings > 1) - ixgbe_setup_rss(ixgbe); - - /* - * Setup RSC for multiple receive queues. - */ - if (ixgbe->lro_enable) { - for (i = 0; i < ixgbe->num_rx_rings; i++) { - /* - * Make sure rx_buf_size * MAXDESC not greater - * than 65535. - * Intel recommends 4 for MAXDESC field value. - */ - reg_val = IXGBE_READ_REG(hw, IXGBE_RSCCTL(i)); - reg_val |= IXGBE_RSCCTL_RSCEN; - if (ixgbe->rx_buf_size == IXGBE_PKG_BUF_16k) - reg_val |= IXGBE_RSCCTL_MAXDESC_1; - else - reg_val |= IXGBE_RSCCTL_MAXDESC_4; - IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(i), reg_val); - } - - reg_val = IXGBE_READ_REG(hw, IXGBE_RSCDBU); - reg_val |= IXGBE_RSCDBU_RSCACKDIS; - IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, reg_val); - - reg_val = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); - reg_val |= IXGBE_RDRXCTL_RSCACKC; - reg_val &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; - - IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg_val); - } } static void @@ -2114,13 +2364,17 @@ ixgbe_setup_rss(ixgbe_t *ixgbe) uint32_t i, mrqc, rxcsum; uint32_t random; uint32_t reta; + uint32_t ring_per_group; /* * Fill out redirection table */ reta = 0; + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + for (i = 0; i < 128; i++) { - reta = (reta << 8) | (i % ixgbe->num_rx_rings); + reta = (reta << 8) | (i % ring_per_group) | + ((i % ring_per_group) << 4); if ((i & 3) == 3) IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); } @@ -2161,6 +2415,185 @@ ixgbe_setup_rss(ixgbe_t *ixgbe) } /* + * ixgbe_setup_vmdq - Setup MAC classification feature + */ +static void +ixgbe_setup_vmdq(ixgbe_t *ixgbe) +{ + struct ixgbe_hw *hw = &ixgbe->hw; + uint32_t vmdctl, i, vtctl; + + /* + * Setup the VMDq Control register, enable VMDq based on + * packet destination MAC address: + */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + /* + * VMDq Enable = 1; + * VMDq Filter = 0; MAC filtering + * Default VMDq output index = 0; + */ + vmdctl = IXGBE_VMD_CTL_VMDQ_EN; + IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl); + break; + + case ixgbe_mac_82599EB: + /* + * Enable VMDq-only. + */ + vmdctl = IXGBE_MRQC_VMDQEN; + IXGBE_WRITE_REG(hw, IXGBE_MRQC, vmdctl); + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(i), 0); + } + + /* + * Enable Virtualization and Replication. + */ + vtctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; + IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl); + + /* + * Enable receiving packets to all VFs + */ + IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), IXGBE_VFRE_ENABLE_ALL); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), IXGBE_VFRE_ENABLE_ALL); + + break; + + default: + break; + } +} + +/* + * ixgbe_setup_vmdq_rss - Setup both vmdq feature and rss feature. + */ +static void +ixgbe_setup_vmdq_rss(ixgbe_t *ixgbe) +{ + struct ixgbe_hw *hw = &ixgbe->hw; + uint32_t i, mrqc, rxcsum; + uint32_t random; + uint32_t reta; + uint32_t ring_per_group; + uint32_t vmdctl, vtctl; + + /* + * Fill out redirection table + */ + reta = 0; + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + for (i = 0; i < 128; i++) { + reta = (reta << 8) | (i % ring_per_group) | + ((i % ring_per_group) << 4); + if ((i & 3) == 3) + IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); + } + + /* + * Fill out hash function seeds with a random constant + */ + for (i = 0; i < 10; i++) { + (void) random_get_pseudo_bytes((uint8_t *)&random, + sizeof (uint32_t)); + IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random); + } + + /* + * Enable and setup RSS and VMDq + */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + /* + * Enable RSS & Setup RSS Hash functions + */ + mrqc = IXGBE_MRQC_RSSEN | + IXGBE_MRQC_RSS_FIELD_IPV4 | + IXGBE_MRQC_RSS_FIELD_IPV4_TCP | + IXGBE_MRQC_RSS_FIELD_IPV4_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX | + IXGBE_MRQC_RSS_FIELD_IPV6 | + IXGBE_MRQC_RSS_FIELD_IPV6_TCP | + IXGBE_MRQC_RSS_FIELD_IPV6_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); + + /* + * Enable and Setup VMDq + * VMDq Filter = 0; MAC filtering + * Default VMDq output index = 0; + */ + vmdctl = IXGBE_VMD_CTL_VMDQ_EN; + IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl); + break; + + case ixgbe_mac_82599EB: + /* + * Enable RSS & Setup RSS Hash functions + */ + mrqc = IXGBE_MRQC_RSS_FIELD_IPV4 | + IXGBE_MRQC_RSS_FIELD_IPV4_TCP | + IXGBE_MRQC_RSS_FIELD_IPV4_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX | + IXGBE_MRQC_RSS_FIELD_IPV6 | + IXGBE_MRQC_RSS_FIELD_IPV6_TCP | + IXGBE_MRQC_RSS_FIELD_IPV6_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; + + /* + * Enable VMDq+RSS. + */ + if (ixgbe->num_rx_groups > 32) { + mrqc = mrqc | IXGBE_MRQC_VMDQRSS64EN; + } else { + mrqc = mrqc | IXGBE_MRQC_VMDQRSS32EN; + } + + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(i), 0); + } + break; + + default: + break; + + } + + /* + * Disable Packet Checksum to enable RSS for multiple receive queues. + * It is an adapter hardware limitation that Packet Checksum is + * mutually exclusive with RSS. + */ + rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); + rxcsum |= IXGBE_RXCSUM_PCSD; + rxcsum &= ~IXGBE_RXCSUM_IPPCSE; + IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); + + if (hw->mac.type == ixgbe_mac_82599EB) { + /* + * Enable Virtualization and Replication. + */ + vtctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; + IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl); + + /* + * Enable receiving packets to all VFs + */ + IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), IXGBE_VFRE_ENABLE_ALL); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), IXGBE_VFRE_ENABLE_ALL); + } +} + +/* * ixgbe_init_unicst - Initialize the unicast addresses. */ static void @@ -2183,7 +2616,7 @@ ixgbe_init_unicst(ixgbe_t *ixgbe) /* * Initialize the multiple unicast addresses */ - ixgbe->unicst_total = MAX_NUM_UNICAST_ADDRESSES; + ixgbe->unicst_total = hw->mac.num_rar_entries; ixgbe->unicst_avail = ixgbe->unicst_total; for (slot = 0; slot < ixgbe->unicst_total; slot++) { mac_addr = ixgbe->unicst_addr[slot].mac.addr; @@ -2198,7 +2631,8 @@ ixgbe_init_unicst(ixgbe_t *ixgbe) mac_addr = ixgbe->unicst_addr[slot].mac.addr; if (ixgbe->unicst_addr[slot].mac.set == 1) { (void) ixgbe_set_rar(hw, slot, mac_addr, - NULL, IXGBE_RAH_AV); + ixgbe->unicst_addr[slot].mac.group_index, + IXGBE_RAH_AV); } else { bzero(mac_addr, ETHERADDRL); (void) ixgbe_set_rar(hw, slot, mac_addr, @@ -2209,35 +2643,6 @@ ixgbe_init_unicst(ixgbe_t *ixgbe) } /* - * ixgbe_unicst_set - Set the unicast address to the specified slot. - */ -int -ixgbe_unicst_set(ixgbe_t *ixgbe, const uint8_t *mac_addr, - int slot) -{ - struct ixgbe_hw *hw = &ixgbe->hw; - - ASSERT(mutex_owned(&ixgbe->gen_lock)); - - /* - * Save the unicast address in the software data structure - */ - bcopy(mac_addr, ixgbe->unicst_addr[slot].mac.addr, ETHERADDRL); - - /* - * Set the unicast address to the RAR register - */ - (void) ixgbe_set_rar(hw, slot, (uint8_t *)mac_addr, NULL, IXGBE_RAH_AV); - - if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) { - ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); - return (EIO); - } - - return (0); -} - -/* * ixgbe_unicst_find - Find the slot for the specified unicast address */ int @@ -2352,6 +2757,81 @@ ixgbe_setup_multicst(ixgbe_t *ixgbe) } /* + * ixgbe_setup_vmdq_rss_conf - Configure vmdq and rss (number and mode). + * + * Configure the rx classification mode (vmdq & rss) and vmdq & rss numbers. + * Different chipsets may have different allowed configuration of vmdq and rss. + */ +static void +ixgbe_setup_vmdq_rss_conf(ixgbe_t *ixgbe) +{ + struct ixgbe_hw *hw = &ixgbe->hw; + uint32_t ring_per_group; + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + /* + * 82598 supports the following combination: + * vmdq no. x rss no. + * [5..16] x 1 + * [1..4] x [1..16] + * However 8 rss queue per pool (vmdq) is sufficient for + * most cases. + */ + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + if (ixgbe->num_rx_groups > 4) { + ixgbe->num_rx_rings = ixgbe->num_rx_groups; + } else { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + min(8, ring_per_group); + } + + break; + + case ixgbe_mac_82599EB: + /* + * 82599 supports the following combination: + * vmdq no. x rss no. + * [33..64] x [1..2] + * [2..32] x [1..4] + * 1 x [1..16] + * However 8 rss queue per pool (vmdq) is sufficient for + * most cases. + */ + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + if (ixgbe->num_rx_groups == 1) { + ixgbe->num_rx_rings = min(8, ring_per_group); + } else if (ixgbe->num_rx_groups <= 32) { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + min(4, ring_per_group); + } else if (ixgbe->num_rx_groups <= 64) { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + min(2, ring_per_group); + } + + break; + + default: + break; + } + + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + + if (ixgbe->num_rx_groups == 1 && ring_per_group == 1) { + ixgbe->classify_mode = IXGBE_CLASSIFY_NONE; + } else if (ixgbe->num_rx_groups != 1 && ring_per_group == 1) { + ixgbe->classify_mode = IXGBE_CLASSIFY_VMDQ; + } else if (ixgbe->num_rx_groups != 1 && ring_per_group != 1) { + ixgbe->classify_mode = IXGBE_CLASSIFY_VMDQ_RSS; + } else { + ixgbe->classify_mode = IXGBE_CLASSIFY_RSS; + } + + ixgbe_log(ixgbe, "rx group number:%d, rx ring number:%d", + ixgbe->num_rx_groups, ixgbe->num_rx_rings); +} + +/* * ixgbe_get_conf - Get driver configurations set in driver.conf. * * This routine gets user-configured values out of the configuration @@ -2434,7 +2914,8 @@ ixgbe_get_conf(ixgbe_t *ixgbe) * Multiple groups configuration */ ixgbe->num_rx_groups = ixgbe_get_prop(ixgbe, PROP_RX_GROUP_NUM, - MIN_RX_GROUP_NUM, MAX_RX_GROUP_NUM, DEFAULT_RX_GROUP_NUM); + ixgbe->capab->min_rx_grp_num, ixgbe->capab->max_rx_grp_num, + ixgbe->capab->def_rx_grp_num); ixgbe->mr_enable = ixgbe_get_prop(ixgbe, PROP_MR_ENABLE, 0, 1, DEFAULT_MR_ENABLE); @@ -2443,6 +2924,16 @@ ixgbe_get_conf(ixgbe_t *ixgbe) ixgbe->num_tx_rings = 1; ixgbe->num_rx_rings = 1; ixgbe->num_rx_groups = 1; + ixgbe->classify_mode = IXGBE_CLASSIFY_NONE; + } else { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + max(ixgbe->num_rx_rings / ixgbe->num_rx_groups, 1); + /* + * The combination of num_rx_rings and num_rx_groups + * may be not supported by h/w. We need to adjust + * them to appropriate values. + */ + ixgbe_setup_vmdq_rss_conf(ixgbe); } /* @@ -3699,6 +4190,7 @@ ixgbe_alloc_intrs(ixgbe_t *ixgbe) ixgbe->num_rx_rings = 1; ixgbe->num_rx_groups = 1; ixgbe->num_tx_rings = 1; + ixgbe->classify_mode = IXGBE_CLASSIFY_NONE; ixgbe_log(ixgbe, "MSI-X not used, force rings and groups number to 1"); @@ -3745,9 +4237,10 @@ static int ixgbe_alloc_intr_handles(ixgbe_t *ixgbe, int intr_type) { dev_info_t *devinfo; - int request, count, avail, actual; + int request, count, actual; int minimum; int rc; + uint32_t ring_per_group; devinfo = ixgbe->dip; @@ -3767,12 +4260,13 @@ ixgbe_alloc_intr_handles(ixgbe_t *ixgbe, int intr_type) case DDI_INTR_TYPE_MSIX: /* * Best number of vectors for the adapter is - * # rx rings + # tx rings. + * (# rx rings + # tx rings), however we will + * limit the request number. */ - request = ixgbe->num_rx_rings + ixgbe->num_tx_rings; + request = min(16, ixgbe->num_rx_rings + ixgbe->num_tx_rings); if (request > ixgbe->capab->max_ring_vect) request = ixgbe->capab->max_ring_vect; - minimum = 2; + minimum = 1; IXGBE_DEBUGLOG_0(ixgbe, "interrupt type: MSI-X"); break; @@ -3797,26 +4291,10 @@ ixgbe_alloc_intr_handles(ixgbe_t *ixgbe, int intr_type) } IXGBE_DEBUGLOG_1(ixgbe, "interrupts supported: %d", count); - /* - * Get number of available interrupts - */ - rc = ddi_intr_get_navail(devinfo, intr_type, &avail); - if ((rc != DDI_SUCCESS) || (avail < minimum)) { - ixgbe_log(ixgbe, - "Get interrupt available number failed. " - "Return: %d, available: %d", rc, avail); - return (IXGBE_FAILURE); - } - IXGBE_DEBUGLOG_1(ixgbe, "interrupts available: %d", avail); - - if (avail < request) { - ixgbe_log(ixgbe, "Request %d handles, %d available", - request, avail); - request = avail; - } - actual = 0; ixgbe->intr_cnt = 0; + ixgbe->intr_cnt_max = 0; + ixgbe->intr_cnt_min = 0; /* * Allocate an array of interrupt handles @@ -3834,7 +4312,24 @@ ixgbe_alloc_intr_handles(ixgbe_t *ixgbe, int intr_type) } IXGBE_DEBUGLOG_1(ixgbe, "interrupts actually allocated: %d", actual); + /* + * upper/lower limit of interrupts + */ ixgbe->intr_cnt = actual; + ixgbe->intr_cnt_max = request; + ixgbe->intr_cnt_min = minimum; + + /* + * rss number per group should not exceed the rx interrupt number, + * else need to adjust rx ring number. + */ + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + ASSERT((ixgbe->num_rx_rings % ixgbe->num_rx_groups) == 0); + if (min(actual, ixgbe->num_rx_rings) < ring_per_group) { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + min(actual, ixgbe->num_rx_rings); + ixgbe_setup_vmdq_rss_conf(ixgbe); + } /* * Now we know the actual number of vectors. Here we map the vector @@ -4147,6 +4642,53 @@ ixgbe_disable_ivar(ixgbe_t *ixgbe, uint16_t intr_alloc_entry, int8_t cause) } /* + * Convert the rx ring index driver maintained to the rx ring index + * in h/w. + */ +static uint32_t +ixgbe_get_hw_rx_index(ixgbe_t *ixgbe, uint32_t sw_rx_index) +{ + + struct ixgbe_hw *hw = &ixgbe->hw; + uint32_t rx_ring_per_group, hw_rx_index; + + if (ixgbe->classify_mode == IXGBE_CLASSIFY_RSS || + ixgbe->classify_mode == IXGBE_CLASSIFY_NONE) { + return (sw_rx_index); + } else if (ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ) { + if (hw->mac.type == ixgbe_mac_82598EB) { + return (sw_rx_index); + } else if (hw->mac.type == ixgbe_mac_82599EB) { + return (sw_rx_index * 2); + } + } else if (ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ_RSS) { + rx_ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + + if (hw->mac.type == ixgbe_mac_82598EB) { + hw_rx_index = (sw_rx_index / rx_ring_per_group) * + 16 + (sw_rx_index % rx_ring_per_group); + return (hw_rx_index); + } else if (hw->mac.type == ixgbe_mac_82599EB) { + if (ixgbe->num_rx_groups > 32) { + hw_rx_index = (sw_rx_index / + rx_ring_per_group) * 2 + + (sw_rx_index % rx_ring_per_group); + } else { + hw_rx_index = (sw_rx_index / + rx_ring_per_group) * 4 + + (sw_rx_index % rx_ring_per_group); + } + return (hw_rx_index); + } + } + + /* + * Should never reach. Just to make compiler happy. + */ + return (sw_rx_index); +} + +/* * ixgbe_map_intrs_to_vectors - Map different interrupts to MSI-X vectors. * * For MSI-X, here will map rx interrupt, tx interrupt and other interrupt @@ -4183,7 +4725,6 @@ ixgbe_map_intrs_to_vectors(ixgbe_t *ixgbe) */ BT_SET(ixgbe->vect_map[vector].other_map, 0); ixgbe->vect_map[vector].other_cnt++; - vector++; /* * Map rx ring interrupts to vectors @@ -4217,6 +4758,7 @@ ixgbe_setup_adapter_vector(ixgbe_t *ixgbe) ixgbe_intr_vector_t *vect; /* vector bitmap */ int r_idx; /* ring index */ int v_idx; /* vector index */ + uint32_t hw_index; /* * Clear any previous entries @@ -4265,7 +4807,8 @@ ixgbe_setup_adapter_vector(ixgbe_t *ixgbe) (ixgbe->num_rx_rings - 1)); while (r_idx >= 0) { - ixgbe_setup_ivar(ixgbe, r_idx, v_idx, 0); + hw_index = ixgbe->rx_rings[r_idx].hw_index; + ixgbe_setup_ivar(ixgbe, hw_index, v_idx, 0); r_idx = bt_getlowbit(vect->rx_map, (r_idx + 1), (ixgbe->num_rx_rings - 1)); } @@ -4637,11 +5180,31 @@ ixgbe_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num) } /* + * Get the global ring index by a ring index within a group. + */ +static int +ixgbe_get_rx_ring_index(ixgbe_t *ixgbe, int gindex, int rindex) +{ + ixgbe_rx_ring_t *rx_ring; + int i; + + for (i = 0; i < ixgbe->num_rx_rings; i++) { + rx_ring = &ixgbe->rx_rings[i]; + if (rx_ring->group_index == gindex) + rindex--; + if (rindex < 0) + return (i); + } + + return (-1); +} + +/* * Callback funtion for MAC layer to register all rings. */ /* ARGSUSED */ void -ixgbe_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, +ixgbe_fill_ring(void *arg, mac_ring_type_t rtype, const int group_index, const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) { ixgbe_t *ixgbe = (ixgbe_t *)arg; @@ -4649,25 +5212,37 @@ ixgbe_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, switch (rtype) { case MAC_RING_TYPE_RX: { - ASSERT(rg_index == 0); - ASSERT(ring_index < ixgbe->num_rx_rings); + /* + * 'index' is the ring index within the group. + * Need to get the global ring index by searching in groups. + */ + int global_ring_index = ixgbe_get_rx_ring_index( + ixgbe, group_index, ring_index); - ixgbe_rx_ring_t *rx_ring = &ixgbe->rx_rings[ring_index]; + ASSERT(global_ring_index >= 0); + + ixgbe_rx_ring_t *rx_ring = &ixgbe->rx_rings[global_ring_index]; rx_ring->ring_handle = rh; infop->mri_driver = (mac_ring_driver_t)rx_ring; infop->mri_start = ixgbe_ring_start; infop->mri_stop = NULL; infop->mri_poll = ixgbe_ring_rx_poll; + infop->mri_stat = ixgbe_rx_ring_stat; mintr->mi_handle = (mac_intr_handle_t)rx_ring; mintr->mi_enable = ixgbe_rx_ring_intr_enable; mintr->mi_disable = ixgbe_rx_ring_intr_disable; + if (ixgbe->intr_type & + (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) { + mintr->mi_ddi_handle = + ixgbe->htable[rx_ring->intr_vector]; + } break; } case MAC_RING_TYPE_TX: { - ASSERT(rg_index == -1); + ASSERT(group_index == -1); ASSERT(ring_index < ixgbe->num_tx_rings); ixgbe_tx_ring_t *tx_ring = &ixgbe->tx_rings[ring_index]; @@ -4677,7 +5252,12 @@ ixgbe_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = NULL; infop->mri_stop = NULL; infop->mri_tx = ixgbe_ring_tx; - + infop->mri_stat = ixgbe_tx_ring_stat; + if (ixgbe->intr_type & + (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) { + mintr->mi_ddi_handle = + ixgbe->htable[tx_ring->intr_vector]; + } break; } default: @@ -4726,16 +5306,26 @@ ixgbe_rx_ring_intr_enable(mac_intr_handle_t intrh) ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)intrh; ixgbe_t *ixgbe = rx_ring->ixgbe; int r_idx = rx_ring->index; + int hw_r_idx = rx_ring->hw_index; int v_idx = rx_ring->intr_vector; mutex_enter(&ixgbe->gen_lock); - ASSERT(BT_TEST(ixgbe->vect_map[v_idx].rx_map, r_idx) == 0); + if (ixgbe->ixgbe_state & IXGBE_INTR_ADJUST) { + mutex_exit(&ixgbe->gen_lock); + /* + * Simply return 0. + * Interrupts are being adjusted. ixgbe_intr_adjust() + * will eventually re-enable the interrupt when it's + * done with the adjustment. + */ + return (0); + } /* * To enable interrupt by setting the VAL bit of given interrupt * vector allocation register (IVAR). */ - ixgbe_enable_ivar(ixgbe, r_idx, 0); + ixgbe_enable_ivar(ixgbe, hw_r_idx, 0); BT_SET(ixgbe->vect_map[v_idx].rx_map, r_idx); @@ -4759,16 +5349,34 @@ ixgbe_rx_ring_intr_disable(mac_intr_handle_t intrh) ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)intrh; ixgbe_t *ixgbe = rx_ring->ixgbe; int r_idx = rx_ring->index; + int hw_r_idx = rx_ring->hw_index; int v_idx = rx_ring->intr_vector; mutex_enter(&ixgbe->gen_lock); - ASSERT(BT_TEST(ixgbe->vect_map[v_idx].rx_map, r_idx) == 1); + if (ixgbe->ixgbe_state & IXGBE_INTR_ADJUST) { + mutex_exit(&ixgbe->gen_lock); + /* + * Simply return 0. + * In the rare case where an interrupt is being + * disabled while interrupts are being adjusted, + * we don't fail the operation. No interrupts will + * be generated while they are adjusted, and + * ixgbe_intr_adjust() will cause the interrupts + * to be re-enabled once it completes. Note that + * in this case, packets may be delivered to the + * stack via interrupts before xgbe_rx_ring_intr_enable() + * is called again. This is acceptable since interrupt + * adjustment is infrequent, and the stack will be + * able to handle these packets. + */ + return (0); + } /* * To disable interrupt by clearing the VAL bit of given interrupt * vector allocation register (IVAR). */ - ixgbe_disable_ivar(ixgbe, r_idx, 0); + ixgbe_disable_ivar(ixgbe, hw_r_idx, 0); BT_CLEAR(ixgbe->vect_map[v_idx].rx_map, r_idx); @@ -4785,8 +5393,8 @@ ixgbe_addmac(void *arg, const uint8_t *mac_addr) { ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)arg; ixgbe_t *ixgbe = rx_group->ixgbe; - int slot; - int err; + struct ixgbe_hw *hw = &ixgbe->hw; + int slot, i; mutex_enter(&ixgbe->gen_lock); @@ -4801,21 +5409,40 @@ ixgbe_addmac(void *arg, const uint8_t *mac_addr) return (ENOSPC); } - for (slot = 0; slot < ixgbe->unicst_total; slot++) { - if (ixgbe->unicst_addr[slot].mac.set == 0) - break; + /* + * The first ixgbe->num_rx_groups slots are reserved for each respective + * group. The rest slots are shared by all groups. While adding a + * MAC address, reserved slots are firstly checked then the shared + * slots are searched. + */ + slot = -1; + if (ixgbe->unicst_addr[rx_group->index].mac.set == 1) { + for (i = ixgbe->num_rx_groups; i < ixgbe->unicst_total; i++) { + if (ixgbe->unicst_addr[i].mac.set == 0) { + slot = i; + break; + } + } + } else { + slot = rx_group->index; } - ASSERT((slot >= 0) && (slot < ixgbe->unicst_total)); - - if ((err = ixgbe_unicst_set(ixgbe, mac_addr, slot)) == 0) { - ixgbe->unicst_addr[slot].mac.set = 1; - ixgbe->unicst_avail--; + if (slot == -1) { + /* no slots available */ + mutex_exit(&ixgbe->gen_lock); + return (ENOSPC); } + bcopy(mac_addr, ixgbe->unicst_addr[slot].mac.addr, ETHERADDRL); + (void) ixgbe_set_rar(hw, slot, ixgbe->unicst_addr[slot].mac.addr, + rx_group->index, IXGBE_RAH_AV); + ixgbe->unicst_addr[slot].mac.set = 1; + ixgbe->unicst_addr[slot].mac.group_index = rx_group->index; + ixgbe->unicst_avail--; + mutex_exit(&ixgbe->gen_lock); - return (err); + return (0); } /* @@ -4826,8 +5453,8 @@ ixgbe_remmac(void *arg, const uint8_t *mac_addr) { ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)arg; ixgbe_t *ixgbe = rx_group->ixgbe; + struct ixgbe_hw *hw = &ixgbe->hw; int slot; - int err; mutex_enter(&ixgbe->gen_lock); @@ -4848,13 +5475,11 @@ ixgbe_remmac(void *arg, const uint8_t *mac_addr) } bzero(ixgbe->unicst_addr[slot].mac.addr, ETHERADDRL); - if ((err = ixgbe_unicst_set(ixgbe, - ixgbe->unicst_addr[slot].mac.addr, slot)) == 0) { - ixgbe->unicst_addr[slot].mac.set = 0; - ixgbe->unicst_avail++; - } + (void) ixgbe_clear_rar(hw, slot); + ixgbe->unicst_addr[slot].mac.set = 0; + ixgbe->unicst_avail++; mutex_exit(&ixgbe->gen_lock); - return (err); + return (0); } diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h b/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h index d9747b4f8f..53690b3a46 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h +++ b/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -22,7 +21,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -71,8 +70,6 @@ void ixgbe_write_pci_cfg(struct ixgbe_hw *, uint32_t, uint32_t); #define CMD_MEM_WRT_INVALIDATE 0x0010 /* BIT_4 */ #define PCI_COMMAND_REGISTER 0x04 #define PCI_EX_CONF_CAP 0xE0 -#define MAX_NUM_UNICAST_ADDRESSES 0x10 -#define MAX_NUM_MULTICAST_ADDRESSES 0x1000 #define SPEED_10GB 10000 #define SPEED_1GB 1000 #define SPEED_100 100 diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_rx.c b/usr/src/uts/common/io/ixgbe/ixgbe_rx.c index 2ed6a09405..c9efa55a87 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_rx.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_rx.c @@ -514,18 +514,17 @@ ixgbe_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error) */ if ((status_error & IXGBE_RXD_STAT_L4CS) && !(status_error & IXGBE_RXDADV_ERR_TCPE)) - hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK; + hcksum_flags |= HCK_FULLCKSUM_OK; /* * Check IP Checksum */ if ((status_error & IXGBE_RXD_STAT_IPCS) && !(status_error & IXGBE_RXDADV_ERR_IPE)) - hcksum_flags |= HCK_IPV4_HDRCKSUM; + hcksum_flags |= HCK_IPV4_HDRCKSUM_OK; if (hcksum_flags != 0) { - (void) hcksum_assoc(mp, - NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, hcksum_flags); } } @@ -722,6 +721,9 @@ rx_discard: status_error = current_rbd->wb.upper.status_error; } + rx_ring->stat_rbytes += received_bytes; + rx_ring->stat_ipackets += pkt_num; + DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORDEV); rx_data->rbd_next = rx_next; @@ -735,7 +737,7 @@ rx_discard: } else rx_tail = PREV_INDEX(rx_next, 1, rx_data->ring_size); - IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_RDT(rx_ring->index), rx_tail); + IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_RDT(rx_ring->hw_index), rx_tail); if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) { ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_stat.c b/usr/src/uts/common/io/ixgbe/ixgbe_stat.c index 54dfdbff09..4d95a00d9f 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_stat.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_stat.c @@ -438,3 +438,358 @@ ixgbe_init_stats(ixgbe_t *ixgbe) return (IXGBE_SUCCESS); } + +/* + * Retrieve a value for one of the statistics. + */ +int +ixgbe_m_stat(void *arg, uint_t stat, uint64_t *val) +{ + ixgbe_t *ixgbe = (ixgbe_t *)arg; + struct ixgbe_hw *hw = &ixgbe->hw; + ixgbe_stat_t *ixgbe_ks; + int i; + + ixgbe_ks = (ixgbe_stat_t *)ixgbe->ixgbe_ks->ks_data; + + mutex_enter(&ixgbe->gen_lock); + + if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { + mutex_exit(&ixgbe->gen_lock); + return (ECANCELED); + } + + switch (stat) { + case MAC_STAT_IFSPEED: + *val = ixgbe->link_speed * 1000000ull; + break; + + case MAC_STAT_MULTIRCV: + ixgbe_ks->mprc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_MPRC); + *val = ixgbe_ks->mprc.value.ui64; + break; + + case MAC_STAT_BRDCSTRCV: + ixgbe_ks->bprc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_BPRC); + *val = ixgbe_ks->bprc.value.ui64; + break; + + case MAC_STAT_MULTIXMT: + ixgbe_ks->mptc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_MPTC); + *val = ixgbe_ks->mptc.value.ui64; + break; + + case MAC_STAT_BRDCSTXMT: + ixgbe_ks->bptc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_BPTC); + *val = ixgbe_ks->bptc.value.ui64; + break; + + case MAC_STAT_NORCVBUF: + for (i = 0; i < 8; i++) { + ixgbe_ks->rnbc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RNBC(i)); + } + *val = ixgbe_ks->rnbc.value.ui64; + break; + + case MAC_STAT_IERRORS: + ixgbe_ks->crcerrs.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_CRCERRS); + ixgbe_ks->illerrc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ILLERRC); + ixgbe_ks->errbc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ERRBC); + ixgbe_ks->rlec.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RLEC); + *val = ixgbe_ks->crcerrs.value.ui64 + + ixgbe_ks->illerrc.value.ui64 + + ixgbe_ks->errbc.value.ui64 + + ixgbe_ks->rlec.value.ui64; + break; + + case MAC_STAT_RBYTES: + ixgbe_ks->tor.value.ui64 = 0; + for (i = 0; i < 16; i++) { + ixgbe_ks->qbrc[i].value.ui64 += + IXGBE_READ_REG(hw, IXGBE_QBRC(i)); + ixgbe_ks->tor.value.ui64 += + ixgbe_ks->qbrc[i].value.ui64; + } + *val = ixgbe_ks->tor.value.ui64; + break; + + case MAC_STAT_OBYTES: + ixgbe_ks->tot.value.ui64 = 0; + for (i = 0; i < 16; i++) { + if (hw->mac.type >= ixgbe_mac_82599EB) { + ixgbe_ks->qbtc[i].value.ui64 += + IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); + ixgbe_ks->qbtc[i].value.ui64 += ((uint64_t) + IXGBE_READ_REG(hw, IXGBE_QBTC_H(i))) << 32; + } else { + ixgbe_ks->qbtc[i].value.ui64 += + IXGBE_READ_REG(hw, IXGBE_QBTC(i)); + } + ixgbe_ks->tot.value.ui64 += + ixgbe_ks->qbtc[i].value.ui64; + } + *val = ixgbe_ks->tot.value.ui64; + break; + + case MAC_STAT_IPACKETS: + ixgbe_ks->tpr.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_TPR); + *val = ixgbe_ks->tpr.value.ui64; + break; + + case MAC_STAT_OPACKETS: + ixgbe_ks->tpt.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_TPT); + *val = ixgbe_ks->tpt.value.ui64; + break; + + /* RFC 1643 stats */ + case ETHER_STAT_FCS_ERRORS: + ixgbe_ks->crcerrs.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_CRCERRS); + *val = ixgbe_ks->crcerrs.value.ui64; + break; + + case ETHER_STAT_TOOLONG_ERRORS: + ixgbe_ks->roc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ROC); + *val = ixgbe_ks->roc.value.ui64; + break; + + case ETHER_STAT_MACRCV_ERRORS: + ixgbe_ks->crcerrs.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_CRCERRS); + ixgbe_ks->illerrc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ILLERRC); + ixgbe_ks->errbc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ERRBC); + ixgbe_ks->rlec.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RLEC); + *val = ixgbe_ks->crcerrs.value.ui64 + + ixgbe_ks->illerrc.value.ui64 + + ixgbe_ks->errbc.value.ui64 + + ixgbe_ks->rlec.value.ui64; + break; + + /* MII/GMII stats */ + case ETHER_STAT_XCVR_ADDR: + /* The Internal PHY's MDI address for each MAC is 1 */ + *val = 1; + break; + + case ETHER_STAT_XCVR_ID: + *val = hw->phy.id; + break; + + case ETHER_STAT_XCVR_INUSE: + switch (ixgbe->link_speed) { + case IXGBE_LINK_SPEED_1GB_FULL: + *val = + (hw->phy.media_type == ixgbe_media_type_copper) ? + XCVR_1000T : XCVR_1000X; + break; + case IXGBE_LINK_SPEED_100_FULL: + *val = (hw->phy.media_type == ixgbe_media_type_copper) ? + XCVR_100T2 : XCVR_100X; + break; + default: + *val = XCVR_NONE; + break; + } + break; + + case ETHER_STAT_CAP_10GFDX: + *val = 1; + break; + + case ETHER_STAT_CAP_1000FDX: + *val = 1; + break; + + case ETHER_STAT_CAP_100FDX: + *val = 1; + break; + + case ETHER_STAT_CAP_ASMPAUSE: + *val = ixgbe->param_asym_pause_cap; + break; + + case ETHER_STAT_CAP_PAUSE: + *val = ixgbe->param_pause_cap; + break; + + case ETHER_STAT_CAP_AUTONEG: + *val = 1; + break; + + case ETHER_STAT_ADV_CAP_10GFDX: + *val = ixgbe->param_adv_10000fdx_cap; + break; + + case ETHER_STAT_ADV_CAP_1000FDX: + *val = ixgbe->param_adv_1000fdx_cap; + break; + + case ETHER_STAT_ADV_CAP_100FDX: + *val = ixgbe->param_adv_100fdx_cap; + break; + + case ETHER_STAT_ADV_CAP_ASMPAUSE: + *val = ixgbe->param_adv_asym_pause_cap; + break; + + case ETHER_STAT_ADV_CAP_PAUSE: + *val = ixgbe->param_adv_pause_cap; + break; + + case ETHER_STAT_ADV_CAP_AUTONEG: + *val = ixgbe->param_adv_autoneg_cap; + break; + + case ETHER_STAT_LP_CAP_10GFDX: + *val = ixgbe->param_lp_10000fdx_cap; + break; + + case ETHER_STAT_LP_CAP_1000FDX: + *val = ixgbe->param_lp_1000fdx_cap; + break; + + case ETHER_STAT_LP_CAP_100FDX: + *val = ixgbe->param_lp_100fdx_cap; + break; + + case ETHER_STAT_LP_CAP_ASMPAUSE: + *val = ixgbe->param_lp_asym_pause_cap; + break; + + case ETHER_STAT_LP_CAP_PAUSE: + *val = ixgbe->param_lp_pause_cap; + break; + + case ETHER_STAT_LP_CAP_AUTONEG: + *val = ixgbe->param_lp_autoneg_cap; + break; + + case ETHER_STAT_LINK_ASMPAUSE: + *val = ixgbe->param_asym_pause_cap; + break; + + case ETHER_STAT_LINK_PAUSE: + *val = ixgbe->param_pause_cap; + break; + + case ETHER_STAT_LINK_AUTONEG: + *val = ixgbe->param_adv_autoneg_cap; + break; + + case ETHER_STAT_LINK_DUPLEX: + *val = ixgbe->link_duplex; + break; + + case ETHER_STAT_TOOSHORT_ERRORS: + ixgbe_ks->ruc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RUC); + *val = ixgbe_ks->ruc.value.ui64; + break; + + case ETHER_STAT_CAP_REMFAULT: + *val = ixgbe->param_rem_fault; + break; + + case ETHER_STAT_ADV_REMFAULT: + *val = ixgbe->param_adv_rem_fault; + break; + + case ETHER_STAT_LP_REMFAULT: + *val = ixgbe->param_lp_rem_fault; + break; + + case ETHER_STAT_JABBER_ERRORS: + ixgbe_ks->rjc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RJC); + *val = ixgbe_ks->rjc.value.ui64; + break; + + default: + mutex_exit(&ixgbe->gen_lock); + return (ENOTSUP); + } + + mutex_exit(&ixgbe->gen_lock); + + if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) { + ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); + return (EIO); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +ixgbe_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)rh; + ixgbe_t *ixgbe = rx_ring->ixgbe; + + if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { + return (ECANCELED); + } + + switch (stat) { + case MAC_STAT_RBYTES: + *val = rx_ring->stat_rbytes; + break; + + case MAC_STAT_IPACKETS: + *val = rx_ring->stat_ipackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +ixgbe_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)rh; + ixgbe_t *ixgbe = tx_ring->ixgbe; + + if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { + return (ECANCELED); + } + + switch (stat) { + case MAC_STAT_OBYTES: + *val = tx_ring->stat_obytes; + break; + + case MAC_STAT_OPACKETS: + *val = tx_ring->stat_opackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h index 30dd825e0f..f5e68fcd87 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h +++ b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h @@ -80,9 +80,10 @@ extern "C" { #define IXGBE_STARTED 0x02 #define IXGBE_SUSPENDED 0x04 #define IXGBE_STALL 0x08 +#define IXGBE_INTR_ADJUST 0x40 #define IXGBE_ERROR 0x80 -#define MAX_NUM_UNICAST_ADDRESSES 0x10 +#define MAX_NUM_UNICAST_ADDRESSES 0x80 #define MAX_NUM_MULTICAST_ADDRESSES 0x1000 #define IXGBE_INTR_NONE 0 #define IXGBE_INTR_MSIX 1 @@ -109,11 +110,11 @@ extern "C" { #define MAX_TX_QUEUE_NUM 128 #define MAX_RX_QUEUE_NUM 128 #define MAX_INTR_VECTOR 64 +#define MAX_RX_GROUP_NUM 64 /* * Maximum values for user configurable parameters */ -#define MAX_RX_GROUP_NUM 1 #define MAX_TX_RING_SIZE 4096 #define MAX_RX_RING_SIZE 4096 @@ -128,7 +129,6 @@ extern "C" { /* * Minimum values for user configurable parameters */ -#define MIN_RX_GROUP_NUM 1 #define MIN_TX_RING_SIZE 64 #define MIN_RX_RING_SIZE 64 @@ -143,7 +143,6 @@ extern "C" { /* * Default values for user configurable parameters */ -#define DEFAULT_RX_GROUP_NUM 1 #define DEFAULT_TX_RING_SIZE 1024 #define DEFAULT_RX_RING_SIZE 1024 @@ -251,11 +250,22 @@ extern "C" { #define IXGBE_FLAG_FAN_FAIL_CAPABLE (u32)(1 << 8) #define IXGBE_FLAG_RSC_CAPABLE (u32)(1 << 9) +/* + * Classification mode + */ +#define IXGBE_CLASSIFY_NONE 0 +#define IXGBE_CLASSIFY_RSS 1 +#define IXGBE_CLASSIFY_VMDQ 2 +#define IXGBE_CLASSIFY_VMDQ_RSS 3 + /* adapter-specific info for each supported device type */ typedef struct adapter_info { - uint32_t max_rx_que_num; /* maximum number of rx queues */ - uint32_t min_rx_que_num; /* minimum number of rx queues */ - uint32_t def_rx_que_num; /* default number of rx queues */ + uint32_t max_rx_que_num; /* maximum number of rx queues */ + uint32_t min_rx_que_num; /* minimum number of rx queues */ + uint32_t def_rx_que_num; /* default number of rx queues */ + uint32_t max_rx_grp_num; /* maximum number of rx groups */ + uint32_t min_rx_grp_num; /* minimum number of rx groups */ + uint32_t def_rx_grp_num; /* default number of rx groups */ uint32_t max_tx_que_num; /* maximum number of tx queues */ uint32_t min_tx_que_num; /* minimum number of tx queues */ uint32_t def_tx_que_num; /* default number of tx queues */ @@ -358,7 +368,7 @@ typedef union ixgbe_ether_addr { } reg; struct { uint8_t set; - uint8_t redundant; + uint8_t group_index; uint8_t addr[ETHERADDRL]; } mac; } ixgbe_ether_addr_t; @@ -494,6 +504,8 @@ typedef struct ixgbe_tx_ring { uint32_t stat_break_tbd_limit; uint32_t stat_lso_header_fail; #endif + uint64_t stat_obytes; + uint64_t stat_opackets; mac_ring_handle_t ring_handle; @@ -546,6 +558,8 @@ typedef struct ixgbe_rx_data { */ typedef struct ixgbe_rx_ring { uint32_t index; /* Ring index */ + uint32_t group_index; /* Group index */ + uint32_t hw_index; /* h/w ring index */ uint32_t intr_vector; /* Interrupt vector index */ uint32_t vect_bit; /* vector's bit in register */ @@ -561,6 +575,8 @@ typedef struct ixgbe_rx_ring { uint32_t stat_cksum_error; uint32_t stat_exceed_pkt; #endif + uint64_t stat_rbytes; + uint64_t stat_ipackets; mac_ring_handle_t ring_handle; uint64_t ring_gen_num; @@ -651,6 +667,7 @@ typedef struct ixgbe { boolean_t tx_hcksum_enable; /* Tx h/w cksum offload */ boolean_t lso_enable; /* Large Segment Offload */ boolean_t mr_enable; /* Multiple Tx and Rx Ring */ + uint32_t classify_mode; /* Classification mode */ uint32_t tx_copy_thresh; /* Tx copy threshold */ uint32_t tx_recycle_thresh; /* Tx recycle threshold */ uint32_t tx_overload_thresh; /* Tx overload threshold */ @@ -664,11 +681,14 @@ typedef struct ixgbe { int intr_type; int intr_cnt; + uint32_t intr_cnt_max; + uint32_t intr_cnt_min; int intr_cap; size_t intr_size; uint_t intr_pri; ddi_intr_handle_t *htable; uint32_t eims_mask; + ddi_cb_handle_t cb_hdl; /* Interrupt callback handle */ kmutex_t gen_lock; /* General lock for device access */ kmutex_t watchdog_lock; @@ -825,16 +845,15 @@ int ixgbe_m_start(void *); void ixgbe_m_stop(void *); int ixgbe_m_promisc(void *, boolean_t); int ixgbe_m_multicst(void *, boolean_t, const uint8_t *); -int ixgbe_m_stat(void *, uint_t, uint64_t *); void ixgbe_m_resources(void *); void ixgbe_m_ioctl(void *, queue_t *, mblk_t *); boolean_t ixgbe_m_getcapab(void *, mac_capab_t, void *); int ixgbe_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -int ixgbe_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); +int ixgbe_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +void ixgbe_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); int ixgbe_set_priv_prop(ixgbe_t *, const char *, uint_t, const void *); -int ixgbe_get_priv_prop(ixgbe_t *, const char *, - uint_t, uint_t, void *, uint_t *); +int ixgbe_get_priv_prop(ixgbe_t *, const char *, uint_t, void *); boolean_t ixgbe_param_locked(mac_prop_id_t); /* @@ -864,6 +883,9 @@ void ixgbe_error(void *, const char *, ...); * Function prototypes in ixgbe_stat.c */ int ixgbe_init_stats(ixgbe_t *); +int ixgbe_m_stat(void *, uint_t, uint64_t *); +int ixgbe_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); +int ixgbe_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_tx.c b/usr/src/uts/common/io/ixgbe/ixgbe_tx.c index 310b6226fd..484b9c11e3 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_tx.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_tx.c @@ -21,7 +21,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -501,6 +501,9 @@ adjust_threshold: ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); + tx_ring->stat_obytes += mbsize; + tx_ring->stat_opackets ++; + mutex_exit(&tx_ring->tx_lock); /* @@ -696,7 +699,7 @@ ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) ASSERT(mp != NULL); - hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, &hckflags); + mac_hcksum_get(mp, &start, NULL, NULL, NULL, &hckflags); bzero(ctx, sizeof (ixgbe_tx_context_t)); if (hckflags == 0) { @@ -705,7 +708,7 @@ ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) ctx->hcksum_flags = hckflags; - lso_info_get(mp, &mss, &lsoflags); + mac_lso_get(mp, &mss, &lsoflags); ctx->mss = mss; ctx->lso_flag = (lsoflags == HW_LSO); diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c index 92d1542efd..e9e1a62e1b 100644 --- a/usr/src/uts/common/io/mac/mac.c +++ b/usr/src/uts/common/io/mac/mac.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -280,6 +280,7 @@ #include <sys/mac_provider.h> #include <sys/mac_client_impl.h> #include <sys/mac_soft_ring.h> +#include <sys/mac_stat.h> #include <sys/mac_impl.h> #include <sys/mac.h> #include <sys/dls.h> @@ -306,6 +307,11 @@ #include <sys/exacct_impl.h> #include <inet/nd.h> #include <sys/ethernet.h> +#include <sys/pool.h> +#include <sys/pool_pset.h> +#include <sys/cpupart.h> +#include <inet/wifi_ioctl.h> +#include <net/wpa.h> #define IMPL_HASHSZ 67 /* prime */ @@ -316,6 +322,7 @@ uint_t i_mac_impl_count; static kmem_cache_t *mac_ring_cache; static id_space_t *minor_ids; static uint32_t minor_count; +static pool_event_cb_t mac_pool_event_reg; /* * Logging stuff. Perhaps mac_logging_interval could be broken into @@ -370,6 +377,7 @@ void mac_tx_client_block(mac_client_impl_t *); static void mac_rx_ring_quiesce(mac_ring_t *, uint_t); static int mac_start_group_and_rings(mac_group_t *); static void mac_stop_group_and_rings(mac_group_t *); +static void mac_pool_event_cb(pool_event_t, int, void *); /* * Module initialization functions. @@ -440,14 +448,22 @@ mac_init(void) mac_flow_log_enable = B_FALSE; mac_link_log_enable = B_FALSE; mac_logging_timer = 0; + + /* Register to be notified of noteworthy pools events */ + mac_pool_event_reg.pec_func = mac_pool_event_cb; + mac_pool_event_reg.pec_arg = NULL; + pool_event_cb_register(&mac_pool_event_reg); } int mac_fini(void) { + if (i_mac_impl_count > 0 || minor_count > 0) return (EBUSY); + pool_event_cb_unregister(&mac_pool_event_reg); + id_space_destroy(minor_ids); mac_flow_fini(); @@ -459,6 +475,8 @@ mac_fini(void) mod_hash_destroy_hash(i_mactype_hash); mac_soft_ring_finish(); + + return (0); } @@ -501,7 +519,6 @@ i_mac_constructor(void *buf, void *arg, int kmflag) mip->mi_linkstate = LINK_STATE_UNKNOWN; - mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL); rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL); mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL); mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL); @@ -554,7 +571,6 @@ i_mac_destructor(void *buf, void *arg) ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL); ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0); - mutex_destroy(&mip->mi_lock); rw_destroy(&mip->mi_rw_lock); mutex_destroy(&mip->mi_promisc_lock); @@ -1049,6 +1065,7 @@ mac_start(mac_handle_t mh) { mac_impl_t *mip = (mac_impl_t *)mh; int err = 0; + mac_group_t *defgrp; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); ASSERT(mip->mi_start != NULL); @@ -1074,33 +1091,31 @@ mac_start(mac_handle_t mh) if (mip->mi_default_tx_ring != NULL) { ring = (mac_ring_t *)mip->mi_default_tx_ring; - err = mac_start_ring(ring); - if (err != 0) { - mip->mi_active--; - return (err); + if (ring->mr_state != MR_INUSE) { + err = mac_start_ring(ring); + if (err != 0) { + mip->mi_active--; + return (err); + } } - ring->mr_state = MR_INUSE; } - if (mip->mi_rx_groups != NULL) { + if ((defgrp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { /* * Start the default ring, since it will be needed * to receive broadcast and multicast traffic for * both primary and non-primary MAC clients. */ - mac_group_t *grp = &mip->mi_rx_groups[0]; - - ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); - err = mac_start_group_and_rings(grp); + ASSERT(defgrp->mrg_state == MAC_GROUP_STATE_REGISTERED); + err = mac_start_group_and_rings(defgrp); if (err != 0) { mip->mi_active--; - if (ring != NULL) { + if ((ring != NULL) && + (ring->mr_state == MR_INUSE)) mac_stop_ring(ring); - ring->mr_state = MR_FREE; - } return (err); } - mac_set_rx_group_state(grp, MAC_GROUP_STATE_SHARED); + mac_set_group_state(defgrp, MAC_GROUP_STATE_SHARED); } } @@ -1114,6 +1129,7 @@ void mac_stop(mac_handle_t mh) { mac_impl_t *mip = (mac_impl_t *)mh; + mac_group_t *grp; ASSERT(mip->mi_stop != NULL); ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); @@ -1123,15 +1139,12 @@ mac_stop(mac_handle_t mh) */ ASSERT(mip->mi_active != 0); if (--mip->mi_active == 0) { - if (mip->mi_rx_groups != NULL) { + if ((grp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { /* * There should be no more active clients since the * MAC is being stopped. Stop the default RX group * and transition it back to registered state. - */ - mac_group_t *grp = &mip->mi_rx_groups[0]; - - /* + * * When clients are torn down, the groups * are release via mac_release_rx_group which * knows the the default group is always in @@ -1141,18 +1154,20 @@ mac_stop(mac_handle_t mh) * as a client) and group is in SHARED state. */ ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED); - ASSERT(MAC_RX_GROUP_NO_CLIENT(grp) && + ASSERT(MAC_GROUP_NO_CLIENT(grp) && mip->mi_nactiveclients == 0); mac_stop_group_and_rings(grp); - mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED); + mac_set_group_state(grp, MAC_GROUP_STATE_REGISTERED); } if (mip->mi_default_tx_ring != NULL) { mac_ring_t *ring; ring = (mac_ring_t *)mip->mi_default_tx_ring; - mac_stop_ring(ring); - ring->mr_state = MR_FREE; + if (ring->mr_state == MR_INUSE) { + mac_stop_ring(ring); + ring->mr_flag = 0; + } } /* @@ -1460,74 +1475,111 @@ mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh, mac_ring_handle_t *hwrh, mac_ring_type_t rtype) { mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + flow_entry_t *flent = mcip->mci_flent; + mac_group_t *grp; + mac_ring_t *ring; int cnt = 0; - switch (rtype) { - case MAC_RING_TYPE_RX: { - flow_entry_t *flent = mcip->mci_flent; - mac_group_t *grp; - mac_ring_t *ring; - + if (rtype == MAC_RING_TYPE_RX) { grp = flent->fe_rx_ring_group; - /* - * The mac client did not reserve any RX group, return directly. - * This is probably because the underlying MAC does not support - * any groups. - */ - *hwgh = NULL; - if (grp == NULL) - return (0); - /* - * This group must be reserved by this mac client. - */ - ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && - (mch == (mac_client_handle_t) - (MAC_RX_GROUP_ONLY_CLIENT(grp)))); - for (ring = grp->mrg_rings; - ring != NULL; ring = ring->mr_next, cnt++) { - ASSERT(cnt < MAX_RINGS_PER_GROUP); - hwrh[cnt] = (mac_ring_handle_t)ring; - } - *hwgh = (mac_group_handle_t)grp; - return (cnt); - } - case MAC_RING_TYPE_TX: { - mac_soft_ring_set_t *tx_srs; - mac_srs_tx_t *tx; - - tx_srs = MCIP_TX_SRS(mcip); - tx = &tx_srs->srs_tx; - for (; cnt < tx->st_ring_count; cnt++) - hwrh[cnt] = tx->st_rings[cnt]; - return (cnt); - } - default: + } else if (rtype == MAC_RING_TYPE_TX) { + grp = flent->fe_tx_ring_group; + } else { ASSERT(B_FALSE); return (-1); } + /* + * The mac client did not reserve any RX group, return directly. + * This is probably because the underlying MAC does not support + * any groups. + */ + if (hwgh != NULL) + *hwgh = NULL; + if (grp == NULL) + return (0); + /* + * This group must be reserved by this mac client. + */ + ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && + (mcip == MAC_GROUP_ONLY_CLIENT(grp))); + + for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next, cnt++) { + ASSERT(cnt < MAX_RINGS_PER_GROUP); + hwrh[cnt] = (mac_ring_handle_t)ring; + } + if (hwgh != NULL) + *hwgh = (mac_group_handle_t)grp; + + return (cnt); } /* - * Setup the RX callback of the mac client which exclusively controls HW ring. + * This function is called to get info about Tx/Rx rings. + * + * Return value: returns uint_t which will have various bits set + * that indicates different properties of the ring. + */ +uint_t +mac_hwring_getinfo(mac_ring_handle_t rh) +{ + mac_ring_t *ring = (mac_ring_t *)rh; + mac_ring_info_t *info = &ring->mr_info; + + return (info->mri_flags); +} + +/* + * Export ddi interrupt handles from the HW ring to the pseudo ring and + * setup the RX callback of the mac client which exclusively controls + * HW ring. */ void -mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh) +mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh, + mac_ring_handle_t pseudo_rh) { mac_ring_t *hw_ring = (mac_ring_t *)hwrh; + mac_ring_t *pseudo_ring; mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; - mac_srs->srs_mrh = prh; - mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; + if (pseudo_rh != NULL) { + pseudo_ring = (mac_ring_t *)pseudo_rh; + /* Export the ddi handles to pseudo ring */ + pseudo_ring->mr_info.mri_intr.mi_ddi_handle = + hw_ring->mr_info.mri_intr.mi_ddi_handle; + pseudo_ring->mr_info.mri_intr.mi_ddi_shared = + hw_ring->mr_info.mri_intr.mi_ddi_shared; + /* + * Save a pointer to pseudo ring in the hw ring. If + * interrupt handle changes, the hw ring will be + * notified of the change (see mac_ring_intr_set()) + * and the appropriate change has to be made to + * the pseudo ring that has exported the ddi handle. + */ + hw_ring->mr_prh = pseudo_rh; + } + + if (hw_ring->mr_type == MAC_RING_TYPE_RX) { + ASSERT(!(mac_srs->srs_type & SRST_TX)); + mac_srs->srs_mrh = prh; + mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; + } } void mac_hwring_teardown(mac_ring_handle_t hwrh) { mac_ring_t *hw_ring = (mac_ring_t *)hwrh; - mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; + mac_soft_ring_set_t *mac_srs; - mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; - mac_srs->srs_mrh = NULL; + if (hw_ring == NULL) + return; + hw_ring->mr_prh = NULL; + if (hw_ring->mr_type == MAC_RING_TYPE_RX) { + mac_srs = hw_ring->mr_srs; + ASSERT(!(mac_srs->srs_type & SRST_TX)); + mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; + mac_srs->srs_mrh = NULL; + } } int @@ -1575,7 +1627,7 @@ mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup) } /* - * Send packets through the selected tx ring. + * Send packets through a selected tx ring. */ mblk_t * mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp) @@ -1588,6 +1640,35 @@ mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp) return (info->mri_tx(info->mri_driver, mp)); } +/* + * Query stats for a particular rx/tx ring + */ +int +mac_hwring_getstat(mac_ring_handle_t rh, uint_t stat, uint64_t *val) +{ + mac_ring_t *ring = (mac_ring_t *)rh; + mac_ring_info_t *info = &ring->mr_info; + + return (info->mri_stat(info->mri_driver, stat, val)); +} + +/* + * Private function that is only used by aggr to send packets through + * a port/Tx ring. Since aggr exposes a pseudo Tx ring even for ports + * that does not expose Tx rings, aggr_ring_tx() entry point needs + * access to mac_impl_t to send packets through m_tx() entry point. + * It accomplishes this by calling mac_hwring_send_priv() function. + */ +mblk_t * +mac_hwring_send_priv(mac_client_handle_t mch, mac_ring_handle_t rh, mblk_t *mp) +{ + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_impl_t *mip = mcip->mci_mip; + + MAC_TX(mip, rh, mp, mcip); + return (mp); +} + int mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr) { @@ -1609,7 +1690,7 @@ mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) * started/stopped outside of this function. */ void -mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state) +mac_set_group_state(mac_group_t *grp, mac_group_state_t state) { /* * If there is no change in the group state, just return. @@ -1629,9 +1710,10 @@ mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state) */ ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); - if (GROUP_INTR_DISABLE_FUNC(grp) != NULL) + if (grp->mrg_type == MAC_RING_TYPE_RX && + GROUP_INTR_DISABLE_FUNC(grp) != NULL) { GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); - + } break; case MAC_GROUP_STATE_SHARED: @@ -1641,9 +1723,10 @@ mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state) */ ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); - if (GROUP_INTR_ENABLE_FUNC(grp) != NULL) + if (grp->mrg_type == MAC_RING_TYPE_RX && + GROUP_INTR_ENABLE_FUNC(grp) != NULL) { GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); - + } /* The ring is not available for reservations any more */ break; @@ -1921,7 +2004,8 @@ mac_rx_srs_restart(mac_soft_ring_set_t *srs) if (mr != NULL) { MAC_RING_UNMARK(mr, MR_QUIESCE); /* In case the ring was stopped, safely restart it */ - (void) mac_start_ring(mr); + if (mr->mr_state != MR_INUSE) + (void) mac_start_ring(mr); } else { FLOW_UNMARK(flent, FE_QUIESCE); } @@ -2088,9 +2172,11 @@ mac_tx_flow_restart(flow_entry_t *flent, void *arg) return (0); } -void -mac_tx_client_quiesce(mac_client_impl_t *mcip, uint_t srs_quiesce_flag) +static void +i_mac_tx_client_quiesce(mac_client_handle_t mch, uint_t srs_quiesce_flag) { + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); mac_tx_client_block(mcip); @@ -2102,8 +2188,22 @@ mac_tx_client_quiesce(mac_client_impl_t *mcip, uint_t srs_quiesce_flag) } void -mac_tx_client_restart(mac_client_impl_t *mcip) +mac_tx_client_quiesce(mac_client_handle_t mch) +{ + i_mac_tx_client_quiesce(mch, SRS_QUIESCE); +} + +void +mac_tx_client_condemn(mac_client_handle_t mch) +{ + i_mac_tx_client_quiesce(mch, SRS_CONDEMNED); +} + +void +mac_tx_client_restart(mac_client_handle_t mch) { + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); mac_tx_client_unblock(mcip); @@ -2119,22 +2219,22 @@ mac_tx_client_flush(mac_client_impl_t *mcip) { ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); - mac_tx_client_quiesce(mcip, SRS_QUIESCE); - mac_tx_client_restart(mcip); + mac_tx_client_quiesce((mac_client_handle_t)mcip); + mac_tx_client_restart((mac_client_handle_t)mcip); } void mac_client_quiesce(mac_client_impl_t *mcip) { mac_rx_client_quiesce((mac_client_handle_t)mcip); - mac_tx_client_quiesce(mcip, SRS_QUIESCE); + mac_tx_client_quiesce((mac_client_handle_t)mcip); } void mac_client_restart(mac_client_impl_t *mcip) { mac_rx_client_restart((mac_client_handle_t)mcip); - mac_tx_client_restart(mcip); + mac_tx_client_restart((mac_client_handle_t)mcip); } /* @@ -2386,8 +2486,21 @@ i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring) rw_enter(&mip->mi_rw_lock, RW_READER); for (cclient = mip->mi_clients_list; cclient != NULL; cclient = cclient->mci_client_next) { - if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) + if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) { mac_tx_srs_wakeup(mac_srs, ring); + } else { + /* + * Aggr opens underlying ports in exclusive mode + * and registers flow control callbacks using + * mac_tx_client_notify(). When opened in + * exclusive mode, Tx SRS won't be created + * during mac_unicast_add(). + */ + if (cclient->mci_state_flags & MCIS_EXCLUSIVE) { + mac_tx_invoke_callbacks(cclient, + (mac_tx_cookie_t)ring); + } + } (void) mac_flow_walk(cclient->mci_subflow_tab, mac_tx_flow_srs_wakeup, ring); } @@ -2724,43 +2837,196 @@ done: } /* - * mac_set_prop() sets mac or hardware driver properties: - * MAC resource properties include maxbw, priority, and cpu binding list. - * Driver properties are private properties to the hardware, such as mtu - * and speed. There's one other MAC property -- the PVID. - * If the property is a driver property, mac_set_prop() calls driver's callback - * function to set it. - * If the property is a mac resource property, mac_set_prop() invokes - * mac_set_resources() which will cache the property value in mac_impl_t and - * may call mac_client_set_resource() to update property value of the primary - * mac client, if it exists. + * Checks the size of the value size specified for a property as + * part of a property operation. Returns B_TRUE if the size is + * correct, B_FALSE otherwise. + */ +boolean_t +mac_prop_check_size(mac_prop_id_t id, uint_t valsize, boolean_t is_range) +{ + uint_t minsize = 0; + + if (is_range) + return (valsize >= sizeof (mac_propval_range_t)); + + switch (id) { + case MAC_PROP_ZONE: + minsize = sizeof (dld_ioc_zid_t); + break; + case MAC_PROP_AUTOPUSH: + if (valsize != 0) + minsize = sizeof (struct dlautopush); + break; + case MAC_PROP_TAGMODE: + minsize = sizeof (link_tagmode_t); + break; + case MAC_PROP_RESOURCE: + case MAC_PROP_RESOURCE_EFF: + minsize = sizeof (mac_resource_props_t); + break; + case MAC_PROP_DUPLEX: + minsize = sizeof (link_duplex_t); + break; + case MAC_PROP_SPEED: + minsize = sizeof (uint64_t); + break; + case MAC_PROP_STATUS: + minsize = sizeof (link_state_t); + break; + case MAC_PROP_AUTONEG: + case MAC_PROP_EN_AUTONEG: + minsize = sizeof (uint8_t); + break; + case MAC_PROP_MTU: + case MAC_PROP_LLIMIT: + case MAC_PROP_LDECAY: + minsize = sizeof (uint32_t); + break; + case MAC_PROP_FLOWCTRL: + minsize = sizeof (link_flowctrl_t); + break; + case MAC_PROP_ADV_10GFDX_CAP: + case MAC_PROP_EN_10GFDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + minsize = sizeof (uint8_t); + break; + case MAC_PROP_PVID: + minsize = sizeof (uint16_t); + break; + case MAC_PROP_IPTUN_HOPLIMIT: + minsize = sizeof (uint32_t); + break; + case MAC_PROP_IPTUN_ENCAPLIMIT: + minsize = sizeof (uint32_t); + break; + case MAC_PROP_MAX_TX_RINGS_AVAIL: + case MAC_PROP_MAX_RX_RINGS_AVAIL: + case MAC_PROP_MAX_RXHWCLNT_AVAIL: + case MAC_PROP_MAX_TXHWCLNT_AVAIL: + minsize = sizeof (uint_t); + break; + case MAC_PROP_WL_ESSID: + minsize = sizeof (wl_linkstatus_t); + break; + case MAC_PROP_WL_BSSID: + minsize = sizeof (wl_bssid_t); + break; + case MAC_PROP_WL_BSSTYPE: + minsize = sizeof (wl_bss_type_t); + break; + case MAC_PROP_WL_LINKSTATUS: + minsize = sizeof (wl_linkstatus_t); + break; + case MAC_PROP_WL_DESIRED_RATES: + minsize = sizeof (wl_rates_t); + break; + case MAC_PROP_WL_SUPPORTED_RATES: + minsize = sizeof (wl_rates_t); + break; + case MAC_PROP_WL_AUTH_MODE: + minsize = sizeof (wl_authmode_t); + break; + case MAC_PROP_WL_ENCRYPTION: + minsize = sizeof (wl_encryption_t); + break; + case MAC_PROP_WL_RSSI: + minsize = sizeof (wl_rssi_t); + break; + case MAC_PROP_WL_PHY_CONFIG: + minsize = sizeof (wl_phy_conf_t); + break; + case MAC_PROP_WL_CAPABILITY: + minsize = sizeof (wl_capability_t); + break; + case MAC_PROP_WL_WPA: + minsize = sizeof (wl_wpa_t); + break; + case MAC_PROP_WL_SCANRESULTS: + minsize = sizeof (wl_wpa_ess_t); + break; + case MAC_PROP_WL_POWER_MODE: + minsize = sizeof (wl_ps_mode_t); + break; + case MAC_PROP_WL_RADIO: + minsize = sizeof (wl_radio_t); + break; + case MAC_PROP_WL_ESS_LIST: + minsize = sizeof (wl_ess_list_t); + break; + case MAC_PROP_WL_KEY_TAB: + minsize = sizeof (wl_wep_key_tab_t); + break; + case MAC_PROP_WL_CREATE_IBSS: + minsize = sizeof (wl_create_ibss_t); + break; + case MAC_PROP_WL_SETOPTIE: + minsize = sizeof (wl_wpa_ie_t); + break; + case MAC_PROP_WL_DELKEY: + minsize = sizeof (wl_del_key_t); + break; + case MAC_PROP_WL_KEY: + minsize = sizeof (wl_key_t); + break; + case MAC_PROP_WL_MLME: + minsize = sizeof (wl_mlme_t); + break; + } + + return (valsize >= minsize); +} + +/* + * mac_set_prop() sets MAC or hardware driver properties: + * + * - MAC-managed properties such as resource properties include maxbw, + * priority, and cpu binding list, as well as the default port VID + * used by bridging. These properties are consumed by the MAC layer + * itself and not passed down to the driver. For resource control + * properties, this function invokes mac_set_resources() which will + * cache the property value in mac_impl_t and may call + * mac_client_set_resource() to update property value of the primary + * mac client, if it exists. + * + * - Properties which act on the hardware and must be passed to the + * driver, such as MTU, through the driver's mc_setprop() entry point. */ int -mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) +mac_set_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, + uint_t valsize) { int err = ENOTSUP; mac_impl_t *mip = (mac_impl_t *)mh; ASSERT(MAC_PERIM_HELD(mh)); - switch (macprop->mp_id) { - case MAC_PROP_MAXBW: - case MAC_PROP_PRIO: - case MAC_PROP_PROTECT: - case MAC_PROP_BIND_CPU: { - mac_resource_props_t mrp; + switch (id) { + case MAC_PROP_RESOURCE: { + mac_resource_props_t *mrp; - /* If it is mac property, call mac_set_resources() */ - if (valsize < sizeof (mac_resource_props_t)) - return (EINVAL); - bcopy(val, &mrp, sizeof (mrp)); - err = mac_set_resources(mh, &mrp); + /* call mac_set_resources() for MAC properties */ + ASSERT(valsize >= sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + bcopy(val, mrp, sizeof (*mrp)); + err = mac_set_resources(mh, mrp); + kmem_free(mrp, sizeof (*mrp)); break; } case MAC_PROP_PVID: - if (valsize < sizeof (uint16_t) || - (mip->mi_state_flags & MIS_IS_VNIC)) + ASSERT(valsize >= sizeof (uint16_t)); + if (mip->mi_state_flags & MIS_IS_VNIC) return (EINVAL); err = mac_set_pvid(mh, *(uint16_t *)val); break; @@ -2768,8 +3034,7 @@ mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) case MAC_PROP_MTU: { uint32_t mtu; - if (valsize < sizeof (mtu)) - return (EINVAL); + ASSERT(valsize >= sizeof (uint32_t)); bcopy(val, &mtu, sizeof (mtu)); err = mac_set_mtu(mh, mtu, NULL); break; @@ -2783,9 +3048,9 @@ mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) (mip->mi_state_flags & MIS_IS_VNIC)) return (EINVAL); bcopy(val, &learnval, sizeof (learnval)); - if (learnval == 0 && macprop->mp_id == MAC_PROP_LDECAY) + if (learnval == 0 && id == MAC_PROP_LDECAY) return (EINVAL); - if (macprop->mp_id == MAC_PROP_LLIMIT) + if (id == MAC_PROP_LLIMIT) mip->mi_llimit = learnval; else mip->mi_ldecay = learnval; @@ -2797,60 +3062,68 @@ mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) /* For other driver properties, call driver's callback */ if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) { err = mip->mi_callbacks->mc_setprop(mip->mi_driver, - macprop->mp_name, macprop->mp_id, valsize, val); + name, id, valsize, val); } } return (err); } /* - * mac_get_prop() gets mac or hardware driver properties. + * mac_get_prop() gets MAC or device driver properties. * * If the property is a driver property, mac_get_prop() calls driver's callback - * function to get it. - * If the property is a mac property, mac_get_prop() invokes mac_get_resources() + * entry point to get it. + * If the property is a MAC property, mac_get_prop() invokes mac_get_resources() * which returns the cached value in mac_impl_t. */ int -mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize, - uint_t *perm) +mac_get_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, + uint_t valsize) { int err = ENOTSUP; mac_impl_t *mip = (mac_impl_t *)mh; - link_state_t link_state; - boolean_t is_getprop, is_setprop; + uint_t rings; + uint_t vlinks; - is_getprop = (mip->mi_callbacks->mc_callbacks & MC_GETPROP); - is_setprop = (mip->mi_callbacks->mc_callbacks & MC_SETPROP); + bzero(val, valsize); - switch (macprop->mp_id) { - case MAC_PROP_MAXBW: - case MAC_PROP_PRIO: - case MAC_PROP_PROTECT: - case MAC_PROP_BIND_CPU: { - mac_resource_props_t mrp; + switch (id) { + case MAC_PROP_RESOURCE: { + mac_resource_props_t *mrp; /* If mac property, read from cache */ - if (valsize < sizeof (mac_resource_props_t)) - return (EINVAL); - mac_get_resources(mh, &mrp); - bcopy(&mrp, val, sizeof (mac_resource_props_t)); + ASSERT(valsize >= sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + mac_get_resources(mh, mrp); + bcopy(mrp, val, sizeof (*mrp)); + kmem_free(mrp, sizeof (*mrp)); + return (0); + } + case MAC_PROP_RESOURCE_EFF: { + mac_resource_props_t *mrp; + + /* If mac effective property, read from client */ + ASSERT(valsize >= sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + mac_get_effective_resources(mh, mrp); + bcopy(mrp, val, sizeof (*mrp)); + kmem_free(mrp, sizeof (*mrp)); return (0); } case MAC_PROP_PVID: - if (valsize < sizeof (uint16_t) || - (mip->mi_state_flags & MIS_IS_VNIC)) + ASSERT(valsize >= sizeof (uint16_t)); + if (mip->mi_state_flags & MIS_IS_VNIC) return (EINVAL); *(uint16_t *)val = mac_get_pvid(mh); return (0); case MAC_PROP_LLIMIT: case MAC_PROP_LDECAY: - if (valsize < sizeof (uint32_t) || - (mip->mi_state_flags & MIS_IS_VNIC)) + ASSERT(valsize >= sizeof (uint32_t)); + if (mip->mi_state_flags & MIS_IS_VNIC) return (EINVAL); - if (macprop->mp_id == MAC_PROP_LLIMIT) + if (id == MAC_PROP_LLIMIT) bcopy(&mip->mi_llimit, val, sizeof (mip->mi_llimit)); else bcopy(&mip->mi_ldecay, val, sizeof (mip->mi_ldecay)); @@ -2858,78 +3131,261 @@ mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize, case MAC_PROP_MTU: { uint32_t sdu; - mac_propval_range_t range; - - if ((macprop->mp_flags & MAC_PROP_POSSIBLE) != 0) { - if (valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - if (is_getprop) { - err = mip->mi_callbacks->mc_getprop(mip-> - mi_driver, macprop->mp_name, macprop->mp_id, - macprop->mp_flags, valsize, val, perm); - } - /* - * If the driver doesn't have *_m_getprop defined or - * if the driver doesn't support setting MTU then - * return the CURRENT value as POSSIBLE value. - */ - if (!is_getprop || err == ENOTSUP) { - mac_sdu_get(mh, NULL, &sdu); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = sdu; - bcopy(&range, val, sizeof (range)); - err = 0; - } - return (err); - } - if (valsize < sizeof (sdu)) - return (EINVAL); - if ((macprop->mp_flags & MAC_PROP_DEFAULT) == 0) { - mac_sdu_get(mh, NULL, &sdu); - bcopy(&sdu, val, sizeof (sdu)); - if (is_setprop && (mip->mi_callbacks->mc_setprop(mip-> - mi_driver, macprop->mp_name, macprop->mp_id, - valsize, val) == 0)) { - *perm = MAC_PROP_PERM_RW; - } else { - *perm = MAC_PROP_PERM_READ; - } - return (0); - } else { - if (mip->mi_info.mi_media == DL_ETHER) { - sdu = ETHERMTU; - bcopy(&sdu, val, sizeof (sdu)); - return (0); - } - /* - * ask driver for its default. - */ - break; - } + ASSERT(valsize >= sizeof (uint32_t)); + mac_sdu_get(mh, NULL, &sdu); + bcopy(&sdu, val, sizeof (sdu)); + + return (0); } - case MAC_PROP_STATUS: + case MAC_PROP_STATUS: { + link_state_t link_state; + if (valsize < sizeof (link_state)) return (EINVAL); - *perm = MAC_PROP_PERM_READ; link_state = mac_link_get(mh); bcopy(&link_state, val, sizeof (link_state)); + + return (0); + } + + case MAC_PROP_MAX_RX_RINGS_AVAIL: + case MAC_PROP_MAX_TX_RINGS_AVAIL: + ASSERT(valsize >= sizeof (uint_t)); + rings = id == MAC_PROP_MAX_RX_RINGS_AVAIL ? + mac_rxavail_get(mh) : mac_txavail_get(mh); + bcopy(&rings, val, sizeof (uint_t)); + return (0); + + case MAC_PROP_MAX_RXHWCLNT_AVAIL: + case MAC_PROP_MAX_TXHWCLNT_AVAIL: + ASSERT(valsize >= sizeof (uint_t)); + vlinks = id == MAC_PROP_MAX_RXHWCLNT_AVAIL ? + mac_rxhwlnksavail_get(mh) : mac_txhwlnksavail_get(mh); + bcopy(&vlinks, val, sizeof (uint_t)); return (0); + + case MAC_PROP_RXRINGSRANGE: + case MAC_PROP_TXRINGSRANGE: + /* + * The value for these properties are returned through + * the MAC_PROP_RESOURCE property. + */ + return (0); + default: break; } + /* If driver property, request from driver */ - if (is_getprop) { - err = mip->mi_callbacks->mc_getprop(mip->mi_driver, - macprop->mp_name, macprop->mp_id, macprop->mp_flags, - valsize, val, perm); + if (mip->mi_callbacks->mc_callbacks & MC_GETPROP) { + err = mip->mi_callbacks->mc_getprop(mip->mi_driver, name, id, + valsize, val); } + return (err); } +/* + * Helper function to initialize the range structure for use in + * mac_get_prop. If the type can be other than uint32, we can + * pass that as an arg. + */ +static void +_mac_set_range(mac_propval_range_t *range, uint32_t min, uint32_t max) +{ + range->mpr_count = 1; + range->mpr_type = MAC_PROPVAL_UINT32; + range->mpr_range_uint32[0].mpur_min = min; + range->mpr_range_uint32[0].mpur_max = max; +} + +/* + * Returns information about the specified property, such as default + * values or permissions. + */ +int +mac_prop_info(mac_handle_t mh, mac_prop_id_t id, char *name, + void *default_val, uint_t default_size, mac_propval_range_t *range, + uint_t *perm) +{ + mac_prop_info_state_t state; + mac_impl_t *mip = (mac_impl_t *)mh; + uint_t max; + + /* + * A property is read/write by default unless the driver says + * otherwise. + */ + if (perm != NULL) + *perm = MAC_PROP_PERM_RW; + + if (default_val != NULL) + bzero(default_val, default_size); + + /* + * First, handle framework properties for which we don't need to + * involve the driver. + */ + switch (id) { + case MAC_PROP_RESOURCE: + case MAC_PROP_PVID: + case MAC_PROP_LLIMIT: + case MAC_PROP_LDECAY: + return (0); + + case MAC_PROP_MAX_RX_RINGS_AVAIL: + case MAC_PROP_MAX_TX_RINGS_AVAIL: + case MAC_PROP_MAX_RXHWCLNT_AVAIL: + case MAC_PROP_MAX_TXHWCLNT_AVAIL: + if (perm != NULL) + *perm = MAC_PROP_PERM_READ; + return (0); + + case MAC_PROP_RXRINGSRANGE: + case MAC_PROP_TXRINGSRANGE: + /* + * Currently, we support range for RX and TX rings properties. + * When we extend this support to maxbw, cpus and priority, + * we should move this to mac_get_resources. + * There is no default value for RX or TX rings. + */ + if ((mip->mi_state_flags & MIS_IS_VNIC) && + mac_is_vnic_primary(mh)) { + /* + * We don't support setting rings for a VLAN + * data link because it shares its ring with the + * primary MAC client. + */ + if (perm != NULL) + *perm = MAC_PROP_PERM_READ; + if (range != NULL) + range->mpr_count = 0; + } else if (range != NULL) { + if (mip->mi_state_flags & MIS_IS_VNIC) + mh = mac_get_lower_mac_handle(mh); + mip = (mac_impl_t *)mh; + if ((id == MAC_PROP_RXRINGSRANGE && + mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) || + (id == MAC_PROP_TXRINGSRANGE && + mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC)) { + if (id == MAC_PROP_RXRINGSRANGE) { + if ((mac_rxhwlnksavail_get(mh) + + mac_rxhwlnksrsvd_get(mh)) <= 1) { + /* + * doesn't support groups or + * rings + */ + range->mpr_count = 0; + } else { + /* + * supports specifying groups, + * but not rings + */ + _mac_set_range(range, 0, 0); + } + } else { + if ((mac_txhwlnksavail_get(mh) + + mac_txhwlnksrsvd_get(mh)) <= 1) { + /* + * doesn't support groups or + * rings + */ + range->mpr_count = 0; + } else { + /* + * supports specifying groups, + * but not rings + */ + _mac_set_range(range, 0, 0); + } + } + } else { + max = id == MAC_PROP_RXRINGSRANGE ? + mac_rxavail_get(mh) + mac_rxrsvd_get(mh) : + mac_txavail_get(mh) + mac_txrsvd_get(mh); + if (max <= 1) { + /* + * doesn't support groups or + * rings + */ + range->mpr_count = 0; + } else { + /* + * -1 because we have to leave out the + * default ring. + */ + _mac_set_range(range, 1, max - 1); + } + } + } + return (0); + + case MAC_PROP_STATUS: + if (perm != NULL) + *perm = MAC_PROP_PERM_READ; + return (0); + } + + /* + * Get the property info from the driver if it implements the + * property info entry point. + */ + bzero(&state, sizeof (state)); + + if (mip->mi_callbacks->mc_callbacks & MC_PROPINFO) { + state.pr_default = default_val; + state.pr_default_size = default_size; + state.pr_range = range; + + mip->mi_callbacks->mc_propinfo(mip->mi_driver, name, id, + (mac_prop_info_handle_t)&state); + + /* + * The operation could fail if the buffer supplied by + * the user was too small for the range or default + * value of the property. + */ + if (state.pr_default_status != 0) + return (state.pr_default_status); + + if (perm != NULL && state.pr_flags & MAC_PROP_INFO_PERM) + *perm = state.pr_perm; + } + + /* + * The MAC layer may want to provide default values or allowed + * ranges for properties if the driver does not provide a + * property info entry point, or that entry point exists, but + * it did not provide a default value or allowed ranges for + * that property. + */ + switch (id) { + case MAC_PROP_MTU: { + uint32_t sdu; + + mac_sdu_get(mh, NULL, &sdu); + + if (range != NULL && !(state.pr_flags & + MAC_PROP_INFO_RANGE)) { + /* MTU range */ + _mac_set_range(range, sdu, sdu); + } + + if (default_val != NULL && !(state.pr_flags & + MAC_PROP_INFO_DEFAULT)) { + if (mip->mi_info.mi_media == DL_ETHER) + sdu = ETHERMTU; + /* default MTU value */ + bcopy(&sdu, default_val, sizeof (sdu)); + } + } + } + + return (0); +} + int mac_fastpath_disable(mac_handle_t mh) { @@ -2953,29 +3409,47 @@ mac_fastpath_enable(mac_handle_t mh) } void -mac_register_priv_prop(mac_impl_t *mip, mac_priv_prop_t *mpp, uint_t nprop) +mac_register_priv_prop(mac_impl_t *mip, char **priv_props) { - mac_priv_prop_t *mpriv; + uint_t nprops, i; + + if (priv_props == NULL) + return; - if (mpp == NULL) + nprops = 0; + while (priv_props[nprops] != NULL) + nprops++; + if (nprops == 0) return; - mpriv = kmem_zalloc(nprop * sizeof (*mpriv), KM_SLEEP); - (void) memcpy(mpriv, mpp, nprop * sizeof (*mpriv)); - mip->mi_priv_prop = mpriv; - mip->mi_priv_prop_count = nprop; + + mip->mi_priv_prop = kmem_zalloc(nprops * sizeof (char *), KM_SLEEP); + + for (i = 0; i < nprops; i++) { + mip->mi_priv_prop[i] = kmem_zalloc(MAXLINKPROPNAME, KM_SLEEP); + (void) strlcpy(mip->mi_priv_prop[i], priv_props[i], + MAXLINKPROPNAME); + } + + mip->mi_priv_prop_count = nprops; } void mac_unregister_priv_prop(mac_impl_t *mip) { - mac_priv_prop_t *mpriv; + uint_t i; - mpriv = mip->mi_priv_prop; - if (mpriv != NULL) { - kmem_free(mpriv, mip->mi_priv_prop_count * sizeof (*mpriv)); - mip->mi_priv_prop = NULL; + if (mip->mi_priv_prop_count == 0) { + ASSERT(mip->mi_priv_prop == NULL); + return; } + + for (i = 0; i < mip->mi_priv_prop_count; i++) + kmem_free(mip->mi_priv_prop[i], MAXLINKPROPNAME); + kmem_free(mip->mi_priv_prop, mip->mi_priv_prop_count * + sizeof (char *)); + + mip->mi_priv_prop = NULL; mip->mi_priv_prop_count = 0; } @@ -2990,22 +3464,19 @@ mac_unregister_priv_prop(mac_impl_t *mip) * count mechanism) will drop such packets. */ static mac_ring_t * -mac_ring_alloc(mac_impl_t *mip, mac_capab_rings_t *cap_rings) +mac_ring_alloc(mac_impl_t *mip) { mac_ring_t *ring; - if (cap_rings->mr_type == MAC_RING_TYPE_RX) { - mutex_enter(&mip->mi_ring_lock); - if (mip->mi_ring_freelist != NULL) { - ring = mip->mi_ring_freelist; - mip->mi_ring_freelist = ring->mr_next; - bzero(ring, sizeof (mac_ring_t)); - } else { - ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); - } + mutex_enter(&mip->mi_ring_lock); + if (mip->mi_ring_freelist != NULL) { + ring = mip->mi_ring_freelist; + mip->mi_ring_freelist = ring->mr_next; + bzero(ring, sizeof (mac_ring_t)); mutex_exit(&mip->mi_ring_lock); } else { - ring = kmem_zalloc(sizeof (mac_ring_t), KM_SLEEP); + mutex_exit(&mip->mi_ring_lock); + ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); } ASSERT((ring != NULL) && (ring->mr_state == MR_FREE)); return (ring); @@ -3014,16 +3485,16 @@ mac_ring_alloc(mac_impl_t *mip, mac_capab_rings_t *cap_rings) static void mac_ring_free(mac_impl_t *mip, mac_ring_t *ring) { - if (ring->mr_type == MAC_RING_TYPE_RX) { - mutex_enter(&mip->mi_ring_lock); - ring->mr_state = MR_FREE; - ring->mr_flag = 0; - ring->mr_next = mip->mi_ring_freelist; - mip->mi_ring_freelist = ring; - mutex_exit(&mip->mi_ring_lock); - } else { - kmem_free(ring, sizeof (mac_ring_t)); - } + ASSERT(ring->mr_state == MR_FREE); + + mutex_enter(&mip->mi_ring_lock); + ring->mr_state = MR_FREE; + ring->mr_flag = 0; + ring->mr_next = mip->mi_ring_freelist; + ring->mr_mip = NULL; + mip->mi_ring_freelist = ring; + mac_ring_stat_delete(ring); + mutex_exit(&mip->mi_ring_lock); } static void @@ -3046,18 +3517,28 @@ mac_start_ring(mac_ring_t *ring) { int rv = 0; - if (ring->mr_start != NULL) + ASSERT(ring->mr_state == MR_FREE); + + if (ring->mr_start != NULL) { rv = ring->mr_start(ring->mr_driver, ring->mr_gen_num); + if (rv != 0) + return (rv); + } + ring->mr_state = MR_INUSE; return (rv); } void mac_stop_ring(mac_ring_t *ring) { + ASSERT(ring->mr_state == MR_INUSE); + if (ring->mr_stop != NULL) ring->mr_stop(ring->mr_driver); + ring->mr_state = MR_FREE; + /* * Increment the ring generation number for this ring. */ @@ -3104,7 +3585,6 @@ mac_start_group_and_rings(mac_group_t *group) ASSERT(ring->mr_state == MR_FREE); if ((rv = mac_start_ring(ring)) != 0) goto error; - ring->mr_state = MR_INUSE; ring->mr_classify_type = MAC_SW_CLASSIFIER; } return (0); @@ -3123,7 +3603,6 @@ mac_stop_group_and_rings(mac_group_t *group) for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { if (ring->mr_state != MR_FREE) { mac_stop_ring(ring); - ring->mr_state = MR_FREE; ring->mr_flag = 0; ring->mr_classify_type = MAC_NO_CLASSIFIER; } @@ -3136,13 +3615,24 @@ static mac_ring_t * mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, mac_capab_rings_t *cap_rings) { - mac_ring_t *ring; + mac_ring_t *ring, *rnext; mac_ring_info_t ring_info; + ddi_intr_handle_t ddi_handle; - ring = mac_ring_alloc(mip, cap_rings); + ring = mac_ring_alloc(mip); /* Prepare basic information of ring */ - ring->mr_index = index; + + /* + * Ring index is numbered to be unique across a particular device. + * Ring index computation makes following assumptions: + * - For drivers with static grouping (e.g. ixgbe, bge), + * ring index exchanged with the driver (e.g. during mr_rget) + * is unique only across the group the ring belongs to. + * - Drivers with dynamic grouping (e.g. nxge), start + * with single group (mrg_index = 0). + */ + ring->mr_index = group->mrg_index * group->mrg_info.mgi_count + index; ring->mr_type = group->mrg_type; ring->mr_gh = (mac_group_handle_t)group; @@ -3159,12 +3649,63 @@ mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, ring->mr_info = ring_info; + /* + * The interrupt handle could be shared among multiple rings. + * Thus if there is a bunch of rings that are sharing an + * interrupt, then only one ring among the bunch will be made + * available for interrupt re-targeting; the rest will have + * ddi_shared flag set to TRUE and would not be available for + * be interrupt re-targeting. + */ + if ((ddi_handle = ring_info.mri_intr.mi_ddi_handle) != NULL) { + rnext = ring->mr_next; + while (rnext != NULL) { + if (rnext->mr_info.mri_intr.mi_ddi_handle == + ddi_handle) { + /* + * If default ring (mr_index == 0) is part + * of a group of rings sharing an + * interrupt, then set ddi_shared flag for + * the default ring and give another ring + * the chance to be re-targeted. + */ + if (rnext->mr_index == 0 && + !rnext->mr_info.mri_intr.mi_ddi_shared) { + rnext->mr_info.mri_intr.mi_ddi_shared = + B_TRUE; + } else { + ring->mr_info.mri_intr.mi_ddi_shared = + B_TRUE; + } + break; + } + rnext = rnext->mr_next; + } + /* + * If rnext is NULL, then no matching ddi_handle was found. + * Rx rings get registered first. So if this is a Tx ring, + * then go through all the Rx rings and see if there is a + * matching ddi handle. + */ + if (rnext == NULL && ring->mr_type == MAC_RING_TYPE_TX) { + mac_compare_ddi_handle(mip->mi_rx_groups, + mip->mi_rx_group_count, ring); + } + } + /* Update ring's status */ ring->mr_state = MR_FREE; ring->mr_flag = 0; /* Update the ring count of the group */ group->mrg_cur_count++; + + /* Create per ring kstats */ + if (ring->mr_stat != NULL) { + ring->mr_mip = mip; + mac_ring_stat_create(ring); + } + return (ring); } @@ -3188,13 +3729,17 @@ mac_init_group(mac_impl_t *mip, mac_group_t *group, int size, int mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) { - mac_capab_rings_t *cap_rings; - mac_group_t *group, *groups; - mac_group_info_t group_info; - uint_t group_free = 0; - uint_t ring_left; - mac_ring_t *ring; - int g, err = 0; + mac_capab_rings_t *cap_rings; + mac_group_t *group; + mac_group_t *groups; + mac_group_info_t group_info; + uint_t group_free = 0; + uint_t ring_left; + mac_ring_t *ring; + int g; + int err = 0; + uint_t grpcnt; + boolean_t pseudo_txgrp = B_FALSE; switch (rtype) { case MAC_RING_TYPE_RX: @@ -3213,15 +3758,32 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) ASSERT(B_FALSE); } - if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, - cap_rings)) + if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, cap_rings)) return (0); + grpcnt = cap_rings->mr_gnum; + + /* + * If we have multiple TX rings, but only one TX group, we can + * create pseudo TX groups (one per TX ring) in the MAC layer, + * except for an aggr. For an aggr currently we maintain only + * one group with all the rings (for all its ports), going + * forwards we might change this. + */ + if (rtype == MAC_RING_TYPE_TX && + cap_rings->mr_gnum == 0 && cap_rings->mr_rnum > 0 && + (mip->mi_state_flags & MIS_IS_AGGR) == 0) { + /* + * The -1 here is because we create a default TX group + * with all the rings in it. + */ + grpcnt = cap_rings->mr_rnum - 1; + pseudo_txgrp = B_TRUE; + } /* * Allocate a contiguous buffer for all groups. */ - groups = kmem_zalloc(sizeof (mac_group_t) * (cap_rings->mr_gnum + 1), - KM_SLEEP); + groups = kmem_zalloc(sizeof (mac_group_t) * (grpcnt+ 1), KM_SLEEP); ring_left = cap_rings->mr_rnum; @@ -3229,7 +3791,7 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) * Get all ring groups if any, and get their ring members * if any. */ - for (g = 0; g < cap_rings->mr_gnum; g++) { + for (g = 0; g < grpcnt; g++) { group = groups + g; /* Prepare basic information of the group */ @@ -3242,6 +3804,16 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) /* Zero to reuse the info data structure */ bzero(&group_info, sizeof (group_info)); + if (pseudo_txgrp) { + /* + * This is a pseudo group that we created, apart + * from setting the state there is nothing to be + * done. + */ + group->mrg_state = MAC_GROUP_STATE_REGISTERED; + group_free++; + continue; + } /* Query group information from driver */ cap_rings->mr_gget(mip->mi_driver, rtype, g, &group_info, (mac_group_handle_t)group); @@ -3321,15 +3893,16 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) */ if (rtype == MAC_RING_TYPE_RX) { if ((group_info.mgi_addmac == NULL) || - (group_info.mgi_addmac == NULL)) + (group_info.mgi_addmac == NULL)) { goto bail; + } } /* Cache driver-supplied information */ group->mrg_info = group_info; /* Update the group's status and group count. */ - mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED); + mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); group_free++; group->mrg_rings = NULL; @@ -3342,7 +3915,7 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) } /* Build up a dummy group for free resources as a pool */ - group = groups + cap_rings->mr_gnum; + group = groups + grpcnt; /* Prepare basic information of the group */ group->mrg_index = -1; @@ -3366,36 +3939,88 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) ring_left = 0; /* Update this group's status */ - mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED); + mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); } else group->mrg_rings = NULL; ASSERT(ring_left == 0); bail: + /* Cache other important information to finalize the initialization */ switch (rtype) { case MAC_RING_TYPE_RX: mip->mi_rx_group_type = cap_rings->mr_group_type; mip->mi_rx_group_count = cap_rings->mr_gnum; mip->mi_rx_groups = groups; + mip->mi_rx_donor_grp = groups; + if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + /* + * The default ring is reserved since it is + * used for sending the broadcast etc. packets. + */ + mip->mi_rxrings_avail = + mip->mi_rx_groups->mrg_cur_count - 1; + mip->mi_rxrings_rsvd = 1; + } + /* + * The default group cannot be reserved. It is used by + * all the clients that do not have an exclusive group. + */ + mip->mi_rxhwclnt_avail = mip->mi_rx_group_count - 1; + mip->mi_rxhwclnt_used = 1; break; case MAC_RING_TYPE_TX: - mip->mi_tx_group_type = cap_rings->mr_group_type; - mip->mi_tx_group_count = cap_rings->mr_gnum; + mip->mi_tx_group_type = pseudo_txgrp ? MAC_GROUP_TYPE_DYNAMIC : + cap_rings->mr_group_type; + mip->mi_tx_group_count = grpcnt; mip->mi_tx_group_free = group_free; mip->mi_tx_groups = groups; + group = groups + grpcnt; + ring = group->mrg_rings; /* - * Ring 0 is used as the default one and it could be assigned - * to a client as well. + * The ring can be NULL in the case of aggr. Aggr will + * have an empty Tx group which will get populated + * later when pseudo Tx rings are added after + * mac_register() is done. */ - group = groups + cap_rings->mr_gnum; - ring = group->mrg_rings; - while ((ring->mr_index != 0) && (ring->mr_next != NULL)) - ring = ring->mr_next; - ASSERT(ring->mr_index == 0); - mip->mi_default_tx_ring = (mac_ring_handle_t)ring; + if (ring == NULL) { + ASSERT(mip->mi_state_flags & MIS_IS_AGGR); + /* + * pass the group to aggr so it can add Tx + * rings to the group later. + */ + cap_rings->mr_gget(mip->mi_driver, rtype, 0, NULL, + (mac_group_handle_t)group); + /* + * Even though there are no rings at this time + * (rings will come later), set the group + * state to registered. + */ + group->mrg_state = MAC_GROUP_STATE_REGISTERED; + } else { + /* + * Ring 0 is used as the default one and it could be + * assigned to a client as well. + */ + while ((ring->mr_index != 0) && (ring->mr_next != NULL)) + ring = ring->mr_next; + ASSERT(ring->mr_index == 0); + mip->mi_default_tx_ring = (mac_ring_handle_t)ring; + } + if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) + mip->mi_txrings_avail = group->mrg_cur_count - 1; + /* + * The default ring cannot be reserved. + */ + mip->mi_txrings_rsvd = 1; + /* + * The default group cannot be reserved. It will be shared + * by clients that do not have an exclusive group. + */ + mip->mi_txhwclnt_avail = mip->mi_tx_group_count; + mip->mi_txhwclnt_used = 1; break; default: ASSERT(B_FALSE); @@ -3408,8 +4033,45 @@ bail: } /* - * Called to free all ring groups with particular type. It's supposed all groups - * have been released by clinet. + * The ddi interrupt handle could be shared amoung rings. If so, compare + * the new ring's ddi handle with the existing ones and set ddi_shared + * flag. + */ +void +mac_compare_ddi_handle(mac_group_t *groups, uint_t grpcnt, mac_ring_t *cring) +{ + mac_group_t *group; + mac_ring_t *ring; + ddi_intr_handle_t ddi_handle; + int g; + + ddi_handle = cring->mr_info.mri_intr.mi_ddi_handle; + for (g = 0; g < grpcnt; g++) { + group = groups + g; + for (ring = group->mrg_rings; ring != NULL; + ring = ring->mr_next) { + if (ring == cring) + continue; + if (ring->mr_info.mri_intr.mi_ddi_handle == + ddi_handle) { + if (cring->mr_type == MAC_RING_TYPE_RX && + ring->mr_index == 0 && + !ring->mr_info.mri_intr.mi_ddi_shared) { + ring->mr_info.mri_intr.mi_ddi_shared = + B_TRUE; + } else { + cring->mr_info.mri_intr.mi_ddi_shared = + B_TRUE; + } + return; + } + } + } +} + +/* + * Called to free all groups of particular type (RX or TX). It's assumed that + * no clients are using these groups. */ void mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) @@ -3426,6 +4088,7 @@ mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) group_count = mip->mi_rx_group_count; mip->mi_rx_groups = NULL; + mip->mi_rx_donor_grp = NULL; mip->mi_rx_group_count = 0; break; case MAC_RING_TYPE_TX: @@ -3501,32 +4164,6 @@ mac_group_remmac(mac_group_t *group, const uint8_t *addr) } /* - * Release a ring in use by marking it MR_FREE. - * Any other client may reserve it for its use. - */ -void -mac_release_tx_ring(mac_ring_handle_t rh) -{ - mac_ring_t *ring = (mac_ring_t *)rh; - mac_group_t *group = (mac_group_t *)ring->mr_gh; - mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; - - ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - ASSERT(ring->mr_state != MR_FREE); - - /* - * Default tx ring will be released by mac_stop(). - */ - if (rh == mip->mi_default_tx_ring) - return; - - mac_stop_ring(ring); - - ring->mr_state = MR_FREE; - ring->mr_flag = 0; -} - -/* * This is the entry point for packets transmitted through the bridging code. * If no bridge is in place, MAC_RING_TX transmits using tx ring. The 'rh' * pointer may be NULL to select the default ring. @@ -3558,16 +4195,17 @@ mac_bridge_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp) /* * Find a ring from its index. */ -mac_ring_t * -mac_find_ring(mac_group_t *group, int index) +mac_ring_handle_t +mac_find_ring(mac_group_handle_t gh, int index) { + mac_group_t *group = (mac_group_t *)gh; mac_ring_t *ring = group->mrg_rings; for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) if (ring->mr_index == index) break; - return (ring); + return ((mac_ring_handle_t)ring); } /* * Add a ring to an existing group. @@ -3586,6 +4224,7 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) boolean_t driver_call = (ring == NULL); mac_group_type_t group_type; int ret = 0; + flow_entry_t *flent; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); @@ -3606,8 +4245,8 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) * There should be no ring with the same ring index in the target * group. */ - ASSERT(mac_find_ring(group, driver_call ? index : ring->mr_index) == - NULL); + ASSERT(mac_find_ring((mac_group_handle_t)group, + driver_call ? index : ring->mr_index) == NULL); if (driver_call) { /* @@ -3627,7 +4266,8 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) * and the mac_ring_t already exists. */ ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); - ASSERT(cap_rings->mr_gaddring != NULL); + ASSERT(group->mrg_driver == NULL || + cap_rings->mr_gaddring != NULL); ASSERT(ring->mr_gh == NULL); } @@ -3667,6 +4307,27 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) return (0); /* + * Start the ring if needed. Failure causes to undo the grouping action. + */ + if (ring->mr_state != MR_INUSE) { + if ((ret = mac_start_ring(ring)) != 0) { + if (!driver_call) { + cap_rings->mr_gremring(group->mrg_driver, + ring->mr_driver, ring->mr_type); + } + group->mrg_cur_count--; + group->mrg_rings = ring->mr_next; + + ring->mr_gh = NULL; + + if (driver_call) + mac_ring_free(mip, ring); + + return (ret); + } + } + + /* * Set up SRS/SR according to the ring type. */ switch (ring->mr_type) { @@ -3676,58 +4337,98 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) * reserved for someones exclusive use. */ if (group->mrg_state == MAC_GROUP_STATE_RESERVED) { - flow_entry_t *flent; mac_client_impl_t *mcip; - mcip = MAC_RX_GROUP_ONLY_CLIENT(group); - ASSERT(mcip != NULL); - flent = mcip->mci_flent; - ASSERT(flent->fe_rx_srs_cnt > 0); - mac_srs_group_setup(mcip, flent, group, SRST_LINK); + mcip = MAC_GROUP_ONLY_CLIENT(group); + /* + * Even though this group is reserved we migth still + * have multiple clients, i.e a VLAN shares the + * group with the primary mac client. + */ + if (mcip != NULL) { + flent = mcip->mci_flent; + ASSERT(flent->fe_rx_srs_cnt > 0); + mac_rx_srs_group_setup(mcip, flent, SRST_LINK); + mac_fanout_setup(mcip, flent, + MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, + mcip, NULL, NULL); + } else { + ring->mr_classify_type = MAC_SW_CLASSIFIER; + } } break; case MAC_RING_TYPE_TX: + { + mac_grp_client_t *mgcp = group->mrg_clients; + mac_client_impl_t *mcip; + mac_soft_ring_set_t *mac_srs; + mac_srs_tx_t *tx; + + if (MAC_GROUP_NO_CLIENT(group)) { + if (ring->mr_state == MR_INUSE) + mac_stop_ring(ring); + ring->mr_flag = 0; + break; + } /* - * For TX this function is only invoked during the - * initial creation of a group when a share is - * associated with a MAC client. So the datapath is not - * yet setup, and will be setup later after the - * group has been reserved and populated. + * If the rings are being moved to a group that has + * clients using it, then add the new rings to the + * clients SRS. */ + while (mgcp != NULL) { + boolean_t is_aggr; + + mcip = mgcp->mgc_client; + flent = mcip->mci_flent; + is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR); + mac_srs = MCIP_TX_SRS(mcip); + tx = &mac_srs->srs_tx; + mac_tx_client_quiesce((mac_client_handle_t)mcip); + /* + * If we are growing from 1 to multiple rings. + */ + if (tx->st_mode == SRS_TX_BW || + tx->st_mode == SRS_TX_SERIALIZE || + tx->st_mode == SRS_TX_DEFAULT) { + mac_ring_t *tx_ring = tx->st_arg2; + + tx->st_arg2 = NULL; + mac_tx_srs_stat_recreate(mac_srs, B_TRUE); + mac_tx_srs_add_ring(mac_srs, tx_ring); + if (mac_srs->srs_type & SRST_BW_CONTROL) { + tx->st_mode = is_aggr ? SRS_TX_BW_AGGR : + SRS_TX_BW_FANOUT; + } else { + tx->st_mode = is_aggr ? SRS_TX_AGGR : + SRS_TX_FANOUT; + } + tx->st_func = mac_tx_get_func(tx->st_mode); + } + mac_tx_srs_add_ring(mac_srs, ring); + mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), + mac_rx_deliver, mcip, NULL, NULL); + mac_tx_client_restart((mac_client_handle_t)mcip); + mgcp = mgcp->mgc_next; + } break; + } default: ASSERT(B_FALSE); } - /* - * Start the ring if needed. Failure causes to undo the grouping action. + * For aggr, the default ring will be NULL to begin with. If it + * is NULL, then pick the first ring that gets added as the + * default ring. Any ring in an aggregation can be removed at + * any time (by the user action of removing a link) and if the + * current default ring gets removed, then a new one gets + * picked (see i_mac_group_rem_ring()). */ - if ((ret = mac_start_ring(ring)) != 0) { - if (ring->mr_type == MAC_RING_TYPE_RX) { - if (ring->mr_srs != NULL) { - mac_rx_srs_remove(ring->mr_srs); - ring->mr_srs = NULL; - } - } - if (!driver_call) { - cap_rings->mr_gremring(group->mrg_driver, - ring->mr_driver, ring->mr_type); - } - group->mrg_cur_count--; - group->mrg_rings = ring->mr_next; - - ring->mr_gh = NULL; - - if (driver_call) - mac_ring_free(mip, ring); - - return (ret); + if (mip->mi_state_flags & MIS_IS_AGGR && + mip->mi_default_tx_ring == NULL && + ring->mr_type == MAC_RING_TYPE_TX) { + mip->mi_default_tx_ring = (mac_ring_handle_t)ring; } - /* - * Update the ring's state. - */ - ring->mr_state = MR_INUSE; MAC_RING_UNMARK(ring, MR_INCIPIENT); return (0); } @@ -3748,18 +4449,18 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - ASSERT(mac_find_ring(group, ring->mr_index) == ring); + ASSERT(mac_find_ring((mac_group_handle_t)group, + ring->mr_index) == (mac_ring_handle_t)ring); ASSERT((mac_group_t *)ring->mr_gh == group); ASSERT(ring->mr_type == group->mrg_type); + if (ring->mr_state == MR_INUSE) + mac_stop_ring(ring); switch (ring->mr_type) { case MAC_RING_TYPE_RX: group_type = mip->mi_rx_group_type; cap_rings = &mip->mi_rx_rings_cap; - if (group->mrg_state >= MAC_GROUP_STATE_RESERVED) - mac_stop_ring(ring); - /* * Only hardware classified packets hold a reference to the * ring all the way up the Rx path. mac_rx_srs_remove() @@ -3771,13 +4472,20 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, mac_rx_srs_remove(ring->mr_srs); ring->mr_srs = NULL; } - ring->mr_state = MR_FREE; - ring->mr_flag = 0; break; case MAC_RING_TYPE_TX: + { + mac_grp_client_t *mgcp; + mac_client_impl_t *mcip; + mac_soft_ring_set_t *mac_srs; + mac_srs_tx_t *tx; + mac_ring_t *rem_ring; + mac_group_t *defgrp; + uint_t ring_info = 0; + /* - * For TX this function is only invoked in two + * For TX this function is invoked in three * cases: * * 1) In the case of a failure during the @@ -3789,13 +4497,120 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, * 2) From mac_release_tx_group() when freeing * a TX SRS. * - * In both cases the SRS and its soft rings are - * already quiesced. + * 3) In the case of aggr, when a port gets removed, + * the pseudo Tx rings that it exposed gets removed. + * + * In the first two cases the SRS and its soft + * rings are already quiesced. */ - ASSERT(!driver_call); + if (driver_call) { + mac_client_impl_t *mcip; + mac_soft_ring_set_t *mac_srs; + mac_soft_ring_t *sringp; + mac_srs_tx_t *srs_tx; + + if (mip->mi_state_flags & MIS_IS_AGGR && + mip->mi_default_tx_ring == + (mac_ring_handle_t)ring) { + /* pick a new default Tx ring */ + mip->mi_default_tx_ring = + (group->mrg_rings != ring) ? + (mac_ring_handle_t)group->mrg_rings : + (mac_ring_handle_t)(ring->mr_next); + } + /* Presently only aggr case comes here */ + if (group->mrg_state != MAC_GROUP_STATE_RESERVED) + break; + + mcip = MAC_GROUP_ONLY_CLIENT(group); + ASSERT(mcip != NULL); + ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR); + mac_srs = MCIP_TX_SRS(mcip); + ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_AGGR || + mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); + srs_tx = &mac_srs->srs_tx; + /* + * Wakeup any callers blocked on this + * Tx ring due to flow control. + */ + sringp = srs_tx->st_soft_rings[ring->mr_index]; + ASSERT(sringp != NULL); + mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)sringp); + mac_tx_client_quiesce((mac_client_handle_t)mcip); + mac_tx_srs_del_ring(mac_srs, ring); + mac_tx_client_restart((mac_client_handle_t)mcip); + break; + } + ASSERT(ring != (mac_ring_t *)mip->mi_default_tx_ring); group_type = mip->mi_tx_group_type; cap_rings = &mip->mi_tx_rings_cap; + /* + * See if we need to take it out of the MAC clients using + * this group + */ + if (MAC_GROUP_NO_CLIENT(group)) + break; + mgcp = group->mrg_clients; + defgrp = MAC_DEFAULT_TX_GROUP(mip); + while (mgcp != NULL) { + mcip = mgcp->mgc_client; + mac_srs = MCIP_TX_SRS(mcip); + tx = &mac_srs->srs_tx; + mac_tx_client_quiesce((mac_client_handle_t)mcip); + /* + * If we are here when removing rings from the + * defgroup, mac_reserve_tx_ring would have + * already deleted the ring from the MAC + * clients in the group. + */ + if (group != defgrp) { + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t) + mac_tx_srs_get_soft_ring(mac_srs, ring)); + mac_tx_srs_del_ring(mac_srs, ring); + } + /* + * Additionally, if we are left with only + * one ring in the group after this, we need + * to modify the mode etc. to. (We haven't + * yet taken the ring out, so we check with 2). + */ + if (group->mrg_cur_count == 2) { + if (ring->mr_next == NULL) + rem_ring = group->mrg_rings; + else + rem_ring = ring->mr_next; + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t) + mac_tx_srs_get_soft_ring(mac_srs, + rem_ring)); + mac_tx_srs_del_ring(mac_srs, rem_ring); + if (rem_ring->mr_state != MR_INUSE) { + (void) mac_start_ring(rem_ring); + } + tx->st_arg2 = (void *)rem_ring; + mac_tx_srs_stat_recreate(mac_srs, B_FALSE); + ring_info = mac_hwring_getinfo( + (mac_ring_handle_t)rem_ring); + /* + * We are shrinking from multiple + * to 1 ring. + */ + if (mac_srs->srs_type & SRST_BW_CONTROL) { + tx->st_mode = SRS_TX_BW; + } else if (mac_tx_serialize || + (ring_info & MAC_RING_TX_SERIALIZE)) { + tx->st_mode = SRS_TX_SERIALIZE; + } else { + tx->st_mode = SRS_TX_DEFAULT; + } + tx->st_func = mac_tx_get_func(tx->st_mode); + } + mac_tx_client_restart((mac_client_handle_t)mcip); + mgcp = mgcp->mgc_next; + } break; + } default: ASSERT(B_FALSE); } @@ -3817,7 +4632,8 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, if (!driver_call) { ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); - ASSERT(cap_rings->mr_gremring != NULL); + ASSERT(group->mrg_driver == NULL || + cap_rings->mr_gremring != NULL); /* * Remove the driver level hardware ring. @@ -3829,12 +4645,10 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, } ring->mr_gh = NULL; - if (driver_call) { + if (driver_call) mac_ring_free(mip, ring); - } else { - ring->mr_state = MR_FREE; + else ring->mr_flag = 0; - } } /* @@ -3982,7 +4796,9 @@ mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, allocated_map = B_TRUE; } - ASSERT(map->ma_group == group); + ASSERT(map->ma_group == NULL || map->ma_group == group); + if (map->ma_group == NULL) + map->ma_group = group; /* * If the MAC address is already in use, simply account for the @@ -4082,6 +4898,8 @@ mac_remove_macaddr(mac_address_t *map) return (0); err = mac_group_remmac(map->ma_group, map->ma_addr); + if (err == 0) + map->ma_group = NULL; break; case MAC_ADDRESS_TYPE_UNICAST_PROMISC: err = i_mac_promisc_set(mip, B_FALSE); @@ -4122,7 +4940,7 @@ mac_update_macaddr(mac_address_t *map, uint8_t *mac_addr) * Update the primary address for drivers that are not * RINGS capable. */ - if (map->ma_group == NULL) { + if (mip->mi_rx_groups == NULL) { err = mip->mi_unicst(mip->mi_driver, (const uint8_t *) mac_addr); if (err != 0) @@ -4223,11 +5041,6 @@ mac_init_macaddr(mac_impl_t *mip) if (mip->mi_rx_groups == NULL) map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; - /* - * The primary MAC address is reserved for default group according - * to current design. - */ - map->ma_group = mip->mi_rx_groups; map->ma_mip = mip; mip->mi_addresses = map; @@ -4258,6 +5071,11 @@ mac_fini_macaddr(mac_impl_t *mip) /* * Logging related functions. + * + * Note that Kernel statistics have been extended to maintain fine + * granularity of statistics viz. hardware lane, software lane, fanout + * stats etc. However, extended accounting continues to support only + * aggregate statistics like before. */ /* Write the Flow description to the log file */ @@ -4304,18 +5122,33 @@ mac_write_flow_desc(flow_entry_t *flent, mac_client_impl_t *mcip) int mac_write_flow_stats(flow_entry_t *flent) { - flow_stats_t *fl_stats; - net_stat_t nstat; + net_stat_t nstat; + mac_soft_ring_set_t *mac_srs; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + int i; - fl_stats = &flent->fe_flowstats; + bzero(&nstat, sizeof (net_stat_t)); nstat.ns_name = flent->fe_flow_name; - nstat.ns_ibytes = fl_stats->fs_rbytes; - nstat.ns_obytes = fl_stats->fs_obytes; - nstat.ns_ipackets = fl_stats->fs_ipackets; - nstat.ns_opackets = fl_stats->fs_opackets; - nstat.ns_ierrors = fl_stats->fs_ierrors; - nstat.ns_oerrors = fl_stats->fs_oerrors; + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + nstat.ns_ibytes += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes; + nstat.ns_ipackets += mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; + nstat.ns_oerrors += mac_rx_stat->mrs_ierrors; + } + mac_srs = (mac_soft_ring_set_t *)(flent->fe_tx_srs); + if (mac_srs != NULL) { + mac_tx_stat = &mac_srs->srs_tx.st_stat; + + nstat.ns_obytes = mac_tx_stat->mts_obytes; + nstat.ns_opackets = mac_tx_stat->mts_opackets; + nstat.ns_oerrors = mac_tx_stat->mts_oerrors; + } return (exacct_commit_netinfo((void *)&nstat, EX_NET_FLSTAT_REC)); } @@ -4347,16 +5180,38 @@ mac_write_link_desc(mac_client_impl_t *mcip) int mac_write_link_stats(mac_client_impl_t *mcip) { - net_stat_t nstat; + net_stat_t nstat; + flow_entry_t *flent; + mac_soft_ring_set_t *mac_srs; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + int i; + bzero(&nstat, sizeof (net_stat_t)); nstat.ns_name = mcip->mci_name; - nstat.ns_ibytes = mcip->mci_stat_ibytes; - nstat.ns_obytes = mcip->mci_stat_obytes; - nstat.ns_ipackets = mcip->mci_stat_ipackets; - nstat.ns_opackets = mcip->mci_stat_opackets; - nstat.ns_ierrors = mcip->mci_stat_ierrors; - nstat.ns_oerrors = mcip->mci_stat_oerrors; + flent = mcip->mci_flent; + if (flent != NULL) { + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + nstat.ns_ibytes += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + + mac_rx_stat->mrs_lclbytes; + nstat.ns_ipackets += mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; + nstat.ns_oerrors += mac_rx_stat->mrs_ierrors; + } + } + mac_srs = (mac_soft_ring_set_t *)(mcip->mci_flent->fe_tx_srs); + if (mac_srs != NULL) { + mac_tx_stat = &mac_srs->srs_tx.st_stat; + + nstat.ns_obytes = mac_tx_stat->mts_obytes; + nstat.ns_opackets = mac_tx_stat->mts_opackets; + nstat.ns_oerrors = mac_tx_stat->mts_oerrors; + } return (exacct_commit_netinfo((void *)&nstat, EX_NET_LNSTAT_REC)); } @@ -4706,181 +5561,255 @@ mac_flow_update_priority(mac_client_impl_t *mcip, flow_entry_t *flent) mac_ring_t * mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring) { - mac_group_t *group; - mac_ring_t *ring; + mac_group_t *group; + mac_grp_client_t *mgcp; + mac_client_impl_t *mcip; + mac_soft_ring_set_t *srs; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - if (mip->mi_tx_groups == NULL) - return (NULL); - /* * Find an available ring and start it before changing its status. * The unassigned rings are at the end of the mi_tx_groups * array. */ - group = mip->mi_tx_groups + mip->mi_tx_group_count; + group = MAC_DEFAULT_TX_GROUP(mip); - for (ring = group->mrg_rings; ring != NULL; - ring = ring->mr_next) { - if (desired_ring == NULL) { - if (ring->mr_state == MR_FREE) - /* wanted any free ring and found one */ - break; - } else { - mac_ring_t *sring; - mac_client_impl_t *client; - mac_soft_ring_set_t *srs; + /* Can't take the default ring out of the default group */ + ASSERT(desired_ring != (mac_ring_t *)mip->mi_default_tx_ring); - if (ring != desired_ring) - /* wants a desired ring but this one ain't it */ - continue; + if (desired_ring->mr_state == MR_FREE) { + ASSERT(MAC_GROUP_NO_CLIENT(group)); + if (mac_start_ring(desired_ring) != 0) + return (NULL); + return (desired_ring); + } + /* + * There are clients using this ring, so let's move the clients + * away from using this ring. + */ + for (mgcp = group->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { + mcip = mgcp->mgc_client; + mac_tx_client_quiesce((mac_client_handle_t)mcip); + srs = MCIP_TX_SRS(mcip); + ASSERT(mac_tx_srs_ring_present(srs, desired_ring)); + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(srs, + desired_ring)); + mac_tx_srs_del_ring(srs, desired_ring); + mac_tx_client_restart((mac_client_handle_t)mcip); + } + return (desired_ring); +} - if (ring->mr_state == MR_FREE) - break; +/* + * For a reserved group with multiple clients, return the primary client. + */ +static mac_client_impl_t * +mac_get_grp_primary(mac_group_t *grp) +{ + mac_grp_client_t *mgcp = grp->mrg_clients; + mac_client_impl_t *mcip; + + while (mgcp != NULL) { + mcip = mgcp->mgc_client; + if (mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) + return (mcip); + mgcp = mgcp->mgc_next; + } + return (NULL); +} + +/* + * Hybrid I/O specifies the ring that should be given to a share. + * If the ring is already used by clients, then we need to release + * the ring back to the default group so that we can give it to + * the share. This means the clients using this ring now get a + * replacement ring. If there aren't any replacement rings, this + * function returns a failure. + */ +static int +mac_reclaim_ring_from_grp(mac_impl_t *mip, mac_ring_type_t ring_type, + mac_ring_t *ring, mac_ring_t **rings, int nrings) +{ + mac_group_t *group = (mac_group_t *)ring->mr_gh; + mac_resource_props_t *mrp; + mac_client_impl_t *mcip; + mac_group_t *defgrp; + mac_ring_t *tring; + mac_group_t *tgrp; + int i; + int j; + mcip = MAC_GROUP_ONLY_CLIENT(group); + if (mcip == NULL) + mcip = mac_get_grp_primary(group); + ASSERT(mcip != NULL); + ASSERT(mcip->mci_share == NULL); + + mrp = MCIP_RESOURCE_PROPS(mcip); + if (ring_type == MAC_RING_TYPE_RX) { + defgrp = mip->mi_rx_donor_grp; + if ((mrp->mrp_mask & MRP_RX_RINGS) == 0) { + /* Need to put this mac client in the default group */ + if (mac_rx_switch_group(mcip, group, defgrp) != 0) + return (ENOSPC); + } else { /* - * Found the desired ring but it's already in use. - * Swap it with a new ring. + * Switch this ring with some other ring from + * the default group. */ - - /* find the client which owns that ring */ - for (client = mip->mi_clients_list; client != NULL; - client = client->mci_client_next) { - srs = MCIP_TX_SRS(client); - if (srs != NULL && mac_tx_srs_ring_present(srs, - desired_ring)) { - /* found our ring */ - break; + for (tring = defgrp->mrg_rings; tring != NULL; + tring = tring->mr_next) { + if (tring->mr_index == 0) + continue; + for (j = 0; j < nrings; j++) { + if (rings[j] == tring) + break; } + if (j >= nrings) + break; } - if (client == NULL) { - /* - * The TX ring is in use, but it's not - * associated with any clients, so it - * has to be the default ring. In that - * case we can simply assign a new ring - * as the default ring, and we're done. - */ - ASSERT(mip->mi_default_tx_ring == - (mac_ring_handle_t)desired_ring); - - /* - * Quiesce all clients on top of - * the NIC to make sure there are no - * pending threads still relying on - * that default ring, for example - * the multicast path. - */ - for (client = mip->mi_clients_list; - client != NULL; - client = client->mci_client_next) { - mac_tx_client_quiesce(client, - SRS_QUIESCE); - } - - mip->mi_default_tx_ring = (mac_ring_handle_t) - mac_reserve_tx_ring(mip, NULL); - - /* resume the clients */ - for (client = mip->mi_clients_list; - client != NULL; - client = client->mci_client_next) - mac_tx_client_restart(client); - - break; + if (tring == NULL) + return (ENOSPC); + if (mac_group_mov_ring(mip, group, tring) != 0) + return (ENOSPC); + if (mac_group_mov_ring(mip, defgrp, ring) != 0) { + (void) mac_group_mov_ring(mip, defgrp, tring); + return (ENOSPC); } + } + ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); + return (0); + } + defgrp = MAC_DEFAULT_TX_GROUP(mip); + if (ring == (mac_ring_t *)mip->mi_default_tx_ring) { + /* + * See if we can get a spare ring to replace the default + * ring. + */ + if (defgrp->mrg_cur_count == 1) { /* - * Note that we cannot simply invoke the group - * add/rem routines since the client doesn't have a - * TX group. So we need to instead add/remove - * the rings from the SRS. + * Need to get a ring from another client, see if + * there are any clients that can be moved to + * the default group, thereby freeing some rings. */ - ASSERT(client->mci_share == NULL); - - /* first quiece the client */ - mac_tx_client_quiesce(client, SRS_QUIESCE); - - /* give a new ring to the client... */ - sring = mac_reserve_tx_ring(mip, NULL); - if (sring != NULL) { - /* - * There are no other available ring - * on that MAC instance. The client - * will fallback to the shared TX - * ring. - */ - mac_tx_srs_add_ring(srs, sring); - } - - /* ... in exchange for our desired ring */ - mac_tx_srs_del_ring(srs, desired_ring); - - /* restart the client */ - mac_tx_client_restart(client); - - if (mip->mi_default_tx_ring == - (mac_ring_handle_t)desired_ring) { - /* - * The desired ring is the default ring, - * and there are one or more clients - * using that default ring directly. - */ - mip->mi_default_tx_ring = - (mac_ring_handle_t)sring; - /* - * Find clients using default ring and - * swap it with the new default ring. - */ - for (client = mip->mi_clients_list; - client != NULL; - client = client->mci_client_next) { - srs = MCIP_TX_SRS(client); - if (srs != NULL && - mac_tx_srs_ring_present(srs, - desired_ring)) { - /* first quiece the client */ - mac_tx_client_quiesce(client, - SRS_QUIESCE); - - /* - * Give it the new default - * ring, and remove the old - * one. - */ - if (sring != NULL) { - mac_tx_srs_add_ring(srs, - sring); - } - mac_tx_srs_del_ring(srs, - desired_ring); - - /* restart the client */ - mac_tx_client_restart(client); + for (i = 0; i < mip->mi_tx_group_count; i++) { + tgrp = &mip->mi_tx_groups[i]; + if (tgrp->mrg_state == + MAC_GROUP_STATE_REGISTERED) { + continue; + } + mcip = MAC_GROUP_ONLY_CLIENT(tgrp); + if (mcip == NULL) + mcip = mac_get_grp_primary(tgrp); + ASSERT(mcip != NULL); + mrp = MCIP_RESOURCE_PROPS(mcip); + if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { + ASSERT(tgrp->mrg_cur_count == 1); + /* + * If this ring is part of the + * rings asked by the share we cannot + * use it as the default ring. + */ + for (j = 0; j < nrings; j++) { + if (rings[j] == tgrp->mrg_rings) + break; } + if (j < nrings) + continue; + mac_tx_client_quiesce( + (mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, tgrp, + defgrp); + mac_tx_client_restart( + (mac_client_handle_t)mcip); + break; } } - break; + /* + * All the rings are reserved, can't give up the + * default ring. + */ + if (defgrp->mrg_cur_count <= 1) + return (ENOSPC); + } + /* + * Swap the default ring with another. + */ + for (tring = defgrp->mrg_rings; tring != NULL; + tring = tring->mr_next) { + /* + * If this ring is part of the rings asked by the + * share we cannot use it as the default ring. + */ + for (j = 0; j < nrings; j++) { + if (rings[j] == tring) + break; + } + if (j >= nrings) + break; } + ASSERT(tring != NULL); + mip->mi_default_tx_ring = (mac_ring_handle_t)tring; + return (0); } - - if (ring != NULL) { - if (mac_start_ring(ring) != 0) - return (NULL); - ring->mr_state = MR_INUSE; + /* + * The Tx ring is with a group reserved by a MAC client. See if + * we can swap it. + */ + ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); + mcip = MAC_GROUP_ONLY_CLIENT(group); + if (mcip == NULL) + mcip = mac_get_grp_primary(group); + ASSERT(mcip != NULL); + mrp = MCIP_RESOURCE_PROPS(mcip); + mac_tx_client_quiesce((mac_client_handle_t)mcip); + if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { + ASSERT(group->mrg_cur_count == 1); + /* Put this mac client in the default group */ + mac_tx_switch_group(mcip, group, defgrp); + } else { + /* + * Switch this ring with some other ring from + * the default group. + */ + for (tring = defgrp->mrg_rings; tring != NULL; + tring = tring->mr_next) { + if (tring == (mac_ring_t *)mip->mi_default_tx_ring) + continue; + /* + * If this ring is part of the rings asked by the + * share we cannot use it for swapping. + */ + for (j = 0; j < nrings; j++) { + if (rings[j] == tring) + break; + } + if (j >= nrings) + break; + } + if (tring == NULL) { + mac_tx_client_restart((mac_client_handle_t)mcip); + return (ENOSPC); + } + if (mac_group_mov_ring(mip, group, tring) != 0) { + mac_tx_client_restart((mac_client_handle_t)mcip); + return (ENOSPC); + } + if (mac_group_mov_ring(mip, defgrp, ring) != 0) { + (void) mac_group_mov_ring(mip, defgrp, tring); + mac_tx_client_restart((mac_client_handle_t)mcip); + return (ENOSPC); + } } - - return (ring); + mac_tx_client_restart((mac_client_handle_t)mcip); + ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); + return (0); } /* - * Minimum number of rings to leave in the default TX group when allocating - * rings to new clients. - */ -static uint_t mac_min_rx_default_rings = 1; - -/* * Populate a zero-ring group with rings. If the share is non-NULL, * the rings are chosen according to that share. * Invoked after allocating a new RX or TX group through @@ -4889,15 +5818,17 @@ static uint_t mac_min_rx_default_rings = 1; */ int i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, - mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share) + mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share, + uint32_t ringcnt) { - mac_ring_t **rings, *tmp_ring[1], *ring; + mac_ring_t **rings, *ring; uint_t nrings; - int rv, i, j; + int rv = 0, i = 0, j; - ASSERT(mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC && - mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC); - ASSERT(new_group->mrg_cur_count == 0); + ASSERT((ring_type == MAC_RING_TYPE_RX && + mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) || + (ring_type == MAC_RING_TYPE_TX && + mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC)); /* * First find the rings to allocate to the group. @@ -4910,9 +5841,23 @@ i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, KM_SLEEP); mip->mi_share_capab.ms_squery(share, ring_type, (mac_ring_handle_t *)rings, &nrings); + for (i = 0; i < nrings; i++) { + /* + * If we have given this ring to a non-default + * group, we need to check if we can get this + * ring. + */ + ring = rings[i]; + if (ring->mr_gh != (mac_group_handle_t)src_group || + ring == (mac_ring_t *)mip->mi_default_tx_ring) { + if (mac_reclaim_ring_from_grp(mip, ring_type, + ring, rings, nrings) != 0) { + rv = ENOSPC; + goto bail; + } + } + } } else { - /* this function is called for TX only with a share */ - ASSERT(ring_type == MAC_RING_TYPE_RX); /* * Pick one ring from default group. * @@ -4922,23 +5867,37 @@ i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, * We need a better way for a driver to indicate this, * for example a per-ring flag. */ + rings = kmem_alloc(ringcnt * sizeof (mac_ring_handle_t), + KM_SLEEP); for (ring = src_group->mrg_rings; ring != NULL; ring = ring->mr_next) { - if (ring->mr_index != 0) + if (ring_type == MAC_RING_TYPE_RX && + ring->mr_index == 0) { + continue; + } + if (ring_type == MAC_RING_TYPE_TX && + ring == (mac_ring_t *)mip->mi_default_tx_ring) { + continue; + } + rings[i++] = ring; + if (i == ringcnt) break; } ASSERT(ring != NULL); - nrings = 1; - tmp_ring[0] = ring; - rings = tmp_ring; + nrings = i; + /* Not enough rings as required */ + if (nrings != ringcnt) { + rv = ENOSPC; + goto bail; + } } switch (ring_type) { case MAC_RING_TYPE_RX: - if (src_group->mrg_cur_count - nrings < - mac_min_rx_default_rings) { + if (src_group->mrg_cur_count - nrings < 1) { /* we ran out of rings */ - return (ENOSPC); + rv = ENOSPC; + goto bail; } /* move receive rings to new group */ @@ -4950,7 +5909,7 @@ i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, (void) mac_group_mov_ring(mip, src_group, rings[j]); } - return (rv); + goto bail; } } break; @@ -4959,37 +5918,42 @@ i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, mac_ring_t *tmp_ring; /* move the TX rings to the new group */ - ASSERT(src_group == NULL); for (i = 0; i < nrings; i++) { /* get the desired ring */ tmp_ring = mac_reserve_tx_ring(mip, rings[i]); + if (tmp_ring == NULL) { + rv = ENOSPC; + goto bail; + } ASSERT(tmp_ring == rings[i]); rv = mac_group_mov_ring(mip, new_group, rings[i]); if (rv != 0) { /* cleanup on failure */ for (j = 0; j < i; j++) { (void) mac_group_mov_ring(mip, - mip->mi_tx_groups + - mip->mi_tx_group_count, rings[j]); + MAC_DEFAULT_TX_GROUP(mip), + rings[j]); } + goto bail; } } break; } } - if (share != NULL) { - /* add group to share */ + /* add group to share */ + if (share != NULL) mip->mi_share_capab.ms_sadd(share, new_group->mrg_driver); - /* free temporary array of rings */ - kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); - } - return (0); +bail: + /* free temporary array of rings */ + kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); + + return (rv); } void -mac_rx_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) +mac_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) { mac_grp_client_t *mgcp; @@ -5008,7 +5972,7 @@ mac_rx_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) } void -mac_rx_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) +mac_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) { mac_grp_client_t *mgcp, **pprev; @@ -5034,65 +5998,149 @@ mac_rx_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) * largest number of rings, otherwise the default ring when available. */ mac_group_t * -mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, - mac_rx_group_reserve_type_t rtype) +mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) { mac_share_handle_t share = mcip->mci_share; mac_impl_t *mip = mcip->mci_mip; mac_group_t *grp = NULL; - int i, start, loopcount; - int err; + int i; + int err = 0; mac_address_t *map; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + int nrings; + int donor_grp_rcnt; + boolean_t need_exclgrp = B_FALSE; + int need_rings = 0; + mac_group_t *candidate_grp = NULL; + mac_client_impl_t *gclient; + mac_resource_props_t *gmrp; + mac_group_t *donorgrp = NULL; + boolean_t rxhw = mrp->mrp_mask & MRP_RX_RINGS; + boolean_t unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC; + boolean_t isprimary; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - /* Check if a group already has this mac address (case of VLANs) */ - if ((map = mac_find_macaddr(mip, mac_addr)) != NULL) - return (map->ma_group); + isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; + + /* + * Check if a group already has this mac address (case of VLANs) + * unless we are moving this MAC client from one group to another. + */ + if (!move && (map = mac_find_macaddr(mip, mac_addr)) != NULL) { + if (map->ma_group != NULL) + return (map->ma_group); + } + if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0) + return (NULL); + /* + * If exclusive open, return NULL which will enable the + * caller to use the default group. + */ + if (mcip->mci_state_flags & MCIS_EXCLUSIVE) + return (NULL); - if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0 || - rtype == MAC_RX_NO_RESERVE) + /* For dynamic groups default unspecified to 1 */ + if (rxhw && unspec && + mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + mrp->mrp_nrxrings = 1; + } + /* + * For static grouping we allow only specifying rings=0 and + * unspecified + */ + if (rxhw && mrp->mrp_nrxrings > 0 && + mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) { return (NULL); + } + if (rxhw) { + /* + * We have explicitly asked for a group (with nrxrings, + * if unspec). + */ + if (unspec || mrp->mrp_nrxrings > 0) { + need_exclgrp = B_TRUE; + need_rings = mrp->mrp_nrxrings; + } else if (mrp->mrp_nrxrings == 0) { + /* + * We have asked for a software group. + */ + return (NULL); + } + } else if (isprimary && mip->mi_nactiveclients == 1 && + mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + /* + * If the primary is the only active client on this + * mip and we have not asked for any rings, we give + * it the default group so that the primary gets to + * use all the rings. + */ + return (NULL); + } + + /* The group that can donate rings */ + donorgrp = mip->mi_rx_donor_grp; + + /* + * The number of rings that the default group can donate. + * We need to leave at least one ring. + */ + donor_grp_rcnt = donorgrp->mrg_cur_count - 1; /* * Try to exclusively reserve a RX group. * - * For flows requires SW_RING it always goes to the default group - * (Until we can explicitely call out default groups (CR 6695600), - * we assume that the default group is always at position zero); + * For flows requiring HW_DEFAULT_RING (unicast flow of the primary + * client), try to reserve the a non-default RX group and give + * it all the rings from the donor group, except the default ring * - * For flows requires HW_DEFAULT_RING (unicast flow of the primary - * client), try to reserve the default RX group only. + * For flows requiring HW_RING (unicast flow of other clients), try + * to reserve non-default RX group with the specified number of + * rings, if available. * - * For flows requires HW_RING (unicast flow of other clients), try - * to reserve non-default RX group then the default group. + * For flows that have not asked for software or hardware ring, + * try to reserve a non-default group with 1 ring, if available. */ - switch (rtype) { - case MAC_RX_RESERVE_DEFAULT: - start = 0; - loopcount = 1; - break; - case MAC_RX_RESERVE_NONDEFAULT: - start = 1; - loopcount = mip->mi_rx_group_count; - } - - for (i = start; i < start + loopcount; i++) { - grp = &mip->mi_rx_groups[i % mip->mi_rx_group_count]; + for (i = 1; i < mip->mi_rx_group_count; i++) { + grp = &mip->mi_rx_groups[i]; DTRACE_PROBE3(rx__group__trying, char *, mip->mi_name, int, grp->mrg_index, mac_group_state_t, grp->mrg_state); /* - * Check to see whether this mac client is the only client - * on this RX group. If not, we cannot exclusively reserve - * this RX group. + * Check if this group could be a candidate group for + * eviction if we need a group for this MAC client, + * but there aren't any. A candidate group is one + * that didn't ask for an exclusive group, but got + * one and it has enough rings (combined with what + * the donor group can donate) for the new MAC + * client */ - if (!MAC_RX_GROUP_NO_CLIENT(grp) && - (MAC_RX_GROUP_ONLY_CLIENT(grp) != mcip)) { + if (grp->mrg_state >= MAC_GROUP_STATE_RESERVED) { + /* + * If the primary/donor group is not the default + * group, don't bother looking for a candidate group. + * If we don't have enough rings we will check + * if the primary group can be vacated. + */ + if (candidate_grp == NULL && + donorgrp == MAC_DEFAULT_RX_GROUP(mip)) { + ASSERT(!MAC_GROUP_NO_CLIENT(grp)); + gclient = MAC_GROUP_ONLY_CLIENT(grp); + if (gclient == NULL) + gclient = mac_get_grp_primary(grp); + ASSERT(gclient != NULL); + gmrp = MCIP_RESOURCE_PROPS(gclient); + if (gclient->mci_share == NULL && + (gmrp->mrp_mask & MRP_RX_RINGS) == 0 && + (unspec || + (grp->mrg_cur_count + donor_grp_rcnt >= + need_rings))) { + candidate_grp = grp; + } + } continue; } - /* * This group could already be SHARED by other multicast * flows on this client. In that case, the group would @@ -5105,35 +6153,133 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, continue; } - if ((i % mip->mi_rx_group_count) == 0 || - mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) { + if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) break; - } - ASSERT(grp->mrg_cur_count == 0); /* * Populate the group. Rings should be taken - * from the default group at position 0 for now. + * from the donor group. */ + nrings = rxhw ? need_rings : isprimary ? donor_grp_rcnt: 1; - err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, - &mip->mi_rx_groups[0], grp, share); - if (err == 0) - break; + /* + * If the donor group can't donate, let's just walk and + * see if someone can vacate a group, so that we have + * enough rings for this, unless we already have + * identified a candiate group.. + */ + if (nrings <= donor_grp_rcnt) { + err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, + donorgrp, grp, share, nrings); + if (err == 0) { + /* + * For a share i_mac_group_allocate_rings gets + * the rings from the driver, let's populate + * the property for the client now. + */ + if (share != NULL) { + mac_client_set_rings( + (mac_client_handle_t)mcip, + grp->mrg_cur_count, -1); + } + if (mac_is_primary_client(mcip) && !rxhw) + mip->mi_rx_donor_grp = grp; + break; + } + } DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, mip->mi_name, int, grp->mrg_index, int, err); /* - * It's a dynamic group but the grouping operation failed. + * It's a dynamic group but the grouping operation + * failed. */ mac_stop_group(grp); } + /* We didn't find an exclusive group for this MAC client */ + if (i >= mip->mi_rx_group_count) { - if (i == start + loopcount) - return (NULL); + if (!need_exclgrp) + return (NULL); + /* + * If we found a candidate group then we switch the + * MAC client from the candidate_group to the default + * group and give the group to this MAC client. If + * we didn't find a candidate_group, check if the + * primary is in its own group and if it can make way + * for this MAC client. + */ + if (candidate_grp == NULL && + donorgrp != MAC_DEFAULT_RX_GROUP(mip) && + donorgrp->mrg_cur_count >= need_rings) { + candidate_grp = donorgrp; + } + if (candidate_grp != NULL) { + boolean_t prim_grp = B_FALSE; + + /* + * Switch the MAC client from the candidate group + * to the default group.. If this group was the + * donor group, then after the switch we need + * to update the donor group too. + */ + grp = candidate_grp; + gclient = MAC_GROUP_ONLY_CLIENT(grp); + if (gclient == NULL) + gclient = mac_get_grp_primary(grp); + if (grp == mip->mi_rx_donor_grp) + prim_grp = B_TRUE; + if (mac_rx_switch_group(gclient, grp, + MAC_DEFAULT_RX_GROUP(mip)) != 0) { + return (NULL); + } + if (prim_grp) { + mip->mi_rx_donor_grp = + MAC_DEFAULT_RX_GROUP(mip); + donorgrp = MAC_DEFAULT_RX_GROUP(mip); + } + + + /* + * Now give this group with the required rings + * to this MAC client. + */ + ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); + if (mac_start_group(grp) != 0) + return (NULL); + + if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) + return (grp); + + donor_grp_rcnt = donorgrp->mrg_cur_count - 1; + ASSERT(grp->mrg_cur_count == 0); + ASSERT(donor_grp_rcnt >= need_rings); + err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, + donorgrp, grp, share, need_rings); + if (err == 0) { + /* + * For a share i_mac_group_allocate_rings gets + * the rings from the driver, let's populate + * the property for the client now. + */ + if (share != NULL) { + mac_client_set_rings( + (mac_client_handle_t)mcip, + grp->mrg_cur_count, -1); + } + DTRACE_PROBE2(rx__group__reserved, + char *, mip->mi_name, int, grp->mrg_index); + return (grp); + } + DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, + mip->mi_name, int, grp->mrg_index, int, err); + mac_stop_group(grp); + } + return (NULL); + } ASSERT(grp != NULL); DTRACE_PROBE2(rx__group__reserved, @@ -5152,10 +6298,13 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, void mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) { - mac_impl_t *mip = mcip->mci_mip; - mac_ring_t *ring; + mac_impl_t *mip = mcip->mci_mip; + mac_ring_t *ring; - ASSERT(group != &mip->mi_rx_groups[0]); + ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); + + if (mip->mi_rx_donor_grp == group) + mip->mi_rx_donor_grp = MAC_DEFAULT_RX_GROUP(mip); /* * This is the case where there are no clients left. Any @@ -5170,10 +6319,12 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) */ ring->mr_srs = NULL; } - ASSERT(ring->mr_state == MR_INUSE); - mac_stop_ring(ring); - ring->mr_state = MR_FREE; - ring->mr_flag = 0; + ASSERT(group->mrg_state < MAC_GROUP_STATE_RESERVED || + ring->mr_state == MR_INUSE); + if (ring->mr_state == MR_INUSE) { + mac_stop_ring(ring); + ring->mr_flag = 0; + } } /* remove group from share */ @@ -5190,8 +6341,8 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) * Move rings back to default group. */ while ((ring = group->mrg_rings) != NULL) { - (void) mac_group_mov_ring(mip, - &mip->mi_rx_groups[0], ring); + (void) mac_group_mov_ring(mip, mip->mi_rx_donor_grp, + ring); } } mac_stop_group(group); @@ -5202,86 +6353,637 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) } /* + * When we move the primary's mac address between groups, we need to also + * take all the clients sharing the same mac address along with it (VLANs) + * We remove the mac address for such clients from the group after quiescing + * them. When we add the mac address we restart the client. Note that + * the primary's mac address is removed from the group after all the + * other clients sharing the address are removed. Similarly, the primary's + * mac address is added before all the other client's mac address are + * added. While grp is the group where the clients reside, tgrp is + * the group where the addresses have to be added. + */ +static void +mac_rx_move_macaddr_prim(mac_client_impl_t *mcip, mac_group_t *grp, + mac_group_t *tgrp, uint8_t *maddr, boolean_t add) +{ + mac_impl_t *mip = mcip->mci_mip; + mac_grp_client_t *mgcp = grp->mrg_clients; + mac_client_impl_t *gmcip; + boolean_t prim; + + prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; + + /* + * If the clients are in a non-default group, we just have to + * walk the group's client list. If it is in the default group + * (which will be shared by other clients as well, we need to + * check if the unicast address matches mcip's unicast. + */ + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + if (gmcip != mcip && + (grp != MAC_DEFAULT_RX_GROUP(mip) || + mcip->mci_unicast == gmcip->mci_unicast)) { + if (!add) { + mac_rx_client_quiesce( + (mac_client_handle_t)gmcip); + (void) mac_remove_macaddr(mcip->mci_unicast); + } else { + (void) mac_add_macaddr(mip, tgrp, maddr, prim); + mac_rx_client_restart( + (mac_client_handle_t)gmcip); + } + } + mgcp = mgcp->mgc_next; + } +} + + +/* + * Move the MAC address from fgrp to tgrp. If this is the primary client, + * we need to take any VLANs etc. together too. + */ +static int +mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp, + mac_group_t *tgrp) +{ + mac_impl_t *mip = mcip->mci_mip; + uint8_t maddr[MAXMACADDRLEN]; + int err = 0; + boolean_t prim; + boolean_t multiclnt = B_FALSE; + + mac_rx_client_quiesce((mac_client_handle_t)mcip); + ASSERT(mcip->mci_unicast != NULL); + bcopy(mcip->mci_unicast->ma_addr, maddr, mcip->mci_unicast->ma_len); + + prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; + if (mcip->mci_unicast->ma_nusers > 1) { + mac_rx_move_macaddr_prim(mcip, fgrp, NULL, maddr, B_FALSE); + multiclnt = B_TRUE; + } + ASSERT(mcip->mci_unicast->ma_nusers == 1); + err = mac_remove_macaddr(mcip->mci_unicast); + if (err != 0) { + mac_rx_client_restart((mac_client_handle_t)mcip); + if (multiclnt) { + mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, + B_TRUE); + } + return (err); + } + /* + * Program the H/W Classifier first, if this fails we need + * not proceed with the other stuff. + */ + if ((err = mac_add_macaddr(mip, tgrp, maddr, prim)) != 0) { + /* Revert back the H/W Classifier */ + if ((err = mac_add_macaddr(mip, fgrp, maddr, prim)) != 0) { + /* + * This should not fail now since it worked earlier, + * should we panic? + */ + cmn_err(CE_WARN, + "mac_rx_switch_group: switching %p back" + " to group %p failed!!", (void *)mcip, + (void *)fgrp); + } + mac_rx_client_restart((mac_client_handle_t)mcip); + if (multiclnt) { + mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, + B_TRUE); + } + return (err); + } + mcip->mci_unicast = mac_find_macaddr(mip, maddr); + mac_rx_client_restart((mac_client_handle_t)mcip); + if (multiclnt) + mac_rx_move_macaddr_prim(mcip, fgrp, tgrp, maddr, B_TRUE); + return (err); +} + +/* + * Switch the MAC client from one group to another. This means we need + * to remove the MAC address from the group, remove the MAC client, + * teardown the SRSs and revert the group state. Then, we add the client + * to the destination group, set the SRSs, and add the MAC address to the + * group. + */ +int +mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, + mac_group_t *tgrp) +{ + int err; + mac_group_state_t next_state; + mac_client_impl_t *group_only_mcip; + mac_client_impl_t *gmcip; + mac_impl_t *mip = mcip->mci_mip; + mac_grp_client_t *mgcp; + + ASSERT(fgrp == mcip->mci_flent->fe_rx_ring_group); + + if ((err = mac_rx_move_macaddr(mcip, fgrp, tgrp)) != 0) + return (err); + + /* + * The group might be reserved, but SRSs may not be set up, e.g. + * primary and its vlans using a reserved group. + */ + if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED && + MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { + mac_rx_srs_group_teardown(mcip->mci_flent, B_TRUE); + } + if (fgrp != MAC_DEFAULT_RX_GROUP(mip)) { + mgcp = fgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + mgcp = mgcp->mgc_next; + mac_group_remove_client(fgrp, gmcip); + mac_group_add_client(tgrp, gmcip); + gmcip->mci_flent->fe_rx_ring_group = tgrp; + } + mac_release_rx_group(mcip, fgrp); + ASSERT(MAC_GROUP_NO_CLIENT(fgrp)); + mac_set_group_state(fgrp, MAC_GROUP_STATE_REGISTERED); + } else { + mac_group_remove_client(fgrp, mcip); + mac_group_add_client(tgrp, mcip); + mcip->mci_flent->fe_rx_ring_group = tgrp; + /* + * If there are other clients (VLANs) sharing this address + * we should be here only for the primary. + */ + if (mcip->mci_unicast->ma_nusers > 1) { + /* + * We need to move all the clients that are using + * this h/w address. + */ + mgcp = fgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + mgcp = mgcp->mgc_next; + if (mcip->mci_unicast == gmcip->mci_unicast) { + mac_group_remove_client(fgrp, gmcip); + mac_group_add_client(tgrp, gmcip); + gmcip->mci_flent->fe_rx_ring_group = + tgrp; + } + } + } + /* + * The default group will still take the multicast, + * broadcast traffic etc., so it won't go to + * MAC_GROUP_STATE_REGISTERED. + */ + if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED) + mac_rx_group_unmark(fgrp, MR_CONDEMNED); + mac_set_group_state(fgrp, MAC_GROUP_STATE_SHARED); + } + next_state = mac_group_next_state(tgrp, &group_only_mcip, + MAC_DEFAULT_RX_GROUP(mip), B_TRUE); + mac_set_group_state(tgrp, next_state); + /* + * If the destination group is reserved, setup the SRSs etc. + */ + if (tgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { + mac_rx_srs_group_setup(mcip, mcip->mci_flent, SRST_LINK); + mac_fanout_setup(mcip, mcip->mci_flent, + MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, mcip, NULL, + NULL); + mac_rx_group_unmark(tgrp, MR_INCIPIENT); + } else { + mac_rx_switch_grp_to_sw(tgrp); + } + return (0); +} + +/* * Reserves a TX group for the specified share. Invoked by mac_tx_srs_setup() * when a share was allocated to the client. */ mac_group_t * -mac_reserve_tx_group(mac_impl_t *mip, mac_share_handle_t share) +mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) { - mac_group_t *grp; - int rv, i; + mac_impl_t *mip = mcip->mci_mip; + mac_group_t *grp = NULL; + int rv; + int i; + int err; + mac_group_t *defgrp; + mac_share_handle_t share = mcip->mci_share; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + int nrings; + int defnrings; + boolean_t need_exclgrp = B_FALSE; + int need_rings = 0; + mac_group_t *candidate_grp = NULL; + mac_client_impl_t *gclient; + mac_resource_props_t *gmrp; + boolean_t txhw = mrp->mrp_mask & MRP_TX_RINGS; + boolean_t unspec = mrp->mrp_mask & MRP_TXRINGS_UNSPEC; + boolean_t isprimary; + + isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; + /* + * When we come here for a VLAN on the primary (dladm create-vlan), + * we need to pair it along with the primary (to keep it consistent + * with the RX side). So, we check if the primary is already assigned + * to a group and return the group if so. The other way is also + * true, i.e. the VLAN is already created and now we are plumbing + * the primary. + */ + if (!move && isprimary) { + for (gclient = mip->mi_clients_list; gclient != NULL; + gclient = gclient->mci_client_next) { + if (gclient->mci_flent->fe_type & FLOW_PRIMARY_MAC && + gclient->mci_flent->fe_tx_ring_group != NULL) { + return (gclient->mci_flent->fe_tx_ring_group); + } + } + } + + if (mip->mi_tx_groups == NULL || mip->mi_tx_group_count == 0) + return (NULL); + + /* For dynamic groups, default unspec to 1 */ + if (txhw && unspec && + mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + mrp->mrp_ntxrings = 1; + } + /* + * For static grouping we allow only specifying rings=0 and + * unspecified + */ + if (txhw && mrp->mrp_ntxrings > 0 && + mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC) { + return (NULL); + } + + if (txhw) { + /* + * We have explicitly asked for a group (with ntxrings, + * if unspec). + */ + if (unspec || mrp->mrp_ntxrings > 0) { + need_exclgrp = B_TRUE; + need_rings = mrp->mrp_ntxrings; + } else if (mrp->mrp_ntxrings == 0) { + /* + * We have asked for a software group. + */ + return (NULL); + } + } + defgrp = MAC_DEFAULT_TX_GROUP(mip); + /* + * The number of rings that the default group can donate. + * We need to leave at least one ring - the default ring - in + * this group. + */ + defnrings = defgrp->mrg_cur_count - 1; /* - * TX groups are currently allocated only to MAC clients - * which are associated with a share. Since we have a fixed - * number of share and groups, and we already successfully - * allocated a share, find an available TX group. + * Primary gets default group unless explicitly told not + * to (i.e. rings > 0). */ - ASSERT(share != NULL); - ASSERT(mip->mi_tx_group_free > 0); + if (isprimary && !need_exclgrp) + return (NULL); + nrings = (mrp->mrp_mask & MRP_TX_RINGS) != 0 ? mrp->mrp_ntxrings : 1; for (i = 0; i < mip->mi_tx_group_count; i++) { grp = &mip->mi_tx_groups[i]; - if ((grp->mrg_state == MAC_GROUP_STATE_RESERVED) || - (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) + (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) { + /* + * Select a candidate for replacement if we don't + * get an exclusive group. A candidate group is one + * that didn't ask for an exclusive group, but got + * one and it has enough rings (combined with what + * the default group can donate) for the new MAC + * client. + */ + if (grp->mrg_state == MAC_GROUP_STATE_RESERVED && + candidate_grp == NULL) { + gclient = MAC_GROUP_ONLY_CLIENT(grp); + if (gclient == NULL) + gclient = mac_get_grp_primary(grp); + gmrp = MCIP_RESOURCE_PROPS(gclient); + if (gclient->mci_share == NULL && + (gmrp->mrp_mask & MRP_TX_RINGS) == 0 && + (unspec || + (grp->mrg_cur_count + defnrings) >= + need_rings)) { + candidate_grp = grp; + } + } continue; + } + /* + * If the default can't donate let's just walk and + * see if someone can vacate a group, so that we have + * enough rings for this. + */ + if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC || + nrings <= defnrings) { + if (grp->mrg_state == MAC_GROUP_STATE_REGISTERED) { + rv = mac_start_group(grp); + ASSERT(rv == 0); + } + break; + } + } - rv = mac_start_group(grp); - ASSERT(rv == 0); + /* The default group */ + if (i >= mip->mi_tx_group_count) { + /* + * If we need an exclusive group and have identified a + * candidate group we switch the MAC client from the + * candidate group to the default group and give the + * candidate group to this client. + */ + if (need_exclgrp && candidate_grp != NULL) { + /* + * Switch the MAC client from the candidate group + * to the default group. + */ + grp = candidate_grp; + gclient = MAC_GROUP_ONLY_CLIENT(grp); + if (gclient == NULL) + gclient = mac_get_grp_primary(grp); + mac_tx_client_quiesce((mac_client_handle_t)gclient); + mac_tx_switch_group(gclient, grp, defgrp); + mac_tx_client_restart((mac_client_handle_t)gclient); - grp->mrg_state = MAC_GROUP_STATE_RESERVED; - break; - } + /* + * Give the candidate group with the specified number + * of rings to this MAC client. + */ + ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); + rv = mac_start_group(grp); + ASSERT(rv == 0); - ASSERT(grp != NULL); + if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) + return (grp); + + ASSERT(grp->mrg_cur_count == 0); + ASSERT(defgrp->mrg_cur_count > need_rings); + err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, + defgrp, grp, share, need_rings); + if (err == 0) { + /* + * For a share i_mac_group_allocate_rings gets + * the rings from the driver, let's populate + * the property for the client now. + */ + if (share != NULL) { + mac_client_set_rings( + (mac_client_handle_t)mcip, -1, + grp->mrg_cur_count); + } + mip->mi_tx_group_free--; + return (grp); + } + DTRACE_PROBE3(tx__group__reserve__alloc__rings, char *, + mip->mi_name, int, grp->mrg_index, int, err); + mac_stop_group(grp); + } + return (NULL); + } /* - * Populate the group. Rings should be taken from the group - * of unassigned rings, which is past the array of TX - * groups adversized by the driver. + * We got an exclusive group, but it is not dynamic. */ - rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, NULL, - grp, share); + if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) { + mip->mi_tx_group_free--; + return (grp); + } + + rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, defgrp, grp, + share, nrings); if (rv != 0) { DTRACE_PROBE3(tx__group__reserve__alloc__rings, char *, mip->mi_name, int, grp->mrg_index, int, rv); - mac_stop_group(grp); - grp->mrg_state = MAC_GROUP_STATE_UNINIT; - return (NULL); } - + /* + * For a share i_mac_group_allocate_rings gets the rings from the + * driver, let's populate the property for the client now. + */ + if (share != NULL) { + mac_client_set_rings((mac_client_handle_t)mcip, -1, + grp->mrg_cur_count); + } mip->mi_tx_group_free--; - return (grp); } void -mac_release_tx_group(mac_impl_t *mip, mac_group_t *grp) +mac_release_tx_group(mac_client_impl_t *mcip, mac_group_t *grp) { - mac_client_impl_t *mcip = grp->mrg_tx_client; - mac_share_handle_t share = mcip->mci_share; - mac_ring_t *ring; - - ASSERT(mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC); - ASSERT(share != NULL); - ASSERT(grp->mrg_state == MAC_GROUP_STATE_RESERVED); + mac_impl_t *mip = mcip->mci_mip; + mac_share_handle_t share = mcip->mci_share; + mac_ring_t *ring; + mac_soft_ring_set_t *srs = MCIP_TX_SRS(mcip); + mac_group_t *defgrp; + + defgrp = MAC_DEFAULT_TX_GROUP(mip); + if (srs != NULL) { + if (srs->srs_soft_ring_count > 0) { + for (ring = grp->mrg_rings; ring != NULL; + ring = ring->mr_next) { + ASSERT(mac_tx_srs_ring_present(srs, ring)); + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t) + mac_tx_srs_get_soft_ring(srs, ring)); + mac_tx_srs_del_ring(srs, ring); + } + } else { + ASSERT(srs->srs_tx.st_arg2 != NULL); + srs->srs_tx.st_arg2 = NULL; + mac_srs_stat_delete(srs); + } + } + if (share != NULL) + mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); - mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); - while ((ring = grp->mrg_rings) != NULL) { - /* move the ring back to the pool */ - (void) mac_group_mov_ring(mip, mip->mi_tx_groups + - mip->mi_tx_group_count, ring); + /* move the ring back to the pool */ + if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + while ((ring = grp->mrg_rings) != NULL) + (void) mac_group_mov_ring(mip, defgrp, ring); } mac_stop_group(grp); - mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED); - grp->mrg_tx_client = NULL; mip->mi_tx_group_free++; } /* + * Disassociate a MAC client from a group, i.e go through the rings in the + * group and delete all the soft rings tied to them. + */ +static void +mac_tx_dismantle_soft_rings(mac_group_t *fgrp, flow_entry_t *flent) +{ + mac_client_impl_t *mcip = flent->fe_mcip; + mac_soft_ring_set_t *tx_srs; + mac_srs_tx_t *tx; + mac_ring_t *ring; + + tx_srs = flent->fe_tx_srs; + tx = &tx_srs->srs_tx; + + /* Single ring case we haven't created any soft rings */ + if (tx->st_mode == SRS_TX_BW || tx->st_mode == SRS_TX_SERIALIZE || + tx->st_mode == SRS_TX_DEFAULT) { + tx->st_arg2 = NULL; + mac_srs_stat_delete(tx_srs); + /* Fanout case, where we have to dismantle the soft rings */ + } else { + for (ring = fgrp->mrg_rings; ring != NULL; + ring = ring->mr_next) { + ASSERT(mac_tx_srs_ring_present(tx_srs, ring)); + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(tx_srs, + ring)); + mac_tx_srs_del_ring(tx_srs, ring); + } + ASSERT(tx->st_arg2 == NULL); + } +} + +/* + * Switch the MAC client from one group to another. This means we need + * to remove the MAC client, teardown the SRSs and revert the group state. + * Then, we add the client to the destination roup, set the SRSs etc. + */ +void +mac_tx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, + mac_group_t *tgrp) +{ + mac_client_impl_t *group_only_mcip; + mac_impl_t *mip = mcip->mci_mip; + flow_entry_t *flent = mcip->mci_flent; + mac_group_t *defgrp; + mac_grp_client_t *mgcp; + mac_client_impl_t *gmcip; + flow_entry_t *gflent; + + defgrp = MAC_DEFAULT_TX_GROUP(mip); + ASSERT(fgrp == flent->fe_tx_ring_group); + + if (fgrp == defgrp) { + /* + * If this is the primary we need to find any VLANs on + * the primary and move them too. + */ + mac_group_remove_client(fgrp, mcip); + mac_tx_dismantle_soft_rings(fgrp, flent); + if (mcip->mci_unicast->ma_nusers > 1) { + mgcp = fgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + mgcp = mgcp->mgc_next; + if (mcip->mci_unicast != gmcip->mci_unicast) + continue; + mac_tx_client_quiesce( + (mac_client_handle_t)gmcip); + + gflent = gmcip->mci_flent; + mac_group_remove_client(fgrp, gmcip); + mac_tx_dismantle_soft_rings(fgrp, gflent); + + mac_group_add_client(tgrp, gmcip); + gflent->fe_tx_ring_group = tgrp; + /* We could directly set this to SHARED */ + tgrp->mrg_state = mac_group_next_state(tgrp, + &group_only_mcip, defgrp, B_FALSE); + + mac_tx_srs_group_setup(gmcip, gflent, + SRST_LINK); + mac_fanout_setup(gmcip, gflent, + MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, + gmcip, NULL, NULL); + + mac_tx_client_restart( + (mac_client_handle_t)gmcip); + } + } + if (MAC_GROUP_NO_CLIENT(fgrp)) { + mac_ring_t *ring; + int cnt; + int ringcnt; + + fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; + /* + * Additionally, we also need to stop all + * the rings in the default group, except + * the default ring. The reason being + * this group won't be released since it is + * the default group, so the rings won't + * be stopped otherwise. + */ + ringcnt = fgrp->mrg_cur_count; + ring = fgrp->mrg_rings; + for (cnt = 0; cnt < ringcnt; cnt++) { + if (ring->mr_state == MR_INUSE && + ring != + (mac_ring_t *)mip->mi_default_tx_ring) { + mac_stop_ring(ring); + ring->mr_flag = 0; + } + ring = ring->mr_next; + } + } else if (MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { + fgrp->mrg_state = MAC_GROUP_STATE_RESERVED; + } else { + ASSERT(fgrp->mrg_state == MAC_GROUP_STATE_SHARED); + } + } else { + /* + * We could have VLANs sharing the non-default group with + * the primary. + */ + mgcp = fgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + mgcp = mgcp->mgc_next; + if (gmcip == mcip) + continue; + mac_tx_client_quiesce((mac_client_handle_t)gmcip); + gflent = gmcip->mci_flent; + + mac_group_remove_client(fgrp, gmcip); + mac_tx_dismantle_soft_rings(fgrp, gflent); + + mac_group_add_client(tgrp, gmcip); + gflent->fe_tx_ring_group = tgrp; + /* We could directly set this to SHARED */ + tgrp->mrg_state = mac_group_next_state(tgrp, + &group_only_mcip, defgrp, B_FALSE); + mac_tx_srs_group_setup(gmcip, gflent, SRST_LINK); + mac_fanout_setup(gmcip, gflent, + MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, + gmcip, NULL, NULL); + + mac_tx_client_restart((mac_client_handle_t)gmcip); + } + mac_group_remove_client(fgrp, mcip); + mac_release_tx_group(mcip, fgrp); + fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; + } + + /* Add it to the tgroup */ + mac_group_add_client(tgrp, mcip); + flent->fe_tx_ring_group = tgrp; + tgrp->mrg_state = mac_group_next_state(tgrp, &group_only_mcip, + defgrp, B_FALSE); + + mac_tx_srs_group_setup(mcip, flent, SRST_LINK); + mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), + mac_rx_deliver, mcip, NULL, NULL); +} + +/* * This is a 1-time control path activity initiated by the client (IP). * The mac perimeter protects against other simultaneous control activities, * for example an ioctl that attempts to change the degree of fanout and @@ -5416,3 +7118,599 @@ mac_no_active(mac_handle_t mh) mip->mi_state_flags |= MIS_NO_ACTIVE; i_mac_perim_exit(mip); } + +/* + * Walk the primary VLAN clients whenever the primary's rings property + * changes and update the mac_resource_props_t for the VLAN's client. + * We need to do this since we don't support setting these properties + * on the primary's VLAN clients, but the VLAN clients have to + * follow the primary w.r.t the rings property; + */ +void +mac_set_prim_vlan_rings(mac_impl_t *mip, mac_resource_props_t *mrp) +{ + mac_client_impl_t *vmcip; + mac_resource_props_t *vmrp; + + for (vmcip = mip->mi_clients_list; vmcip != NULL; + vmcip = vmcip->mci_client_next) { + if (!(vmcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) || + mac_client_vid((mac_client_handle_t)vmcip) == + VLAN_ID_NONE) { + continue; + } + vmrp = MCIP_RESOURCE_PROPS(vmcip); + + vmrp->mrp_nrxrings = mrp->mrp_nrxrings; + if (mrp->mrp_mask & MRP_RX_RINGS) + vmrp->mrp_mask |= MRP_RX_RINGS; + else if (vmrp->mrp_mask & MRP_RX_RINGS) + vmrp->mrp_mask &= ~MRP_RX_RINGS; + + vmrp->mrp_ntxrings = mrp->mrp_ntxrings; + if (mrp->mrp_mask & MRP_TX_RINGS) + vmrp->mrp_mask |= MRP_TX_RINGS; + else if (vmrp->mrp_mask & MRP_TX_RINGS) + vmrp->mrp_mask &= ~MRP_TX_RINGS; + + if (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) + vmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; + else + vmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; + + if (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) + vmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; + else + vmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; + } +} + +/* + * We are adding or removing ring(s) from a group. The source for taking + * rings is the default group. The destination for giving rings back is + * the default group. + */ +int +mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group, + mac_group_t *defgrp) +{ + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + uint_t modify; + int count; + mac_ring_t *ring; + mac_ring_t *next; + mac_impl_t *mip = mcip->mci_mip; + mac_ring_t **rings; + uint_t ringcnt; + int i = 0; + boolean_t rx_group = group->mrg_type == MAC_RING_TYPE_RX; + int start; + int end; + mac_group_t *tgrp; + int j; + int rv = 0; + + /* + * If we are asked for just a group, we give 1 ring, else + * the specified number of rings. + */ + if (rx_group) { + ringcnt = (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) ? 1: + mrp->mrp_nrxrings; + } else { + ringcnt = (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) ? 1: + mrp->mrp_ntxrings; + } + + /* don't allow modifying rings for a share for now. */ + ASSERT(mcip->mci_share == NULL); + + if (ringcnt == group->mrg_cur_count) + return (0); + + if (group->mrg_cur_count > ringcnt) { + modify = group->mrg_cur_count - ringcnt; + if (rx_group) { + if (mip->mi_rx_donor_grp == group) { + ASSERT(mac_is_primary_client(mcip)); + mip->mi_rx_donor_grp = defgrp; + } else { + defgrp = mip->mi_rx_donor_grp; + } + } + ring = group->mrg_rings; + rings = kmem_alloc(modify * sizeof (mac_ring_handle_t), + KM_SLEEP); + j = 0; + for (count = 0; count < modify; count++) { + next = ring->mr_next; + rv = mac_group_mov_ring(mip, defgrp, ring); + if (rv != 0) { + /* cleanup on failure */ + for (j = 0; j < count; j++) { + (void) mac_group_mov_ring(mip, group, + rings[j]); + } + break; + } + rings[j++] = ring; + ring = next; + } + kmem_free(rings, modify * sizeof (mac_ring_handle_t)); + return (rv); + } + if (ringcnt >= MAX_RINGS_PER_GROUP) + return (EINVAL); + + modify = ringcnt - group->mrg_cur_count; + + if (rx_group) { + if (group != mip->mi_rx_donor_grp) + defgrp = mip->mi_rx_donor_grp; + else + /* + * This is the donor group with all the remaining + * rings. Default group now gets to be the donor + */ + mip->mi_rx_donor_grp = defgrp; + start = 1; + end = mip->mi_rx_group_count; + } else { + start = 0; + end = mip->mi_tx_group_count - 1; + } + /* + * If the default doesn't have any rings, lets see if we can + * take rings given to an h/w client that doesn't need it. + * For now, we just see if there is any one client that can donate + * all the required rings. + */ + if (defgrp->mrg_cur_count < (modify + 1)) { + for (i = start; i < end; i++) { + if (rx_group) { + tgrp = &mip->mi_rx_groups[i]; + if (tgrp == group || tgrp->mrg_state < + MAC_GROUP_STATE_RESERVED) { + continue; + } + mcip = MAC_GROUP_ONLY_CLIENT(tgrp); + if (mcip == NULL) + mcip = mac_get_grp_primary(tgrp); + ASSERT(mcip != NULL); + mrp = MCIP_RESOURCE_PROPS(mcip); + if ((mrp->mrp_mask & MRP_RX_RINGS) != 0) + continue; + if ((tgrp->mrg_cur_count + + defgrp->mrg_cur_count) < (modify + 1)) { + continue; + } + if (mac_rx_switch_group(mcip, tgrp, + defgrp) != 0) { + return (ENOSPC); + } + } else { + tgrp = &mip->mi_tx_groups[i]; + if (tgrp == group || tgrp->mrg_state < + MAC_GROUP_STATE_RESERVED) { + continue; + } + mcip = MAC_GROUP_ONLY_CLIENT(tgrp); + if (mcip == NULL) + mcip = mac_get_grp_primary(tgrp); + mrp = MCIP_RESOURCE_PROPS(mcip); + if ((mrp->mrp_mask & MRP_TX_RINGS) != 0) + continue; + if ((tgrp->mrg_cur_count + + defgrp->mrg_cur_count) < (modify + 1)) { + continue; + } + /* OK, we can switch this to s/w */ + mac_tx_client_quiesce( + (mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, tgrp, defgrp); + mac_tx_client_restart( + (mac_client_handle_t)mcip); + } + } + if (defgrp->mrg_cur_count < (modify + 1)) + return (ENOSPC); + } + if ((rv = i_mac_group_allocate_rings(mip, group->mrg_type, defgrp, + group, mcip->mci_share, modify)) != 0) { + return (rv); + } + return (0); +} + +/* + * Given the poolname in mac_resource_props, find the cpupart + * that is associated with this pool. The cpupart will be used + * later for finding the cpus to be bound to the networking threads. + * + * use_default is set B_TRUE if pools are enabled and pool_default + * is returned. This avoids a 2nd lookup to set the poolname + * for pool-effective. + * + * returns: + * + * NULL - pools are disabled or if the 'cpus' property is set. + * cpupart of pool_default - pools are enabled and the pool + * is not available or poolname is blank + * cpupart of named pool - pools are enabled and the pool + * is available. + */ +cpupart_t * +mac_pset_find(mac_resource_props_t *mrp, boolean_t *use_default) +{ + pool_t *pool; + cpupart_t *cpupart; + + *use_default = B_FALSE; + + /* CPUs property is set */ + if (mrp->mrp_mask & MRP_CPUS) + return (NULL); + + ASSERT(pool_lock_held()); + + /* Pools are disabled, no pset */ + if (pool_state == POOL_DISABLED) + return (NULL); + + /* Pools property is set */ + if (mrp->mrp_mask & MRP_POOL) { + if ((pool = pool_lookup_pool_by_name(mrp->mrp_pool)) == NULL) { + /* Pool not found */ + DTRACE_PROBE1(mac_pset_find_no_pool, char *, + mrp->mrp_pool); + *use_default = B_TRUE; + pool = pool_default; + } + /* Pools property is not set */ + } else { + *use_default = B_TRUE; + pool = pool_default; + } + + /* Find the CPU pset that corresponds to the pool */ + mutex_enter(&cpu_lock); + if ((cpupart = cpupart_find(pool->pool_pset->pset_id)) == NULL) { + DTRACE_PROBE1(mac_find_pset_no_pset, psetid_t, + pool->pool_pset->pset_id); + } + mutex_exit(&cpu_lock); + + return (cpupart); +} + +void +mac_set_pool_effective(boolean_t use_default, cpupart_t *cpupart, + mac_resource_props_t *mrp, mac_resource_props_t *emrp) +{ + ASSERT(pool_lock_held()); + + if (cpupart != NULL) { + emrp->mrp_mask |= MRP_POOL; + if (use_default) { + (void) strcpy(emrp->mrp_pool, + "pool_default"); + } else { + ASSERT(strlen(mrp->mrp_pool) != 0); + (void) strcpy(emrp->mrp_pool, + mrp->mrp_pool); + } + } else { + emrp->mrp_mask &= ~MRP_POOL; + bzero(emrp->mrp_pool, MAXPATHLEN); + } +} + +struct mac_pool_arg { + char mpa_poolname[MAXPATHLEN]; + pool_event_t mpa_what; +}; + +/*ARGSUSED*/ +static uint_t +mac_pool_link_update(mod_hash_key_t key, mod_hash_val_t *val, void *arg) +{ + struct mac_pool_arg *mpa = arg; + mac_impl_t *mip = (mac_impl_t *)val; + mac_client_impl_t *mcip; + mac_resource_props_t *mrp, *emrp; + boolean_t pool_update = B_FALSE; + boolean_t pool_clear = B_FALSE; + boolean_t use_default = B_FALSE; + cpupart_t *cpupart = NULL; + + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + i_mac_perim_enter(mip); + for (mcip = mip->mi_clients_list; mcip != NULL; + mcip = mcip->mci_client_next) { + pool_update = B_FALSE; + pool_clear = B_FALSE; + use_default = B_FALSE; + mac_client_get_resources((mac_client_handle_t)mcip, mrp); + emrp = MCIP_EFFECTIVE_PROPS(mcip); + + /* + * When pools are enabled + */ + if ((mpa->mpa_what == POOL_E_ENABLE) && + ((mrp->mrp_mask & MRP_CPUS) == 0)) { + mrp->mrp_mask |= MRP_POOL; + pool_update = B_TRUE; + } + + /* + * When pools are disabled + */ + if ((mpa->mpa_what == POOL_E_DISABLE) && + ((mrp->mrp_mask & MRP_CPUS) == 0)) { + mrp->mrp_mask |= MRP_POOL; + pool_clear = B_TRUE; + } + + /* + * Look for links with the pool property set and the poolname + * matching the one which is changing. + */ + if (strcmp(mrp->mrp_pool, mpa->mpa_poolname) == 0) { + /* + * The pool associated with the link has changed. + */ + if (mpa->mpa_what == POOL_E_CHANGE) { + mrp->mrp_mask |= MRP_POOL; + pool_update = B_TRUE; + } + } + + /* + * This link is associated with pool_default and + * pool_default has changed. + */ + if ((mpa->mpa_what == POOL_E_CHANGE) && + (strcmp(emrp->mrp_pool, "pool_default") == 0) && + (strcmp(mpa->mpa_poolname, "pool_default") == 0)) { + mrp->mrp_mask |= MRP_POOL; + pool_update = B_TRUE; + } + + /* + * Get new list of cpus for the pool, bind network + * threads to new list of cpus and update resources. + */ + if (pool_update) { + if (MCIP_DATAPATH_SETUP(mcip)) { + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_setup(mcip, mcip->mci_flent, mrp, + mac_rx_deliver, mcip, NULL, cpupart); + mac_set_pool_effective(use_default, cpupart, + mrp, emrp); + pool_unlock(); + } + mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), + B_FALSE); + } + + /* + * Clear the effective pool and bind network threads + * to any available CPU. + */ + if (pool_clear) { + if (MCIP_DATAPATH_SETUP(mcip)) { + emrp->mrp_mask &= ~MRP_POOL; + bzero(emrp->mrp_pool, MAXPATHLEN); + mac_fanout_setup(mcip, mcip->mci_flent, mrp, + mac_rx_deliver, mcip, NULL, NULL); + } + mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), + B_FALSE); + } + } + i_mac_perim_exit(mip); + kmem_free(mrp, sizeof (*mrp)); + return (MH_WALK_CONTINUE); +} + +static void +mac_pool_update(void *arg) +{ + mod_hash_walk(i_mac_impl_hash, mac_pool_link_update, arg); + kmem_free(arg, sizeof (struct mac_pool_arg)); +} + +/* + * Callback function to be executed when a noteworthy pool event + * takes place. + */ +/* ARGSUSED */ +static void +mac_pool_event_cb(pool_event_t what, poolid_t id, void *arg) +{ + pool_t *pool; + char *poolname = NULL; + struct mac_pool_arg *mpa; + + pool_lock(); + mpa = kmem_zalloc(sizeof (struct mac_pool_arg), KM_SLEEP); + + switch (what) { + case POOL_E_ENABLE: + case POOL_E_DISABLE: + break; + + case POOL_E_CHANGE: + pool = pool_lookup_pool_by_id(id); + if (pool == NULL) { + kmem_free(mpa, sizeof (struct mac_pool_arg)); + pool_unlock(); + return; + } + pool_get_name(pool, &poolname); + (void) strlcpy(mpa->mpa_poolname, poolname, + sizeof (mpa->mpa_poolname)); + break; + + default: + kmem_free(mpa, sizeof (struct mac_pool_arg)); + pool_unlock(); + return; + } + pool_unlock(); + + mpa->mpa_what = what; + + mac_pool_update(mpa); +} + +/* + * Set effective rings property. This could be called from datapath_setup/ + * datapath_teardown or set-linkprop. + * If the group is reserved we just go ahead and set the effective rings. + * Additionally, for TX this could mean the default group has lost/gained + * some rings, so if the default group is reserved, we need to adjust the + * effective rings for the default group clients. For RX, if we are working + * with the non-default group, we just need * to reset the effective props + * for the default group clients. + */ +void +mac_set_rings_effective(mac_client_impl_t *mcip) +{ + mac_impl_t *mip = mcip->mci_mip; + mac_group_t *grp; + mac_group_t *defgrp; + flow_entry_t *flent = mcip->mci_flent; + mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); + mac_grp_client_t *mgcp; + mac_client_impl_t *gmcip; + + grp = flent->fe_rx_ring_group; + if (grp != NULL) { + defgrp = MAC_DEFAULT_RX_GROUP(mip); + /* + * If we have reserved a group, set the effective rings + * to the ring count in the group. + */ + if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { + emrp->mrp_mask |= MRP_RX_RINGS; + emrp->mrp_nrxrings = grp->mrg_cur_count; + } + + /* + * We go through the clients in the shared group and + * reset the effective properties. It is possible this + * might have already been done for some client (i.e. + * if some client is being moved to a group that is + * already shared). The case where the default group is + * RESERVED is taken care of above (note in the RX side if + * there is a non-default group, the default group is always + * SHARED). + */ + if (grp != defgrp || grp->mrg_state == MAC_GROUP_STATE_SHARED) { + if (grp->mrg_state == MAC_GROUP_STATE_SHARED) + mgcp = grp->mrg_clients; + else + mgcp = defgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + emrp = MCIP_EFFECTIVE_PROPS(gmcip); + if (emrp->mrp_mask & MRP_RX_RINGS) { + emrp->mrp_mask &= ~MRP_RX_RINGS; + emrp->mrp_nrxrings = 0; + } + mgcp = mgcp->mgc_next; + } + } + } + + /* Now the TX side */ + grp = flent->fe_tx_ring_group; + if (grp != NULL) { + defgrp = MAC_DEFAULT_TX_GROUP(mip); + + if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { + emrp->mrp_mask |= MRP_TX_RINGS; + emrp->mrp_ntxrings = grp->mrg_cur_count; + } else if (grp->mrg_state == MAC_GROUP_STATE_SHARED) { + mgcp = grp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + emrp = MCIP_EFFECTIVE_PROPS(gmcip); + if (emrp->mrp_mask & MRP_TX_RINGS) { + emrp->mrp_mask &= ~MRP_TX_RINGS; + emrp->mrp_ntxrings = 0; + } + mgcp = mgcp->mgc_next; + } + } + + /* + * If the group is not the default group and the default + * group is reserved, the ring count in the default group + * might have changed, update it. + */ + if (grp != defgrp && + defgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { + gmcip = MAC_GROUP_ONLY_CLIENT(defgrp); + emrp = MCIP_EFFECTIVE_PROPS(gmcip); + emrp->mrp_ntxrings = defgrp->mrg_cur_count; + } + } + emrp = MCIP_EFFECTIVE_PROPS(mcip); +} + +/* + * Check if the primary is in the default group. If so, see if we + * can give it a an exclusive group now that another client is + * being configured. We take the primary out of the default group + * because the multicast/broadcast packets for the all the clients + * will land in the default ring in the default group which means + * any client in the default group, even if it is the only on in + * the group, will lose exclusive access to the rings, hence + * polling. + */ +mac_client_impl_t * +mac_check_primary_relocation(mac_client_impl_t *mcip, boolean_t rxhw) +{ + mac_impl_t *mip = mcip->mci_mip; + mac_group_t *defgrp = MAC_DEFAULT_RX_GROUP(mip); + flow_entry_t *flent = mcip->mci_flent; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + uint8_t *mac_addr; + mac_group_t *ngrp; + + /* + * Check if the primary is in the default group, if not + * or if it is explicitly configured to be in the default + * group OR set the RX rings property, return. + */ + if (flent->fe_rx_ring_group != defgrp || mrp->mrp_mask & MRP_RX_RINGS) + return (NULL); + + /* + * If the new client needs an exclusive group and we + * don't have another for the primary, return. + */ + if (rxhw && mip->mi_rxhwclnt_avail < 2) + return (NULL); + + mac_addr = flent->fe_flow_desc.fd_dst_mac; + /* + * We call this when we are setting up the datapath for + * the first non-primary. + */ + ASSERT(mip->mi_nactiveclients == 2); + /* + * OK, now we have the primary that needs to be relocated. + */ + ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); + if (ngrp == NULL) + return (NULL); + if (mac_rx_switch_group(mcip, defgrp, ngrp) != 0) { + mac_stop_group(ngrp); + return (NULL); + } + return (mcip); +} diff --git a/usr/src/uts/common/io/mac/mac_bcast.c b/usr/src/uts/common/io/mac/mac_bcast.c index 2f17228e06..1aba37c822 100644 --- a/usr/src/uts/common/io/mac/mac_bcast.c +++ b/usr/src/uts/common/io/mac/mac_bcast.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -212,10 +212,15 @@ mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback) rw_enter(&mip->mi_rw_lock, RW_READER); /* update stats */ - if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) - dst_mcip->mci_stat_multircv++; - else - dst_mcip->mci_stat_brdcstrcv++; + if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) { + MCIP_STAT_UPDATE(dst_mcip, multircv, 1); + MCIP_STAT_UPDATE(dst_mcip, multircvbytes, + msgdsize(mp_chain)); + } else { + MCIP_STAT_UPDATE(dst_mcip, brdcstrcv, 1); + MCIP_STAT_UPDATE(dst_mcip, brdcstrcvbytes, + msgdsize(mp_chain)); + } if (grp->mbg_clients_gen != gen) { /* @@ -236,10 +241,12 @@ mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback) * so we need to send a copy of the packet to the * underlying NIC so that it can be sent on the wire. */ - src_mcip->mci_stat_multixmt++; - src_mcip->mci_stat_brdcstxmt++; + MCIP_STAT_UPDATE(src_mcip, multixmt, 1); + MCIP_STAT_UPDATE(src_mcip, multixmtbytes, msgdsize(mp_chain)); + MCIP_STAT_UPDATE(src_mcip, brdcstxmt, 1); + MCIP_STAT_UPDATE(src_mcip, brdcstxmtbytes, msgdsize(mp_chain)); - MAC_TX(mip, mip->mi_default_tx_ring, mp_chain, B_FALSE); + MAC_TX(mip, mip->mi_default_tx_ring, mp_chain, src_mcip); if (mp_chain != NULL) freemsgchain(mp_chain); } else { diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c index 78c7eae9f2..2f8962f67a 100644 --- a/usr/src/uts/common/io/mac/mac_client.c +++ b/usr/src/uts/common/io/mac/mac_client.c @@ -108,6 +108,7 @@ #include <sys/mac_impl.h> #include <sys/mac_client_impl.h> #include <sys/mac_soft_ring.h> +#include <sys/mac_stat.h> #include <sys/dls.h> #include <sys/dld.h> #include <sys/modctl.h> @@ -144,6 +145,10 @@ static void mac_client_remove_flow_from_list(mac_client_impl_t *, static void mac_client_add_to_flow_list(mac_client_impl_t *, flow_entry_t *); static void mac_rename_flow_names(mac_client_impl_t *, const char *); static void mac_virtual_link_update(mac_impl_t *); +static int mac_client_datapath_setup(mac_client_impl_t *, uint16_t, + uint8_t *, mac_resource_props_t *, boolean_t, mac_unicast_impl_t *); +static void mac_client_datapath_teardown(mac_client_handle_t, + mac_unicast_impl_t *, flow_entry_t *); /* ARGSUSED */ static int @@ -560,6 +565,14 @@ mac_client_link_state(mac_client_impl_t *mcip) } /* + * These statistics are consumed by dladm show-link -s <vnic>, + * dladm show-vnic -s and netstat. With the introduction of dlstat, + * dladm show-link -s and dladm show-vnic -s witll be EOL'ed while + * netstat will consume from kstats introduced for dlstat. This code + * will be removed at that time. + */ + +/* * Return the statistics of a MAC client. These statistics are different * then the statistics of the underlying MAC which are returned by * mac_stat_get(). @@ -567,9 +580,17 @@ mac_client_link_state(mac_client_impl_t *mcip) uint64_t mac_client_stat_get(mac_client_handle_t mch, uint_t stat) { - mac_client_impl_t *mcip = (mac_client_impl_t *)mch; - mac_impl_t *mip = mcip->mci_mip; - uint64_t val; + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_impl_t *mip = mcip->mci_mip; + flow_entry_t *flent = mcip->mci_flent; + mac_soft_ring_set_t *mac_srs; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + int i; + uint64_t val = 0; + + mac_srs = (mac_soft_ring_set_t *)(flent->fe_tx_srs); + mac_tx_stat = &mac_srs->srs_tx.st_stat; switch (stat) { case MAC_STAT_LINK_STATE: @@ -588,37 +609,52 @@ mac_client_stat_get(mac_client_handle_t mch, uint_t stat) val = mac_client_ifspeed(mcip); break; case MAC_STAT_MULTIRCV: - val = mcip->mci_stat_multircv; + val = mcip->mci_misc_stat.mms_multircv; break; case MAC_STAT_BRDCSTRCV: - val = mcip->mci_stat_brdcstrcv; + val = mcip->mci_misc_stat.mms_brdcstrcv; break; case MAC_STAT_MULTIXMT: - val = mcip->mci_stat_multixmt; + val = mcip->mci_misc_stat.mms_multixmt; break; case MAC_STAT_BRDCSTXMT: - val = mcip->mci_stat_brdcstxmt; + val = mcip->mci_misc_stat.mms_brdcstxmt; break; case MAC_STAT_OBYTES: - val = mcip->mci_stat_obytes; + val = mac_tx_stat->mts_obytes; break; case MAC_STAT_OPACKETS: - val = mcip->mci_stat_opackets; + val = mac_tx_stat->mts_opackets; break; case MAC_STAT_OERRORS: - val = mcip->mci_stat_oerrors; + val = mac_tx_stat->mts_oerrors; break; case MAC_STAT_IPACKETS: - val = mcip->mci_stat_ipackets; + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + val += mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; + } break; case MAC_STAT_RBYTES: - val = mcip->mci_stat_ibytes; + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + val += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + + mac_rx_stat->mrs_lclbytes; + } break; case MAC_STAT_IERRORS: - val = mcip->mci_stat_ierrors; + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + val += mac_rx_stat->mrs_ierrors; + } break; default: - val = mac_stat_default(mip, stat); + val = mac_driver_stat_default(mip, stat); break; } @@ -676,12 +712,30 @@ mac_stat_get(mac_handle_t mh, uint_t stat) * The driver doesn't support this statistic. Get the * statistic's default value. */ - val = mac_stat_default(mip, stat); + val = mac_driver_stat_default(mip, stat); } return (val); } /* + * Query hardware rx ring corresponding to the pseudo ring. + */ +uint64_t +mac_pseudo_rx_ring_stat_get(mac_ring_handle_t handle, uint_t stat) +{ + return (mac_rx_ring_stat_get(handle, stat)); +} + +/* + * Query hardware tx ring corresponding to the pseudo ring. + */ +uint64_t +mac_pseudo_tx_ring_stat_get(mac_ring_handle_t handle, uint_t stat) +{ + return (mac_tx_ring_stat_get(handle, stat)); +} + +/* * Utility function which returns the VID associated with a flow entry. */ uint16_t @@ -752,6 +806,12 @@ mac_unicast_update_client_flow(mac_client_impl_t *mcip) mac_flow_set_desc(flent, &flow_desc); /* + * The v6 local addr (used by mac protection) needs to be + * regenerated because our mac address has changed. + */ + mac_protect_update_v6_local_addr(mcip); + + /* * A MAC client could have one MAC address but multiple * VLANs. In that case update the flow entries corresponding * to all VLANs of the MAC client. @@ -1184,20 +1244,14 @@ int mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, uint16_t flags) { - mac_impl_t *mip = (mac_impl_t *)mh; - mac_client_impl_t *mcip; - int err = 0; - boolean_t share_desired = - ((flags & MAC_OPEN_FLAGS_SHARES_DESIRED) != 0); - boolean_t no_hwrings = ((flags & MAC_OPEN_FLAGS_NO_HWRINGS) != 0); - boolean_t req_hwrings = ((flags & MAC_OPEN_FLAGS_REQ_HWRINGS) != 0); - flow_entry_t *flent = NULL; + mac_impl_t *mip = (mac_impl_t *)mh; + mac_client_impl_t *mcip; + int err = 0; + boolean_t share_desired; + flow_entry_t *flent = NULL; + share_desired = (flags & MAC_OPEN_FLAGS_SHARES_DESIRED) != 0; *mchp = NULL; - if (share_desired && no_hwrings) { - /* can't have shares but no hardware rings */ - return (EINVAL); - } i_mac_perim_enter(mip); @@ -1249,6 +1303,9 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, if ((flags & MAC_OPEN_FLAGS_IS_AGGR_PORT) != 0) mcip->mci_state_flags |= MCIS_IS_AGGR_PORT; + if (mip->mi_state_flags & MIS_IS_AGGR) + mcip->mci_state_flags |= MCIS_IS_AGGR; + if ((flags & MAC_OPEN_FLAGS_USE_DATALINK_NAME) != 0) { datalink_id_t linkid; @@ -1283,19 +1340,18 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, if (flags & MAC_OPEN_FLAGS_MULTI_PRIMARY) mcip->mci_flags |= MAC_CLIENT_FLAGS_MULTI_PRIMARY; + if (flags & MAC_OPEN_FLAGS_NO_UNICAST_ADDR) + mcip->mci_state_flags |= MCIS_NO_UNICAST_ADDR; + + mac_protect_init(mcip); + /* the subflow table will be created dynamically */ mcip->mci_subflow_tab = NULL; - mcip->mci_stat_multircv = 0; - mcip->mci_stat_brdcstrcv = 0; - mcip->mci_stat_multixmt = 0; - mcip->mci_stat_brdcstxmt = 0; - - mcip->mci_stat_obytes = 0; - mcip->mci_stat_opackets = 0; - mcip->mci_stat_oerrors = 0; - mcip->mci_stat_ibytes = 0; - mcip->mci_stat_ipackets = 0; - mcip->mci_stat_ierrors = 0; + + mcip->mci_misc_stat.mms_multircv = 0; + mcip->mci_misc_stat.mms_brdcstrcv = 0; + mcip->mci_misc_stat.mms_multixmt = 0; + mcip->mci_misc_stat.mms_brdcstxmt = 0; /* Create an initial flow */ @@ -1321,20 +1377,25 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, */ mac_client_add(mcip); - if (no_hwrings) - mcip->mci_state_flags |= MCIS_NO_HWRINGS; - if (req_hwrings) - mcip->mci_state_flags |= MCIS_REQ_HWRINGS; mcip->mci_share = NULL; - if (share_desired) { - ASSERT(!no_hwrings); + if (share_desired) i_mac_share_alloc(mcip); - } DTRACE_PROBE2(mac__client__open__allocated, mac_impl_t *, mcip->mci_mip, mac_client_impl_t *, mcip); *mchp = (mac_client_handle_t)mcip; + /* + * We will do mimimal datapath setup to allow a MAC client to + * transmit or receive non-unicast packets without waiting + * for mac_unicast_add. + */ + if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) { + if ((err = mac_client_datapath_setup(mcip, VLAN_ID_NONE, + NULL, NULL, B_TRUE, NULL)) != 0) { + goto done; + } + } i_mac_perim_exit(mip); return (0); @@ -1373,6 +1434,13 @@ mac_client_close(mac_client_handle_t mch, uint16_t flags) return; } + /* If we have only setup up minimal datapth setup, tear it down */ + if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) { + mac_client_datapath_teardown((mac_client_handle_t)mcip, NULL, + mcip->mci_flent); + mcip->mci_state_flags &= ~MCIS_NO_UNICAST_ADDR; + } + /* * Remove the flent associated with the MAC client */ @@ -1389,7 +1457,7 @@ mac_client_close(mac_client_handle_t mch, uint16_t flags) ASSERT(mcip->mci_tx_notify_cb_list == NULL); i_mac_share_free(mcip); - + mac_protect_fini(mcip); mac_client_remove(mcip); i_mac_perim_exit(mip); @@ -1495,6 +1563,335 @@ mac_update_subflow_priority(mac_client_impl_t *mcip) } /* + * Modify the TX or RX ring properties. We could either just move around + * rings, i.e add/remove rings given to a client. Or this might cause the + * client to move from hardware based to software or the other way around. + * If we want to reset this property, then we clear the mask, additionally + * if the client was given a non-default group we remove all rings except + * for 1 and give it back to the default group. + */ +int +mac_client_set_rings_prop(mac_client_impl_t *mcip, mac_resource_props_t *mrp, + mac_resource_props_t *tmrp) +{ + mac_impl_t *mip = mcip->mci_mip; + flow_entry_t *flent = mcip->mci_flent; + uint8_t *mac_addr; + int err = 0; + mac_group_t *defgrp; + mac_group_t *group; + mac_group_t *ngrp; + mac_resource_props_t *cmrp = MCIP_RESOURCE_PROPS(mcip); + uint_t ringcnt; + boolean_t unspec; + + if (mcip->mci_share != NULL) + return (EINVAL); + + if (mrp->mrp_mask & MRP_RX_RINGS) { + unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC; + group = flent->fe_rx_ring_group; + defgrp = MAC_DEFAULT_RX_GROUP(mip); + mac_addr = flent->fe_flow_desc.fd_dst_mac; + + /* + * No resulting change. If we are resetting on a client on + * which there was no rx rings property. For dynamic group + * if we are setting the same number of rings already set. + * For static group if we are requesting a group again. + */ + if (mrp->mrp_mask & MRP_RINGS_RESET) { + if (!(tmrp->mrp_mask & MRP_RX_RINGS)) + return (0); + } else { + if (unspec) { + if (tmrp->mrp_mask & MRP_RXRINGS_UNSPEC) + return (0); + } else if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + if ((tmrp->mrp_mask & MRP_RX_RINGS) && + !(tmrp->mrp_mask & MRP_RXRINGS_UNSPEC) && + mrp->mrp_nrxrings == tmrp->mrp_nrxrings) { + return (0); + } + } + } + /* Resetting the prop */ + if (mrp->mrp_mask & MRP_RINGS_RESET) { + /* + * We will just keep one ring and give others back if + * we are not the primary. For the primary we give + * all the rings in the default group except the + * default ring. If it is a static group, then + * we don't do anything, but clear the MRP_RX_RINGS + * flag. + */ + if (group != defgrp) { + if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + /* + * This group has reserved rings + * that need to be released now, + * so does the group. + */ + MAC_RX_RING_RELEASED(mip, + group->mrg_cur_count); + MAC_RX_GRP_RELEASED(mip); + if ((flent->fe_type & + FLOW_PRIMARY_MAC) != 0) { + if (mip->mi_nactiveclients == + 1) { + (void) + mac_rx_switch_group( + mcip, group, + defgrp); + return (0); + } else { + cmrp->mrp_nrxrings = + group-> + mrg_cur_count + + defgrp-> + mrg_cur_count - 1; + } + } else { + cmrp->mrp_nrxrings = 1; + } + (void) mac_group_ring_modify(mcip, + group, defgrp); + } else { + /* + * If this is a static group, we + * need to release the group. The + * client will remain in the same + * group till some other client + * needs this group. + */ + MAC_RX_GRP_RELEASED(mip); + } + /* Let check if we can give this an excl group */ + } else if (group == defgrp) { + ngrp = mac_reserve_rx_group(mcip, mac_addr, + B_TRUE); + /* Couldn't give it a group, that's fine */ + if (ngrp == NULL) + return (0); + /* Switch to H/W */ + if (mac_rx_switch_group(mcip, defgrp, ngrp) != + 0) { + mac_stop_group(ngrp); + return (0); + } + } + /* + * If the client is in the default group, we will + * just clear the MRP_RX_RINGS and leave it as + * it rather than look for an exclusive group + * for it. + */ + return (0); + } + + if (group == defgrp && ((mrp->mrp_nrxrings > 0) || unspec)) { + ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); + if (ngrp == NULL) + return (ENOSPC); + + /* Switch to H/W */ + if (mac_rx_switch_group(mcip, defgrp, ngrp) != 0) { + mac_release_rx_group(mcip, ngrp); + return (ENOSPC); + } + MAC_RX_GRP_RESERVED(mip); + if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) + MAC_RX_RING_RESERVED(mip, ngrp->mrg_cur_count); + } else if (group != defgrp && !unspec && + mrp->mrp_nrxrings == 0) { + /* Switch to S/W */ + ringcnt = group->mrg_cur_count; + if (mac_rx_switch_group(mcip, group, defgrp) != 0) + return (ENOSPC); + if (tmrp->mrp_mask & MRP_RX_RINGS) { + MAC_RX_GRP_RELEASED(mip); + if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_RX_RING_RELEASED(mip, ringcnt); + } + } + } else if (group != defgrp && mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + ringcnt = group->mrg_cur_count; + err = mac_group_ring_modify(mcip, group, defgrp); + if (err != 0) + return (err); + /* + * Update the accounting. If this group + * already had explicitly reserved rings, + * we need to update the rings based on + * the new ring count. If this group + * had not explicitly reserved rings, + * then we just reserve the rings asked for + * and reserve the group. + */ + if (tmrp->mrp_mask & MRP_RX_RINGS) { + if (ringcnt > group->mrg_cur_count) { + MAC_RX_RING_RELEASED(mip, + ringcnt - group->mrg_cur_count); + } else { + MAC_RX_RING_RESERVED(mip, + group->mrg_cur_count - ringcnt); + } + } else { + MAC_RX_RING_RESERVED(mip, group->mrg_cur_count); + MAC_RX_GRP_RESERVED(mip); + } + } + } + if (mrp->mrp_mask & MRP_TX_RINGS) { + unspec = mrp->mrp_mask & MRP_TXRINGS_UNSPEC; + group = flent->fe_tx_ring_group; + defgrp = MAC_DEFAULT_TX_GROUP(mip); + + /* + * For static groups we only allow rings=0 or resetting the + * rings property. + */ + if (mrp->mrp_ntxrings > 0 && + mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) { + return (ENOTSUP); + } + if (mrp->mrp_mask & MRP_RINGS_RESET) { + if (!(tmrp->mrp_mask & MRP_TX_RINGS)) + return (0); + } else { + if (unspec) { + if (tmrp->mrp_mask & MRP_TXRINGS_UNSPEC) + return (0); + } else if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + if ((tmrp->mrp_mask & MRP_TX_RINGS) && + !(tmrp->mrp_mask & MRP_TXRINGS_UNSPEC) && + mrp->mrp_ntxrings == tmrp->mrp_ntxrings) { + return (0); + } + } + } + /* Resetting the prop */ + if (mrp->mrp_mask & MRP_RINGS_RESET) { + if (group != defgrp) { + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + ringcnt = group->mrg_cur_count; + if ((flent->fe_type & + FLOW_PRIMARY_MAC) != 0) { + mac_tx_client_quiesce( + (mac_client_handle_t) + mcip); + mac_tx_switch_group(mcip, + group, defgrp); + mac_tx_client_restart( + (mac_client_handle_t) + mcip); + MAC_TX_GRP_RELEASED(mip); + MAC_TX_RING_RELEASED(mip, + ringcnt); + return (0); + } + cmrp->mrp_ntxrings = 1; + (void) mac_group_ring_modify(mcip, + group, defgrp); + /* + * This group has reserved rings + * that need to be released now. + */ + MAC_TX_RING_RELEASED(mip, ringcnt); + } + /* + * If this is a static group, we + * need to release the group. The + * client will remain in the same + * group till some other client + * needs this group. + */ + MAC_TX_GRP_RELEASED(mip); + } else if (group == defgrp && + (flent->fe_type & FLOW_PRIMARY_MAC) == 0) { + ngrp = mac_reserve_tx_group(mcip, B_TRUE); + if (ngrp == NULL) + return (0); + mac_tx_client_quiesce( + (mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, defgrp, ngrp); + mac_tx_client_restart( + (mac_client_handle_t)mcip); + } + /* + * If the client is in the default group, we will + * just clear the MRP_TX_RINGS and leave it as + * it rather than look for an exclusive group + * for it. + */ + return (0); + } + + /* Switch to H/W */ + if (group == defgrp && ((mrp->mrp_ntxrings > 0) || unspec)) { + ngrp = mac_reserve_tx_group(mcip, B_TRUE); + if (ngrp == NULL) + return (ENOSPC); + mac_tx_client_quiesce((mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, defgrp, ngrp); + mac_tx_client_restart((mac_client_handle_t)mcip); + MAC_TX_GRP_RESERVED(mip); + if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) + MAC_TX_RING_RESERVED(mip, ngrp->mrg_cur_count); + /* Switch to S/W */ + } else if (group != defgrp && !unspec && + mrp->mrp_ntxrings == 0) { + /* Switch to S/W */ + ringcnt = group->mrg_cur_count; + mac_tx_client_quiesce((mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, group, defgrp); + mac_tx_client_restart((mac_client_handle_t)mcip); + if (tmrp->mrp_mask & MRP_TX_RINGS) { + MAC_TX_GRP_RELEASED(mip); + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_TX_RING_RELEASED(mip, ringcnt); + } + } + } else if (group != defgrp && mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + ringcnt = group->mrg_cur_count; + err = mac_group_ring_modify(mcip, group, defgrp); + if (err != 0) + return (err); + /* + * Update the accounting. If this group + * already had explicitly reserved rings, + * we need to update the rings based on + * the new ring count. If this group + * had not explicitly reserved rings, + * then we just reserve the rings asked for + * and reserve the group. + */ + if (tmrp->mrp_mask & MRP_TX_RINGS) { + if (ringcnt > group->mrg_cur_count) { + MAC_TX_RING_RELEASED(mip, + ringcnt - group->mrg_cur_count); + } else { + MAC_TX_RING_RESERVED(mip, + group->mrg_cur_count - ringcnt); + } + } else { + MAC_TX_RING_RESERVED(mip, group->mrg_cur_count); + MAC_TX_GRP_RESERVED(mip); + } + } + } + return (0); +} + +/* * When the MAC client is being brought up (i.e. we do a unicast_add) we need * to initialize the cpu and resource control structure in the * mac_client_impl_t from the mac_impl_t (i.e if there are any cached @@ -1506,16 +1903,73 @@ mac_resource_ctl_set(mac_client_handle_t mch, mac_resource_props_t *mrp) mac_client_impl_t *mcip = (mac_client_impl_t *)mch; mac_impl_t *mip = (mac_impl_t *)mcip->mci_mip; int err = 0; + flow_entry_t *flent = mcip->mci_flent; + mac_resource_props_t *omrp, *nmrp = MCIP_RESOURCE_PROPS(mcip); ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - err = mac_validate_props(mrp); + err = mac_validate_props(mcip->mci_state_flags & MCIS_IS_VNIC ? + mcip->mci_upper_mip : mip, mrp); if (err != 0) return (err); + /* + * Copy over the existing properties since mac_update_resources + * will modify the client's mrp. Currently, the saved property + * is used to determine the difference between existing and + * modified rings property. + */ + omrp = kmem_zalloc(sizeof (*omrp), KM_SLEEP); + bcopy(nmrp, omrp, sizeof (*omrp)); mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), B_FALSE); if (MCIP_DATAPATH_SETUP(mcip)) { /* + * We support rings only for primary client when there are + * multiple clients sharing the same MAC address (e.g. VLAN). + */ + if (mrp->mrp_mask & MRP_RX_RINGS || + mrp->mrp_mask & MRP_TX_RINGS) { + + if ((err = mac_client_set_rings_prop(mcip, mrp, + omrp)) != 0) { + if (omrp->mrp_mask & MRP_RX_RINGS) { + nmrp->mrp_mask |= MRP_RX_RINGS; + nmrp->mrp_nrxrings = omrp->mrp_nrxrings; + } else { + nmrp->mrp_mask &= ~MRP_RX_RINGS; + nmrp->mrp_nrxrings = 0; + } + if (omrp->mrp_mask & MRP_TX_RINGS) { + nmrp->mrp_mask |= MRP_TX_RINGS; + nmrp->mrp_ntxrings = omrp->mrp_ntxrings; + } else { + nmrp->mrp_mask &= ~MRP_TX_RINGS; + nmrp->mrp_ntxrings = 0; + } + if (omrp->mrp_mask & MRP_RXRINGS_UNSPEC) + omrp->mrp_mask |= MRP_RXRINGS_UNSPEC; + else + omrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; + + if (omrp->mrp_mask & MRP_TXRINGS_UNSPEC) + omrp->mrp_mask |= MRP_TXRINGS_UNSPEC; + else + omrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; + kmem_free(omrp, sizeof (*omrp)); + return (err); + } + + /* + * If we modified the rings property of the primary + * we need to update the property fields of its + * VLANs as they inherit the primary's properites. + */ + if (mac_is_primary_client(mcip)) { + mac_set_prim_vlan_rings(mip, + MCIP_RESOURCE_PROPS(mcip)); + } + } + /* * We have to set this prior to calling mac_flow_modify. */ if (mrp->mrp_mask & MRP_PRIORITY) { @@ -1528,11 +1982,11 @@ mac_resource_ctl_set(mac_client_handle_t mch, mac_resource_props_t *mrp) } } - mac_flow_modify(mip->mi_flow_tab, mcip->mci_flent, mrp); + mac_flow_modify(mip->mi_flow_tab, flent, mrp); if (mrp->mrp_mask & MRP_PRIORITY) mac_update_subflow_priority(mcip); - return (0); } + kmem_free(omrp, sizeof (*omrp)); return (0); } @@ -1562,8 +2016,12 @@ mac_unicast_flow_create(mac_client_impl_t *mcip, uint8_t *mac_addr, */ bzero(&flow_desc, sizeof (flow_desc)); - flow_desc.fd_mac_len = mip->mi_type->mt_addr_length; - bcopy(mac_addr, flow_desc.fd_dst_mac, flow_desc.fd_mac_len); + ASSERT(mac_addr != NULL || + (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR)); + if (mac_addr != NULL) { + flow_desc.fd_mac_len = mip->mi_type->mt_addr_length; + bcopy(mac_addr, flow_desc.fd_dst_mac, flow_desc.fd_mac_len); + } flow_desc.fd_mask = FLOW_LINK_DST; if (vid != 0) { flow_desc.fd_vid = vid; @@ -1612,6 +2070,7 @@ mac_unicast_flow_create(mac_client_impl_t *mcip, uint8_t *mac_addr, flent_flags, flent)) != 0) return (err); + mac_misc_stat_create(*flent); FLOW_MARK(*flent, FE_INCIPIENT); (*flent)->fe_mcip = mcip; @@ -1700,6 +2159,9 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, boolean_t nactiveclients_added = B_FALSE; flow_entry_t *flent; int err = 0; + boolean_t no_unicast; + + no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR; if ((err = mac_start((mac_handle_t)mip)) != 0) goto bail; @@ -1725,10 +2187,11 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, /* We are configuring the unicast flow now */ if (!MCIP_DATAPATH_SETUP(mcip)) { - MAC_CLIENT_SET_PRIORITY_RANGE(mcip, - (mrp->mrp_mask & MRP_PRIORITY) ? mrp->mrp_priority : - MPL_LINK_DEFAULT); - + if (mrp != NULL) { + MAC_CLIENT_SET_PRIORITY_RANGE(mcip, + (mrp->mrp_mask & MRP_PRIORITY) ? mrp->mrp_priority : + MPL_LINK_DEFAULT); + } if ((err = mac_unicast_flow_create(mcip, mac_addr, vid, isprimary, B_TRUE, &flent, mrp)) != 0) goto bail; @@ -1743,6 +2206,8 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, if ((err = mac_datapath_setup(mcip, flent, SRST_LINK)) != 0) goto bail; + if (no_unicast) + goto done_setup; /* * The unicast MAC address must have been added successfully. */ @@ -1756,6 +2221,7 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, } else { mac_address_t *map = mcip->mci_unicast; + ASSERT(!no_unicast); /* * A unicast flow already exists for that MAC client, * this flow must be the same mac address but with @@ -1794,7 +2260,7 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, mcip->mci_unicast_list = muip; rw_exit(&mcip->mci_rw_lock); - +done_setup: /* * First add the flent to the flow list of this mcip. Then set * the mip's mi_single_active_client if needed. The Rx path assumes @@ -1802,7 +2268,6 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, * flent. */ mac_client_add_to_flow_list(mcip, flent); - if (nactiveclients_added) mac_update_single_active_client(mip); /* @@ -1889,7 +2354,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, boolean_t fastpath_disabled = B_FALSE; boolean_t is_primary = (flags & MAC_UNICAST_PRIMARY); boolean_t is_unicast_hw = (flags & MAC_UNICAST_HW); - mac_resource_props_t mrp; + mac_resource_props_t *mrp; boolean_t passive_client = B_FALSE; mac_unicast_impl_t *muip; boolean_t is_vnic_primary = @@ -1899,6 +2364,13 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != 0))); /* + * Can't unicast add if the client asked only for minimal datapath + * setup. + */ + if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) + return (ENOTSUP); + + /* * Check for an attempted use of the current Port VLAN ID, if enabled. * No client may use it. */ @@ -2020,7 +2492,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, mip->mi_state_flags |= MIS_EXCLUSIVE; } - bzero(&mrp, sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); if (is_primary && !(mcip->mci_state_flags & (MCIS_IS_VNIC | MCIS_IS_AGGR_PORT))) { /* @@ -2029,11 +2501,40 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, * port, its property should be set in the mcip when the * VNIC/aggr was created. */ - mac_get_resources((mac_handle_t)mip, &mrp); - (void) mac_client_set_resources(mch, &mrp); + mac_get_resources((mac_handle_t)mip, mrp); + (void) mac_client_set_resources(mch, mrp); } else if (mcip->mci_state_flags & MCIS_IS_VNIC) { - bcopy(MCIP_RESOURCE_PROPS(mcip), &mrp, - sizeof (mac_resource_props_t)); + /* + * This is a primary VLAN client, we don't support + * specifying rings property for this as it inherits the + * rings property from its MAC. + */ + if (is_vnic_primary) { + mac_resource_props_t *vmrp; + + vmrp = MCIP_RESOURCE_PROPS(mcip); + if (vmrp->mrp_mask & MRP_RX_RINGS || + vmrp->mrp_mask & MRP_TX_RINGS) { + if (fastpath_disabled) + mac_fastpath_enable((mac_handle_t)mip); + kmem_free(mrp, sizeof (*mrp)); + return (ENOTSUP); + } + /* + * Additionally we also need to inherit any + * rings property from the MAC. + */ + mac_get_resources((mac_handle_t)mip, mrp); + if (mrp->mrp_mask & MRP_RX_RINGS) { + vmrp->mrp_mask |= MRP_RX_RINGS; + vmrp->mrp_nrxrings = mrp->mrp_nrxrings; + } + if (mrp->mrp_mask & MRP_TX_RINGS) { + vmrp->mrp_mask |= MRP_TX_RINGS; + vmrp->mrp_ntxrings = mrp->mrp_ntxrings; + } + } + bcopy(MCIP_RESOURCE_PROPS(mcip), mrp, sizeof (*mrp)); } muip = kmem_zalloc(sizeof (mac_unicast_impl_t), KM_SLEEP); @@ -2151,6 +2652,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, ASSERT((mcip->mci_flags & MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) == 0); mcip->mci_flags |= MAC_CLIENT_FLAGS_PASSIVE_PRIMARY; + kmem_free(mrp, sizeof (*mrp)); /* * Stash the unicast address handle, we will use it when @@ -2161,10 +2663,12 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, return (0); } - err = mac_client_datapath_setup(mcip, vid, mac_addr, &mrp, + err = mac_client_datapath_setup(mcip, vid, mac_addr, mrp, is_primary || is_vnic_primary, muip); if (err != 0) goto bail_out; + + kmem_free(mrp, sizeof (*mrp)); *mah = (mac_unicast_handle_t)muip; return (0); @@ -2178,6 +2682,7 @@ bail_out: mip->mi_driver); } } + kmem_free(mrp, sizeof (*mrp)); kmem_free(muip, sizeof (mac_unicast_impl_t)); return (err); } @@ -2227,25 +2732,33 @@ mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, return (err); } -void +static void mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip, flow_entry_t *flent) { mac_client_impl_t *mcip = (mac_client_impl_t *)mch; mac_impl_t *mip = mcip->mci_mip; + boolean_t no_unicast; /* - * We would have initialized subflows etc. only if we brought up - * the primary client and set the unicast unicast address etc. - * Deactivate the flows. The flow entry will be removed from the - * active flow tables, and the associated SRS, softrings etc will - * be deleted. But the flow entry itself won't be destroyed, instead - * it will continue to be archived off the the global flow hash - * list, for a possible future activation when say IP is plumbed - * again. + * If we have not added a unicast address for this MAC client, just + * teardown the datapath. */ - mac_link_release_flows(mch); + no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR; + if (!no_unicast) { + /* + * We would have initialized subflows etc. only if we brought + * up the primary client and set the unicast unicast address + * etc. Deactivate the flows. The flow entry will be removed + * from the active flow tables, and the associated SRS, + * softrings etc will be deleted. But the flow entry itself + * won't be destroyed, instead it will continue to be archived + * off the the global flow hash list, for a possible future + * activation when say IP is plumbed again. + */ + mac_link_release_flows(mch); + } mip->mi_nactiveclients--; mac_update_single_active_client(mip); @@ -2287,6 +2800,7 @@ mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip, flent->fe_tx_srs == NULL && flent->fe_rx_srs_cnt == 0); flent->fe_flags = FE_MC_NO_DATAPATH; flow_stat_destroy(flent); + mac_misc_stat_delete(flent); /* Initialize the receiver function to a safe routine */ flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop; @@ -2297,8 +2811,9 @@ mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip, mutex_exit(&flent->fe_lock); if (mip->mi_type->mt_brdcst_addr != NULL) { + ASSERT(muip != NULL || no_unicast); mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr, - muip->mui_vid); + muip != NULL ? muip->mui_vid : VLAN_ID_NONE); } if (mip->mi_nactiveclients == 1) { @@ -2324,8 +2839,12 @@ mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip, if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK) mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK; - kmem_free(muip, sizeof (mac_unicast_impl_t)); + if (muip != NULL) + kmem_free(muip, sizeof (mac_unicast_impl_t)); + mac_protect_cancel_timer(mcip); + mac_protect_flush_dhcp(mcip); + bzero(&mcip->mci_misc_stat, sizeof (mcip->mci_misc_stat)); /* * Disable fastpath if this is a VNIC or a VLAN. */ @@ -2345,7 +2864,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) mac_unicast_impl_t *pre; mac_impl_t *mip = mcip->mci_mip; flow_entry_t *flent; - boolean_t isprimary = B_FALSE; + uint16_t mui_vid; i_mac_perim_enter(mip); if (mcip->mci_flags & MAC_CLIENT_FLAGS_VNIC_PRIMARY) { @@ -2436,11 +2955,6 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) rw_exit(&mcip->mci_rw_lock); } - if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) && - muip->mui_vid == 0) { - mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PRIMARY; - isprimary = B_TRUE; - } if (!mac_client_single_rcvr(mcip)) { /* * This MAC client is shared by more than one unicast @@ -2490,34 +3004,39 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) return (0); } + mui_vid = muip->mui_vid; mac_client_datapath_teardown(mch, muip, flent); + if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) && mui_vid == 0) { + mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PRIMARY; + } else { + i_mac_perim_exit(mip); + return (0); + } + /* * If we are removing the primary, check if we have a passive primary * client that we need to activate now. */ - if (!isprimary) { - i_mac_perim_exit(mip); - return (0); - } mcip = mac_get_passive_primary_client(mip); if (mcip != NULL) { - mac_resource_props_t mrp; + mac_resource_props_t *mrp; mac_unicast_impl_t *muip; mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PASSIVE_PRIMARY; - bzero(&mrp, sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + /* * Apply the property cached in the mac_impl_t to the * primary mac client. */ - mac_get_resources((mac_handle_t)mip, &mrp); - (void) mac_client_set_resources(mch, &mrp); + mac_get_resources((mac_handle_t)mip, mrp); + (void) mac_client_set_resources(mch, mrp); ASSERT(mcip->mci_p_unicast_list != NULL); muip = mcip->mci_p_unicast_list; mcip->mci_p_unicast_list = NULL; if (mac_client_datapath_setup(mcip, VLAN_ID_NONE, - mip->mi_addr, &mrp, B_TRUE, muip) == 0) { + mip->mi_addr, mrp, B_TRUE, muip) == 0) { if (mcip->mci_rx_p_fn != NULL) { mac_rx_set(mch, mcip->mci_rx_p_fn, mcip->mci_rx_p_arg); @@ -2527,6 +3046,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) } else { kmem_free(muip, sizeof (mac_unicast_impl_t)); } + kmem_free(mrp, sizeof (*mrp)); } i_mac_perim_exit(mip); return (0); @@ -2775,36 +3295,6 @@ mac_promisc_remove(mac_promisc_handle_t mph) } /* - * Bump the count of the number of active Tx threads. This is maintained as - * a per CPU counter. On (CMT kind of) machines with large number of CPUs, - * a single mci_tx_lock may become contended. However a count of the total - * number of Tx threads per client is needed in order to quiesce the Tx side - * prior to reassigning a Tx ring dynamically to another client. The thread - * that needs to quiesce the Tx traffic grabs all the percpu locks and checks - * the sum of the individual percpu refcnts. Each Tx data thread only grabs - * its own percpu lock and increments its own refcnt. - */ -void * -mac_tx_hold(mac_client_handle_t mch) -{ - mac_client_impl_t *mcip = (mac_client_impl_t *)mch; - mac_tx_percpu_t *mytx; - int error; - - MAC_TX_TRY_HOLD(mcip, mytx, error); - return (error == 0 ? (void *)mytx : NULL); -} - -void -mac_tx_rele(mac_client_handle_t mch, void *mytx_handle) -{ - mac_client_impl_t *mcip = (mac_client_impl_t *)mch; - mac_tx_percpu_t *mytx = mytx_handle; - - MAC_TX_RELE(mcip, mytx) -} - -/* * Send function invoked by MAC clients. */ mac_tx_cookie_t @@ -2872,8 +3362,7 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, srs_tx = &srs->srs_tx; if (srs_tx->st_mode == SRS_TX_DEFAULT && (srs->srs_state & SRS_ENQUEUED) == 0 && - mip->mi_nactiveclients == 1 && mip->mi_promisc_list == NULL && - mp_chain->b_next == NULL) { + mip->mi_nactiveclients == 1 && mp_chain->b_next == NULL) { uint64_t obytes; /* @@ -2891,7 +3380,7 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, MAC_VID_CHECK(mcip, mp_chain, err); if (err != 0) { freemsg(mp_chain); - mcip->mci_stat_oerrors++; + mcip->mci_misc_stat.mms_txerrors++; goto done; } } @@ -2899,7 +3388,7 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, mp_chain = mac_add_vlan_tag(mp_chain, 0, mac_client_vid(mch)); if (mp_chain == NULL) { - mcip->mci_stat_oerrors++; + mcip->mci_misc_stat.mms_txerrors++; goto done; } } @@ -2908,17 +3397,11 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, obytes = (mp_chain->b_cont == NULL ? MBLKL(mp_chain) : msgdsize(mp_chain)); - MAC_TX(mip, srs_tx->st_arg2, mp_chain, - ((mcip->mci_state_flags & MCIS_SHARE_BOUND) != 0)); - + MAC_TX(mip, srs_tx->st_arg2, mp_chain, mcip); if (mp_chain == NULL) { cookie = NULL; - mcip->mci_stat_obytes += obytes; - mcip->mci_stat_opackets += 1; - if ((srs->srs_type & SRST_FLOW) != 0) { - FLOW_STAT_UPDATE(flent, obytes, obytes); - FLOW_STAT_UPDATE(flent, opackets, 1); - } + SRS_TX_STAT_UPDATE(srs, opackets, 1); + SRS_TX_STAT_UPDATE(srs, obytes, obytes); } else { mutex_enter(&srs->srs_lock); cookie = mac_tx_srs_no_desc(srs, mp_chain, @@ -2978,7 +3461,14 @@ mac_tx_is_flow_blocked(mac_client_handle_t mch, mac_tx_cookie_t cookie) } mutex_enter(&mac_srs->srs_lock); - if (mac_srs->srs_tx.st_mode == SRS_TX_FANOUT) { + /* + * Only in the case of TX_FANOUT and TX_AGGR, the underlying + * softring (s_ring_state) will have the HIWAT set. This is + * the multiple Tx ring flow control case. For all other + * case, SRS (srs_state) will store the condition. + */ + if (mac_srs->srs_tx.st_mode == SRS_TX_FANOUT || + mac_srs->srs_tx.st_mode == SRS_TX_AGGR) { if (cookie != NULL) { sringp = (mac_soft_ring_t *)cookie; mutex_enter(&sringp->s_ring_lock); @@ -2986,8 +3476,8 @@ mac_tx_is_flow_blocked(mac_client_handle_t mch, mac_tx_cookie_t cookie) blocked = B_TRUE; mutex_exit(&sringp->s_ring_lock); } else { - for (i = 0; i < mac_srs->srs_oth_ring_count; i++) { - sringp = mac_srs->srs_oth_soft_rings[i]; + for (i = 0; i < mac_srs->srs_tx_ring_count; i++) { + sringp = mac_srs->srs_tx_soft_rings[i]; mutex_enter(&sringp->s_ring_lock); if (sringp->s_ring_state & S_RING_TX_HIWAT) { blocked = B_TRUE; @@ -3228,9 +3718,10 @@ mac_cpu_set(mac_client_handle_t mch, mac_resource_props_t *mrp) ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - if ((err = mac_validate_props(mrp)) != 0) + if ((err = mac_validate_props(mcip->mci_state_flags & MCIS_IS_VNIC ? + mcip->mci_upper_mip : mip, mrp)) != 0) { return (err); - + } if (MCIP_DATAPATH_SETUP(mcip)) mac_flow_modify(mip->mi_flow_tab, mcip->mci_flent, mrp); @@ -3256,14 +3747,20 @@ mac_client_set_resources(mac_client_handle_t mch, mac_resource_props_t *mrp) goto done; } - if (mrp->mrp_mask & MRP_CPUS) { + if (mrp->mrp_mask & (MRP_CPUS|MRP_POOL)) { err = mac_cpu_set(mch, mrp); if (err != 0) goto done; } - if (mrp->mrp_mask & MRP_PROTECT) + if (mrp->mrp_mask & MRP_PROTECT) { err = mac_protect_set(mch, mrp); + if (err != 0) + goto done; + } + + if ((mrp->mrp_mask & MRP_RX_RINGS) || (mrp->mrp_mask & MRP_TX_RINGS)) + err = mac_resource_ctl_set(mch, mrp); done: i_mac_perim_exit(mip); @@ -3283,6 +3780,20 @@ mac_client_get_resources(mac_client_handle_t mch, mac_resource_props_t *mrp) } /* + * Return the effective properties currently associated with the specified + * MAC client. + */ +void +mac_client_get_effective_resources(mac_client_handle_t mch, + mac_resource_props_t *mrp) +{ + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_resource_props_t *mcip_mrp = MCIP_EFFECTIVE_PROPS(mcip); + + bcopy(mcip_mrp, mrp, sizeof (mac_resource_props_t)); +} + +/* * Pass a copy of the specified packet to the promiscuous callbacks * of the specified MAC. * @@ -3708,6 +4219,16 @@ mac_get_lower_mac_handle(mac_handle_t mh) return (((vnic_t *)mip->mi_driver)->vn_lower_mh); } +boolean_t +mac_is_vnic_primary(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + ASSERT(mac_is_vnic(mh)); + return (((vnic_t *)mip->mi_driver)->vn_addr_type == + VNIC_MAC_ADDR_TYPE_PRIMARY); +} + void mac_update_resources(mac_resource_props_t *nmrp, mac_resource_props_t *cmrp, boolean_t is_user_flow) @@ -3728,17 +4249,66 @@ mac_update_resources(mac_resource_props_t *nmrp, mac_resource_props_t *cmrp, } } if (nmrp->mrp_mask & MRP_MAXBW) { - cmrp->mrp_maxbw = nmrp->mrp_maxbw; - if (nmrp->mrp_maxbw == MRP_MAXBW_RESETVAL) + if (nmrp->mrp_maxbw == MRP_MAXBW_RESETVAL) { cmrp->mrp_mask &= ~MRP_MAXBW; - else + cmrp->mrp_maxbw = 0; + } else { cmrp->mrp_mask |= MRP_MAXBW; + cmrp->mrp_maxbw = nmrp->mrp_maxbw; + } } if (nmrp->mrp_mask & MRP_CPUS) MAC_COPY_CPUS(nmrp, cmrp); + if (nmrp->mrp_mask & MRP_POOL) { + if (strlen(nmrp->mrp_pool) == 0) { + cmrp->mrp_mask &= ~MRP_POOL; + bzero(cmrp->mrp_pool, sizeof (cmrp->mrp_pool)); + } else { + cmrp->mrp_mask |= MRP_POOL; + (void) strncpy(cmrp->mrp_pool, nmrp->mrp_pool, + sizeof (cmrp->mrp_pool)); + } + + } + if (nmrp->mrp_mask & MRP_PROTECT) mac_protect_update(nmrp, cmrp); + + /* + * Update the rings specified. + */ + if (nmrp->mrp_mask & MRP_RX_RINGS) { + if (nmrp->mrp_mask & MRP_RINGS_RESET) { + cmrp->mrp_mask &= ~MRP_RX_RINGS; + if (cmrp->mrp_mask & MRP_RXRINGS_UNSPEC) + cmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; + cmrp->mrp_nrxrings = 0; + } else { + cmrp->mrp_mask |= MRP_RX_RINGS; + cmrp->mrp_nrxrings = nmrp->mrp_nrxrings; + } + } + if (nmrp->mrp_mask & MRP_TX_RINGS) { + if (nmrp->mrp_mask & MRP_RINGS_RESET) { + cmrp->mrp_mask &= ~MRP_TX_RINGS; + if (cmrp->mrp_mask & MRP_TXRINGS_UNSPEC) + cmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; + cmrp->mrp_ntxrings = 0; + } else { + cmrp->mrp_mask |= MRP_TX_RINGS; + cmrp->mrp_ntxrings = nmrp->mrp_ntxrings; + } + } + if (nmrp->mrp_mask & MRP_RXRINGS_UNSPEC) + cmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; + else if (cmrp->mrp_mask & MRP_RXRINGS_UNSPEC) + cmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; + + if (nmrp->mrp_mask & MRP_TXRINGS_UNSPEC) + cmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; + else if (cmrp->mrp_mask & MRP_TXRINGS_UNSPEC) + cmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; } } @@ -3757,26 +4327,29 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp) mac_client_impl_t *mcip; int err = 0; uint32_t resmask, newresmask; - mac_resource_props_t tmrp, umrp; + mac_resource_props_t *tmrp, *umrp; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - err = mac_validate_props(mrp); + err = mac_validate_props(mip, mrp); if (err != 0) return (err); - bcopy(&mip->mi_resource_props, &umrp, sizeof (mac_resource_props_t)); - resmask = umrp.mrp_mask; - mac_update_resources(mrp, &umrp, B_FALSE); - newresmask = umrp.mrp_mask; + umrp = kmem_zalloc(sizeof (*umrp), KM_SLEEP); + bcopy(&mip->mi_resource_props, umrp, sizeof (*umrp)); + resmask = umrp->mrp_mask; + mac_update_resources(mrp, umrp, B_FALSE); + newresmask = umrp->mrp_mask; if (resmask == 0 && newresmask != 0) { /* - * Bandwidth, priority or cpu link properties configured, + * Bandwidth, priority, cpu or pool link properties configured, * must disable fastpath. */ - if ((err = mac_fastpath_disable((mac_handle_t)mip)) != 0) + if ((err = mac_fastpath_disable((mac_handle_t)mip)) != 0) { + kmem_free(umrp, sizeof (*umrp)); return (err); + } } /* @@ -3784,19 +4357,93 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp) * we use a copy of bind_cpu and finally cache bind_cpu in mip. * This allows us to cache only user edits in mip. */ - bcopy(mrp, &tmrp, sizeof (mac_resource_props_t)); + tmrp = kmem_zalloc(sizeof (*tmrp), KM_SLEEP); + bcopy(mrp, tmrp, sizeof (*tmrp)); mcip = mac_primary_client_handle(mip); if (mcip != NULL && (mcip->mci_state_flags & MCIS_IS_AGGR_PORT) == 0) { - err = - mac_client_set_resources((mac_client_handle_t)mcip, &tmrp); + err = mac_client_set_resources((mac_client_handle_t)mcip, tmrp); + } else if ((mrp->mrp_mask & MRP_RX_RINGS || + mrp->mrp_mask & MRP_TX_RINGS)) { + mac_client_impl_t *vmcip; + + /* + * If the primary is not up, we need to check if there + * are any VLANs on this primary. If there are then + * we need to set this property on the VLANs since + * VLANs follow the primary they are based on. Just + * look for the first VLAN and change its properties, + * all the other VLANs should be in the same group. + */ + for (vmcip = mip->mi_clients_list; vmcip != NULL; + vmcip = vmcip->mci_client_next) { + if ((vmcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) && + mac_client_vid((mac_client_handle_t)vmcip) != + VLAN_ID_NONE) { + break; + } + } + if (vmcip != NULL) { + mac_resource_props_t *omrp; + mac_resource_props_t *vmrp; + + omrp = kmem_zalloc(sizeof (*omrp), KM_SLEEP); + bcopy(MCIP_RESOURCE_PROPS(vmcip), omrp, sizeof (*omrp)); + /* + * We dont' call mac_update_resources since we + * want to take only the ring properties and + * not all the properties that may have changed. + */ + vmrp = MCIP_RESOURCE_PROPS(vmcip); + if (mrp->mrp_mask & MRP_RX_RINGS) { + if (mrp->mrp_mask & MRP_RINGS_RESET) { + vmrp->mrp_mask &= ~MRP_RX_RINGS; + if (vmrp->mrp_mask & + MRP_RXRINGS_UNSPEC) { + vmrp->mrp_mask &= + ~MRP_RXRINGS_UNSPEC; + } + vmrp->mrp_nrxrings = 0; + } else { + vmrp->mrp_mask |= MRP_RX_RINGS; + vmrp->mrp_nrxrings = mrp->mrp_nrxrings; + } + } + if (mrp->mrp_mask & MRP_TX_RINGS) { + if (mrp->mrp_mask & MRP_RINGS_RESET) { + vmrp->mrp_mask &= ~MRP_TX_RINGS; + if (vmrp->mrp_mask & + MRP_TXRINGS_UNSPEC) { + vmrp->mrp_mask &= + ~MRP_TXRINGS_UNSPEC; + } + vmrp->mrp_ntxrings = 0; + } else { + vmrp->mrp_mask |= MRP_TX_RINGS; + vmrp->mrp_ntxrings = mrp->mrp_ntxrings; + } + } + if (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) + vmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; + + if (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) + vmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; + + if ((err = mac_client_set_rings_prop(vmcip, mrp, + omrp)) != 0) { + bcopy(omrp, MCIP_RESOURCE_PROPS(vmcip), + sizeof (*omrp)); + } else { + mac_set_prim_vlan_rings(mip, vmrp); + } + kmem_free(omrp, sizeof (*omrp)); + } } /* Only update the values if mac_client_set_resources succeeded */ if (err == 0) { - bcopy(&umrp, &mip->mi_resource_props, - sizeof (mac_resource_props_t)); + bcopy(umrp, &mip->mi_resource_props, sizeof (*umrp)); /* - * If bankwidth, priority or cpu link properties cleared, + * If bandwidth, priority or cpu link properties cleared, * renable fastpath. */ if (resmask != 0 && newresmask == 0) @@ -3804,6 +4451,8 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp) } else if (resmask == 0 && newresmask != 0) { mac_fastpath_enable((mac_handle_t)mip); } + kmem_free(tmrp, sizeof (*tmrp)); + kmem_free(umrp, sizeof (*umrp)); return (err); } @@ -3827,17 +4476,33 @@ mac_get_resources(mac_handle_t mh, mac_resource_props_t *mrp) mac_impl_t *mip = (mac_impl_t *)mh; mac_client_impl_t *mcip; - if (mip->mi_state_flags & MIS_IS_VNIC) { - mcip = mac_primary_client_handle(mip); - if (mcip != NULL) { - mac_client_get_resources((mac_client_handle_t)mcip, - mrp); - return; - } + mcip = mac_primary_client_handle(mip); + if (mcip != NULL) { + mac_client_get_resources((mac_client_handle_t)mcip, mrp); + return; } bcopy(&mip->mi_resource_props, mrp, sizeof (mac_resource_props_t)); } +/* + * Get the effective properties from the primary client of the + * specified MAC instance. + */ +void +mac_get_effective_resources(mac_handle_t mh, mac_resource_props_t *mrp) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + mac_client_impl_t *mcip; + + mcip = mac_primary_client_handle(mip); + if (mcip != NULL) { + mac_client_get_effective_resources((mac_client_handle_t)mcip, + mrp); + return; + } + bzero(mrp, sizeof (mac_resource_props_t)); +} + int mac_set_pvid(mac_handle_t mh, uint16_t pvid) { @@ -3904,8 +4569,10 @@ mac_rename_primary(mac_handle_t mh, const char *new_name) * the associated flow kstat. */ if (mip->mi_state_flags & MIS_IS_VNIC) { + mac_client_impl_t *mcip = mac_vnic_lower(mip); ASSERT(new_name != NULL); - mac_rename_flow_names(mac_vnic_lower(mip), new_name); + mac_rename_flow_names(mcip, new_name); + mac_stat_rename(mcip); goto done; } /* @@ -3954,6 +4621,10 @@ mac_rename_primary(mac_handle_t mh, const char *new_name) } } + /* Recreate kstats associated with aggr pseudo rings */ + if (mip->mi_state_flags & MIS_IS_AGGR) + mac_pseudo_ring_stat_rename(mip); + done: i_mac_perim_exit(mip); return (0); @@ -4187,8 +4858,14 @@ mac_client_single_rcvr(mac_client_impl_t *mcip) } int -mac_validate_props(mac_resource_props_t *mrp) +mac_validate_props(mac_impl_t *mip, mac_resource_props_t *mrp) { + boolean_t reset; + uint32_t rings_needed; + uint32_t rings_avail; + mac_group_type_t gtype; + mac_resource_props_t *mip_mrp; + if (mrp == NULL) return (0); @@ -4246,6 +4923,100 @@ mac_validate_props(mac_resource_props_t *mrp) if (err != 0) return (err); } + + if (!(mrp->mrp_mask & MRP_RX_RINGS) && + !(mrp->mrp_mask & MRP_TX_RINGS)) { + return (0); + } + + /* + * mip will be null when we come from mac_flow_create or + * mac_link_flow_modify. In the latter case it is a user flow, + * for which we don't support rings. In the former we would + * have validated the props beforehand (i_mac_unicast_add -> + * mac_client_set_resources -> validate for the primary and + * vnic_dev_create -> mac_client_set_resources -> validate for + * a vnic. + */ + if (mip == NULL) + return (0); + + /* + * We don't support setting rings property for a VNIC that is using a + * primary address (VLAN) + */ + if ((mip->mi_state_flags & MIS_IS_VNIC) && + mac_is_vnic_primary((mac_handle_t)mip)) { + return (ENOTSUP); + } + + mip_mrp = &mip->mi_resource_props; + /* + * The rings property should be validated against the NICs + * resources + */ + if (mip->mi_state_flags & MIS_IS_VNIC) + mip = (mac_impl_t *)mac_get_lower_mac_handle((mac_handle_t)mip); + + reset = mrp->mrp_mask & MRP_RINGS_RESET; + /* + * If groups are not supported, return error. + */ + if (((mrp->mrp_mask & MRP_RX_RINGS) && mip->mi_rx_groups == NULL) || + ((mrp->mrp_mask & MRP_TX_RINGS) && mip->mi_tx_groups == NULL)) { + return (EINVAL); + } + /* + * If we are just resetting, there is no validation needed. + */ + if (reset) + return (0); + + if (mrp->mrp_mask & MRP_RX_RINGS) { + rings_needed = mrp->mrp_nrxrings; + /* + * We just want to check if the number of additional + * rings requested is available. + */ + if (mip_mrp->mrp_mask & MRP_RX_RINGS) { + if (mrp->mrp_nrxrings > mip_mrp->mrp_nrxrings) + /* Just check for the additional rings */ + rings_needed -= mip_mrp->mrp_nrxrings; + else + /* We are not asking for additional rings */ + rings_needed = 0; + } + rings_avail = mip->mi_rxrings_avail; + gtype = mip->mi_rx_group_type; + } else { + rings_needed = mrp->mrp_ntxrings; + /* Similarly for the TX rings */ + if (mip_mrp->mrp_mask & MRP_TX_RINGS) { + if (mrp->mrp_ntxrings > mip_mrp->mrp_ntxrings) + /* Just check for the additional rings */ + rings_needed -= mip_mrp->mrp_ntxrings; + else + /* We are not asking for additional rings */ + rings_needed = 0; + } + rings_avail = mip->mi_txrings_avail; + gtype = mip->mi_tx_group_type; + } + + /* Error if the group is dynamic .. */ + if (gtype == MAC_GROUP_TYPE_DYNAMIC) { + /* + * .. and rings specified are more than available. + */ + if (rings_needed > rings_avail) + return (EINVAL); + } else { + /* + * OR group is static and we have specified some rings. + */ + if (rings_needed > 0) + return (EINVAL); + } return (0); } @@ -4266,11 +5037,18 @@ mac_virtual_link_update(mac_impl_t *mip) * mac handle in the client. */ void -mac_set_upper_mac(mac_client_handle_t mch, mac_handle_t mh) +mac_set_upper_mac(mac_client_handle_t mch, mac_handle_t mh, + mac_resource_props_t *mrp) { mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_impl_t *mip = (mac_impl_t *)mh; - mcip->mci_upper_mip = (mac_impl_t *)mh; + mcip->mci_upper_mip = mip; + /* If there are any properties, copy it over too */ + if (mrp != NULL) { + bcopy(mrp, &mip->mi_resource_props, + sizeof (mac_resource_props_t)); + } } /* @@ -4326,15 +5104,7 @@ mac_unmark_exclusive(mac_handle_t mh) } /* - * Set the MTU for the specified MAC. Note that this mechanism depends on - * the driver calling mac_maxsdu_update() to update the link MTU if it was - * successful in setting its MTU. - * - * Note that there is potential for improvement here. A better model might be - * to not require drivers to call mac_maxsdu_update(), but rather have this - * function update mi_sdu_max and send notifications if the driver setprop - * callback succeeds. This would remove the burden and complexity from - * drivers. + * Set the MTU for the specified MAC. */ int mac_set_mtu(mac_handle_t mh, uint_t new_mtu, uint_t *old_mtu_arg) @@ -4352,9 +5122,18 @@ mac_set_mtu(mac_handle_t mh, uint_t new_mtu, uint_t *old_mtu_arg) old_mtu = mip->mi_sdu_max; + if (new_mtu == 0 || new_mtu < mip->mi_sdu_min) { + rv = EINVAL; + goto bail; + } + if (old_mtu != new_mtu) { rv = mip->mi_callbacks->mc_setprop(mip->mi_driver, "mtu", MAC_PROP_MTU, sizeof (uint_t), &new_mtu); + if (rv != 0) + goto bail; + rv = mac_maxsdu_update(mh, new_mtu); + ASSERT(rv == 0); } bail: @@ -4365,13 +5144,18 @@ bail: return (rv); } +/* + * Return the RX h/w information for the group indexed by grp_num. + */ void -mac_get_hwgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, - uint_t *n_rings, uint_t *type, uint_t *n_clnts, char *clnts_name) +mac_get_hwrxgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, + uint_t *n_rings, uint_t *rings, uint_t *type, uint_t *n_clnts, + char *clnts_name) { mac_impl_t *mip = (mac_impl_t *)mh; mac_grp_client_t *mcip; uint_t i = 0, index = 0; + mac_ring_t *ring; /* Revisit when we implement fully dynamic group allocation */ ASSERT(grp_index >= 0 && grp_index < mip->mi_rx_group_count); @@ -4380,6 +5164,19 @@ mac_get_hwgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, *grp_num = mip->mi_rx_groups[grp_index].mrg_index; *type = mip->mi_rx_groups[grp_index].mrg_type; *n_rings = mip->mi_rx_groups[grp_index].mrg_cur_count; + ring = mip->mi_rx_groups[grp_index].mrg_rings; + for (index = 0; index < mip->mi_rx_groups[grp_index].mrg_cur_count; + index++) { + rings[index] = ring->mr_index; + ring = ring->mr_next; + } + /* Assuming the 1st is the default group */ + index = 0; + if (grp_index == 0) { + (void) strlcpy(clnts_name, "<default,mcast>,", + MAXCLIENTNAMELEN); + index += strlen("<default,mcast>,"); + } for (mcip = mip->mi_rx_groups[grp_index].mrg_clients; mcip != NULL; mcip = mcip->mgc_next) { int name_len = strlen(mcip->mgc_client->mci_name); @@ -4410,10 +5207,194 @@ mac_get_hwgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, rw_exit(&mip->mi_rw_lock); } +/* + * Return the TX h/w information for the group indexed by grp_num. + */ +void +mac_get_hwtxgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, + uint_t *n_rings, uint_t *rings, uint_t *type, uint_t *n_clnts, + char *clnts_name) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + mac_grp_client_t *mcip; + uint_t i = 0, index = 0; + mac_ring_t *ring; + + /* Revisit when we implement fully dynamic group allocation */ + ASSERT(grp_index >= 0 && grp_index <= mip->mi_tx_group_count); + + rw_enter(&mip->mi_rw_lock, RW_READER); + *grp_num = mip->mi_tx_groups[grp_index].mrg_index > 0 ? + mip->mi_tx_groups[grp_index].mrg_index : grp_index; + *type = mip->mi_tx_groups[grp_index].mrg_type; + *n_rings = mip->mi_tx_groups[grp_index].mrg_cur_count; + ring = mip->mi_tx_groups[grp_index].mrg_rings; + for (index = 0; index < mip->mi_tx_groups[grp_index].mrg_cur_count; + index++) { + rings[index] = ring->mr_index; + ring = ring->mr_next; + } + index = 0; + /* Default group has an index of -1 */ + if (mip->mi_tx_groups[grp_index].mrg_index < 0) { + (void) strlcpy(clnts_name, "<default>,", + MAXCLIENTNAMELEN); + index += strlen("<default>,"); + } + for (mcip = mip->mi_tx_groups[grp_index].mrg_clients; mcip != NULL; + mcip = mcip->mgc_next) { + int name_len = strlen(mcip->mgc_client->mci_name); + + /* + * MAXCLIENTNAMELEN is the buffer size reserved for client + * names. + * XXXX Formating the client name string needs to be moved + * to user land when fixing the size of dhi_clnts in + * dld_hwgrpinfo_t. We should use n_clients * client_name for + * dhi_clntsin instead of MAXCLIENTNAMELEN + */ + if (index + name_len >= MAXCLIENTNAMELEN) { + index = MAXCLIENTNAMELEN; + break; + } + bcopy(mcip->mgc_client->mci_name, &(clnts_name[index]), + name_len); + index += name_len; + clnts_name[index++] = ','; + i++; + } + + /* Get rid of the last , */ + if (index > 0) + clnts_name[index - 1] = '\0'; + *n_clnts = i; + rw_exit(&mip->mi_rw_lock); +} + +/* + * Return the group count for RX or TX. + */ uint_t -mac_hwgrp_num(mac_handle_t mh) +mac_hwgrp_num(mac_handle_t mh, int type) { mac_impl_t *mip = (mac_impl_t *)mh; - return (mip->mi_rx_group_count); + /* + * Return the Rx and Tx group count; for the Tx we need to + * include the default too. + */ + return (type == MAC_RING_TYPE_RX ? mip->mi_rx_group_count : + mip->mi_tx_groups != NULL ? mip->mi_tx_group_count + 1 : 0); +} + +/* + * The total number of free TX rings for this MAC. + */ +uint_t +mac_txavail_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_txrings_avail); +} + +/* + * The total number of free RX rings for this MAC. + */ +uint_t +mac_rxavail_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_rxrings_avail); +} + +/* + * The total number of reserved RX rings on this MAC. + */ +uint_t +mac_rxrsvd_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_rxrings_rsvd); +} + +/* + * The total number of reserved TX rings on this MAC. + */ +uint_t +mac_txrsvd_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_txrings_rsvd); +} + +/* + * Total number of free RX groups on this MAC. + */ +uint_t +mac_rxhwlnksavail_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_rxhwclnt_avail); +} + +/* + * Total number of RX groups reserved on this MAC. + */ +uint_t +mac_rxhwlnksrsvd_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_rxhwclnt_used); +} + +/* + * Total number of free TX groups on this MAC. + */ +uint_t +mac_txhwlnksavail_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_txhwclnt_avail); +} + +/* + * Total number of TX groups reserved on this MAC. + */ +uint_t +mac_txhwlnksrsvd_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_txhwclnt_used); +} + +/* + * Initialize the rings property for a mac client. A non-0 value for + * rxring or txring specifies the number of rings required, a value + * of MAC_RXRINGS_NONE/MAC_TXRINGS_NONE specifies that it doesn't need + * any RX/TX rings and a value of MAC_RXRINGS_DONTCARE/MAC_TXRINGS_DONTCARE + * means the system can decide whether it can give any rings or not. + */ +void +mac_client_set_rings(mac_client_handle_t mch, int rxrings, int txrings) +{ + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + + if (rxrings != MAC_RXRINGS_DONTCARE) { + mrp->mrp_mask |= MRP_RX_RINGS; + mrp->mrp_nrxrings = rxrings; + } + + if (txrings != MAC_TXRINGS_DONTCARE) { + mrp->mrp_mask |= MRP_TX_RINGS; + mrp->mrp_ntxrings = txrings; + } } diff --git a/usr/src/uts/common/io/mac/mac_datapath_setup.c b/usr/src/uts/common/io/mac/mac_datapath_setup.c index 379e488ee2..6f1661d5f2 100644 --- a/usr/src/uts/common/io/mac/mac_datapath_setup.c +++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c @@ -19,12 +19,15 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <sys/types.h> #include <sys/callb.h> +#include <sys/cpupart.h> +#include <sys/pool.h> +#include <sys/pool_pset.h> #include <sys/sdt.h> #include <sys/strsubr.h> #include <sys/strsun.h> @@ -40,6 +43,7 @@ #include <sys/mac_client_priv.h> #include <sys/mac_soft_ring.h> #include <sys/mac_flow_impl.h> +#include <sys/mac_stat.h> static void mac_srs_soft_rings_signal(mac_soft_ring_set_t *, uint_t); static void mac_srs_update_fanout_list(mac_soft_ring_set_t *); @@ -56,14 +60,10 @@ static void mac_srs_remove_glist(mac_soft_ring_set_t *); static void mac_srs_fanout_list_free(mac_soft_ring_set_t *); static void mac_soft_ring_remove(mac_soft_ring_set_t *, mac_soft_ring_t *); -static int mac_compute_soft_ring_count(flow_entry_t *, int); +static int mac_compute_soft_ring_count(flow_entry_t *, int, int); static void mac_walk_srs_and_bind(int); static void mac_walk_srs_and_unbind(int); -extern mac_group_t *mac_reserve_rx_group(mac_client_impl_t *, uint8_t *, - mac_rx_group_reserve_type_t); -extern void mac_release_rx_group(mac_client_impl_t *, mac_group_t *); - extern boolean_t mac_latency_optimize; static kmem_cache_t *mac_srs_cache; @@ -92,14 +92,6 @@ int mac_soft_ring_max_q_cnt = 1024; int mac_soft_ring_min_q_cnt = 256; int mac_soft_ring_poll_thres = 16; -/* - * Default value of number of TX rings to be assigned to a MAC client. - * If less than 'mac_tx_ring_count' worth of Tx rings is available, then - * as many as is available will be assigned to the newly created MAC client. - * If no TX rings are available, then MAC client(s) will be assigned the - * default Tx ring. Default Tx ring can be shared among multiple MAC clients. - */ -uint32_t mac_tx_ring_count = 32; boolean_t mac_tx_serialize = B_FALSE; /* @@ -157,9 +149,11 @@ static krwlock_t mac_srs_g_lock; boolean_t mac_srs_thread_bind = B_TRUE; /* - * CPU to fallback to, used by mac_next_bind_cpu(). + * Whether Rx/Tx interrupts should be re-targeted. Disabled by default. + * dladm command would override this. */ -processorid_t srs_bind_cpu = 0; +boolean_t mac_tx_intr_retarget = B_FALSE; +boolean_t mac_rx_intr_retarget = B_FALSE; /* * If cpu bindings are specified by user, then Tx SRS and its soft @@ -170,24 +164,39 @@ processorid_t srs_bind_cpu = 0; */ #define BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp) { \ processorid_t cpuid; \ - int i, j; \ + int i; \ mac_soft_ring_t *softring; \ + mac_cpus_t *srs_cpu; \ \ - cpuid = mrp->mrp_cpu[mrp->mrp_ncpus - 1]; \ - mac_srs_worker_bind(mac_tx_srs, cpuid); \ - if (TX_MULTI_RING_MODE(mac_tx_srs)) { \ - j = mrp->mrp_ncpus - 1; \ - for (i = 0; \ - i < mac_tx_srs->srs_oth_ring_count; i++, j--) { \ - if (j < 0) \ - j = mrp->mrp_ncpus - 1; \ - cpuid = mrp->mrp_cpu[j]; \ - softring = mac_tx_srs->srs_oth_soft_rings[i]; \ - (void) mac_soft_ring_bind(softring, cpuid); \ + srs_cpu = &mac_tx_srs->srs_cpu; \ + cpuid = srs_cpu->mc_tx_fanout_cpus[0]; \ + mac_srs_worker_bind(mac_tx_srs, cpuid); \ + if (MAC_TX_SOFT_RINGS(mac_tx_srs)) { \ + for (i = 0; i < mac_tx_srs->srs_tx_ring_count; i++) { \ + cpuid = srs_cpu->mc_tx_fanout_cpus[i]; \ + softring = mac_tx_srs->srs_tx_soft_rings[i]; \ + if (cpuid != -1) { \ + (void) mac_soft_ring_bind(softring, \ + cpuid); \ + } \ } \ } \ } +/* + * Re-targeting is allowed only for exclusive group or for primary. + */ +#define RETARGETABLE_CLIENT(group, mcip) \ + ((((group) != NULL) && \ + ((group)->mrg_state == MAC_GROUP_STATE_RESERVED)) || \ + mac_is_primary_client(mcip)) + +#define MAC_RING_RETARGETABLE(ring) \ + (((ring) != NULL) && \ + ((ring)->mr_info.mri_intr.mi_ddi_handle != NULL) && \ + !((ring)->mr_info.mri_intr.mi_ddi_shared)) + + /* INIT and FINI ROUTINES */ void @@ -218,7 +227,7 @@ mac_soft_ring_finish(void) } static void -mac_srs_soft_rings_free(mac_soft_ring_set_t *mac_srs, boolean_t release_tx_ring) +mac_srs_soft_rings_free(mac_soft_ring_set_t *mac_srs) { mac_soft_ring_t *softring, *next, *head; @@ -240,7 +249,7 @@ mac_srs_soft_rings_free(mac_soft_ring_set_t *mac_srs, boolean_t release_tx_ring) for (softring = head; softring != NULL; softring = next) { next = softring->s_ring_next; - mac_soft_ring_free(softring, release_tx_ring); + mac_soft_ring_free(softring); } } @@ -518,21 +527,30 @@ mac_srs_poll_state_change(mac_soft_ring_set_t *mac_srs, /* * Return the next CPU to be used to bind a MAC kernel thread. + * If a cpupart is specified, the cpu chosen must be from that + * cpu partition. */ static processorid_t -mac_next_bind_cpu(void) +mac_next_bind_cpu(cpupart_t *cpupart) { - static processorid_t srs_curr_cpu = -1; - cpu_t *cp; + static cpu_t *cp = NULL; + cpu_t *cp_start; ASSERT(MUTEX_HELD(&cpu_lock)); - srs_curr_cpu++; - cp = cpu_get(srs_curr_cpu); - if (cp == NULL || !cpu_is_online(cp)) - srs_curr_cpu = srs_bind_cpu; + if (cp == NULL) + cp = cpu_list; + + cp = cp->cpu_next_onln; + cp_start = cp; + + do { + if ((cpupart == NULL) || (cp->cpu_part == cpupart)) + return (cp->cpu_id); - return (srs_curr_cpu); + } while ((cp = cp->cpu_next_onln) != cp_start); + + return (NULL); } /* ARGSUSED */ @@ -588,7 +606,7 @@ mac_srs_cpu_setup(cpu_setup_t what, int id, void *arg) */ boolean_t mac_use_bw_heuristic = B_TRUE; static int -mac_compute_soft_ring_count(flow_entry_t *flent, int rx_srs_cnt) +mac_compute_soft_ring_count(flow_entry_t *flent, int rx_srs_cnt, int maxcpus) { uint64_t cpu_speed, bw = 0; int srings = 0; @@ -675,12 +693,85 @@ mac_compute_soft_ring_count(flow_entry_t *flent, int rx_srs_cnt) srings = 0; } /* Do some more massaging */ - srings = min(srings, ncpus); + srings = min(srings, maxcpus); srings = min(srings, MAX_SR_FANOUT); return (srings); } /* + * mac_tx_cpu_init: + * set up CPUs for Tx interrupt re-targeting and Tx worker + * thread binding + */ +static void +mac_tx_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp, + cpupart_t *cpupart) +{ + mac_soft_ring_set_t *tx_srs = flent->fe_tx_srs; + mac_srs_tx_t *srs_tx = &tx_srs->srs_tx; + mac_cpus_t *srs_cpu = &tx_srs->srs_cpu; + mac_soft_ring_t *sringp; + mac_ring_t *ring; + processorid_t worker_cpuid; + boolean_t retargetable_client = B_FALSE; + int i, j; + + if (RETARGETABLE_CLIENT((mac_group_t *)flent->fe_tx_ring_group, + flent->fe_mcip)) { + retargetable_client = B_TRUE; + } + + if (MAC_TX_SOFT_RINGS(tx_srs)) { + if (mrp != NULL) + j = mrp->mrp_ncpus - 1; + for (i = 0; i < tx_srs->srs_tx_ring_count; i++) { + if (mrp != NULL) { + if (j < 0) + j = mrp->mrp_ncpus - 1; + worker_cpuid = mrp->mrp_cpu[j]; + } else { + /* + * Bind interrupt to the next CPU available + * and leave the worker unbound. + */ + worker_cpuid = -1; + } + sringp = tx_srs->srs_tx_soft_rings[i]; + ring = (mac_ring_t *)sringp->s_ring_tx_arg2; + srs_cpu->mc_tx_fanout_cpus[i] = worker_cpuid; + if (MAC_RING_RETARGETABLE(ring) && + retargetable_client) { + mutex_enter(&cpu_lock); + srs_cpu->mc_tx_intr_cpu[i] = + (mrp != NULL) ? mrp->mrp_cpu[j] : + (mac_tx_intr_retarget ? + mac_next_bind_cpu(cpupart) : -1); + mutex_exit(&cpu_lock); + } else { + srs_cpu->mc_tx_intr_cpu[i] = -1; + } + if (mrp != NULL) + j--; + } + } else { + /* Tx mac_ring_handle_t is stored in st_arg2 */ + srs_cpu->mc_tx_fanout_cpus[0] = + (mrp != NULL) ? mrp->mrp_cpu[mrp->mrp_ncpus - 1] : -1; + ring = (mac_ring_t *)srs_tx->st_arg2; + if (MAC_RING_RETARGETABLE(ring) && retargetable_client) { + mutex_enter(&cpu_lock); + srs_cpu->mc_tx_intr_cpu[0] = (mrp != NULL) ? + mrp->mrp_cpu[mrp->mrp_ncpus - 1] : + (mac_tx_intr_retarget ? + mac_next_bind_cpu(cpupart) : -1); + mutex_exit(&cpu_lock); + } else { + srs_cpu->mc_tx_intr_cpu[0] = -1; + } + } +} + +/* * Assignment of user specified CPUs to a link. * * Minimum CPUs required to get an optimal assignmet: @@ -719,6 +810,7 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) int rx_srs_cnt, reqd_rx_cpu_cnt; int fanout_cpu_cnt, reqd_tx_cpu_cnt; int reqd_poll_worker_cnt, fanout_cnt_per_srs; + mac_resource_props_t *emrp = &flent->fe_effective_props; ASSERT(mrp->mrp_fanout_mode == MCM_CPUS); /* @@ -731,12 +823,11 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) no_of_cpus = mrp->mrp_ncpus; - if (mrp->mrp_intr_cpu != -1) { + if (mrp->mrp_rx_intr_cpu != -1) { /* * interrupt has been re-targetted. Poll * thread needs to be bound to interrupt - * CPU. Presently only fixed interrupts - * are re-targetted, MSI-x aren't. + * CPU. * * Find where in the list is the intr * CPU and swap it with the first one. @@ -744,11 +835,11 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) * list for poll. */ for (i = 0; i < no_of_cpus; i++) { - if (mrp->mrp_cpu[i] == mrp->mrp_intr_cpu) + if (mrp->mrp_cpu[i] == mrp->mrp_rx_intr_cpu) break; } mrp->mrp_cpu[i] = mrp->mrp_cpu[0]; - mrp->mrp_cpu[0] = mrp->mrp_intr_cpu; + mrp->mrp_cpu[0] = mrp->mrp_rx_intr_cpu; } /* @@ -768,8 +859,8 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) /* How many CPUs are needed for Tx side? */ tx_srs = flent->fe_tx_srs; - reqd_tx_cpu_cnt = TX_MULTI_RING_MODE(tx_srs) ? - tx_srs->srs_oth_ring_count : 1; + reqd_tx_cpu_cnt = MAC_TX_SOFT_RINGS(tx_srs) ? + tx_srs->srs_tx_ring_count : 1; /* CPUs needed for Rx SRSes poll and worker threads */ reqd_poll_worker_cnt = mac_latency_optimize ? @@ -806,14 +897,14 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_fanout_cnt = fanout_cnt_per_srs; - srs_cpu->mc_pollid = mrp->mrp_cpu[cpu_cnt++]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = srs_cpu->mc_pollid; - if (!mac_latency_optimize) - srs_cpu->mc_workerid = mrp->mrp_cpu[cpu_cnt++]; + srs_cpu->mc_rx_fanout_cnt = fanout_cnt_per_srs; + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt++]; + /* Retarget the interrupt to the same CPU as the poll */ + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = (mac_latency_optimize ? + srs_cpu->mc_rx_pollid : mrp->mrp_cpu[cpu_cnt++]); for (i = 0; i < fanout_cnt_per_srs; i++) - srs_cpu->mc_fanout_cpus[i] = mrp->mrp_cpu[cpu_cnt++]; + srs_cpu->mc_rx_fanout_cpus[i] = mrp->mrp_cpu[cpu_cnt++]; /* Do the assignment for h/w Rx SRSes */ if (flent->fe_rx_srs_cnt > 1) { @@ -831,23 +922,22 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_fanout_cnt = fanout_cnt_per_srs; + srs_cpu->mc_rx_fanout_cnt = fanout_cnt_per_srs; /* The first CPU in the list is the intr CPU */ - srs_cpu->mc_pollid = mrp->mrp_cpu[cpu_cnt++]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = srs_cpu->mc_pollid; - if (!mac_latency_optimize) { - srs_cpu->mc_workerid = - mrp->mrp_cpu[cpu_cnt++]; - } + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt++]; + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = + (mac_latency_optimize ? + srs_cpu->mc_rx_pollid : + mrp->mrp_cpu[cpu_cnt++]); for (i = 0; i < fanout_cnt_per_srs; i++) { - srs_cpu->mc_fanout_cpus[i] = + srs_cpu->mc_rx_fanout_cpus[i] = mrp->mrp_cpu[cpu_cnt++]; } ASSERT(cpu_cnt <= no_of_cpus); } } - return; + goto tx_cpu_init; } /* @@ -885,13 +975,15 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_fanout_cnt = 1; - srs_cpu->mc_pollid = mrp->mrp_cpu[cpu_cnt++]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = srs_cpu->mc_pollid; - if (!mac_latency_optimize && worker_assign) - srs_cpu->mc_workerid = mrp->mrp_cpu[cpu_cnt++]; - srs_cpu->mc_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; + srs_cpu->mc_rx_fanout_cnt = 1; + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt++]; + /* Retarget the interrupt to the same CPU as the poll */ + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = + ((!mac_latency_optimize && worker_assign) ? + mrp->mrp_cpu[cpu_cnt++] : srs_cpu->mc_rx_pollid); + + srs_cpu->mc_rx_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; /* Do CPU bindings for SRSes having h/w Rx rings */ if (flent->fe_rx_srs_cnt > 1) { @@ -909,22 +1001,21 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_pollid = + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = srs_cpu->mc_pollid; - if (!mac_latency_optimize && worker_assign) { - srs_cpu->mc_workerid = - mrp->mrp_cpu[++cpu_cnt]; - } - srs_cpu->mc_fanout_cnt = 1; - srs_cpu->mc_fanout_cpus[0] = + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = + ((!mac_latency_optimize && worker_assign) ? + mrp->mrp_cpu[++cpu_cnt] : + srs_cpu->mc_rx_pollid); + srs_cpu->mc_rx_fanout_cnt = 1; + srs_cpu->mc_rx_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; cpu_cnt++; ASSERT(cpu_cnt <= no_of_cpus); } } - return; + goto tx_cpu_init; } /* @@ -942,14 +1033,28 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_fanout_cnt = 1; - srs_cpu->mc_pollid = mrp->mrp_cpu[cpu_cnt]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = mrp->mrp_cpu[cpu_cnt]; - srs_cpu->mc_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; + srs_cpu->mc_rx_fanout_cnt = 1; + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt]; + /* Retarget the interrupt to the same CPU as the poll */ + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = mrp->mrp_cpu[cpu_cnt]; + srs_cpu->mc_rx_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; if (++cpu_cnt >= no_of_cpus) cpu_cnt = 0; } + +tx_cpu_init: + mac_tx_cpu_init(flent, mrp, NULL); + + /* + * Copy the user specified CPUs to the effective CPUs + */ + for (i = 0; i < mrp->mrp_ncpus; i++) { + emrp->mrp_cpu[i] = mrp->mrp_cpu[i]; + } + emrp->mrp_ncpus = mrp->mrp_ncpus; + emrp->mrp_mask = mrp->mrp_mask; + bzero(emrp->mrp_pool, MAXPATHLEN); } /* @@ -960,64 +1065,95 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) * with a flent. */ static void -mac_flow_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) +mac_flow_cpu_init(flow_entry_t *flent, cpupart_t *cpupart) { mac_soft_ring_set_t *rx_srs; processorid_t cpuid; - int j, srs_cnt, soft_ring_cnt = 0; + int i, j, k, srs_cnt, nscpus, maxcpus, soft_ring_cnt = 0; mac_cpus_t *srs_cpu; + mac_resource_props_t *emrp = &flent->fe_effective_props; + uint32_t cpus[MRP_NCPUS]; - if (mrp->mrp_mask & MRP_CPUS_USERSPEC) { - mac_flow_user_cpu_init(flent, mrp); - } else { + /* + * The maximum number of CPUs available can either be + * the number of CPUs in the pool or the number of CPUs + * in the system. + */ + maxcpus = (cpupart != NULL) ? cpupart->cp_ncpus : ncpus; + + /* + * Compute the number of soft rings needed on top for each Rx + * SRS. "rx_srs_cnt-1" indicates the number of Rx SRS + * associated with h/w Rx rings. Soft ring count needed for + * each h/w Rx SRS is computed and the same is applied to + * software classified Rx SRS. The first Rx SRS in fe_rx_srs[] + * is the software classified Rx SRS. + */ + soft_ring_cnt = mac_compute_soft_ring_count(flent, + flent->fe_rx_srs_cnt - 1, maxcpus); + if (soft_ring_cnt == 0) { /* - * Compute the number of soft rings needed on top for each Rx - * SRS. "rx_srs_cnt-1" indicates the number of Rx SRS - * associated with h/w Rx rings. Soft ring count needed for - * each h/w Rx SRS is computed and the same is applied to - * software classified Rx SRS. The first Rx SRS in fe_rx_srs[] - * is the software classified Rx SRS. + * Even when soft_ring_cnt is 0, we still need + * to create a soft ring for TCP, UDP and + * OTHER. So set it to 1. */ - soft_ring_cnt = mac_compute_soft_ring_count(flent, - flent->fe_rx_srs_cnt - 1); - if (soft_ring_cnt == 0) { - /* - * Even when soft_ring_cnt is 0, we still need - * to create a soft ring for TCP, UDP and - * OTHER. So set it to 1. - */ - soft_ring_cnt = 1; - } - for (srs_cnt = 0; srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) { - rx_srs = flent->fe_rx_srs[srs_cnt]; - srs_cpu = &rx_srs->srs_cpu; - if (rx_srs->srs_fanout_state == SRS_FANOUT_INIT) { - if (soft_ring_cnt == srs_cpu->mc_fanout_cnt) - continue; - rx_srs->srs_fanout_state = SRS_FANOUT_REINIT; - } - srs_cpu->mc_ncpus = soft_ring_cnt; - srs_cpu->mc_fanout_cnt = soft_ring_cnt; - mutex_enter(&cpu_lock); - for (j = 0; j < soft_ring_cnt; j++) { - cpuid = mac_next_bind_cpu(); - srs_cpu->mc_cpus[j] = cpuid; - srs_cpu->mc_fanout_cpus[j] = cpuid; - } - cpuid = mac_next_bind_cpu(); - srs_cpu->mc_pollid = cpuid; - /* increment ncpus to account for polling cpu */ + soft_ring_cnt = 1; + } + for (srs_cnt = 0; srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) { + rx_srs = flent->fe_rx_srs[srs_cnt]; + srs_cpu = &rx_srs->srs_cpu; + if (rx_srs->srs_fanout_state == SRS_FANOUT_INIT) + rx_srs->srs_fanout_state = SRS_FANOUT_REINIT; + srs_cpu->mc_ncpus = soft_ring_cnt; + srs_cpu->mc_rx_fanout_cnt = soft_ring_cnt; + mutex_enter(&cpu_lock); + for (j = 0; j < soft_ring_cnt; j++) { + cpuid = mac_next_bind_cpu(cpupart); + srs_cpu->mc_cpus[j] = cpuid; + srs_cpu->mc_rx_fanout_cpus[j] = cpuid; + } + cpuid = mac_next_bind_cpu(cpupart); + srs_cpu->mc_rx_pollid = cpuid; + srs_cpu->mc_rx_intr_cpu = (mac_rx_intr_retarget ? + srs_cpu->mc_rx_pollid : -1); + /* increment ncpus to account for polling cpu */ + srs_cpu->mc_ncpus++; + srs_cpu->mc_cpus[j++] = cpuid; + if (!mac_latency_optimize) { + cpuid = mac_next_bind_cpu(cpupart); srs_cpu->mc_ncpus++; srs_cpu->mc_cpus[j++] = cpuid; - if (!mac_latency_optimize) { - cpuid = mac_next_bind_cpu(); - srs_cpu->mc_ncpus++; - srs_cpu->mc_cpus[j++] = cpuid; - } - srs_cpu->mc_workerid = cpuid; - mutex_exit(&cpu_lock); } + srs_cpu->mc_rx_workerid = cpuid; + mutex_exit(&cpu_lock); } + + nscpus = 0; + for (srs_cnt = 0; srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) { + rx_srs = flent->fe_rx_srs[srs_cnt]; + srs_cpu = &rx_srs->srs_cpu; + for (j = 0; j < srs_cpu->mc_ncpus; j++) { + cpus[nscpus++] = srs_cpu->mc_cpus[j]; + } + } + + + /* + * Copy cpu list to fe_effective_props + * without duplicates. + */ + k = 0; + for (i = 0; i < nscpus; i++) { + for (j = 0; j < k; j++) { + if (emrp->mrp_cpu[j] == cpus[i]) + break; + } + if (j == k) + emrp->mrp_cpu[k++] = cpus[i]; + } + emrp->mrp_ncpus = k; + + mac_tx_cpu_init(flent, NULL, cpupart); } /* @@ -1025,15 +1161,46 @@ mac_flow_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) * (setup SRS and set/update FANOUT, B/W and PRIORITY) */ +/* + * mac_srs_fanout_list_alloc: + * + * The underlying device can expose upto MAX_RINGS_PER_GROUP worth of + * rings to a client. In such a case, MAX_RINGS_PER_GROUP worth of + * array space is needed to store Tx soft rings. Thus we allocate so + * much array space for srs_tx_soft_rings. + * + * And when it is an aggr, again we allocate MAX_RINGS_PER_GROUP worth + * of space to st_soft_rings. This array is used for quick access to + * soft ring associated with a pseudo Tx ring based on the pseudo + * ring's index (mr_index). + */ static void mac_srs_fanout_list_alloc(mac_soft_ring_set_t *mac_srs) { - mac_srs->srs_tcp_soft_rings = (mac_soft_ring_t **) - kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, KM_SLEEP); - mac_srs->srs_udp_soft_rings = (mac_soft_ring_t **) - kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, KM_SLEEP); - mac_srs->srs_oth_soft_rings = (mac_soft_ring_t **) - kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, KM_SLEEP); + mac_client_impl_t *mcip = mac_srs->srs_mcip; + + if (mac_srs->srs_type & SRST_TX) { + mac_srs->srs_tx_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * + MAX_RINGS_PER_GROUP, KM_SLEEP); + if (mcip->mci_state_flags & MCIS_IS_AGGR) { + mac_srs_tx_t *tx = &mac_srs->srs_tx; + + tx->st_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * + MAX_RINGS_PER_GROUP, KM_SLEEP); + } + } else { + mac_srs->srs_tcp_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, + KM_SLEEP); + mac_srs->srs_udp_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, + KM_SLEEP); + mac_srs->srs_oth_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, + KM_SLEEP); + } } static void @@ -1095,6 +1262,121 @@ mac_srs_poll_bind(mac_soft_ring_set_t *mac_srs, processorid_t cpuid) } /* + * Re-target interrupt to the passed CPU. If re-target is successful, + * set mc_rx_intr_cpu to the re-targeted CPU. Otherwise set it to -1. + */ +void +mac_rx_srs_retarget_intr(mac_soft_ring_set_t *mac_srs, processorid_t cpuid) +{ + cpu_t *cp; + mac_ring_t *ring = mac_srs->srs_ring; + mac_intr_t *mintr = &ring->mr_info.mri_intr; + flow_entry_t *flent = mac_srs->srs_flent; + boolean_t primary = mac_is_primary_client(mac_srs->srs_mcip); + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Don't re-target the interrupt for these cases: + * 1) ring is NULL + * 2) the interrupt is shared (mi_ddi_shared) + * 3) ddi_handle is NULL and !primary + * 4) primary, ddi_handle is NULL but fe_rx_srs_cnt > 2 + * Case 3 & 4 are because of mac_client_intr_cpu() routine. + * This routine will re-target fixed interrupt for primary + * mac client if the client has only one ring. In that + * case, mc_rx_intr_cpu will already have the correct value. + */ + if (ring == NULL || mintr->mi_ddi_shared || cpuid == -1 || + (mintr->mi_ddi_handle == NULL && !primary) || (primary && + mintr->mi_ddi_handle == NULL && flent->fe_rx_srs_cnt > 2)) { + mac_srs->srs_cpu.mc_rx_intr_cpu = -1; + return; + } + + if (mintr->mi_ddi_handle == NULL) + return; + + cp = cpu_get(cpuid); + if (cp == NULL || !cpu_is_online(cp)) + return; + + /* Drop the cpu_lock as ddi_intr_set_affinity() holds it */ + mutex_exit(&cpu_lock); + if (ddi_intr_set_affinity(mintr->mi_ddi_handle, cpuid) == DDI_SUCCESS) + mac_srs->srs_cpu.mc_rx_intr_cpu = cpuid; + else + mac_srs->srs_cpu.mc_rx_intr_cpu = -1; + mutex_enter(&cpu_lock); +} + +/* + * Re-target Tx interrupts + */ +void +mac_tx_srs_retarget_intr(mac_soft_ring_set_t *mac_srs) +{ + cpu_t *cp; + mac_ring_t *ring; + mac_intr_t *mintr; + mac_soft_ring_t *sringp; + mac_srs_tx_t *srs_tx; + mac_cpus_t *srs_cpu; + processorid_t cpuid; + int i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + srs_cpu = &mac_srs->srs_cpu; + if (MAC_TX_SOFT_RINGS(mac_srs)) { + for (i = 0; i < mac_srs->srs_tx_ring_count; i++) { + sringp = mac_srs->srs_tx_soft_rings[i]; + ring = (mac_ring_t *)sringp->s_ring_tx_arg2; + cpuid = srs_cpu->mc_tx_intr_cpu[i]; + cp = cpu_get(cpuid); + if (cp == NULL || !cpu_is_online(cp) || + !MAC_RING_RETARGETABLE(ring)) { + srs_cpu->mc_tx_retargeted_cpu[i] = -1; + continue; + } + mintr = &ring->mr_info.mri_intr; + /* + * Drop the cpu_lock as ddi_intr_set_affinity() + * holds it + */ + mutex_exit(&cpu_lock); + if (ddi_intr_set_affinity(mintr->mi_ddi_handle, + cpuid) == DDI_SUCCESS) { + srs_cpu->mc_tx_retargeted_cpu[i] = cpuid; + } else { + srs_cpu->mc_tx_retargeted_cpu[i] = -1; + } + mutex_enter(&cpu_lock); + } + } else { + cpuid = srs_cpu->mc_tx_intr_cpu[0]; + cp = cpu_get(cpuid); + if (cp == NULL || !cpu_is_online(cp)) { + srs_cpu->mc_tx_retargeted_cpu[0] = -1; + return; + } + srs_tx = &mac_srs->srs_tx; + ring = (mac_ring_t *)srs_tx->st_arg2; + if (MAC_RING_RETARGETABLE(ring)) { + mintr = &ring->mr_info.mri_intr; + mutex_exit(&cpu_lock); + if ((ddi_intr_set_affinity(mintr->mi_ddi_handle, + cpuid) == DDI_SUCCESS)) { + srs_cpu->mc_tx_retargeted_cpu[0] = cpuid; + } else { + srs_cpu->mc_tx_retargeted_cpu[0] = -1; + } + mutex_enter(&cpu_lock); + } + } +} + +/* * When a CPU comes back online, bind the MAC kernel threads which * were previously bound to that CPU, and had to be unbound because * the CPU was going away. @@ -1231,17 +1513,16 @@ done: static void mac_tx_srs_update_bwlimit(mac_soft_ring_set_t *srs, mac_resource_props_t *mrp) { - uint32_t tx_mode; + uint32_t tx_mode, ring_info = 0; mac_srs_tx_t *srs_tx = &srs->srs_tx; mac_client_impl_t *mcip = srs->srs_mcip; - mac_impl_t *mip = mcip->mci_mip; /* * We need to quiesce/restart the client here because mac_tx() and * srs->srs_tx->st_func do not hold srs->srs_lock while accessing * st_mode and related fields, which are modified by the code below. */ - mac_tx_client_quiesce(mcip, SRS_QUIESCE); + mac_tx_client_quiesce((mac_client_handle_t)mcip); mutex_enter(&srs->srs_lock); mutex_enter(&srs->srs_bw->mac_bw_lock); @@ -1250,14 +1531,18 @@ mac_tx_srs_update_bwlimit(mac_soft_ring_set_t *srs, mac_resource_props_t *mrp) if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) { /* Reset bandwidth limit */ if (tx_mode == SRS_TX_BW) { + if (srs_tx->st_arg2 != NULL) + ring_info = mac_hwring_getinfo(srs_tx->st_arg2); if (mac_tx_serialize || - (mip->mi_v12n_level & MAC_VIRT_SERIALIZE)) { + (ring_info & MAC_RING_TX_SERIALIZE)) { srs_tx->st_mode = SRS_TX_SERIALIZE; } else { srs_tx->st_mode = SRS_TX_DEFAULT; } } else if (tx_mode == SRS_TX_BW_FANOUT) { srs_tx->st_mode = SRS_TX_FANOUT; + } else if (tx_mode == SRS_TX_BW_AGGR) { + srs_tx->st_mode = SRS_TX_AGGR; } srs->srs_type &= ~SRST_BW_CONTROL; } else { @@ -1270,13 +1555,15 @@ mac_tx_srs_update_bwlimit(mac_soft_ring_set_t *srs, mac_resource_props_t *mrp) srs->srs_bw->mac_bw_drop_threshold = srs->srs_bw->mac_bw_limit << 1; srs->srs_type |= SRST_BW_CONTROL; - if (tx_mode != SRS_TX_BW && - tx_mode != SRS_TX_BW_FANOUT) { + if (tx_mode != SRS_TX_BW && tx_mode != SRS_TX_BW_FANOUT && + tx_mode != SRS_TX_BW_AGGR) { if (tx_mode == SRS_TX_SERIALIZE || tx_mode == SRS_TX_DEFAULT) { srs_tx->st_mode = SRS_TX_BW; } else if (tx_mode == SRS_TX_FANOUT) { srs_tx->st_mode = SRS_TX_BW_FANOUT; + } else if (tx_mode == SRS_TX_AGGR) { + srs_tx->st_mode = SRS_TX_BW_AGGR; } else { ASSERT(0); } @@ -1287,7 +1574,7 @@ done: mutex_exit(&srs->srs_bw->mac_bw_lock); mutex_exit(&srs->srs_lock); - mac_tx_client_restart(mcip); + mac_tx_client_restart((mac_client_handle_t)mcip); } /* @@ -1392,9 +1679,7 @@ mac_client_update_classifier(mac_client_impl_t *mcip, boolean_t enable) static void mac_srs_update_fanout_list(mac_soft_ring_set_t *mac_srs) { - int tcp_count = 0; - int udp_count = 0; - int oth_count = 0; + int tcp_count = 0, udp_count = 0, oth_count = 0, tx_count = 0; mac_soft_ring_t *softring; softring = mac_srs->srs_soft_ring_head; @@ -1403,33 +1688,35 @@ mac_srs_update_fanout_list(mac_soft_ring_set_t *mac_srs) mac_srs->srs_tcp_ring_count = 0; mac_srs->srs_udp_ring_count = 0; mac_srs->srs_oth_ring_count = 0; + mac_srs->srs_tx_ring_count = 0; return; } - softring = mac_srs->srs_soft_ring_head; - tcp_count = udp_count = oth_count = 0; - while (softring != NULL) { - if (softring->s_ring_type & ST_RING_TCP) + if (softring->s_ring_type & ST_RING_TCP) { mac_srs->srs_tcp_soft_rings[tcp_count++] = softring; - else if (softring->s_ring_type & ST_RING_UDP) + } else if (softring->s_ring_type & ST_RING_UDP) { mac_srs->srs_udp_soft_rings[udp_count++] = softring; - else + } else if (softring->s_ring_type & ST_RING_OTH) { mac_srs->srs_oth_soft_rings[oth_count++] = softring; + } else { + ASSERT(softring->s_ring_type & ST_RING_TX); + mac_srs->srs_tx_soft_rings[tx_count++] = softring; + } softring = softring->s_ring_next; } ASSERT(mac_srs->srs_soft_ring_count == - (tcp_count + udp_count + oth_count)); - + (tcp_count + udp_count + oth_count + tx_count)); mac_srs->srs_tcp_ring_count = tcp_count; mac_srs->srs_udp_ring_count = udp_count; mac_srs->srs_oth_ring_count = oth_count; + mac_srs->srs_tx_ring_count = tx_count; } void -mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, - pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs, +mac_srs_create_proto_softrings(int id, uint16_t type, pri_t pri, + mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs, processorid_t cpuid, mac_direct_rx_t rx_func, void *x_arg1, mac_resource_handle_t x_arg2, boolean_t set_bypass) { @@ -1446,7 +1733,7 @@ mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, mrf.mrf_flow_priority = pri; softring = mac_soft_ring_create(id, mac_soft_ring_worker_wait, - (void *)flent, (type|ST_RING_TCP), pri, mcip, mac_srs, + (type|ST_RING_TCP), pri, mcip, mac_srs, cpuid, rx_func, x_arg1, x_arg2); softring->s_ring_rx_arg2 = NULL; @@ -1481,7 +1768,7 @@ mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, * bypass the DLS layer. */ softring = mac_soft_ring_create(id, mac_soft_ring_worker_wait, - (void *)flent, (type|ST_RING_UDP), pri, mcip, mac_srs, + (type|ST_RING_UDP), pri, mcip, mac_srs, cpuid, rx_func, x_arg1, x_arg2); softring->s_ring_rx_arg2 = NULL; @@ -1493,7 +1780,7 @@ mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, /* Create the Oth softrings which has to go through the DLS */ softring = mac_soft_ring_create(id, mac_soft_ring_worker_wait, - (void *)flent, (type|ST_RING_OTH), pri, mcip, mac_srs, + (type|ST_RING_OTH), pri, mcip, mac_srs, cpuid, rx_func, x_arg1, x_arg2); softring->s_ring_rx_arg2 = NULL; } @@ -1507,19 +1794,16 @@ mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, * same CPU as that of the soft ring's. */ static void -mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, - mac_resource_props_t *mrp, mac_direct_rx_t rx_func, void *x_arg1, - mac_resource_handle_t x_arg2, mac_soft_ring_set_t *mac_rx_srs, - mac_soft_ring_set_t *mac_tx_srs) +mac_srs_fanout_modify(mac_client_impl_t *mcip, mac_direct_rx_t rx_func, + void *x_arg1, mac_resource_handle_t x_arg2, + mac_soft_ring_set_t *mac_rx_srs, mac_soft_ring_set_t *mac_tx_srs) { mac_soft_ring_t *softring; uint32_t soft_ring_flag = 0; processorid_t cpuid = -1; - boolean_t user_specified; int i, srings_present, new_fanout_cnt; mac_cpus_t *srs_cpu; - user_specified = mrp->mrp_mask & MRP_CPUS_USERSPEC; /* fanout state is REINIT. Set it back to INIT */ ASSERT(mac_rx_srs->srs_fanout_state == SRS_FANOUT_REINIT); mac_rx_srs->srs_fanout_state = SRS_FANOUT_INIT; @@ -1528,7 +1812,7 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, srings_present = mac_rx_srs->srs_tcp_ring_count; /* new request */ srs_cpu = &mac_rx_srs->srs_cpu; - new_fanout_cnt = srs_cpu->mc_fanout_cnt; + new_fanout_cnt = srs_cpu->mc_rx_fanout_cnt; mutex_enter(&mac_rx_srs->srs_lock); if (mac_rx_srs->srs_type & SRST_BW_CONTROL) @@ -1547,8 +1831,7 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, * Create the protocol softrings and set the * DLS bypass where possible. */ - mac_srs_create_proto_softrings(i, - (void *)flent, soft_ring_flag, + mac_srs_create_proto_softrings(i, soft_ring_flag, mac_rx_srs->srs_pri, mcip, mac_rx_srs, cpuid, rx_func, x_arg1, x_arg2, B_TRUE); } @@ -1583,7 +1866,7 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, ASSERT(new_fanout_cnt == mac_rx_srs->srs_tcp_ring_count); mutex_enter(&cpu_lock); for (i = 0; i < mac_rx_srs->srs_tcp_ring_count; i++) { - cpuid = srs_cpu->mc_fanout_cpus[i]; + cpuid = srs_cpu->mc_rx_fanout_cpus[i]; (void) mac_soft_ring_bind(mac_rx_srs->srs_udp_soft_rings[i], cpuid); (void) mac_soft_ring_bind(mac_rx_srs->srs_oth_soft_rings[i], @@ -1597,15 +1880,16 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, } } - mac_srs_worker_bind(mac_rx_srs, srs_cpu->mc_pollid); - mac_srs_poll_bind(mac_rx_srs, srs_cpu->mc_workerid); - + mac_srs_worker_bind(mac_rx_srs, srs_cpu->mc_rx_workerid); + mac_srs_poll_bind(mac_rx_srs, srs_cpu->mc_rx_pollid); + mac_rx_srs_retarget_intr(mac_rx_srs, srs_cpu->mc_rx_intr_cpu); /* * Bind Tx srs and soft ring threads too. Let's bind tx * srs to the last cpu in mrp list. */ - if (mac_tx_srs != NULL && user_specified) { + if (mac_tx_srs != NULL) { BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp); + mac_tx_srs_retarget_intr(mac_tx_srs); } mutex_exit(&cpu_lock); } @@ -1614,16 +1898,15 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, * Bind SRS threads and soft rings to CPUs/create fanout list. */ void -mac_srs_fanout_init(mac_client_impl_t *mcip, flow_entry_t *flent, - mac_resource_props_t *mrp, mac_direct_rx_t rx_func, void *x_arg1, - mac_resource_handle_t x_arg2, mac_soft_ring_set_t *mac_rx_srs, - mac_soft_ring_set_t *mac_tx_srs) +mac_srs_fanout_init(mac_client_impl_t *mcip, mac_resource_props_t *mrp, + mac_direct_rx_t rx_func, void *x_arg1, mac_resource_handle_t x_arg2, + mac_soft_ring_set_t *mac_rx_srs, mac_soft_ring_set_t *mac_tx_srs, + cpupart_t *cpupart) { int i; - processorid_t cpuid, worker_cpuid, poll_cpuid; + processorid_t cpuid; uint32_t soft_ring_flag = 0; int soft_ring_cnt; - boolean_t user_specified = B_FALSE; mac_cpus_t *srs_cpu = &mac_rx_srs->srs_cpu; /* @@ -1641,31 +1924,27 @@ mac_srs_fanout_init(mac_client_impl_t *mcip, flow_entry_t *flent, ASSERT(mac_rx_srs->srs_fanout_state == SRS_FANOUT_UNINIT); mac_rx_srs->srs_fanout_state = SRS_FANOUT_INIT; - user_specified = mrp->mrp_mask & MRP_CPUS_USERSPEC; /* * Ring count can be 0 if no fanout is required and no cpu * were specified. Leave the SRS worker and poll thread * unbound */ ASSERT(mrp != NULL); - soft_ring_cnt = srs_cpu->mc_fanout_cnt; + soft_ring_cnt = srs_cpu->mc_rx_fanout_cnt; /* Step 1: bind cpu contains cpu list where threads need to bind */ if (soft_ring_cnt > 0) { mutex_enter(&cpu_lock); for (i = 0; i < soft_ring_cnt; i++) { - cpuid = srs_cpu->mc_fanout_cpus[i]; + cpuid = srs_cpu->mc_rx_fanout_cpus[i]; /* Create the protocol softrings */ - mac_srs_create_proto_softrings(i, (void *)flent, - soft_ring_flag, mac_rx_srs->srs_pri, - mcip, mac_rx_srs, cpuid, rx_func, - x_arg1, x_arg2, B_FALSE); + mac_srs_create_proto_softrings(i, soft_ring_flag, + mac_rx_srs->srs_pri, mcip, mac_rx_srs, cpuid, + rx_func, x_arg1, x_arg2, B_FALSE); } - worker_cpuid = srs_cpu->mc_workerid; - poll_cpuid = srs_cpu->mc_pollid; - mac_srs_worker_bind(mac_rx_srs, worker_cpuid); - mac_srs_poll_bind(mac_rx_srs, poll_cpuid); - + mac_srs_worker_bind(mac_rx_srs, srs_cpu->mc_rx_workerid); + mac_srs_poll_bind(mac_rx_srs, srs_cpu->mc_rx_pollid); + mac_rx_srs_retarget_intr(mac_rx_srs, srs_cpu->mc_rx_intr_cpu); /* * Bind Tx srs and soft ring threads too. * Let's bind tx srs to the last cpu in @@ -1676,9 +1955,8 @@ mac_srs_fanout_init(mac_client_impl_t *mcip, flow_entry_t *flent, goto alldone; } - if (user_specified) { - BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp); - } + BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp); + mac_tx_srs_retarget_intr(mac_tx_srs); mutex_exit(&cpu_lock); } else { mutex_enter(&cpu_lock); @@ -1686,8 +1964,8 @@ mac_srs_fanout_init(mac_client_impl_t *mcip, flow_entry_t *flent, * For a subflow, mrp_workerid and mrp_pollid * is not set. */ - mac_srs_worker_bind(mac_rx_srs, mrp->mrp_workerid); - mac_srs_poll_bind(mac_rx_srs, mrp->mrp_pollid); + mac_srs_worker_bind(mac_rx_srs, mrp->mrp_rx_workerid); + mac_srs_poll_bind(mac_rx_srs, mrp->mrp_rx_pollid); mutex_exit(&cpu_lock); goto no_softrings; } @@ -1702,12 +1980,11 @@ alldone: no_softrings: if (mac_rx_srs->srs_type & SRST_FANOUT_PROTO) { mutex_enter(&cpu_lock); - cpuid = mac_next_bind_cpu(); + cpuid = mac_next_bind_cpu(cpupart); /* Create the protocol softrings */ - mac_srs_create_proto_softrings(0, (void *)flent, - soft_ring_flag, mac_rx_srs->srs_pri, - mcip, mac_rx_srs, cpuid, rx_func, - x_arg1, x_arg2, B_FALSE); + mac_srs_create_proto_softrings(0, soft_ring_flag, + mac_rx_srs->srs_pri, mcip, mac_rx_srs, cpuid, + rx_func, x_arg1, x_arg2, B_FALSE); mutex_exit(&cpu_lock); } else { /* @@ -1729,7 +2006,7 @@ no_softrings: void mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, mac_resource_props_t *mrp, mac_direct_rx_t rx_func, void *x_arg1, - mac_resource_handle_t x_arg2) + mac_resource_handle_t x_arg2, cpupart_t *cpupart) { mac_soft_ring_set_t *mac_rx_srs, *mac_tx_srs; int i, rx_srs_cnt; @@ -1739,7 +2016,7 @@ mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * This is an aggregation port. Fanout will be setup * over the aggregation itself. */ - if (mcip->mci_state_flags & MCIS_IS_AGGR_PORT) + if (mcip->mci_state_flags & MCIS_EXCLUSIVE) return; mac_rx_srs = flent->fe_rx_srs[0]; @@ -1754,12 +2031,18 @@ mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, /* No fanout for subflows */ if (flent->fe_type & FLOW_USER) { - mac_srs_fanout_init(mcip, flent, mrp, rx_func, - x_arg1, x_arg2, mac_rx_srs, mac_tx_srs); + mac_srs_fanout_init(mcip, mrp, rx_func, + x_arg1, x_arg2, mac_rx_srs, mac_tx_srs, + cpupart); return; } - mac_flow_cpu_init(flent, mrp); + if (mrp->mrp_mask & MRP_CPUS_USERSPEC) + mac_flow_user_cpu_init(flent, mrp); + else + mac_flow_cpu_init(flent, cpupart); + + mrp->mrp_rx_fanout_cnt = mac_rx_srs->srs_cpu.mc_rx_fanout_cnt; /* * Set up fanout for both SW (0th SRS) and HW classified @@ -1771,15 +2054,16 @@ mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, mac_tx_srs = NULL; switch (mac_rx_srs->srs_fanout_state) { case SRS_FANOUT_UNINIT: - mac_srs_fanout_init(mcip, flent, mrp, rx_func, - x_arg1, x_arg2, mac_rx_srs, mac_tx_srs); + mac_srs_fanout_init(mcip, mrp, rx_func, + x_arg1, x_arg2, mac_rx_srs, mac_tx_srs, + cpupart); break; case SRS_FANOUT_INIT: break; case SRS_FANOUT_REINIT: mac_rx_srs_quiesce(mac_rx_srs, SRS_QUIESCE); - mac_srs_fanout_modify(mcip, flent, mrp, rx_func, - x_arg1, x_arg2, mac_rx_srs, mac_tx_srs); + mac_srs_fanout_modify(mcip, rx_func, x_arg1, + x_arg2, mac_rx_srs, mac_tx_srs); mac_rx_srs_restart(mac_rx_srs); break; default: @@ -1791,7 +2075,7 @@ mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, } /* - * mac_create_soft_ring_set: + * mac_srs_create: * * Create a mac_soft_ring_set_t (SRS). If soft_ring_fanout_type is * SRST_TX, an SRS for Tx side is created. Otherwise an SRS for Rx side @@ -1867,6 +2151,7 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, mac_srs->srs_type = (srs_type | SRST_NO_SOFT_RINGS); mac_srs->srs_worker_cpuid = mac_srs->srs_worker_cpuid_save = -1; mac_srs->srs_poll_cpuid = mac_srs->srs_poll_cpuid_save = -1; + mac_srs->srs_mcip = mcip; mac_srs_fanout_list_alloc(mac_srs); /* @@ -1881,7 +2166,6 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, } else { mac_srs->srs_pri = mcip->mci_max_pri; } - mac_srs->srs_mcip = mcip; /* * We need to insert the SRS in the global list before * binding the SRS and SR threads. Otherwise there is a @@ -1959,7 +2243,7 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, mac_tx_srs_max_q_cnt : mac_tx_srs_hiwat; srs_tx->st_arg1 = x_arg1; srs_tx->st_arg2 = x_arg2; - return (mac_srs); + goto done; } if ((srs_type & SRST_FLOW) != 0 || @@ -1973,11 +2257,13 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, srs_rx->sr_arg2 = x_arg2; if (ring != NULL) { + uint_t ring_info; + /* Is the mac_srs created over the RX default group? */ if (ring->mr_gh == (mac_group_handle_t) - (&mcip->mci_mip->mi_rx_groups[0])) + MAC_DEFAULT_RX_GROUP(mcip->mci_mip)) { mac_srs->srs_type |= SRST_DEFAULT_GRP; - + } mac_srs->srs_ring = ring; ring->mr_srs = mac_srs; ring->mr_classify_type = MAC_HW_CLASSIFIER; @@ -1997,9 +2283,12 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, * so that we get a chance to switch into a polling * mode under backlog. */ - if (mcip->mci_mip->mi_v12n_level & MAC_VIRT_SERIALIZE) + ring_info = mac_hwring_getinfo((mac_ring_handle_t)ring); + if (ring_info & MAC_RING_RX_ENQUEUE) mac_srs->srs_state |= SRS_SOFTRING_QUEUE; } +done: + mac_srs_stat_create(mac_srs); return (mac_srs); } @@ -2043,7 +2332,7 @@ mac_find_fanout(flow_entry_t *flent, uint32_t link_type) /* * Change a group from h/w to s/w classification. */ -static void +void mac_rx_switch_grp_to_sw(mac_group_t *group) { mac_ring_t *ring; @@ -2063,11 +2352,11 @@ mac_rx_switch_grp_to_sw(mac_group_t *group) if (ring->mr_state != MR_INUSE) (void) mac_start_ring(ring); + /* * We need to perform SW classification * for packets landing in these rings */ - ring->mr_state = MR_INUSE; ring->mr_flag = 0; ring->mr_classify_type = MAC_SW_CLASSIFIER; } @@ -2079,14 +2368,38 @@ mac_rx_switch_grp_to_sw(mac_group_t *group) */ void mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, - mac_group_t *group, uint32_t link_type) + uint32_t link_type) +{ + cpupart_t *cpupart; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); + boolean_t use_default = B_FALSE; + + mac_rx_srs_group_setup(mcip, flent, link_type); + mac_tx_srs_group_setup(mcip, flent, link_type); + + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), + mac_rx_deliver, mcip, NULL, cpupart); + mac_set_pool_effective(use_default, cpupart, mrp, emrp); + pool_unlock(); +} + +/* + * Set up the RX SRSs. If the S/W SRS is not set, set it up, if there + * is a group associated with this MAC client, set up SRSs for individual + * h/w rings. + */ +void +mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, + uint32_t link_type) { mac_impl_t *mip = mcip->mci_mip; mac_soft_ring_set_t *mac_srs; - mac_soft_ring_set_t *tx_srs = NULL; mac_ring_t *ring; uint32_t fanout_type; - boolean_t created_srs = B_FALSE; + mac_group_t *rx_group = flent->fe_rx_ring_group; fanout_type = mac_find_fanout(flent, link_type); @@ -2096,64 +2409,23 @@ mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, /* Setup the Rx SRS */ mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type, mac_rx_deliver, mcip, NULL, NULL); - mutex_enter(&flent->fe_lock); flent->fe_cb_fn = (flow_fn_t)mac_srs->srs_rx.sr_lower_proc; flent->fe_cb_arg1 = (void *)mip; flent->fe_cb_arg2 = (void *)mac_srs; mutex_exit(&flent->fe_lock); - - /* Setup the Tx SRS as well */ - ASSERT(flent->fe_tx_srs == NULL); - tx_srs = mac_srs_create(mcip, flent, SRST_TX | link_type, - NULL, mcip, NULL, NULL); - - if (mcip->mci_share != NULL) { - mac_srs_tx_t *tx = &tx_srs->srs_tx; - ASSERT((mcip->mci_state_flags & MCIS_NO_HWRINGS) == 0); - /* - * A share requires a dedicated TX group. - * mac_reserve_tx_group() does the work needed to - * allocate a new group and populate that group - * with rings according to the driver requirements - * and limitations. - */ - tx->st_group = - mac_reserve_tx_group(mip, mcip->mci_share); - ASSERT(tx->st_group != NULL); - tx->st_group->mrg_tx_client = mcip; - } - mac_tx_srs_setup(mcip, flent, link_type); - created_srs = B_TRUE; } - if (group == NULL) { - if (created_srs) { - mac_fanout_setup(mcip, flent, - MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, - mcip, NULL); - } + if (rx_group == NULL) return; - } - /* * fanout for default SRS is done when default SRS are created * above. As each ring is added to the group, we setup the * SRS and fanout to it. */ - switch (group->mrg_state) { + switch (rx_group->mrg_state) { case MAC_GROUP_STATE_RESERVED: - /* - * The group is exclusively ours. Create a SRS - * for each ring in the group and allow the - * individual SRS to dynamically poll their - * Rx ring. Do this only if the client is not - * a VLAN MAC client since for VLAN we do - * s/w classification for the VID check. - */ - if (i_mac_flow_vid(mcip->mci_flent) != VLAN_ID_NONE) - break; - for (ring = group->mrg_rings; ring != NULL; + for (ring = rx_group->mrg_rings; ring != NULL; ring = ring->mr_next) { switch (ring->mr_state) { case MR_INUSE: @@ -2163,14 +2435,28 @@ mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, if (ring->mr_state != MR_INUSE) (void) mac_start_ring(ring); - ring->mr_state = MR_INUSE; - + /* + * Since the group is exclusively ours create + * an SRS for this ring to allow the + * individual SRS to dynamically poll the + * ring. Do this only if the client is not + * a VLAN MAC client, since for VLAN we do + * s/w classification for the VID check, and + * if it has a unicast address. + */ + if ((mcip->mci_state_flags & + MCIS_NO_UNICAST_ADDR) || + i_mac_flow_vid(mcip->mci_flent) != + VLAN_ID_NONE) { + break; + } mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type, mac_rx_deliver, mcip, NULL, ring); break; default: - cmn_err(CE_PANIC, "srs_setup: mcip = %p " + cmn_err(CE_PANIC, + "srs_setup: mcip = %p " "trying to add UNKNOWN ring = %p\n", (void *)mcip, (void *)ring); break; @@ -2181,43 +2467,102 @@ mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, /* * Set all rings of this group to software classified. * - * If the group is current RESERVED, the existing mac client - * (the only client on this group) is using this group - * exclusively. In that case we need to disable polling on - * the rings of the group (if it was enabled), and free the - * SRS associated with the rings. + * If the group is current RESERVED, the existing mac + * client (the only client on this group) is using + * this group exclusively. In that case we need to + * disable polling on the rings of the group (if it + * was enabled), and free the SRS associated with the + * rings. */ - mac_rx_switch_grp_to_sw(group); + mac_rx_switch_grp_to_sw(rx_group); break; default: ASSERT(B_FALSE); break; } - mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), - mac_rx_deliver, mcip, NULL); } +/* + * Set up the TX SRS. + */ void -mac_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, +mac_tx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t link_type) { + int cnt; + int ringcnt; + mac_ring_t *ring; + mac_group_t *grp; + + /* + * If we are opened exclusively (like aggr does for aggr_ports), + * don't set up Tx SRS and Tx soft rings as they won't be used. + * The same thing has to be done for Rx side also. See bug: + * 6880080 + */ + if (mcip->mci_state_flags & MCIS_EXCLUSIVE) { + /* + * If we have rings, start them here. + */ + if (flent->fe_tx_ring_group == NULL) + return; + grp = (mac_group_t *)flent->fe_tx_ring_group; + ringcnt = grp->mrg_cur_count; + ring = grp->mrg_rings; + for (cnt = 0; cnt < ringcnt; cnt++) { + if (ring->mr_state != MR_INUSE) { + (void) mac_start_ring(ring); + } + ring = ring->mr_next; + } + return; + } + if (flent->fe_tx_srs == NULL) { + (void) mac_srs_create(mcip, flent, SRST_TX | link_type, + NULL, mcip, NULL, NULL); + } + mac_tx_srs_setup(mcip, flent); +} + +/* + * Remove all the RX SRSs. If we want to remove only the SRSs associated + * with h/w rings, leave the S/W SRS alone. This is used when we want to + * move the MAC client from one group to another, so we need to teardown + * on the h/w SRSs. + */ +void +mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly) +{ mac_soft_ring_set_t *mac_srs; - mac_soft_ring_set_t *tx_srs; - mac_srs_tx_t *tx; int i; + int count = flent->fe_rx_srs_cnt; - for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + for (i = 0; i < count; i++) { + if (i == 0 && hwonly) + continue; mac_srs = flent->fe_rx_srs[i]; mac_rx_srs_quiesce(mac_srs, SRS_CONDEMNED); - /* - * Deal with all fanout tear down etc. - */ mac_srs_free(mac_srs); flent->fe_rx_srs[i] = NULL; + flent->fe_rx_srs_cnt--; } - flent->fe_rx_srs_cnt = 0; + ASSERT(!hwonly || flent->fe_rx_srs_cnt == 1); + ASSERT(hwonly || flent->fe_rx_srs_cnt == 0); +} + +/* + * Remove the TX SRS. + */ +void +mac_tx_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, + uint32_t link_type) +{ + mac_soft_ring_set_t *tx_srs; + mac_srs_tx_t *tx; + + if ((tx_srs = flent->fe_tx_srs) == NULL) + return; - tx_srs = flent->fe_tx_srs; tx = &tx_srs->srs_tx; switch (link_type) { case SRST_FLOW: @@ -2228,25 +2573,16 @@ mac_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, mac_tx_srs_quiesce(tx_srs, SRS_CONDEMNED); break; case SRST_LINK: - mac_tx_client_quiesce(mcip, SRS_CONDEMNED); - /* - * Release the TX resources. First the TX group, if any - * was assigned to the MAC client, which will cause the - * TX rings to be moved back to the pool. Then free the - * rings themselves. - */ - if (tx->st_group != NULL) { - mac_release_tx_group(tx_srs->srs_mcip->mci_mip, - tx->st_group); - tx->st_group = NULL; - } - if (tx->st_ring_count != 0) { - kmem_free(tx->st_rings, - sizeof (mac_ring_handle_t) * tx->st_ring_count); - } + mac_tx_client_condemn((mac_client_handle_t)mcip); if (tx->st_arg2 != NULL) { ASSERT(tx_srs->srs_type & SRST_TX); - mac_release_tx_ring(tx->st_arg2); + /* + * The ring itself will be stopped when + * we release the group or in the + * mac_datapath_teardown (for the default + * group) + */ + tx->st_arg2 = NULL; } break; default: @@ -2258,7 +2594,9 @@ mac_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, } /* - * This is the group state machine. The state of an Rx group is given by + * This is the group state machine. + * + * The state of an Rx group is given by * the following table. The default group and its rings are started in * mac_start itself and the default group stays in SHARED state until * mac_stop at which time the group and rings are stopped and and it @@ -2276,15 +2614,27 @@ mac_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * * Non-default 0 N.A. REGISTERED * Non-default 1 N.A. RESERVED - * Non-default > 1 N.A. SHARED * * Default 0 N.A. SHARED * Default 1 1 RESERVED * Default 1 > 1 SHARED * Default > 1 N.A. SHARED + * + * For a TX group, the following is the state table. + * + * Group type # of clients Group State + * in the group + * + * Non-default 0 REGISTERED + * Non-default 1 RESERVED + * + * Default 0 REGISTERED + * Default 1 RESERVED + * Default > 1 SHARED */ mac_group_state_t -mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) +mac_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip, + mac_group_t *defgrp, boolean_t rx_group) { mac_impl_t *mip = (mac_impl_t *)grp->mrg_mh; @@ -2292,11 +2642,11 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) /* Non-default group */ - if (grp != mip->mi_rx_groups) { - if (MAC_RX_GROUP_NO_CLIENT(grp)) + if (grp != defgrp) { + if (MAC_GROUP_NO_CLIENT(grp)) return (MAC_GROUP_STATE_REGISTERED); - *group_only_mcip = MAC_RX_GROUP_ONLY_CLIENT(grp); + *group_only_mcip = MAC_GROUP_ONLY_CLIENT(grp); if (*group_only_mcip != NULL) return (MAC_GROUP_STATE_RESERVED); @@ -2305,10 +2655,19 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) /* Default group */ - if (MAC_RX_GROUP_NO_CLIENT(grp) || mip->mi_nactiveclients != 1) + if (MAC_GROUP_NO_CLIENT(grp)) { + if (rx_group) + return (MAC_GROUP_STATE_SHARED); + else + return (MAC_GROUP_STATE_REGISTERED); + } + *group_only_mcip = MAC_GROUP_ONLY_CLIENT(grp); + if (*group_only_mcip == NULL) + return (MAC_GROUP_STATE_SHARED); + + if (rx_group && mip->mi_nactiveclients != 1) return (MAC_GROUP_STATE_SHARED); - *group_only_mcip = MAC_RX_GROUP_ONLY_CLIENT(grp); ASSERT(*group_only_mcip != NULL); return (MAC_GROUP_STATE_RESERVED); } @@ -2456,13 +2815,12 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) * * For NICs which have only 1 Rx ring (we treat NICs with no Rx rings * as NIC with a single default ring), we assign the only ring to - * primary Link as MAC_RX_HW_DEFAULT_RING. The primary Link SRS can do - * polling on it as long as it is the only link in use and we compare - * the MAC address for unicast packets before accepting an incoming - * packet (there is no need for S/W classification in this case). We - * disable polling on the only ring the moment 2nd link gets created - * (the polling remains enabled even though there are broadcast and - * multicast flows created). + * primary Link. The primary Link SRS can do polling on it as long as + * it is the only link in use and we compare the MAC address for unicast + * packets before accepting an incoming packet (there is no need for S/W + * classification in this case). We disable polling on the only ring the + * moment 2nd link gets created (the polling remains enabled even though + * there are broadcast and * multicast flows created). * * If the NIC has more than 1 Rx ring, we assign the default ring (the * 1st ring) to deal with broadcast, multicast and traffic for other @@ -2472,10 +2830,6 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) * (and its SRS) can continue to poll the assigned Rx ring at all times * independantly. * - * Right now we just assign MAC_RX_HW_DEFAULT_RING to note that it is - * primary NIC and later we will check to see how many Rx rings we - * have and can we get a non default Rx ring for the primary MAC. - * * Note: In future, if no fanout is specified, we try to assign 2 Rx * rings for the primary Link with the primary MAC address + TCP going * to one ring and primary MAC address + UDP|SCTP going to other ring. @@ -2487,56 +2841,128 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) * As an optimization, when a new NIC or VNIC is created, we can get * only one Rx ring and make it a TCP specific Rx ring and use the * H/W default Rx ring for the rest (this Rx ring is never polled). + * + * For clients that don't have MAC address, but want to receive and + * transmit packets (e.g, bpf, gvrp etc.), we need to setup the datapath. + * For such clients (identified by the MCIS_NO_UNICAST_ADDR flag) we + * always give the default group and use software classification (i.e. + * even if this is the only client in the default group, we will + * leave group as shared). */ int mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t link_type) { mac_impl_t *mip = mcip->mci_mip; - mac_group_t *group = NULL; - mac_group_t *default_group; + mac_group_t *rgroup = NULL; + mac_group_t *tgroup = NULL; + mac_group_t *default_rgroup; + mac_group_t *default_tgroup; int err; uint8_t *mac_addr; - mac_rx_group_reserve_type_t rtype = MAC_RX_RESERVE_NONDEFAULT; mac_group_state_t next_state; mac_client_impl_t *group_only_mcip; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); + boolean_t rxhw; + boolean_t txhw; + boolean_t use_default = B_FALSE; + cpupart_t *cpupart; + boolean_t no_unicast; + boolean_t isprimary = flent->fe_type & FLOW_PRIMARY_MAC; + mac_client_impl_t *reloc_pmcip = NULL; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); switch (link_type) { case SRST_FLOW: - mac_srs_group_setup(mcip, flent, NULL, link_type); + mac_srs_group_setup(mcip, flent, link_type); return (0); case SRST_LINK: + no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR; mac_addr = flent->fe_flow_desc.fd_dst_mac; - /* Check if we need to reserve the default group */ - if (flent->fe_type & FLOW_PRIMARY_MAC) - rtype = MAC_RX_RESERVE_DEFAULT; + /* Default RX group */ + default_rgroup = MAC_DEFAULT_RX_GROUP(mip); - if ((mcip->mci_state_flags & MCIS_NO_HWRINGS) == 0) { - /* - * Check to see if we can get an exclusive group for - * this mac address or if there already exists a - * group that has this mac address (case of VLANs). - * If no groups are available, use the default group. - */ - group = mac_reserve_rx_group(mcip, mac_addr, rtype); + /* Default TX group */ + default_tgroup = MAC_DEFAULT_TX_GROUP(mip); + + if (no_unicast) { + rgroup = default_rgroup; + tgroup = default_tgroup; + goto grp_found; } + rxhw = (mrp->mrp_mask & MRP_RX_RINGS) && + (mrp->mrp_nrxrings > 0 || + (mrp->mrp_mask & MRP_RXRINGS_UNSPEC)); + txhw = (mrp->mrp_mask & MRP_TX_RINGS) && + (mrp->mrp_ntxrings > 0 || + (mrp->mrp_mask & MRP_TXRINGS_UNSPEC)); - if (group == NULL) { - if ((mcip->mci_state_flags & MCIS_REQ_HWRINGS) != 0) - return (ENOSPC); - group = &mip->mi_rx_groups[0]; + /* + * By default we have given the primary all the rings + * i.e. the default group. Let's see if the primary + * needs to be relocated so that the addition of this + * client doesn't impact the primary's performance, + * i.e. if the primary is in the default group and + * we add this client, the primary will lose polling. + * We do this only for NICs supporting dynamic ring + * grouping and only when this is the first client + * after the primary (i.e. nactiveclients is 2) + */ + if (!isprimary && mip->mi_nactiveclients == 2 && + (group_only_mcip = mac_primary_client_handle(mip)) != + NULL && mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + reloc_pmcip = mac_check_primary_relocation( + group_only_mcip, rxhw); + } + /* + * Check to see if we can get an exclusive group for + * this mac address or if there already exists a + * group that has this mac address (case of VLANs). + * If no groups are available, use the default group. + */ + rgroup = mac_reserve_rx_group(mcip, mac_addr, B_FALSE); + if (rgroup == NULL && rxhw) { + err = ENOSPC; + goto setup_failed; + } else if (rgroup == NULL) { + rgroup = default_rgroup; + } + /* + * Check to see if we can get an exclusive group for + * this mac client. If no groups are available, use + * the default group. + */ + tgroup = mac_reserve_tx_group(mcip, B_FALSE); + if (tgroup == NULL && txhw) { + if (rgroup != NULL && rgroup != default_rgroup) + mac_release_rx_group(mcip, rgroup); + err = ENOSPC; + goto setup_failed; + } else if (tgroup == NULL) { + tgroup = default_tgroup; } /* * Some NICs don't support any Rx rings, so there may not * even be a default group. */ - if (group != NULL) { - flent->fe_rx_ring_group = group; + grp_found: + if (rgroup != NULL) { + if (rgroup != default_rgroup && + MAC_GROUP_NO_CLIENT(rgroup) && + (rxhw || mcip->mci_share != NULL)) { + MAC_RX_GRP_RESERVED(mip); + if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_RX_RING_RESERVED(mip, + rgroup->mrg_cur_count); + } + } + flent->fe_rx_ring_group = rgroup; /* * Add the client to the group. This could cause * either this group to move to the shared state or @@ -2545,18 +2971,29 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * actions on the default group are postponed to * the end of this function. */ - mac_rx_group_add_client(group, mcip); - next_state = mac_rx_group_next_state(group, - &group_only_mcip); - - ASSERT((next_state == MAC_GROUP_STATE_RESERVED && - mcip == group_only_mcip) || - (next_state == MAC_GROUP_STATE_SHARED && - group_only_mcip == NULL)); - - mac_set_rx_group_state(group, next_state); + mac_group_add_client(rgroup, mcip); + next_state = mac_group_next_state(rgroup, + &group_only_mcip, default_rgroup, B_TRUE); + mac_set_group_state(rgroup, next_state); } + if (tgroup != NULL) { + if (tgroup != default_tgroup && + MAC_GROUP_NO_CLIENT(tgroup) && + (txhw || mcip->mci_share != NULL)) { + MAC_TX_GRP_RESERVED(mip); + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_TX_RING_RESERVED(mip, + tgroup->mrg_cur_count); + } + } + flent->fe_tx_ring_group = tgroup; + mac_group_add_client(tgroup, mcip); + next_state = mac_group_next_state(tgroup, + &group_only_mcip, default_tgroup, B_FALSE); + tgroup->mrg_state = next_state; + } /* * Setup the Rx and Tx SRSes. If we got a pristine group * exclusively above, mac_srs_group_setup would simply create @@ -2564,18 +3001,23 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * reserved group, mac_srs_group_setup would also dismantle the * SRSes of the previously exclusive group */ - mac_srs_group_setup(mcip, flent, group, link_type); + mac_srs_group_setup(mcip, flent, link_type); + /* We are setting up minimal datapath only */ + if (no_unicast) + break; /* Program the S/W Classifer */ if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0) goto setup_failed; /* Program the H/W Classifier */ - if ((err = mac_add_macaddr(mip, group, mac_addr, + if ((err = mac_add_macaddr(mip, rgroup, mac_addr, (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0)) != 0) goto setup_failed; mcip->mci_unicast = mac_find_macaddr(mip, mac_addr); ASSERT(mcip->mci_unicast != NULL); + /* Initialize the v6 local addr used by link protection */ + mac_protect_update_v6_local_addr(mcip); break; default: @@ -2590,38 +3032,53 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * incoming broadcast traffic to the other groups and dismantle the * SRSes over the default group. */ - if (group != NULL) { - if (group != mip->mi_rx_groups) { - default_group = mip->mi_rx_groups; - if (default_group->mrg_state == + if (rgroup != NULL) { + if (rgroup != default_rgroup) { + if (default_rgroup->mrg_state == MAC_GROUP_STATE_RESERVED) { - group_only_mcip = MAC_RX_GROUP_ONLY_CLIENT( - default_group); + group_only_mcip = MAC_GROUP_ONLY_CLIENT( + default_rgroup); ASSERT(group_only_mcip != NULL && mip->mi_nactiveclients > 1); - mac_set_rx_group_state(default_group, + mac_set_group_state(default_rgroup, MAC_GROUP_STATE_SHARED); - mac_srs_group_setup(group_only_mcip, + mac_rx_srs_group_setup(group_only_mcip, + group_only_mcip->mci_flent, SRST_LINK); + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_setup(group_only_mcip, group_only_mcip->mci_flent, - default_group, SRST_LINK); + MCIP_RESOURCE_PROPS(group_only_mcip), + mac_rx_deliver, group_only_mcip, NULL, + cpupart); + mac_set_pool_effective(use_default, cpupart, + mrp, emrp); + pool_unlock(); } - ASSERT(default_group->mrg_state == + ASSERT(default_rgroup->mrg_state == MAC_GROUP_STATE_SHARED); } /* * If we get an exclusive group for a VLAN MAC client we * need to take the s/w path to make the additional check for * the vid. Disable polling and set it to s/w classification. + * Similarly for clients that don't have a unicast address. */ - if (group->mrg_state == MAC_GROUP_STATE_RESERVED && - i_mac_flow_vid(mcip->mci_flent) != VLAN_ID_NONE) { - mac_rx_switch_grp_to_sw(group); + if (rgroup->mrg_state == MAC_GROUP_STATE_RESERVED && + (i_mac_flow_vid(flent) != VLAN_ID_NONE || no_unicast)) { + mac_rx_switch_grp_to_sw(rgroup); } } + mac_set_rings_effective(mcip); return (0); setup_failed: + /* Switch the primary back to default group */ + if (reloc_pmcip != NULL) { + (void) mac_rx_switch_group(reloc_pmcip, + reloc_pmcip->mci_flent->fe_rx_ring_group, default_rgroup); + } mac_datapath_teardown(mcip, flent, link_type); return (err); } @@ -2637,12 +3094,14 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, mac_group_t *default_group; boolean_t check_default_group = B_FALSE; mac_group_state_t next_state; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); switch (link_type) { case SRST_FLOW: - mac_srs_group_teardown(mcip, flent, SRST_FLOW); + mac_rx_srs_group_teardown(flent, B_FALSE); + mac_tx_srs_group_teardown(mcip, flent, SRST_FLOW); return; case SRST_LINK: @@ -2666,7 +3125,9 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, mac_flow_wait(flent, FLOW_DRIVER_UPCALL); /* Now quiesce and destroy all SRS and soft rings */ - mac_srs_group_teardown(mcip, flent, SRST_LINK); + mac_rx_srs_group_teardown(flent, B_FALSE); + mac_tx_srs_group_teardown(mcip, flent, SRST_LINK); + ASSERT((mcip->mci_flent == flent) && (flent->fe_next == NULL)); @@ -2677,16 +3138,17 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * were the last client, release the group. */ group = flent->fe_rx_ring_group; + default_group = MAC_DEFAULT_RX_GROUP(mip); if (group != NULL) { - mac_rx_group_remove_client(group, mcip); - next_state = mac_rx_group_next_state(group, - &grp_only_mcip); + mac_group_remove_client(group, mcip); + next_state = mac_group_next_state(group, + &grp_only_mcip, default_group, B_TRUE); if (next_state == MAC_GROUP_STATE_RESERVED) { /* * Only one client left on this RX group. */ ASSERT(grp_only_mcip != NULL); - mac_set_rx_group_state(group, + mac_set_group_state(group, MAC_GROUP_STATE_RESERVED); group_only_flent = grp_only_mcip->mci_flent; @@ -2695,9 +3157,14 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * access on the group. Allow it to * dynamically poll the H/W rings etc. */ - mac_srs_group_setup(grp_only_mcip, - group_only_flent, group, SRST_LINK); + mac_rx_srs_group_setup(grp_only_mcip, + group_only_flent, SRST_LINK); + mac_fanout_setup(grp_only_mcip, + group_only_flent, + MCIP_RESOURCE_PROPS(grp_only_mcip), + mac_rx_deliver, grp_only_mcip, NULL, NULL); mac_rx_group_unmark(group, MR_INCIPIENT); + mac_set_rings_effective(grp_only_mcip); } else if (next_state == MAC_GROUP_STATE_REGISTERED) { /* * This is a non-default group being freed up. @@ -2705,19 +3172,95 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * to see if the primary client can get * exclusive access to the default group. */ - ASSERT(group != mip->mi_rx_groups); + ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); + if (mrp->mrp_mask & MRP_RX_RINGS) { + MAC_RX_GRP_RELEASED(mip); + if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_RX_RING_RELEASED(mip, + group->mrg_cur_count); + } + } mac_release_rx_group(mcip, group); - mac_set_rx_group_state(group, + mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); check_default_group = B_TRUE; } else { ASSERT(next_state == MAC_GROUP_STATE_SHARED); - mac_set_rx_group_state(group, + mac_set_group_state(group, MAC_GROUP_STATE_SHARED); mac_rx_group_unmark(group, MR_CONDEMNED); } flent->fe_rx_ring_group = NULL; } + /* + * Remove the client from the TX group. Additionally, if + * this a non-default group, then we also need to release + * the group. + */ + group = flent->fe_tx_ring_group; + default_group = MAC_DEFAULT_TX_GROUP(mip); + if (group != NULL) { + mac_group_remove_client(group, mcip); + next_state = mac_group_next_state(group, + &grp_only_mcip, default_group, B_FALSE); + if (next_state == MAC_GROUP_STATE_REGISTERED) { + if (group != default_group) { + if (mrp->mrp_mask & MRP_TX_RINGS) { + MAC_TX_GRP_RELEASED(mip); + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_TX_RING_RELEASED( + mip, group-> + mrg_cur_count); + } + } + mac_release_tx_group(mcip, group); + /* + * If the default group is reserved, + * then we need to set the effective + * rings as we would have given + * back some rings when the group + * was released + */ + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC && + default_group->mrg_state == + MAC_GROUP_STATE_RESERVED) { + grp_only_mcip = + MAC_GROUP_ONLY_CLIENT + (default_group); + mac_set_rings_effective( + grp_only_mcip); + } + } else { + mac_ring_t *ring; + int cnt; + int ringcnt; + + /* + * Stop all the rings except the + * default ring. + */ + ringcnt = group->mrg_cur_count; + ring = group->mrg_rings; + for (cnt = 0; cnt < ringcnt; cnt++) { + if (ring->mr_state == + MR_INUSE && ring != + (mac_ring_t *) + mip->mi_default_tx_ring) { + mac_stop_ring(ring); + ring->mr_flag = 0; + } + ring = ring->mr_next; + } + } + } else if (next_state == MAC_GROUP_STATE_RESERVED) { + mac_set_rings_effective(grp_only_mcip); + } + flent->fe_tx_ring_group = NULL; + group->mrg_state = next_state; + } break; default: ASSERT(B_FALSE); @@ -2731,21 +3274,53 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * over the default group. */ if (check_default_group) { - default_group = mip->mi_rx_groups; + default_group = MAC_DEFAULT_RX_GROUP(mip); ASSERT(default_group->mrg_state == MAC_GROUP_STATE_SHARED); - next_state = mac_rx_group_next_state(default_group, - &grp_only_mcip); + next_state = mac_group_next_state(default_group, + &grp_only_mcip, default_group, B_TRUE); if (next_state == MAC_GROUP_STATE_RESERVED) { ASSERT(grp_only_mcip != NULL && mip->mi_nactiveclients == 1); - mac_set_rx_group_state(default_group, + mac_set_group_state(default_group, MAC_GROUP_STATE_RESERVED); - mac_srs_group_setup(grp_only_mcip, + mac_rx_srs_group_setup(grp_only_mcip, + grp_only_mcip->mci_flent, SRST_LINK); + mac_fanout_setup(grp_only_mcip, grp_only_mcip->mci_flent, - default_group, SRST_LINK); + MCIP_RESOURCE_PROPS(grp_only_mcip), mac_rx_deliver, + grp_only_mcip, NULL, NULL); mac_rx_group_unmark(default_group, MR_INCIPIENT); + mac_set_rings_effective(grp_only_mcip); } } + + /* + * If the primary is the only one left and the MAC supports + * dynamic grouping, we need to see if the primary needs to + * be moved to the default group so that it can use all the + * H/W rings. + */ + if (!(flent->fe_type & FLOW_PRIMARY_MAC) && + mip->mi_nactiveclients == 1 && + mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + default_group = MAC_DEFAULT_RX_GROUP(mip); + grp_only_mcip = mac_primary_client_handle(mip); + if (grp_only_mcip == NULL) + return; + group_only_flent = grp_only_mcip->mci_flent; + mrp = MCIP_RESOURCE_PROPS(grp_only_mcip); + /* + * If the primary has an explicit property set, leave it + * alone. + */ + if (mrp->mrp_mask & MRP_RX_RINGS) + return; + /* + * Switch the primary to the default group. + */ + (void) mac_rx_switch_group(grp_only_mcip, + group_only_flent->fe_rx_ring_group, default_group); + } } /* DATAPATH TEAR DOWN ROUTINES (SRS and FANOUT teardown) */ @@ -2753,18 +3328,36 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, static void mac_srs_fanout_list_free(mac_soft_ring_set_t *mac_srs) { - ASSERT(mac_srs->srs_tcp_soft_rings != NULL); - kmem_free(mac_srs->srs_tcp_soft_rings, - sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); - mac_srs->srs_tcp_soft_rings = NULL; - ASSERT(mac_srs->srs_udp_soft_rings != NULL); - kmem_free(mac_srs->srs_udp_soft_rings, - sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); - mac_srs->srs_udp_soft_rings = NULL; - ASSERT(mac_srs->srs_oth_soft_rings != NULL); - kmem_free(mac_srs->srs_oth_soft_rings, - sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); - mac_srs->srs_oth_soft_rings = NULL; + if (mac_srs->srs_type & SRST_TX) { + mac_srs_tx_t *tx; + + ASSERT(mac_srs->srs_tcp_soft_rings == NULL); + ASSERT(mac_srs->srs_udp_soft_rings == NULL); + ASSERT(mac_srs->srs_oth_soft_rings == NULL); + ASSERT(mac_srs->srs_tx_soft_rings != NULL); + kmem_free(mac_srs->srs_tx_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_RINGS_PER_GROUP); + mac_srs->srs_tx_soft_rings = NULL; + tx = &mac_srs->srs_tx; + if (tx->st_soft_rings != NULL) { + kmem_free(tx->st_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_RINGS_PER_GROUP); + } + } else { + ASSERT(mac_srs->srs_tx_soft_rings == NULL); + ASSERT(mac_srs->srs_tcp_soft_rings != NULL); + kmem_free(mac_srs->srs_tcp_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); + mac_srs->srs_tcp_soft_rings = NULL; + ASSERT(mac_srs->srs_udp_soft_rings != NULL); + kmem_free(mac_srs->srs_udp_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); + mac_srs->srs_udp_soft_rings = NULL; + ASSERT(mac_srs->srs_oth_soft_rings != NULL); + kmem_free(mac_srs->srs_oth_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); + mac_srs->srs_oth_soft_rings = NULL; + } } /* @@ -2815,10 +3408,11 @@ mac_srs_free(mac_soft_ring_set_t *mac_srs) mac_pkt_drop(NULL, NULL, mac_srs->srs_first, B_FALSE); mac_srs_ring_free(mac_srs); - mac_srs_soft_rings_free(mac_srs, B_TRUE); + mac_srs_soft_rings_free(mac_srs); mac_srs_fanout_list_free(mac_srs); mac_srs->srs_bw = NULL; + mac_srs_stat_delete(mac_srs); kmem_cache_free(mac_srs_cache, mac_srs); } @@ -3126,13 +3720,19 @@ mac_tx_srs_add_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring) { mac_client_impl_t *mcip = mac_srs->srs_mcip; mac_soft_ring_t *soft_ring; - int count = mac_srs->srs_oth_ring_count; + int count = mac_srs->srs_tx_ring_count; + uint32_t soft_ring_type = ST_RING_TX; + uint_t ring_info; ASSERT(mac_srs->srs_state & SRS_QUIESCE); - soft_ring = mac_soft_ring_create(count, 0, NULL, - (ST_RING_OTH | ST_RING_TX), maxclsyspri, mcip, mac_srs, -1, + ring_info = mac_hwring_getinfo((mac_ring_handle_t)tx_ring); + if (mac_tx_serialize || (ring_info & MAC_RING_TX_SERIALIZE)) + soft_ring_type |= ST_RING_WORKER_ONLY; + soft_ring = mac_soft_ring_create(count, 0, + soft_ring_type, maxclsyspri, mcip, mac_srs, -1, NULL, mcip, (mac_resource_handle_t)tx_ring); - mac_srs->srs_oth_ring_count++; + mac_srs->srs_tx_ring_count++; + mac_srs_update_fanout_list(mac_srs); /* * put this soft ring in quiesce mode too so when we restart * all soft rings in the srs are in the same state. @@ -3177,7 +3777,7 @@ mac_soft_ring_remove(mac_soft_ring_set_t *mac_srs, mac_soft_ring_t *softring) mac_srs->srs_soft_ring_condemned_count--; mutex_exit(&mac_srs->srs_lock); - mac_soft_ring_free(softring, B_FALSE); + mac_soft_ring_free(softring); } void @@ -3185,70 +3785,59 @@ mac_tx_srs_del_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring) { int i; mac_soft_ring_t *soft_ring, *remove_sring; + mac_client_impl_t *mcip = mac_srs->srs_mcip; mutex_enter(&mac_srs->srs_lock); - for (i = 0; i < mac_srs->srs_oth_ring_count; i++) { - soft_ring = mac_srs->srs_oth_soft_rings[i]; + for (i = 0; i < mac_srs->srs_tx_ring_count; i++) { + soft_ring = mac_srs->srs_tx_soft_rings[i]; if (soft_ring->s_ring_tx_arg2 == tx_ring) break; } mutex_exit(&mac_srs->srs_lock); - ASSERT(i < mac_srs->srs_oth_ring_count); + ASSERT(i < mac_srs->srs_tx_ring_count); remove_sring = soft_ring; + /* + * In the case of aggr, the soft ring associated with a Tx ring + * is also stored in st_soft_rings[] array. That entry should + * be removed. + */ + if (mcip->mci_state_flags & MCIS_IS_AGGR) { + mac_srs_tx_t *tx = &mac_srs->srs_tx; + + ASSERT(tx->st_soft_rings[tx_ring->mr_index] == remove_sring); + tx->st_soft_rings[tx_ring->mr_index] = NULL; + } mac_soft_ring_remove(mac_srs, remove_sring); mac_srs_update_fanout_list(mac_srs); } /* * mac_tx_srs_setup(): - * * Used to setup Tx rings. If no free Tx ring is available, then default * Tx ring is used. */ void -mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent, - uint32_t srs_type) +mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent) { - mac_impl_t *mip = mcip->mci_mip; - mac_soft_ring_set_t *tx_srs; - int i, tx_ring_count = 0, tx_rings_reserved = 0; - mac_ring_handle_t *tx_rings = NULL; - uint32_t soft_ring_type; - mac_group_t *grp = NULL; - mac_ring_t *ring; - mac_srs_tx_t *tx; - boolean_t serialize = B_FALSE; - - tx_srs = flent->fe_tx_srs; - tx = &tx_srs->srs_tx; - - if (tx->st_group != NULL) { - grp = tx->st_group; - tx_ring_count = grp->mrg_cur_count; - } else { - tx_ring_count = mac_tx_ring_count; - } - - if (tx_ring_count != 0) { - tx_rings = kmem_zalloc(sizeof (mac_ring_handle_t) * - tx_ring_count, KM_SLEEP); - } - - /* - * Just use the default ring for now. We need to use - * the underlying link's ring set instead of the underlying - * NIC's. - */ - if (srs_type == SRST_FLOW || - (mcip->mci_state_flags & MCIS_NO_HWRINGS) != 0) { - /* use default ring */ - tx_rings[0] = (void *)mip->mi_default_tx_ring; - tx_rings_reserved++; - goto rings_assigned; - } - - if (mcip->mci_share != NULL) - ring = grp->mrg_rings; + mac_impl_t *mip = mcip->mci_mip; + mac_soft_ring_set_t *tx_srs = flent->fe_tx_srs; + int i; + int tx_ring_count = 0; + uint32_t soft_ring_type; + mac_group_t *grp = NULL; + mac_ring_t *ring; + mac_srs_tx_t *tx = &tx_srs->srs_tx; + boolean_t is_aggr; + uint_t ring_info = 0; + + is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR) != 0; + grp = flent->fe_tx_ring_group; + if (grp == NULL) { + ring = (mac_ring_t *)mip->mi_default_tx_ring; + goto no_group; + } + tx_ring_count = grp->mrg_cur_count; + ring = grp->mrg_rings; /* * An attempt is made to reserve 'tx_ring_count' number * of Tx rings. If tx_ring_count is 0, default Tx ring @@ -3258,87 +3847,80 @@ mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * then each Tx ring will have a Tx-side soft ring. All * these soft rings will be hang off Tx SRS. */ - for (i = 0; i < tx_ring_count; i++) { - if (mcip->mci_share != NULL) { - /* - * The ring was already chosen and associated - * with the TX group. Save it in the new - * array to keep as much of the code below common - * between the share and non-share cases. - */ - ASSERT(ring != NULL); - tx_rings[i] = (mac_ring_handle_t)ring; - ring = ring->mr_next; - } else { - tx_rings[i] = - (mac_ring_handle_t)mac_reserve_tx_ring(mip, NULL); - if (tx_rings[i] == NULL) { - /* - * We have run out of Tx rings. So - * give the default ring too. - */ - tx_rings[i] = (void *)mip->mi_default_tx_ring; - tx_rings_reserved++; + switch (grp->mrg_state) { + case MAC_GROUP_STATE_SHARED: + case MAC_GROUP_STATE_RESERVED: + if (tx_ring_count <= 1 && !is_aggr) { +no_group: + if (ring != NULL && + ring->mr_state != MR_INUSE) { + (void) mac_start_ring(ring); + ring_info = mac_hwring_getinfo( + (mac_ring_handle_t)ring); + } + tx->st_arg2 = (void *)ring; + mac_tx_srs_stat_recreate(tx_srs, B_FALSE); + if (tx_srs->srs_type & SRST_BW_CONTROL) { + tx->st_mode = SRS_TX_BW; + } else if (mac_tx_serialize || + (ring_info & MAC_RING_TX_SERIALIZE)) { + tx->st_mode = SRS_TX_SERIALIZE; + } else { + tx->st_mode = SRS_TX_DEFAULT; + } break; } - } - tx_rings_reserved++; - } - -rings_assigned: - if (mac_tx_serialize || (mip->mi_v12n_level & MAC_VIRT_SERIALIZE)) - serialize = B_TRUE; - /* - * Did we get the requested number of tx rings? - * There are 2 actions we can take depending upon the number - * of tx_rings we got. - * 1) If we got one, then get the tx_ring from the soft ring, - * save it in SRS and free up the soft ring. - * 2) If we got more than 1, then do the tx fanout among the - * rings we obtained. - */ - ASSERT(tx_rings_reserved != 0); - if (tx_rings_reserved == 1) { - tx->st_arg2 = (void *)tx_rings[0]; - /* For ring_count of 0 or 1, set the tx_mode and return */ - if (tx_srs->srs_type & SRST_BW_CONTROL) - tx->st_mode = SRS_TX_BW; - else if (serialize) - tx->st_mode = SRS_TX_SERIALIZE; - else - tx->st_mode = SRS_TX_DEFAULT; - } else { - /* - * We got multiple Tx rings for Tx fanout. - */ - soft_ring_type = ST_RING_OTH | ST_RING_TX; - if (tx_srs->srs_type & SRST_BW_CONTROL) { - tx->st_mode = SRS_TX_BW_FANOUT; - } else { - tx->st_mode = SRS_TX_FANOUT; - if (serialize) - soft_ring_type |= ST_RING_WORKER_ONLY; - } - for (i = 0; i < tx_rings_reserved; i++) { - (void) mac_soft_ring_create(i, 0, NULL, soft_ring_type, - maxclsyspri, mcip, tx_srs, -1, NULL, mcip, - (mac_resource_handle_t)tx_rings[i]); - } - mac_srs_update_fanout_list(tx_srs); + soft_ring_type = ST_RING_TX; + if (tx_srs->srs_type & SRST_BW_CONTROL) { + tx->st_mode = is_aggr ? + SRS_TX_BW_AGGR : SRS_TX_BW_FANOUT; + } else { + tx->st_mode = is_aggr ? SRS_TX_AGGR : + SRS_TX_FANOUT; + } + for (i = 0; i < tx_ring_count; i++) { + ASSERT(ring != NULL); + switch (ring->mr_state) { + case MR_INUSE: + case MR_FREE: + ASSERT(ring->mr_srs == NULL); + + if (ring->mr_state != MR_INUSE) + (void) mac_start_ring(ring); + ring_info = mac_hwring_getinfo( + (mac_ring_handle_t)ring); + if (mac_tx_serialize || (ring_info & + MAC_RING_TX_SERIALIZE)) { + soft_ring_type |= + ST_RING_WORKER_ONLY; + } + (void) mac_soft_ring_create(i, 0, + soft_ring_type, maxclsyspri, + mcip, tx_srs, -1, NULL, mcip, + (mac_resource_handle_t)ring); + break; + default: + cmn_err(CE_PANIC, + "srs_setup: mcip = %p " + "trying to add UNKNOWN ring = %p\n", + (void *)mcip, (void *)ring); + break; + } + ring = ring->mr_next; + } + mac_srs_update_fanout_list(tx_srs); + break; + default: + ASSERT(B_FALSE); + break; } tx->st_func = mac_tx_get_func(tx->st_mode); - - DTRACE_PROBE3(tx__srs___setup__return, mac_soft_ring_set_t *, tx_srs, - int, tx->st_mode, int, tx_srs->srs_oth_ring_count); - - if (tx_ring_count != 0) { - tx->st_ring_count = tx_rings_reserved; - tx->st_rings = kmem_zalloc(sizeof (mac_ring_handle_t) * - tx_rings_reserved, KM_SLEEP); - for (i = 0; i < tx->st_ring_count; i++) - tx->st_rings[i] = tx_rings[i]; - kmem_free(tx_rings, sizeof (mac_ring_handle_t) * tx_ring_count); + if (is_aggr) { + VERIFY(i_mac_capab_get((mac_handle_t)mip, + MAC_CAPAB_AGGR, &tx->st_capab_aggr)); } + DTRACE_PROBE3(tx__srs___setup__return, mac_soft_ring_set_t *, tx_srs, + int, tx->st_mode, int, tx_srs->srs_tx_ring_count); } /* @@ -3346,10 +3928,14 @@ rings_assigned: * its current link speed. */ void -mac_fanout_recompute_client(mac_client_impl_t *mcip) +mac_fanout_recompute_client(mac_client_impl_t *mcip, cpupart_t *cpupart) { uint64_t link_speed; mac_resource_props_t *mcip_mrp; + flow_entry_t *flent = mcip->mci_flent; + mac_soft_ring_set_t *rx_srs; + mac_cpus_t *srs_cpu; + int soft_ring_count, maxcpus; ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); @@ -3359,8 +3945,31 @@ mac_fanout_recompute_client(mac_client_impl_t *mcip) if ((link_speed != 0) && (link_speed != mcip->mci_flent->fe_nic_speed)) { mcip_mrp = MCIP_RESOURCE_PROPS(mcip); - mac_fanout_setup(mcip, mcip->mci_flent, - mcip_mrp, mac_rx_deliver, mcip, NULL); + /* + * Before calling mac_fanout_setup(), check to see if + * the SRSes already have the right number of soft + * rings. mac_fanout_setup() is a heavy duty operation + * where new cpu bindings are done for SRS and soft + * ring threads and interrupts re-targeted. + */ + maxcpus = (cpupart != NULL) ? cpupart->cp_ncpus : ncpus; + soft_ring_count = mac_compute_soft_ring_count(flent, + flent->fe_rx_srs_cnt - 1, maxcpus); + /* + * If soft_ring_count returned by + * mac_compute_soft_ring_count() is 0, bump it + * up by 1 because we always have atleast one + * TCP, UDP, and OTH soft ring associated with + * an SRS. + */ + soft_ring_count = (soft_ring_count == 0) ? + 1 : soft_ring_count; + rx_srs = flent->fe_rx_srs[0]; + srs_cpu = &rx_srs->srs_cpu; + if (soft_ring_count != srs_cpu->mc_rx_fanout_cnt) { + mac_fanout_setup(mcip, flent, mcip_mrp, + mac_rx_deliver, mcip, NULL, cpupart); + } } } @@ -3376,6 +3985,9 @@ void mac_fanout_recompute(mac_impl_t *mip) { mac_client_impl_t *mcip; + cpupart_t *cpupart; + boolean_t use_default; + mac_resource_props_t *mrp, *emrp; i_mac_perim_enter(mip); if ((mip->mi_state_flags & MIS_IS_VNIC) != 0 || @@ -3389,7 +4001,14 @@ mac_fanout_recompute(mac_impl_t *mip) if ((mcip->mci_state_flags & MCIS_SHARE_BOUND) != 0 || !MCIP_DATAPATH_SETUP(mcip)) continue; - mac_fanout_recompute_client(mcip); + mrp = MCIP_RESOURCE_PROPS(mcip); + emrp = MCIP_EFFECTIVE_PROPS(mcip); + use_default = B_FALSE; + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_recompute_client(mcip, cpupart); + mac_set_pool_effective(use_default, cpupart, mrp, emrp); + pool_unlock(); } i_mac_perim_exit(mip); } diff --git a/usr/src/uts/common/io/mac/mac_flow.c b/usr/src/uts/common/io/mac/mac_flow.c index 16b5ec4396..aa4985fe4c 100644 --- a/usr/src/uts/common/io/mac/mac_flow.c +++ b/usr/src/uts/common/io/mac/mac_flow.c @@ -29,10 +29,14 @@ #include <sys/mac.h> #include <sys/mac_impl.h> #include <sys/mac_client_impl.h> +#include <sys/mac_stat.h> #include <sys/dls.h> #include <sys/dls_impl.h> #include <sys/mac_soft_ring.h> #include <sys/ethernet.h> +#include <sys/cpupart.h> +#include <sys/pool.h> +#include <sys/pool_pset.h> #include <sys/vlan.h> #include <inet/ip.h> #include <inet/ip6.h> @@ -40,6 +44,16 @@ #include <netinet/udp.h> #include <netinet/sctp.h> +typedef struct flow_stats_s { + uint64_t fs_obytes; + uint64_t fs_opackets; + uint64_t fs_oerrors; + uint64_t fs_ibytes; + uint64_t fs_ipackets; + uint64_t fs_ierrors; +} flow_stats_t; + + /* global flow table, will be a per exclusive-zone table later */ static mod_hash_t *flow_hash; static krwlock_t flow_tab_lock; @@ -55,7 +69,7 @@ typedef struct { #define FS_OFF(f) (offsetof(flow_stats_t, f)) static flow_stats_info_t flow_stats_list[] = { - {"rbytes", FS_OFF(fs_rbytes)}, + {"rbytes", FS_OFF(fs_ibytes)}, {"ipackets", FS_OFF(fs_ipackets)}, {"ierrors", FS_OFF(fs_ierrors)}, {"obytes", FS_OFF(fs_obytes)}, @@ -83,19 +97,48 @@ flow_stat_init(kstat_named_t *knp) static int flow_stat_update(kstat_t *ksp, int rw) { - flow_entry_t *fep = ksp->ks_private; - flow_stats_t *fsp = &fep->fe_flowstats; - kstat_named_t *knp = ksp->ks_data; - uint64_t *statp; - int i; + flow_entry_t *fep = ksp->ks_private; + kstat_named_t *knp = ksp->ks_data; + uint64_t *statp; + int i; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + flow_stats_t flow_stats; + mac_soft_ring_set_t *mac_srs; if (rw != KSTAT_READ) return (EACCES); + bzero(&flow_stats, sizeof (flow_stats_t)); + + for (i = 0; i < fep->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)fep->fe_rx_srs[i]; + if (mac_srs == NULL) /* Multicast flow */ + break; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + flow_stats.fs_ibytes += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes; + + flow_stats.fs_ipackets += mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; + + flow_stats.fs_ierrors += mac_rx_stat->mrs_ierrors; + } + + mac_srs = (mac_soft_ring_set_t *)fep->fe_tx_srs; + if (mac_srs == NULL) /* Multicast flow */ + goto done; + mac_tx_stat = &mac_srs->srs_tx.st_stat; + + flow_stats.fs_obytes = mac_tx_stat->mts_obytes; + flow_stats.fs_opackets = mac_tx_stat->mts_opackets; + flow_stats.fs_oerrors = mac_tx_stat->mts_oerrors; + +done: for (i = 0; i < FS_SIZE; i++, knp++) { statp = (uint64_t *) - ((uchar_t *)fsp + flow_stats_list[i].fs_offset); - + ((uchar_t *)&flow_stats + flow_stats_list[i].fs_offset); knp->value.ui64 = *statp; } return (0); @@ -170,11 +213,11 @@ int mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name, void *client_cookie, uint_t type, flow_entry_t **flentp) { - flow_entry_t *flent = *flentp; - int err = 0; + flow_entry_t *flent = *flentp; + int err = 0; if (mrp != NULL) { - err = mac_validate_props(mrp); + err = mac_validate_props(NULL, mrp); if (err != 0) return (err); } @@ -221,6 +264,8 @@ mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name, mrp->mrp_priority = MPL_SUBFLOW_DEFAULT; else mrp->mrp_priority = MPL_LINK_DEFAULT; + bzero(mrp->mrp_pool, MAXPATHLEN); + bzero(&mrp->mrp_cpus, sizeof (mac_cpus_t)); bcopy(mrp, &flent->fe_effective_props, sizeof (mac_resource_props_t)); } @@ -593,7 +638,7 @@ mac_flow_destroy(flow_entry_t *flent) } else { mac_flow_cleanup(flent); } - + mac_misc_stat_delete(flent); mutex_destroy(&flent->fe_lock); cv_destroy(&flent->fe_cv); flow_stat_destroy(flent); @@ -617,13 +662,15 @@ mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp) int i; if ((mrp->mrp_mask & MRP_MAXBW) != 0 && - (fmrp->mrp_maxbw != mrp->mrp_maxbw)) { + (!(fmrp->mrp_mask & MRP_MAXBW) || + (fmrp->mrp_maxbw != mrp->mrp_maxbw))) { changed_mask |= MRP_MAXBW; - fmrp->mrp_maxbw = mrp->mrp_maxbw; if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) { fmrp->mrp_mask &= ~MRP_MAXBW; + fmrp->mrp_maxbw = 0; } else { fmrp->mrp_mask |= MRP_MAXBW; + fmrp->mrp_maxbw = mrp->mrp_maxbw; } } @@ -658,6 +705,22 @@ mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp) changed_mask |= MRP_CPUS; MAC_COPY_CPUS(mrp, fmrp); } + + /* + * Modify the rings property. + */ + if (mrp->mrp_mask & MRP_RX_RINGS || mrp->mrp_mask & MRP_TX_RINGS) + mac_set_rings_effective(flent->fe_mcip); + + if ((mrp->mrp_mask & MRP_POOL) != 0) { + if (strcmp(fmrp->mrp_pool, mrp->mrp_pool) != 0) + changed_mask |= MRP_POOL; + if (strlen(mrp->mrp_pool) == 0) + fmrp->mrp_mask &= ~MRP_POOL; + else + fmrp->mrp_mask |= MRP_POOL; + (void) strncpy(fmrp->mrp_pool, mrp->mrp_pool, MAXPATHLEN); + } return (changed_mask); } @@ -667,6 +730,9 @@ mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp) uint32_t changed_mask; mac_client_impl_t *mcip = flent->fe_mcip; mac_resource_props_t *mcip_mrp = MCIP_RESOURCE_PROPS(mcip); + mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); + cpupart_t *cpupart = NULL; + boolean_t use_default = B_FALSE; ASSERT(flent != NULL); ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); @@ -693,14 +759,24 @@ mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp) !(changed_mask & MRP_CPUS) && !(mcip_mrp->mrp_mask & MRP_CPUS_USERSPEC)) { mac_fanout_setup(mcip, flent, mcip_mrp, - mac_rx_deliver, mcip, NULL); + mac_rx_deliver, mcip, NULL, NULL); } } if (mrp->mrp_mask & MRP_PRIORITY) mac_flow_update_priority(mcip, flent); if (changed_mask & MRP_CPUS) - mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL); + mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL, + NULL); + + if (mrp->mrp_mask & MRP_POOL) { + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL, + cpupart); + mac_set_pool_effective(use_default, cpupart, mrp, emrp); + pool_unlock(); + } } /* @@ -1368,7 +1444,7 @@ mac_link_flow_modify(char *flow_name, mac_resource_props_t *mrp) datalink_id_t linkid; flow_tab_t *flow_tab; - err = mac_validate_props(mrp); + err = mac_validate_props(NULL, mrp); if (err != 0) return (err); @@ -1445,10 +1521,14 @@ static int mac_link_flow_walk_cb(flow_entry_t *flent, void *arg) { flow_walk_state_t *statep = arg; - mac_flowinfo_t finfo; + mac_flowinfo_t *finfo; + int err; - mac_link_flowinfo_copy(&finfo, flent); - return (statep->ws_func(&finfo, statep->ws_arg)); + finfo = kmem_zalloc(sizeof (*finfo), KM_SLEEP); + mac_link_flowinfo_copy(finfo, flent); + err = statep->ws_func(finfo, statep->ws_arg); + kmem_free(finfo, sizeof (*finfo)); + return (err); } /* @@ -1885,18 +1965,19 @@ flow_ip_accept(flow_tab_t *ft, flow_state_t *s) break; } case ETHERTYPE_IPV6: { - ip6_t *ip6h = (ip6_t *)l3_start; - uint16_t ip6_hdrlen; - uint8_t nexthdr; + ip6_t *ip6h = (ip6_t *)l3_start; + ip6_frag_t *frag = NULL; + uint16_t ip6_hdrlen; + uint8_t nexthdr; - if (!mac_ip_hdr_length_v6(s->fs_mp, ip6h, &ip6_hdrlen, - &nexthdr, NULL, NULL)) { + if (!mac_ip_hdr_length_v6(ip6h, s->fs_mp->b_wptr, &ip6_hdrlen, + &nexthdr, &frag)) { return (ENOBUFS); } l3info->l3_hdrsize = ip6_hdrlen; l3info->l3_protocol = nexthdr; l3info->l3_version = IPV6_VERSION; - l3info->l3_fragmented = B_FALSE; + l3info->l3_fragmented = (frag != NULL); break; } default: diff --git a/usr/src/uts/common/io/mac/mac_hio.c b/usr/src/uts/common/io/mac/mac_hio.c index 9810ac821c..703c42b4cc 100644 --- a/usr/src/uts/common/io/mac/mac_hio.c +++ b/usr/src/uts/common/io/mac/mac_hio.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ #include <sys/mac.h> #include <sys/mac_impl.h> #include <sys/mac_client_impl.h> +#include <sys/mac_client_priv.h> #include <sys/mac_soft_ring.h> @@ -129,7 +130,7 @@ mac_share_bind(mac_client_handle_t mch, uint64_t cookie, uint64_t *rcookie) * there are no in flight packets through a transmit ring * which is being bound to another domain. */ - mac_tx_client_quiesce(mcip, SRS_QUIESCE); + mac_tx_client_quiesce(mch); /* * For the receive path, no traffic will be sent up through @@ -148,7 +149,7 @@ mac_share_bind(mac_client_handle_t mch, uint64_t cookie, uint64_t *rcookie) /* * Resume transmit traffic for the MAC client. */ - mac_tx_client_restart(mcip); + mac_tx_client_restart(mch); i_mac_perim_exit(mip); @@ -182,7 +183,7 @@ mac_share_unbind(mac_client_handle_t mch) * been updated by mac_fanout_recompute(). Do the check here * now that the share has been unbound. */ - mac_fanout_recompute_client(mcip); + mac_fanout_recompute_client(mcip, NULL); i_mac_perim_exit(mip); } diff --git a/usr/src/uts/common/io/mac/mac_ndd.c b/usr/src/uts/common/io/mac/mac_ndd.c index 9d4fc4bc18..cf99ef64be 100644 --- a/usr/src/uts/common/io/mac/mac_ndd.c +++ b/usr/src/uts/common/io/mac/mac_ndd.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +29,7 @@ #include <sys/types.h> #include <sys/mac.h> #include <sys/mac_impl.h> +#include <sys/mac_client_priv.h> #include <inet/nd.h> #include <sys/mac_ether.h> #include <sys/policy.h> @@ -95,17 +96,16 @@ mac_ndd_get_names(mac_impl_t *mip, mblk_t *mp) { int size_out, i; mblk_t *tmp; - mac_priv_prop_t *mpriv; uint_t permflags; int status; uint64_t value; + char *prop_name; if (!mac_add_name(mp, "?", MAC_PROP_PERM_READ)) return (-1); /* first the known ndd mappings */ for (i = 0; i < mip->mi_type->mt_mappingcount; i++) { - permflags = MAC_PROP_PERM_RW; if ((mip->mi_type->mt_mapping[i].mp_flags & MAC_PROP_MAP_KSTAT) != 0) permflags = MAC_PROP_PERM_READ; @@ -113,8 +113,13 @@ mac_ndd_get_names(mac_impl_t *mip, mblk_t *mp) status = mip->mi_callbacks->mc_getprop(mip->mi_driver, mip->mi_type->mt_mapping[i].mp_name, mip->mi_type->mt_mapping[i].mp_prop_id, - 0, mip->mi_type->mt_mapping[i].mp_valsize, - &value, &permflags); + mip->mi_type->mt_mapping[i].mp_valsize, &value); + if (status != 0) + continue; + status = mac_prop_info((mac_handle_t)mip, + mip->mi_type->mt_mapping[i].mp_prop_id, + mip->mi_type->mt_mapping[i].mp_name, NULL, 0, + NULL, &permflags); if (status != 0) continue; } @@ -126,10 +131,14 @@ mac_ndd_get_names(mac_impl_t *mip, mblk_t *mp) /* now the driver's ndd variables */ for (i = 0; i < mip->mi_priv_prop_count; i++) { - mpriv = &mip->mi_priv_prop[i]; + prop_name = mip->mi_priv_prop[i]; + + if (mac_prop_info((mac_handle_t)mip, MAC_PROP_PRIVATE, + prop_name, NULL, 0, NULL, &permflags) != 0) + return (-1); /* skip over the "_" */ - if (!mac_add_name(mp, &mpriv->mpp_name[1], mpriv->mpp_flags)) + if (!mac_add_name(mp, &prop_name[1], permflags)) return (-1); } @@ -185,7 +194,6 @@ mac_ndd_get_ioctl(mac_impl_t *mip, mblk_t *mp, int avail, int *rval) uint16_t u16; uint32_t u32; uint64_t u64; - uint_t perm; if (mp->b_cont == NULL || avail < 2) return (EINVAL); @@ -258,9 +266,8 @@ mac_ndd_get_ioctl(mac_impl_t *mip, mblk_t *mp, int avail, int *rval) new_value = u32 = (long)u64; } else { status = mip->mi_callbacks->mc_getprop(mip->mi_driver, - name, mip->mi_type->mt_mapping[i].mp_prop_id, 0, - mip->mi_type->mt_mapping[i].mp_valsize, value, - &perm); + name, mip->mi_type->mt_mapping[i].mp_prop_id, + mip->mi_type->mt_mapping[i].mp_valsize, value); switch (mip->mi_type->mt_mapping[i].mp_valsize) { case 1: new_value = u8; @@ -294,7 +301,7 @@ mac_ndd_get_ioctl(mac_impl_t *mip, mblk_t *mp, int avail, int *rval) */ (void) snprintf(priv_name, sizeof (priv_name), "_%s", name); status = mip->mi_callbacks->mc_getprop(mip->mi_driver, priv_name, - MAC_PROP_PRIVATE, 0, avail - 2, mp1->b_rptr, &perm); + MAC_PROP_PRIVATE, avail - 2, mp1->b_rptr); if (status != 0) goto get_done; diff --git a/usr/src/uts/common/io/mac/mac_protect.c b/usr/src/uts/common/io/mac/mac_protect.c index 8bd527c8d5..c923bcdbe2 100644 --- a/usr/src/uts/common/io/mac/mac_protect.c +++ b/usr/src/uts/common/io/mac/mac_protect.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,68 +33,1668 @@ #include <sys/ethernet.h> #include <sys/vlan.h> #include <sys/dlpi.h> +#include <sys/avl.h> #include <inet/ip.h> #include <inet/ip6.h> #include <inet/arp.h> +#include <netinet/arp.h> +#include <netinet/udp.h> +#include <netinet/dhcp.h> +#include <netinet/dhcp6.h> /* - * Check if ipaddr is in the 'allowed-ips' list. + * Implementation overview for DHCP address detection + * + * The purpose of DHCP address detection is to relieve the user of having to + * manually configure static IP addresses when ip-nospoof protection is turned + * on. To achieve this, the mac layer needs to intercept DHCP packets to + * determine the assigned IP addresses. + * + * A DHCP handshake between client and server typically requires at least + * 4 messages: + * + * 1. DISCOVER - client attempts to locate DHCP servers via a + * broadcast message to its subnet. + * 2. OFFER - server responds to client with an IP address and + * other parameters. + * 3. REQUEST - client requests the offered address. + * 4. ACK - server verifies that the requested address matches + * the one it offered. + * + * DHCPv6 behaves pretty much the same way aside from different message names. + * + * Address information is embedded in either the OFFER or REQUEST message. + * We chose to intercept REQUEST because this is at the last part of the + * handshake and it indicates that the client intends to keep the address. + * Intercepting OFFERs is unreliable because the client may receive multiple + * offers from different servers, and we can't tell which address the client + * will keep. + * + * Each DHCP message has a transaction ID. We use this transaction ID to match + * REQUESTs with ACKs received from servers. + * + * For IPv4, the process to acquire a DHCP-assigned address is as follows: + * + * 1. Client sends REQUEST. a new dhcpv4_txn_t object is created and inserted + * in the the mci_v4_pending_txn table (keyed by xid). This object represents + * a new transaction. It contains the xid, the client ID and requested IP + * address. + * + * 2. Server responds with an ACK. The xid from this ACK is used to lookup the + * pending transaction from the mci_v4_pending_txn table. Once the object is + * found, it is removed from the pending table and inserted into the + * completed table (mci_v4_completed_txn, keyed by client ID) and the dynamic + * IP table (mci_v4_dyn_ip, keyed by IP address). + * + * 3. An outgoing packet that goes through the ip-nospoof path will be checked + * against the dynamic IP table. Packets that have the assigned DHCP address + * as the source IP address will pass the check and be admitted onto the + * network. + * + * IPv4 notes: + * + * If the server never responds with an ACK, there is a timer that is set after + * the insertion of the transaction into the pending table. When the timer + * fires, it will check whether the transaction is old (by comparing current + * time and the txn's timestamp), if so the transaction will be freed. along + * with this, any transaction in the completed/dyn-ip tables matching the client + * ID of this stale transaction will also be freed. If the client fails to + * extend a lease, we want to stop the client from using any IP addresses that + * were granted previously. + * + * A RELEASE message from the client will not cause a transaction to be created. + * The client ID in the RELEASE message will be used for finding and removing + * transactions in the completed and dyn-ip tables. + * + * + * For IPv6, the process to acquire a DHCPv6-assigned address is as follows: + * + * 1. Client sends REQUEST. The DUID is extracted and stored into a dhcpv6_cid_t + * structure. A new transaction structure (dhcpv6_txn_t) is also created and + * it will point to the dhcpv6_cid_t. If an existing transaction with a + * matching xid is not found, this dhcpv6_txn_t will be inserted into the + * mci_v6_pending_txn table (keyed by xid). + * + * 2. Server responds with a REPLY. If a pending transaction is found, the + * addresses in the reply will be placed into the dhcpv6_cid_t pointed to by + * the transaction. The dhcpv6_cid_t will then be moved to the mci_v6_cid + * table (keyed by cid). The associated addresses will be added to the + * mci_v6_dyn_ip table (while still being pointed to by the dhcpv6_cid_t). + * + * 3. IPv6 ip-nospoof will now check mci_v6_dyn_ip for matching packets. + * Packets with a source address matching one of the DHCPv6-assigned + * addresses will be allowed through. + * + * IPv6 notes: + * + * The v6 code shares the same timer as v4 for scrubbing stale transactions. + * Just like v4, as part of removing an expired transaction, a RELEASE will be + * be triggered on the cid associated with the expired transaction. + * + * The data structures used for v6 are slightly different because a v6 client + * may have multiple addresses associated with it. + */ + +/* + * These are just arbitrary limits meant for preventing abuse (e.g. a user + * flooding the network with bogus transactions). They are not meant to be + * user-modifiable so they are not exposed as linkprops. + */ +static ulong_t dhcp_max_pending_txn = 512; +static ulong_t dhcp_max_completed_txn = 512; +static time_t txn_cleanup_interval = 60; + +/* + * DHCPv4 transaction. It may be added to three different tables + * (keyed by different fields). + */ +typedef struct dhcpv4_txn { + uint32_t dt_xid; + time_t dt_timestamp; + uint8_t dt_cid[DHCP_MAX_OPT_SIZE]; + uint8_t dt_cid_len; + ipaddr_t dt_ipaddr; + avl_node_t dt_node; + avl_node_t dt_ipnode; + struct dhcpv4_txn *dt_next; +} dhcpv4_txn_t; + +/* + * DHCPv6 address. May be added to mci_v6_dyn_ip. + * It is always pointed to by its parent dhcpv6_cid_t structure. + */ +typedef struct dhcpv6_addr { + in6_addr_t da_addr; + avl_node_t da_node; + struct dhcpv6_addr *da_next; +} dhcpv6_addr_t; + +/* + * DHCPv6 client ID. May be added to mci_v6_cid. + * No dhcpv6_txn_t should be pointing to it after it is added to mci_v6_cid. + */ +typedef struct dhcpv6_cid { + uchar_t *dc_cid; + uint_t dc_cid_len; + dhcpv6_addr_t *dc_addr; + uint_t dc_addrcnt; + avl_node_t dc_node; +} dhcpv6_cid_t; + +/* + * DHCPv6 transaction. Unlike its v4 counterpart, this object gets freed up + * as soon as the transaction completes or expires. + */ +typedef struct dhcpv6_txn { + uint32_t dt_xid; + time_t dt_timestamp; + dhcpv6_cid_t *dt_cid; + avl_node_t dt_node; + struct dhcpv6_txn *dt_next; +} dhcpv6_txn_t; + +static void start_txn_cleanup_timer(mac_client_impl_t *); + +#define BUMP_STAT(m, s) (m)->mci_misc_stat.mms_##s++ + +/* + * Comparison functions for the 3 AVL trees used: + * mci_v4_pending_txn, mci_v4_completed_txn, mci_v4_dyn_ip + */ +static int +compare_dhcpv4_xid(const void *arg1, const void *arg2) +{ + const dhcpv4_txn_t *txn1 = arg1, *txn2 = arg2; + + if (txn1->dt_xid < txn2->dt_xid) + return (-1); + else if (txn1->dt_xid > txn2->dt_xid) + return (1); + else + return (0); +} + +static int +compare_dhcpv4_cid(const void *arg1, const void *arg2) +{ + const dhcpv4_txn_t *txn1 = arg1, *txn2 = arg2; + int ret; + + if (txn1->dt_cid_len < txn2->dt_cid_len) + return (-1); + else if (txn1->dt_cid_len > txn2->dt_cid_len) + return (1); + + if (txn1->dt_cid_len == 0) + return (0); + + ret = memcmp(txn1->dt_cid, txn2->dt_cid, txn1->dt_cid_len); + if (ret < 0) + return (-1); + else if (ret > 0) + return (1); + else + return (0); +} + +static int +compare_dhcpv4_ip(const void *arg1, const void *arg2) +{ + const dhcpv4_txn_t *txn1 = arg1, *txn2 = arg2; + + if (txn1->dt_ipaddr < txn2->dt_ipaddr) + return (-1); + else if (txn1->dt_ipaddr > txn2->dt_ipaddr) + return (1); + else + return (0); +} + +/* + * Find the specified DHCPv4 option. + */ +static int +get_dhcpv4_option(struct dhcp *dh4, uchar_t *end, uint8_t type, + uchar_t **opt, uint8_t *opt_len) +{ + uchar_t *start = (uchar_t *)dh4->options; + uint8_t otype, olen; + + while (start < end) { + if (*start == CD_PAD) { + start++; + continue; + } + if (*start == CD_END) + break; + + otype = *start++; + olen = *start++; + if (otype == type && olen > 0) { + *opt = start; + *opt_len = olen; + return (0); + } + start += olen; + } + return (ENOENT); +} + +/* + * Locate the start of a DHCPv4 header. + * The possible return values and associated meanings are: + * 0 - packet is DHCP and has a DHCP header. + * EINVAL - packet is not DHCP. the recommended action is to let it pass. + * ENOSPC - packet is a initial fragment that is DHCP or is unidentifiable. + * the recommended action is to drop it. + */ +static int +get_dhcpv4_info(ipha_t *ipha, uchar_t *end, struct dhcp **dh4) +{ + uint16_t offset_and_flags, client, server; + boolean_t first_frag = B_FALSE; + struct udphdr *udph; + uchar_t *dh; + + if (ipha->ipha_protocol != IPPROTO_UDP) + return (EINVAL); + + offset_and_flags = ntohs(ipha->ipha_fragment_offset_and_flags); + if ((offset_and_flags & (IPH_MF | IPH_OFFSET)) != 0) { + /* + * All non-initial fragments may pass because we cannot + * identify their type. It's safe to let them through + * because reassembly will fail if we decide to drop the + * initial fragment. + */ + if (((offset_and_flags << 3) & 0xffff) != 0) + return (EINVAL); + first_frag = B_TRUE; + } + /* drop packets without a udp header */ + udph = (struct udphdr *)((uchar_t *)ipha + IPH_HDR_LENGTH(ipha)); + if ((uchar_t *)&udph[1] > end) + return (ENOSPC); + + client = htons(IPPORT_BOOTPC); + server = htons(IPPORT_BOOTPS); + if (udph->uh_sport != client && udph->uh_sport != server && + udph->uh_dport != client && udph->uh_dport != server) + return (EINVAL); + + /* drop dhcp fragments */ + if (first_frag) + return (ENOSPC); + + dh = (uchar_t *)&udph[1]; + if (dh + BASE_PKT_SIZE > end) + return (EINVAL); + + *dh4 = (struct dhcp *)dh; + return (0); +} + +/* + * Wrappers for accesses to avl trees to improve readability. + * Their purposes are fairly self-explanatory. + */ +static dhcpv4_txn_t * +find_dhcpv4_pending_txn(mac_client_impl_t *mcip, uint32_t xid) +{ + dhcpv4_txn_t tmp_txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + tmp_txn.dt_xid = xid; + return (avl_find(&mcip->mci_v4_pending_txn, &tmp_txn, NULL)); +} + +static int +insert_dhcpv4_pending_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn) +{ + avl_index_t where; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (avl_find(&mcip->mci_v4_pending_txn, txn, &where) != NULL) + return (EEXIST); + + if (avl_numnodes(&mcip->mci_v4_pending_txn) >= dhcp_max_pending_txn) { + BUMP_STAT(mcip, dhcpdropped); + return (EAGAIN); + } + avl_insert(&mcip->mci_v4_pending_txn, txn, where); + return (0); +} + +static void +remove_dhcpv4_pending_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn) +{ + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + avl_remove(&mcip->mci_v4_pending_txn, txn); +} + +static dhcpv4_txn_t * +find_dhcpv4_completed_txn(mac_client_impl_t *mcip, uint8_t *cid, + uint8_t cid_len) +{ + dhcpv4_txn_t tmp_txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (cid_len > 0) + bcopy(cid, tmp_txn.dt_cid, cid_len); + tmp_txn.dt_cid_len = cid_len; + return (avl_find(&mcip->mci_v4_completed_txn, &tmp_txn, NULL)); +} + +/* + * After a pending txn is removed from the pending table, it is inserted + * into both the completed and dyn-ip tables. These two insertions are + * done together because a client ID must have 1:1 correspondence with + * an IP address and IP addresses must be unique in the dyn-ip table. + */ +static int +insert_dhcpv4_completed_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn) +{ + avl_index_t where; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (avl_find(&mcip->mci_v4_completed_txn, txn, &where) != NULL) + return (EEXIST); + + if (avl_numnodes(&mcip->mci_v4_completed_txn) >= + dhcp_max_completed_txn) { + BUMP_STAT(mcip, dhcpdropped); + return (EAGAIN); + } + + avl_insert(&mcip->mci_v4_completed_txn, txn, where); + if (avl_find(&mcip->mci_v4_dyn_ip, txn, &where) != NULL) { + avl_remove(&mcip->mci_v4_completed_txn, txn); + return (EEXIST); + } + avl_insert(&mcip->mci_v4_dyn_ip, txn, where); + return (0); +} + +static void +remove_dhcpv4_completed_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn) +{ + dhcpv4_txn_t *ctxn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if ((ctxn = avl_find(&mcip->mci_v4_dyn_ip, txn, NULL)) != NULL && + ctxn == txn) + avl_remove(&mcip->mci_v4_dyn_ip, txn); + + avl_remove(&mcip->mci_v4_completed_txn, txn); +} + +/* + * Check whether an IP address is in the dyn-ip table. */ static boolean_t -ipnospoof_check_ips(mac_protect_t *protect, ipaddr_t ipaddr) +check_dhcpv4_dyn_ip(mac_client_impl_t *mcip, ipaddr_t ipaddr) +{ + dhcpv4_txn_t tmp_txn, *txn; + + mutex_enter(&mcip->mci_protect_lock); + tmp_txn.dt_ipaddr = ipaddr; + txn = avl_find(&mcip->mci_v4_dyn_ip, &tmp_txn, NULL); + mutex_exit(&mcip->mci_protect_lock); + return (txn != NULL); +} + +/* + * Create/destroy a DHCPv4 transaction. + */ +static dhcpv4_txn_t * +create_dhcpv4_txn(uint32_t xid, uint8_t *cid, uint8_t cid_len, ipaddr_t ipaddr) +{ + dhcpv4_txn_t *txn; + + if ((txn = kmem_zalloc(sizeof (*txn), KM_NOSLEEP)) == NULL) + return (NULL); + + txn->dt_xid = xid; + txn->dt_timestamp = ddi_get_time(); + if (cid_len > 0) + bcopy(cid, &txn->dt_cid, cid_len); + txn->dt_cid_len = cid_len; + txn->dt_ipaddr = ipaddr; + return (txn); +} + +static void +free_dhcpv4_txn(dhcpv4_txn_t *txn) +{ + kmem_free(txn, sizeof (*txn)); +} + +/* + * Clean up all v4 tables. + */ +static void +flush_dhcpv4(mac_client_impl_t *mcip) +{ + void *cookie = NULL; + dhcpv4_txn_t *txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + while ((txn = avl_destroy_nodes(&mcip->mci_v4_dyn_ip, + &cookie)) != NULL) { + /* + * No freeing needed here because the same txn exists + * in the mci_v4_completed_txn table as well. + */ + } + cookie = NULL; + while ((txn = avl_destroy_nodes(&mcip->mci_v4_completed_txn, + &cookie)) != NULL) { + free_dhcpv4_txn(txn); + } + cookie = NULL; + while ((txn = avl_destroy_nodes(&mcip->mci_v4_pending_txn, + &cookie)) != NULL) { + free_dhcpv4_txn(txn); + } +} + +/* + * Cleanup stale DHCPv4 transactions. + */ +static void +txn_cleanup_v4(mac_client_impl_t *mcip) { - uint_t i; + dhcpv4_txn_t *txn, *ctxn, *next, *txn_list = NULL; /* - * unspecified addresses are harmless and are used by ARP,DHCP..etc. + * Find stale pending transactions and place them on a list + * to be removed. */ - if (ipaddr == INADDR_ANY) - return (B_TRUE); + for (txn = avl_first(&mcip->mci_v4_pending_txn); txn != NULL; + txn = avl_walk(&mcip->mci_v4_pending_txn, txn, AVL_AFTER)) { + if (ddi_get_time() - txn->dt_timestamp > + txn_cleanup_interval) { + DTRACE_PROBE2(found__expired__txn, + mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); - for (i = 0; i < protect->mp_ipaddrcnt; i++) { - if (protect->mp_ipaddrs[i] == ipaddr) - return (B_TRUE); + txn->dt_next = txn_list; + txn_list = txn; + } } - return (B_FALSE); + + /* + * Remove and free stale pending transactions and completed + * transactions with the same client IDs as the stale transactions. + */ + for (txn = txn_list; txn != NULL; txn = next) { + avl_remove(&mcip->mci_v4_pending_txn, txn); + + ctxn = find_dhcpv4_completed_txn(mcip, txn->dt_cid, + txn->dt_cid_len); + if (ctxn != NULL) { + DTRACE_PROBE2(removing__completed__txn, + mac_client_impl_t *, mcip, + dhcpv4_txn_t *, ctxn); + + remove_dhcpv4_completed_txn(mcip, ctxn); + free_dhcpv4_txn(ctxn); + } + next = txn->dt_next; + txn->dt_next = NULL; + + DTRACE_PROBE2(freeing__txn, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + free_dhcpv4_txn(txn); + } +} + +/* + * Core logic for intercepting outbound DHCPv4 packets. + */ +static void +intercept_dhcpv4_outbound(mac_client_impl_t *mcip, ipha_t *ipha, uchar_t *end) +{ + struct dhcp *dh4; + uchar_t *opt; + dhcpv4_txn_t *txn, *ctxn; + ipaddr_t ipaddr; + uint8_t opt_len, mtype, cid[DHCP_MAX_OPT_SIZE], cid_len; + + if (get_dhcpv4_info(ipha, end, &dh4) != 0) + return; + + if (get_dhcpv4_option(dh4, end, CD_DHCP_TYPE, &opt, &opt_len) != 0 || + opt_len != 1) { + DTRACE_PROBE2(mtype__not__found, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + return; + } + mtype = *opt; + if (mtype != REQUEST && mtype != RELEASE) { + DTRACE_PROBE3(ignored__mtype, mac_client_impl_t *, mcip, + struct dhcp *, dh4, uint8_t, mtype); + return; + } + + /* client ID is optional for IPv4 */ + if (get_dhcpv4_option(dh4, end, CD_CLIENT_ID, &opt, &opt_len) == 0 && + opt_len >= 2) { + bcopy(opt, cid, opt_len); + cid_len = opt_len; + } else { + bzero(cid, DHCP_MAX_OPT_SIZE); + cid_len = 0; + } + + mutex_enter(&mcip->mci_protect_lock); + if (mtype == RELEASE) { + DTRACE_PROBE2(release, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + + /* flush any completed txn with this cid */ + ctxn = find_dhcpv4_completed_txn(mcip, cid, cid_len); + if (ctxn != NULL) { + DTRACE_PROBE2(release__successful, mac_client_impl_t *, + mcip, struct dhcp *, dh4); + + remove_dhcpv4_completed_txn(mcip, ctxn); + free_dhcpv4_txn(ctxn); + } + goto done; + } + + /* + * If a pending txn already exists, we'll update its timestamp so + * it won't get flushed by the timer. We don't need to create new + * txns for retransmissions. + */ + if ((txn = find_dhcpv4_pending_txn(mcip, dh4->xid)) != NULL) { + DTRACE_PROBE2(update, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + txn->dt_timestamp = ddi_get_time(); + goto done; + } + + if (get_dhcpv4_option(dh4, end, CD_REQUESTED_IP_ADDR, + &opt, &opt_len) != 0 || opt_len != sizeof (ipaddr)) { + DTRACE_PROBE2(ipaddr__not__found, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + goto done; + } + bcopy(opt, &ipaddr, sizeof (ipaddr)); + if ((txn = create_dhcpv4_txn(dh4->xid, cid, cid_len, ipaddr)) == NULL) + goto done; + + if (insert_dhcpv4_pending_txn(mcip, txn) != 0) { + DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + free_dhcpv4_txn(txn); + goto done; + } + start_txn_cleanup_timer(mcip); + + DTRACE_PROBE2(txn__pending, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + +done: + mutex_exit(&mcip->mci_protect_lock); } /* - * Enforce ip-nospoof protection. Only IPv4 is supported for now. + * Core logic for intercepting inbound DHCPv4 packets. + */ +static void +intercept_dhcpv4_inbound(mac_client_impl_t *mcip, ipha_t *ipha, uchar_t *end) +{ + uchar_t *opt; + struct dhcp *dh4; + dhcpv4_txn_t *txn, *ctxn; + uint8_t opt_len, mtype; + + if (get_dhcpv4_info(ipha, end, &dh4) != 0) + return; + + if (get_dhcpv4_option(dh4, end, CD_DHCP_TYPE, &opt, &opt_len) != 0 || + opt_len != 1) { + DTRACE_PROBE2(mtype__not__found, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + return; + } + mtype = *opt; + if (mtype != ACK && mtype != NAK) { + DTRACE_PROBE3(ignored__mtype, mac_client_impl_t *, mcip, + struct dhcp *, dh4, uint8_t, mtype); + return; + } + + mutex_enter(&mcip->mci_protect_lock); + if ((txn = find_dhcpv4_pending_txn(mcip, dh4->xid)) == NULL) { + DTRACE_PROBE2(txn__not__found, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + goto done; + } + remove_dhcpv4_pending_txn(mcip, txn); + + /* + * We're about to move a txn from the pending table to the completed/ + * dyn-ip tables. If there is an existing completed txn with the + * same cid as our txn, we need to remove and free it. + */ + ctxn = find_dhcpv4_completed_txn(mcip, txn->dt_cid, txn->dt_cid_len); + if (ctxn != NULL) { + DTRACE_PROBE2(replacing__old__txn, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, ctxn); + remove_dhcpv4_completed_txn(mcip, ctxn); + free_dhcpv4_txn(ctxn); + } + if (mtype == NAK) { + DTRACE_PROBE2(nak__received, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + free_dhcpv4_txn(txn); + goto done; + } + if (insert_dhcpv4_completed_txn(mcip, txn) != 0) { + DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + free_dhcpv4_txn(txn); + goto done; + } + DTRACE_PROBE2(txn__completed, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + +done: + mutex_exit(&mcip->mci_protect_lock); +} + + +/* + * Comparison functions for the DHCPv6 AVL trees. */ static int -ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, - mblk_t *mp, mac_header_info_t *mhip) +compare_dhcpv6_xid(const void *arg1, const void *arg2) { - uint32_t sap = mhip->mhi_bindsap; - uchar_t *start = mp->b_rptr + mhip->mhi_hdrsize; - int err = EINVAL; + const dhcpv6_txn_t *txn1 = arg1, *txn2 = arg2; + + if (txn1->dt_xid < txn2->dt_xid) + return (-1); + else if (txn1->dt_xid > txn2->dt_xid) + return (1); + else + return (0); +} + +static int +compare_dhcpv6_ip(const void *arg1, const void *arg2) +{ + const dhcpv6_addr_t *ip1 = arg1, *ip2 = arg2; + int ret; + + ret = memcmp(&ip1->da_addr, &ip2->da_addr, sizeof (in6_addr_t)); + if (ret < 0) + return (-1); + else if (ret > 0) + return (1); + else + return (0); +} + +static int +compare_dhcpv6_cid(const void *arg1, const void *arg2) +{ + const dhcpv6_cid_t *cid1 = arg1, *cid2 = arg2; + int ret; + + if (cid1->dc_cid_len < cid2->dc_cid_len) + return (-1); + else if (cid1->dc_cid_len > cid2->dc_cid_len) + return (1); + + if (cid1->dc_cid_len == 0) + return (0); + + ret = memcmp(cid1->dc_cid, cid2->dc_cid, cid1->dc_cid_len); + if (ret < 0) + return (-1); + else if (ret > 0) + return (1); + else + return (0); +} + +/* + * Locate the start of a DHCPv6 header. + * The possible return values and associated meanings are: + * 0 - packet is DHCP and has a DHCP header. + * EINVAL - packet is not DHCP. the recommended action is to let it pass. + * ENOSPC - packet is a initial fragment that is DHCP or is unidentifiable. + * the recommended action is to drop it. + */ +static int +get_dhcpv6_info(ip6_t *ip6h, uchar_t *end, dhcpv6_message_t **dh6) +{ + uint16_t hdrlen, client, server; + boolean_t first_frag = B_FALSE; + ip6_frag_t *frag = NULL; + uint8_t proto; + struct udphdr *udph; + uchar_t *dh; + + if (!mac_ip_hdr_length_v6(ip6h, end, &hdrlen, &proto, &frag)) + return (ENOSPC); + + if (proto != IPPROTO_UDP) + return (EINVAL); + + if (frag != NULL) { + /* + * All non-initial fragments may pass because we cannot + * identify their type. It's safe to let them through + * because reassembly will fail if we decide to drop the + * initial fragment. + */ + if ((ntohs(frag->ip6f_offlg) & ~7) != 0) + return (EINVAL); + first_frag = B_TRUE; + } + /* drop packets without a udp header */ + udph = (struct udphdr *)((uchar_t *)ip6h + hdrlen); + if ((uchar_t *)&udph[1] > end) + return (ENOSPC); + + client = htons(IPPORT_DHCPV6C); + server = htons(IPPORT_DHCPV6S); + if (udph->uh_sport != client && udph->uh_sport != server && + udph->uh_dport != client && udph->uh_dport != server) + return (EINVAL); + + /* drop dhcp fragments */ + if (first_frag) + return (ENOSPC); + + dh = (uchar_t *)&udph[1]; + if (dh + sizeof (dhcpv6_message_t) > end) + return (EINVAL); + + *dh6 = (dhcpv6_message_t *)dh; + return (0); +} + +/* + * Find the specified DHCPv6 option. + */ +static dhcpv6_option_t * +get_dhcpv6_option(void *buf, size_t buflen, dhcpv6_option_t *oldopt, + uint16_t codenum, uint_t *retlenp) +{ + uchar_t *bp; + dhcpv6_option_t d6o; + uint_t olen; + + codenum = htons(codenum); + bp = buf; + while (buflen >= sizeof (dhcpv6_option_t)) { + bcopy(bp, &d6o, sizeof (d6o)); + olen = ntohs(d6o.d6o_len) + sizeof (d6o); + if (olen > buflen) + break; + if (d6o.d6o_code != codenum || d6o.d6o_len == 0 || + (oldopt != NULL && bp <= (uchar_t *)oldopt)) { + bp += olen; + buflen -= olen; + continue; + } + if (retlenp != NULL) + *retlenp = olen; + /* LINTED : alignment */ + return ((dhcpv6_option_t *)bp); + } + return (NULL); +} + +/* + * Get the status code from a reply message. + */ +static int +get_dhcpv6_status(dhcpv6_message_t *dh6, uchar_t *end, uint16_t *status) +{ + dhcpv6_option_t *d6o; + uint_t olen; + uint16_t s; + + d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL, + DHCPV6_OPT_STATUS_CODE, &olen); + + /* Success is implied if status code is missing */ + if (d6o == NULL) { + *status = DHCPV6_STAT_SUCCESS; + return (0); + } + if ((uchar_t *)d6o + olen > end) + return (EINVAL); + + olen -= sizeof (*d6o); + if (olen < sizeof (s)) + return (EINVAL); + + bcopy(&d6o[1], &s, sizeof (s)); + *status = ntohs(s); + return (0); +} + +/* + * Get the addresses from a reply message. + */ +static int +get_dhcpv6_addrs(dhcpv6_message_t *dh6, uchar_t *end, dhcpv6_cid_t *cid) +{ + dhcpv6_option_t *d6o; + dhcpv6_addr_t *next; + uint_t olen; + + d6o = NULL; + while ((d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], + d6o, DHCPV6_OPT_IA_NA, &olen)) != NULL) { + dhcpv6_option_t *d6so; + dhcpv6_iaaddr_t d6ia; + dhcpv6_addr_t **addrp; + uchar_t *obase; + uint_t solen; + + if (olen < sizeof (dhcpv6_ia_na_t) || + (uchar_t *)d6o + olen > end) + goto fail; + + obase = (uchar_t *)d6o + sizeof (dhcpv6_ia_na_t); + olen -= sizeof (dhcpv6_ia_na_t); + d6so = NULL; + while ((d6so = get_dhcpv6_option(obase, olen, d6so, + DHCPV6_OPT_IAADDR, &solen)) != NULL) { + if (solen < sizeof (dhcpv6_iaaddr_t) || + (uchar_t *)d6so + solen > end) + goto fail; + + bcopy(d6so, &d6ia, sizeof (d6ia)); + for (addrp = &cid->dc_addr; *addrp != NULL; + addrp = &(*addrp)->da_next) { + if (bcmp(&(*addrp)->da_addr, &d6ia.d6ia_addr, + sizeof (in6_addr_t)) == 0) + goto fail; + } + if ((*addrp = kmem_zalloc(sizeof (dhcpv6_addr_t), + KM_NOSLEEP)) == NULL) + goto fail; + + bcopy(&d6ia.d6ia_addr, &(*addrp)->da_addr, + sizeof (in6_addr_t)); + cid->dc_addrcnt++; + } + } + if (cid->dc_addrcnt == 0) + return (ENOENT); + + return (0); + +fail: + for (; cid->dc_addr != NULL; cid->dc_addr = next) { + next = cid->dc_addr->da_next; + kmem_free(cid->dc_addr, sizeof (dhcpv6_addr_t)); + cid->dc_addrcnt--; + } + ASSERT(cid->dc_addrcnt == 0); + return (EINVAL); +} + +/* + * Free a cid. + * Before this gets called the caller must ensure that all the + * addresses are removed from the mci_v6_dyn_ip table. + */ +static void +free_dhcpv6_cid(dhcpv6_cid_t *cid) +{ + dhcpv6_addr_t *addr, *next; + uint_t cnt = 0; + + kmem_free(cid->dc_cid, cid->dc_cid_len); + for (addr = cid->dc_addr; addr != NULL; addr = next) { + next = addr->da_next; + kmem_free(addr, sizeof (*addr)); + cnt++; + } + ASSERT(cnt == cid->dc_addrcnt); + kmem_free(cid, sizeof (*cid)); +} + +/* + * Extract the DUID from a message. The associated addresses will be + * extracted later from the reply message. + */ +static dhcpv6_cid_t * +create_dhcpv6_cid(dhcpv6_message_t *dh6, uchar_t *end) +{ + dhcpv6_option_t *d6o; + dhcpv6_cid_t *cid; + uchar_t *rawcid; + uint_t olen, rawcidlen; + + d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL, + DHCPV6_OPT_CLIENTID, &olen); + if (d6o == NULL || (uchar_t *)d6o + olen > end) + return (NULL); + + rawcidlen = olen - sizeof (*d6o); + if ((rawcid = kmem_zalloc(rawcidlen, KM_NOSLEEP)) == NULL) + return (NULL); + bcopy(d6o + 1, rawcid, rawcidlen); + + if ((cid = kmem_zalloc(sizeof (*cid), KM_NOSLEEP)) == NULL) { + kmem_free(rawcid, rawcidlen); + return (NULL); + } + cid->dc_cid = rawcid; + cid->dc_cid_len = rawcidlen; + return (cid); +} + +/* + * Remove a cid from mci_v6_cid. The addresses owned by the cid + * are also removed from mci_v6_dyn_ip. + */ +static void +remove_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid) +{ + dhcpv6_addr_t *addr, *tmp_addr; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + avl_remove(&mcip->mci_v6_cid, cid); + for (addr = cid->dc_addr; addr != NULL; addr = addr->da_next) { + tmp_addr = avl_find(&mcip->mci_v6_dyn_ip, addr, NULL); + if (tmp_addr == addr) + avl_remove(&mcip->mci_v6_dyn_ip, addr); + } +} + +/* + * Find and remove a matching cid and associated addresses from + * their respective tables. + */ +static void +release_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid) +{ + dhcpv6_cid_t *oldcid; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if ((oldcid = avl_find(&mcip->mci_v6_cid, cid, NULL)) == NULL) + return; + + /* + * Since cid belongs to a pending txn, it can't possibly be in + * mci_v6_cid. Anything that's found must be an existing cid. + */ + ASSERT(oldcid != cid); + remove_dhcpv6_cid(mcip, oldcid); + free_dhcpv6_cid(oldcid); +} + +/* + * Insert cid into mci_v6_cid. + */ +static int +insert_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid) +{ + avl_index_t where; + dhcpv6_addr_t *addr; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (avl_find(&mcip->mci_v6_cid, cid, &where) != NULL) + return (EEXIST); + + if (avl_numnodes(&mcip->mci_v6_cid) >= dhcp_max_completed_txn) { + BUMP_STAT(mcip, dhcpdropped); + return (EAGAIN); + } + avl_insert(&mcip->mci_v6_cid, cid, where); + for (addr = cid->dc_addr; addr != NULL; addr = addr->da_next) { + if (avl_find(&mcip->mci_v6_dyn_ip, addr, &where) != NULL) + goto fail; + + avl_insert(&mcip->mci_v6_dyn_ip, addr, where); + } + return (0); + +fail: + remove_dhcpv6_cid(mcip, cid); + return (EEXIST); +} + +/* + * Check whether an IP address is in the dyn-ip table. + */ +static boolean_t +check_dhcpv6_dyn_ip(mac_client_impl_t *mcip, in6_addr_t *addr) +{ + dhcpv6_addr_t tmp_addr, *a; + + mutex_enter(&mcip->mci_protect_lock); + bcopy(addr, &tmp_addr.da_addr, sizeof (in6_addr_t)); + a = avl_find(&mcip->mci_v6_dyn_ip, &tmp_addr, NULL); + mutex_exit(&mcip->mci_protect_lock); + return (a != NULL); +} + +static dhcpv6_txn_t * +find_dhcpv6_pending_txn(mac_client_impl_t *mcip, uint32_t xid) +{ + dhcpv6_txn_t tmp_txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + tmp_txn.dt_xid = xid; + return (avl_find(&mcip->mci_v6_pending_txn, &tmp_txn, NULL)); +} + +static void +remove_dhcpv6_pending_txn(mac_client_impl_t *mcip, dhcpv6_txn_t *txn) +{ + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + avl_remove(&mcip->mci_v6_pending_txn, txn); +} + +static dhcpv6_txn_t * +create_dhcpv6_txn(uint32_t xid, dhcpv6_cid_t *cid) +{ + dhcpv6_txn_t *txn; + + if ((txn = kmem_zalloc(sizeof (dhcpv6_txn_t), KM_NOSLEEP)) == NULL) + return (NULL); + + txn->dt_xid = xid; + txn->dt_cid = cid; + txn->dt_timestamp = ddi_get_time(); + return (txn); +} + +static void +free_dhcpv6_txn(dhcpv6_txn_t *txn) +{ + if (txn->dt_cid != NULL) + free_dhcpv6_cid(txn->dt_cid); + kmem_free(txn, sizeof (dhcpv6_txn_t)); +} + +static int +insert_dhcpv6_pending_txn(mac_client_impl_t *mcip, dhcpv6_txn_t *txn) +{ + avl_index_t where; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (avl_find(&mcip->mci_v6_pending_txn, txn, &where) != NULL) + return (EEXIST); + + if (avl_numnodes(&mcip->mci_v6_pending_txn) >= dhcp_max_pending_txn) { + BUMP_STAT(mcip, dhcpdropped); + return (EAGAIN); + } + avl_insert(&mcip->mci_v6_pending_txn, txn, where); + return (0); +} + +/* + * Clean up all v6 tables. + */ +static void +flush_dhcpv6(mac_client_impl_t *mcip) +{ + void *cookie = NULL; + dhcpv6_cid_t *cid; + dhcpv6_txn_t *txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + while (avl_destroy_nodes(&mcip->mci_v6_dyn_ip, &cookie) != NULL) { + } + cookie = NULL; + while ((cid = avl_destroy_nodes(&mcip->mci_v6_cid, &cookie)) != NULL) { + free_dhcpv6_cid(cid); + } + cookie = NULL; + while ((txn = avl_destroy_nodes(&mcip->mci_v6_pending_txn, + &cookie)) != NULL) { + free_dhcpv6_txn(txn); + } +} + +/* + * Cleanup stale DHCPv6 transactions. + */ +static void +txn_cleanup_v6(mac_client_impl_t *mcip) +{ + dhcpv6_txn_t *txn, *next, *txn_list = NULL; /* - * This handles the case where the mac header is not in - * the same mblk as the IP header. + * Find stale pending transactions and place them on a list + * to be removed. */ - if (start == mp->b_wptr) { - mp = mp->b_cont; + for (txn = avl_first(&mcip->mci_v6_pending_txn); txn != NULL; + txn = avl_walk(&mcip->mci_v6_pending_txn, txn, AVL_AFTER)) { + if (ddi_get_time() - txn->dt_timestamp > + txn_cleanup_interval) { + DTRACE_PROBE2(found__expired__txn, + mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + txn->dt_next = txn_list; + txn_list = txn; + } + } + + /* + * Remove and free stale pending transactions. + * Release any existing cids matching the stale transactions. + */ + for (txn = txn_list; txn != NULL; txn = next) { + avl_remove(&mcip->mci_v6_pending_txn, txn); + release_dhcpv6_cid(mcip, txn->dt_cid); + next = txn->dt_next; + txn->dt_next = NULL; + + DTRACE_PROBE2(freeing__txn, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + free_dhcpv6_txn(txn); + } + +} + +/* + * Core logic for intercepting outbound DHCPv6 packets. + */ +static void +intercept_dhcpv6_outbound(mac_client_impl_t *mcip, ip6_t *ip6h, uchar_t *end) +{ + dhcpv6_message_t *dh6; + dhcpv6_txn_t *txn; + dhcpv6_cid_t *cid = NULL; + uint32_t xid; + uint8_t mtype; + + if (get_dhcpv6_info(ip6h, end, &dh6) != 0) + return; + + mtype = dh6->d6m_msg_type; + if (mtype != DHCPV6_MSG_REQUEST && mtype != DHCPV6_MSG_RENEW && + mtype != DHCPV6_MSG_REBIND && mtype != DHCPV6_MSG_RELEASE) + return; + + if ((cid = create_dhcpv6_cid(dh6, end)) == NULL) + return; + + mutex_enter(&mcip->mci_protect_lock); + if (mtype == DHCPV6_MSG_RELEASE) { + release_dhcpv6_cid(mcip, cid); + goto done; + } + xid = DHCPV6_GET_TRANSID(dh6); + if ((txn = find_dhcpv6_pending_txn(mcip, xid)) != NULL) { + DTRACE_PROBE2(update, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + txn->dt_timestamp = ddi_get_time(); + goto done; + } + if ((txn = create_dhcpv6_txn(xid, cid)) == NULL) + goto done; + + cid = NULL; + if (insert_dhcpv6_pending_txn(mcip, txn) != 0) { + DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + free_dhcpv6_txn(txn); + goto done; + } + start_txn_cleanup_timer(mcip); + + DTRACE_PROBE2(txn__pending, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + +done: + if (cid != NULL) + free_dhcpv6_cid(cid); + + mutex_exit(&mcip->mci_protect_lock); +} + +/* + * Core logic for intercepting inbound DHCPv6 packets. + */ +static void +intercept_dhcpv6_inbound(mac_client_impl_t *mcip, ip6_t *ip6h, uchar_t *end) +{ + dhcpv6_message_t *dh6; + dhcpv6_txn_t *txn; + uint32_t xid; + uint8_t mtype; + uint16_t status; + + if (get_dhcpv6_info(ip6h, end, &dh6) != 0) + return; + + mtype = dh6->d6m_msg_type; + if (mtype != DHCPV6_MSG_REPLY) + return; + + mutex_enter(&mcip->mci_protect_lock); + xid = DHCPV6_GET_TRANSID(dh6); + if ((txn = find_dhcpv6_pending_txn(mcip, xid)) == NULL) { + DTRACE_PROBE2(txn__not__found, mac_client_impl_t *, mcip, + dhcpv6_message_t *, dh6); + goto done; + } + remove_dhcpv6_pending_txn(mcip, txn); + release_dhcpv6_cid(mcip, txn->dt_cid); + + if (get_dhcpv6_status(dh6, end, &status) != 0 || + status != DHCPV6_STAT_SUCCESS) { + DTRACE_PROBE2(error__status, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + goto done; + } + if (get_dhcpv6_addrs(dh6, end, txn->dt_cid) != 0) { + DTRACE_PROBE2(no__addrs, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + goto done; + } + if (insert_dhcpv6_cid(mcip, txn->dt_cid) != 0) { + DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + goto done; + } + DTRACE_PROBE2(txn__completed, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + + txn->dt_cid = NULL; + +done: + if (txn != NULL) + free_dhcpv6_txn(txn); + mutex_exit(&mcip->mci_protect_lock); +} + +/* + * Timer for cleaning up stale transactions. + */ +static void +txn_cleanup_timer(void *arg) +{ + mac_client_impl_t *mcip = arg; + + mutex_enter(&mcip->mci_protect_lock); + if (mcip->mci_txn_cleanup_tid == 0) { + /* do nothing if timer got cancelled */ + mutex_exit(&mcip->mci_protect_lock); + return; + } + mcip->mci_txn_cleanup_tid = 0; + + txn_cleanup_v4(mcip); + txn_cleanup_v6(mcip); + + /* + * Restart timer if pending transactions still exist. + */ + if (!avl_is_empty(&mcip->mci_v4_pending_txn) || + !avl_is_empty(&mcip->mci_v6_pending_txn)) { + DTRACE_PROBE1(restarting__timer, mac_client_impl_t *, mcip); + + mcip->mci_txn_cleanup_tid = timeout(txn_cleanup_timer, mcip, + drv_usectohz(txn_cleanup_interval * 1000000)); + } + mutex_exit(&mcip->mci_protect_lock); +} + +static void +start_txn_cleanup_timer(mac_client_impl_t *mcip) +{ + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (mcip->mci_txn_cleanup_tid == 0) { + mcip->mci_txn_cleanup_tid = timeout(txn_cleanup_timer, mcip, + drv_usectohz(txn_cleanup_interval * 1000000)); + } +} + +static void +cancel_txn_cleanup_timer(mac_client_impl_t *mcip) +{ + timeout_id_t tid; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + + /* + * This needs to be a while loop because the timer could get + * rearmed during untimeout(). + */ + while ((tid = mcip->mci_txn_cleanup_tid) != 0) { + mcip->mci_txn_cleanup_tid = 0; + mutex_exit(&mcip->mci_protect_lock); + (void) untimeout(tid); + mutex_enter(&mcip->mci_protect_lock); + } +} + +/* + * Get the start/end pointers of an L3 packet and also do pullup if needed. + * pulled-up packet needs to be freed by the caller. + */ +static int +get_l3_info(mblk_t *mp, size_t hdrsize, uchar_t **start, uchar_t **end, + mblk_t **nmp) +{ + uchar_t *s, *e; + mblk_t *newmp = NULL; + + /* + * Pullup if necessary but reject packets that do not have + * a proper mac header. + */ + s = mp->b_rptr + hdrsize; + e = mp->b_wptr; + + if (s > mp->b_wptr) + return (EINVAL); + + if (!OK_32PTR(s) || mp->b_cont != NULL) { /* - * IP header missing. Let the packet through. + * Temporarily adjust mp->b_rptr to ensure proper + * alignment of IP header in newmp. */ - if (mp == NULL) - return (0); + DTRACE_PROBE1(pullup__needed, mblk_t *, mp); + + mp->b_rptr += hdrsize; + newmp = msgpullup(mp, -1); + mp->b_rptr -= hdrsize; + + if (newmp == NULL) + return (ENOMEM); + + s = newmp->b_rptr; + e = newmp->b_wptr; + } + + *start = s; + *end = e; + *nmp = newmp; + return (0); +} + +void +mac_protect_intercept_dhcp_one(mac_client_impl_t *mcip, mblk_t *mp) +{ + mac_impl_t *mip = mcip->mci_mip; + uchar_t *start, *end; + mblk_t *nmp = NULL; + mac_header_info_t mhi; + int err; + + err = mac_vlan_header_info((mac_handle_t)mip, mp, &mhi); + if (err != 0) { + DTRACE_PROBE2(invalid__header, mac_client_impl_t *, mcip, + mblk_t *, mp); + return; + } + + err = get_l3_info(mp, mhi.mhi_hdrsize, &start, &end, &nmp); + if (err != 0) { + DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip, + mblk_t *, mp); + return; + } + + switch (mhi.mhi_bindsap) { + case ETHERTYPE_IP: { + ipha_t *ipha = (ipha_t *)start; + + if (start + sizeof (ipha_t) > end) + return; + + intercept_dhcpv4_inbound(mcip, ipha, end); + break; + } + case ETHERTYPE_IPV6: { + ip6_t *ip6h = (ip6_t *)start; + + if (start + sizeof (ip6_t) > end) + return; + + intercept_dhcpv6_inbound(mcip, ip6h, end); + break; + } + } + freemsg(nmp); +} + +void +mac_protect_intercept_dhcp(mac_client_impl_t *mcip, mblk_t *mp) +{ + /* + * Skip checks if we are part of an aggr. + */ + if ((mcip->mci_state_flags & MCIS_IS_AGGR_PORT) != 0) + return; + + for (; mp != NULL; mp = mp->b_next) + mac_protect_intercept_dhcp_one(mcip, mp); +} + +void +mac_protect_flush_dhcp(mac_client_impl_t *mcip) +{ + mutex_enter(&mcip->mci_protect_lock); + flush_dhcpv4(mcip); + flush_dhcpv6(mcip); + mutex_exit(&mcip->mci_protect_lock); +} + +void +mac_protect_cancel_timer(mac_client_impl_t *mcip) +{ + mutex_enter(&mcip->mci_protect_lock); + cancel_txn_cleanup_timer(mcip); + mutex_exit(&mcip->mci_protect_lock); +} + +/* + * Check if addr is in the 'allowed-ips' list. + */ + +/* ARGSUSED */ +static boolean_t +ipnospoof_check_v4(mac_client_impl_t *mcip, mac_protect_t *protect, + ipaddr_t *addr) +{ + uint_t i; + + /* + * The unspecified address is allowed. + */ + if (*addr == INADDR_ANY) + return (B_TRUE); + + for (i = 0; i < protect->mp_ipaddrcnt; i++) { + mac_ipaddr_t *v4addr = &protect->mp_ipaddrs[i]; + + if (v4addr->ip_version == IPV4_VERSION && + V4_PART_OF_V6(v4addr->ip_addr) == *addr) + return (B_TRUE); + } + return (check_dhcpv4_dyn_ip(mcip, *addr)); +} + +static boolean_t +ipnospoof_check_v6(mac_client_impl_t *mcip, mac_protect_t *protect, + in6_addr_t *addr) +{ + uint_t i; + + /* + * The unspecified address and the v6 link local address are allowed. + */ + if (IN6_IS_ADDR_UNSPECIFIED(addr) || + ((mcip->mci_protect_flags & MPT_FLAG_V6_LOCAL_ADDR_SET) != 0 && + IN6_ARE_ADDR_EQUAL(&mcip->mci_v6_local_addr, addr))) + return (B_TRUE); + + + for (i = 0; i < protect->mp_ipaddrcnt; i++) { + mac_ipaddr_t *v6addr = &protect->mp_ipaddrs[i]; + + if (v6addr->ip_version == IPV6_VERSION && + IN6_ARE_ADDR_EQUAL(&v6addr->ip_addr, addr)) + return (B_TRUE); + } + return (check_dhcpv6_dyn_ip(mcip, addr)); +} + +/* + * Checks various fields within an IPv6 NDP packet. + */ +static boolean_t +ipnospoof_check_ndp(mac_client_impl_t *mcip, mac_protect_t *protect, + ip6_t *ip6h, uchar_t *end) +{ + icmp6_t *icmp_nd = (icmp6_t *)&ip6h[1]; + int hdrlen, optlen, opttype, len; + uint_t addrlen, maclen; + uint8_t type; + nd_opt_hdr_t *opt; + struct nd_opt_lla *lla = NULL; + + /* + * NDP packets do not have extension headers so the ICMPv6 header + * must immediately follow the IPv6 header. + */ + if (ip6h->ip6_nxt != IPPROTO_ICMPV6) + return (B_TRUE); + + /* ICMPv6 header missing */ + if ((uchar_t *)&icmp_nd[1] > end) + return (B_FALSE); + + len = end - (uchar_t *)icmp_nd; + type = icmp_nd->icmp6_type; + + switch (type) { + case ND_ROUTER_SOLICIT: + hdrlen = sizeof (nd_router_solicit_t); + break; + case ND_ROUTER_ADVERT: + hdrlen = sizeof (nd_router_advert_t); + break; + case ND_NEIGHBOR_SOLICIT: + hdrlen = sizeof (nd_neighbor_solicit_t); + break; + case ND_NEIGHBOR_ADVERT: + hdrlen = sizeof (nd_neighbor_advert_t); + break; + case ND_REDIRECT: + hdrlen = sizeof (nd_redirect_t); + break; + default: + return (B_TRUE); + } + + if (len < hdrlen) + return (B_FALSE); + + /* SLLA option checking is needed for RS/RA/NS */ + opttype = ND_OPT_SOURCE_LINKADDR; + + switch (type) { + case ND_NEIGHBOR_ADVERT: { + nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp_nd; + + if (!ipnospoof_check_v6(mcip, protect, &na->nd_na_target)) { + DTRACE_PROBE2(ndp__na__fail, + mac_client_impl_t *, mcip, ip6_t *, ip6h); + return (B_FALSE); + } + + /* TLLA option for NA */ + opttype = ND_OPT_TARGET_LINKADDR; + break; + } + case ND_REDIRECT: { + /* option checking not needed for RD */ + return (B_TRUE); + } + default: + break; + } - start = mp->b_rptr; + if (len == hdrlen) { + /* no options, we're done */ + return (B_TRUE); } + opt = (nd_opt_hdr_t *)((uchar_t *)icmp_nd + hdrlen); + optlen = len - hdrlen; + + /* find the option header we need */ + while (optlen > sizeof (nd_opt_hdr_t)) { + if (opt->nd_opt_type == opttype) { + lla = (struct nd_opt_lla *)opt; + break; + } + optlen -= 8 * opt->nd_opt_len; + opt = (nd_opt_hdr_t *) + ((uchar_t *)opt + 8 * opt->nd_opt_len); + } + if (lla == NULL) + return (B_TRUE); + + addrlen = lla->nd_opt_lla_len * 8 - sizeof (nd_opt_hdr_t); + maclen = mcip->mci_mip->mi_info.mi_addr_length; + + if (addrlen != maclen || + bcmp(mcip->mci_unicast->ma_addr, + lla->nd_opt_lla_hdw_addr, maclen) != 0) { + DTRACE_PROBE2(ndp__lla__fail, + mac_client_impl_t *, mcip, ip6_t *, ip6h); + return (B_FALSE); + } + + DTRACE_PROBE2(ndp__lla__ok, mac_client_impl_t *, mcip, ip6_t *, ip6h); + return (B_TRUE); +} + +/* + * Enforce ip-nospoof protection. + */ +static int +ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, + mblk_t *mp, mac_header_info_t *mhip) +{ + size_t hdrsize = mhip->mhi_hdrsize; + uint32_t sap = mhip->mhi_bindsap; + uchar_t *start, *end; + mblk_t *nmp = NULL; + int err; + + err = get_l3_info(mp, hdrsize, &start, &end, &nmp); + if (err != 0) { + DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip, + mblk_t *, mp); + return (err); + } + err = EINVAL; switch (sap) { case ETHERTYPE_IP: { ipha_t *ipha = (ipha_t *)start; - if (start + sizeof (ipha_t) > mp->b_wptr || !OK_32PTR(start)) + if (start + sizeof (ipha_t) > end) goto fail; - if (!ipnospoof_check_ips(protect, ipha->ipha_src)) + if (!ipnospoof_check_v4(mcip, protect, &ipha->ipha_src)) goto fail; + intercept_dhcpv4_outbound(mcip, ipha, end); break; } case ETHERTYPE_ARP: { @@ -103,7 +1703,7 @@ ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, ipaddr_t spaddr; uchar_t *shaddr; - if (start + sizeof (arh_t) > mp->b_wptr) + if (start + sizeof (arh_t) > end) goto fail; maclen = mcip->mci_mip->mi_info.mi_addr_length; @@ -114,7 +1714,7 @@ ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, goto fail; arplen = sizeof (arh_t) + 2 * hlen + 2 * plen; - if (start + arplen > mp->b_wptr) + if (start + arplen > end) goto fail; shaddr = start + sizeof (arh_t); @@ -123,20 +1723,230 @@ ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, goto fail; bcopy(shaddr + hlen, &spaddr, sizeof (spaddr)); - if (!ipnospoof_check_ips(protect, spaddr)) + if (!ipnospoof_check_v4(mcip, protect, &spaddr)) goto fail; break; } - default: + case ETHERTYPE_IPV6: { + ip6_t *ip6h = (ip6_t *)start; + + if (start + sizeof (ip6_t) > end) + goto fail; + + if (!ipnospoof_check_v6(mcip, protect, &ip6h->ip6_src)) + goto fail; + + if (!ipnospoof_check_ndp(mcip, protect, ip6h, end)) + goto fail; + + intercept_dhcpv6_outbound(mcip, ip6h, end); break; } + } + freemsg(nmp); return (0); fail: - /* increment ipnospoof stat here */ + freemsg(nmp); return (err); } +static boolean_t +dhcpnospoof_check_cid(mac_protect_t *p, uchar_t *cid, uint_t cidlen) +{ + int i; + + for (i = 0; i < p->mp_cidcnt; i++) { + mac_dhcpcid_t *dcid = &p->mp_cids[i]; + + if (dcid->dc_len == cidlen && + bcmp(dcid->dc_id, cid, cidlen) == 0) + return (B_TRUE); + } + return (B_FALSE); +} + +static boolean_t +dhcpnospoof_check_v4(mac_client_impl_t *mcip, mac_protect_t *p, + ipha_t *ipha, uchar_t *end) +{ + struct dhcp *dh4; + uchar_t *cid; + uint_t maclen, cidlen = 0; + uint8_t optlen; + int err; + + if ((err = get_dhcpv4_info(ipha, end, &dh4)) != 0) + return (err == EINVAL); + + maclen = mcip->mci_mip->mi_info.mi_addr_length; + if (dh4->hlen == maclen && + bcmp(mcip->mci_unicast->ma_addr, dh4->chaddr, maclen) != 0) { + return (B_FALSE); + } + if (get_dhcpv4_option(dh4, end, CD_CLIENT_ID, &cid, &optlen) == 0) + cidlen = optlen; + + if (cidlen == 0) + return (B_TRUE); + + if (*cid == ARPHRD_ETHER && cidlen - 1 == maclen && + bcmp(mcip->mci_unicast->ma_addr, cid + 1, maclen) == 0) + return (B_TRUE); + + return (dhcpnospoof_check_cid(p, cid, cidlen)); +} + +static boolean_t +dhcpnospoof_check_v6(mac_client_impl_t *mcip, mac_protect_t *p, + ip6_t *ip6h, uchar_t *end) +{ + dhcpv6_message_t *dh6; + dhcpv6_option_t *d6o; + uint8_t mtype; + uchar_t *cid, *lladdr = NULL; + uint_t cidlen, maclen, addrlen = 0; + uint16_t cidtype; + int err; + + if ((err = get_dhcpv6_info(ip6h, end, &dh6)) != 0) + return (err == EINVAL); + + /* + * We only check client-generated messages. + */ + mtype = dh6->d6m_msg_type; + if (mtype == DHCPV6_MSG_ADVERTISE || mtype == DHCPV6_MSG_REPLY || + mtype == DHCPV6_MSG_RECONFIGURE) + return (B_TRUE); + + d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL, + DHCPV6_OPT_CLIENTID, &cidlen); + if (d6o == NULL || (uchar_t *)d6o + cidlen > end) + return (B_TRUE); + + cid = (uchar_t *)&d6o[1]; + cidlen -= sizeof (*d6o); + if (cidlen < sizeof (cidtype)) + return (B_TRUE); + + bcopy(cid, &cidtype, sizeof (cidtype)); + cidtype = ntohs(cidtype); + if (cidtype == DHCPV6_DUID_LLT && cidlen >= sizeof (duid_llt_t)) { + lladdr = cid + sizeof (duid_llt_t); + addrlen = cidlen - sizeof (duid_llt_t); + } + if (cidtype == DHCPV6_DUID_LL && cidlen >= sizeof (duid_ll_t)) { + lladdr = cid + sizeof (duid_ll_t); + addrlen = cidlen - sizeof (duid_ll_t); + } + maclen = mcip->mci_mip->mi_info.mi_addr_length; + if (lladdr != NULL && addrlen == maclen && + bcmp(mcip->mci_unicast->ma_addr, lladdr, maclen) == 0) { + return (B_TRUE); + } + return (dhcpnospoof_check_cid(p, cid, cidlen)); +} + +/* + * Enforce dhcp-nospoof protection. + */ +static int +dhcpnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, + mblk_t *mp, mac_header_info_t *mhip) +{ + size_t hdrsize = mhip->mhi_hdrsize; + uint32_t sap = mhip->mhi_bindsap; + uchar_t *start, *end; + mblk_t *nmp = NULL; + int err; + + err = get_l3_info(mp, hdrsize, &start, &end, &nmp); + if (err != 0) { + DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip, + mblk_t *, mp); + return (err); + } + err = EINVAL; + + switch (sap) { + case ETHERTYPE_IP: { + ipha_t *ipha = (ipha_t *)start; + + if (start + sizeof (ipha_t) > end) + goto fail; + + if (!dhcpnospoof_check_v4(mcip, protect, ipha, end)) + goto fail; + + break; + } + case ETHERTYPE_IPV6: { + ip6_t *ip6h = (ip6_t *)start; + + if (start + sizeof (ip6_t) > end) + goto fail; + + if (!dhcpnospoof_check_v6(mcip, protect, ip6h, end)) + goto fail; + + break; + } + } + freemsg(nmp); + return (0); + +fail: + /* increment dhcpnospoof stat here */ + freemsg(nmp); + return (err); +} + +/* + * This needs to be called whenever the mac client's mac address changes. + */ +void +mac_protect_update_v6_local_addr(mac_client_impl_t *mcip) +{ + uint8_t *p, *macaddr = mcip->mci_unicast->ma_addr; + uint_t i, media = mcip->mci_mip->mi_info.mi_media; + in6_addr_t token, *v6addr = &mcip->mci_v6_local_addr; + in6_addr_t ll_template = {(uint32_t)V6_LINKLOCAL, 0x0, 0x0, 0x0}; + + + bzero(&token, sizeof (token)); + p = (uint8_t *)&token.s6_addr32[2]; + + switch (media) { + case DL_ETHER: + bcopy(macaddr, p, 3); + p[0] ^= 0x2; + p[3] = 0xff; + p[4] = 0xfe; + bcopy(macaddr + 3, p + 5, 3); + break; + case DL_IB: + ASSERT(mcip->mci_mip->mi_info.mi_addr_length == 20); + bcopy(macaddr + 12, p, 8); + p[0] |= 2; + break; + default: + /* + * We do not need to generate the local address for link types + * that do not support link protection. Wifi pretends to be + * ethernet so it is covered by the DL_ETHER case (note the + * use of mi_media instead of mi_nativemedia). + */ + return; + } + + for (i = 0; i < 4; i++) { + v6addr->s6_addr32[i] = token.s6_addr32[i] | + ll_template.s6_addr32[i]; + } + mcip->mci_protect_flags |= MPT_FLAG_V6_LOCAL_ADDR_SET; +} + /* * Enforce link protection on one packet. */ @@ -159,7 +1969,6 @@ mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp) mblk_t *, mp); return (err); } - protect = &mrp->mrp_protect; types = protect->mp_types; @@ -167,12 +1976,12 @@ mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp) if (mhi.mhi_saddr != NULL && bcmp(mcip->mci_unicast->ma_addr, mhi.mhi_saddr, mip->mi_info.mi_addr_length) != 0) { + BUMP_STAT(mcip, macspoofed); DTRACE_PROBE2(mac__nospoof__fail, mac_client_impl_t *, mcip, mblk_t *, mp); return (EINVAL); } } - if ((types & MPT_RESTRICTED) != 0) { uint32_t vid = VLAN_ID(mhi.mhi_tci); uint32_t sap = mhi.mhi_bindsap; @@ -182,6 +1991,7 @@ mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp) * the vid is not spoofed. */ if (vid != 0 && !mac_client_check_flow_vid(mcip, vid)) { + BUMP_STAT(mcip, restricted); DTRACE_PROBE2(restricted__vid__invalid, mac_client_impl_t *, mcip, mblk_t *, mp); return (EINVAL); @@ -189,20 +1999,28 @@ mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp) if (sap != ETHERTYPE_IP && sap != ETHERTYPE_IPV6 && sap != ETHERTYPE_ARP) { + BUMP_STAT(mcip, restricted); DTRACE_PROBE2(restricted__fail, mac_client_impl_t *, mcip, mblk_t *, mp); return (EINVAL); } } - if ((types & MPT_IPNOSPOOF) != 0) { - if ((err = ipnospoof_check(mcip, protect, - mp, &mhi)) != 0) { + if ((err = ipnospoof_check(mcip, protect, mp, &mhi)) != 0) { + BUMP_STAT(mcip, ipspoofed); DTRACE_PROBE2(ip__nospoof__fail, mac_client_impl_t *, mcip, mblk_t *, mp); return (err); } } + if ((types & MPT_DHCPNOSPOOF) != 0) { + if ((err = dhcpnospoof_check(mcip, protect, mp, &mhi)) != 0) { + BUMP_STAT(mcip, dhcpspoofed); + DTRACE_PROBE2(dhcp__nospoof__fail, + mac_client_impl_t *, mcip, mblk_t *, mp); + return (err); + } + } return (0); } @@ -242,11 +2060,89 @@ mac_protect_check(mac_client_handle_t mch, mblk_t *mp) boolean_t mac_protect_enabled(mac_client_handle_t mch, uint32_t type) { - mac_client_impl_t *mcip = (mac_client_impl_t *)mch; - mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + return (MAC_PROTECT_ENABLED((mac_client_impl_t *)mch, type)); +} - ASSERT(mrp != NULL); - return ((mrp->mrp_protect.mp_types & type) != 0); +static int +validate_ips(mac_protect_t *p) +{ + uint_t i, j; + + if (p->mp_ipaddrcnt == MPT_RESET) + return (0); + + if (p->mp_ipaddrcnt > MPT_MAXIPADDR) + return (EINVAL); + + for (i = 0; i < p->mp_ipaddrcnt; i++) { + mac_ipaddr_t *addr = &p->mp_ipaddrs[i]; + + /* + * The unspecified address is implicitly allowed + * so there's no need to add it to the list. + */ + if (addr->ip_version == IPV4_VERSION) { + if (V4_PART_OF_V6(addr->ip_addr) == INADDR_ANY) + return (EINVAL); + } else if (addr->ip_version == IPV6_VERSION) { + if (IN6_IS_ADDR_UNSPECIFIED(&addr->ip_addr)) + return (EINVAL); + } else { + /* invalid ip version */ + return (EINVAL); + } + + for (j = 0; j < p->mp_ipaddrcnt; j++) { + mac_ipaddr_t *addr1 = &p->mp_ipaddrs[j]; + + if (i == j || addr->ip_version != addr1->ip_version) + continue; + + /* found a duplicate */ + if ((addr->ip_version == IPV4_VERSION && + V4_PART_OF_V6(addr->ip_addr) == + V4_PART_OF_V6(addr1->ip_addr)) || + IN6_ARE_ADDR_EQUAL(&addr->ip_addr, + &addr1->ip_addr)) + return (EINVAL); + } + } + return (0); +} + +/* ARGSUSED */ +static int +validate_cids(mac_protect_t *p) +{ + uint_t i, j; + + if (p->mp_cidcnt == MPT_RESET) + return (0); + + if (p->mp_cidcnt > MPT_MAXCID) + return (EINVAL); + + for (i = 0; i < p->mp_cidcnt; i++) { + mac_dhcpcid_t *cid = &p->mp_cids[i]; + + if (cid->dc_len > MPT_MAXCIDLEN || + (cid->dc_form != CIDFORM_TYPED && + cid->dc_form != CIDFORM_HEX && + cid->dc_form != CIDFORM_STR)) + return (EINVAL); + + for (j = 0; j < p->mp_cidcnt; j++) { + mac_dhcpcid_t *cid1 = &p->mp_cids[j]; + + if (i == j || cid->dc_len != cid1->dc_len) + continue; + + /* found a duplicate */ + if (bcmp(cid->dc_id, cid1->dc_id, cid->dc_len) == 0) + return (EINVAL); + } + } + return (0); } /* @@ -256,33 +2152,18 @@ int mac_protect_validate(mac_resource_props_t *mrp) { mac_protect_t *p = &mrp->mrp_protect; + int err; /* check for invalid types */ if (p->mp_types != MPT_RESET && (p->mp_types & ~MPT_ALL) != 0) return (EINVAL); - if (p->mp_ipaddrcnt != MPT_RESET) { - uint_t i, j; - - if (p->mp_ipaddrcnt > MPT_MAXIPADDR) - return (EINVAL); + if ((err = validate_ips(p)) != 0) + return (err); - for (i = 0; i < p->mp_ipaddrcnt; i++) { - /* - * The unspecified address is implicitly allowed - * so there's no need to add it to the list. - */ - if (p->mp_ipaddrs[i] == INADDR_ANY) - return (EINVAL); + if ((err = validate_cids(p)) != 0) + return (err); - for (j = 0; j < p->mp_ipaddrcnt; j++) { - /* found a duplicate */ - if (i != j && - p->mp_ipaddrs[i] == p->mp_ipaddrs[j]) - return (EINVAL); - } - } - } return (0); } @@ -326,9 +2207,8 @@ mac_protect_update(mac_resource_props_t *new, mac_resource_props_t *curr) curr->mrp_mask |= MRP_PROTECT; } } - if (np->mp_ipaddrcnt != 0) { - if (np->mp_ipaddrcnt < MPT_MAXIPADDR) { + if (np->mp_ipaddrcnt <= MPT_MAXIPADDR) { bcopy(np->mp_ipaddrs, cp->mp_ipaddrs, sizeof (cp->mp_ipaddrs)); cp->mp_ipaddrcnt = np->mp_ipaddrcnt; @@ -337,4 +2217,47 @@ mac_protect_update(mac_resource_props_t *new, mac_resource_props_t *curr) cp->mp_ipaddrcnt = 0; } } + if (np->mp_cidcnt != 0) { + if (np->mp_cidcnt <= MPT_MAXCID) { + bcopy(np->mp_cids, cp->mp_cids, sizeof (cp->mp_cids)); + cp->mp_cidcnt = np->mp_cidcnt; + } else if (np->mp_cidcnt == MPT_RESET) { + bzero(cp->mp_cids, sizeof (cp->mp_cids)); + cp->mp_cidcnt = 0; + } + } +} + +void +mac_protect_init(mac_client_impl_t *mcip) +{ + mutex_init(&mcip->mci_protect_lock, NULL, MUTEX_DRIVER, NULL); + mcip->mci_protect_flags = 0; + mcip->mci_txn_cleanup_tid = 0; + avl_create(&mcip->mci_v4_pending_txn, compare_dhcpv4_xid, + sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_node)); + avl_create(&mcip->mci_v4_completed_txn, compare_dhcpv4_cid, + sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_node)); + avl_create(&mcip->mci_v4_dyn_ip, compare_dhcpv4_ip, + sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_ipnode)); + avl_create(&mcip->mci_v6_pending_txn, compare_dhcpv6_xid, + sizeof (dhcpv6_txn_t), offsetof(dhcpv6_txn_t, dt_node)); + avl_create(&mcip->mci_v6_cid, compare_dhcpv6_cid, + sizeof (dhcpv6_cid_t), offsetof(dhcpv6_cid_t, dc_node)); + avl_create(&mcip->mci_v6_dyn_ip, compare_dhcpv6_ip, + sizeof (dhcpv6_addr_t), offsetof(dhcpv6_addr_t, da_node)); +} + +void +mac_protect_fini(mac_client_impl_t *mcip) +{ + avl_destroy(&mcip->mci_v6_dyn_ip); + avl_destroy(&mcip->mci_v6_cid); + avl_destroy(&mcip->mci_v6_pending_txn); + avl_destroy(&mcip->mci_v4_dyn_ip); + avl_destroy(&mcip->mci_v4_completed_txn); + avl_destroy(&mcip->mci_v4_pending_txn); + mcip->mci_txn_cleanup_tid = 0; + mcip->mci_protect_flags = 0; + mutex_destroy(&mcip->mci_protect_lock); } diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c index 43501e3505..f0fe17ae0a 100644 --- a/usr/src/uts/common/io/mac/mac_provider.c +++ b/usr/src/uts/common/io/mac/mac_provider.c @@ -40,6 +40,7 @@ #include <sys/mac_client_impl.h> #include <sys/mac_client_priv.h> #include <sys/mac_soft_ring.h> +#include <sys/mac_stat.h> #include <sys/dld.h> #include <sys/modctl.h> #include <sys/fs/dv_node.h> @@ -53,6 +54,8 @@ #include <sys/ddi_intr_impl.h> #include <sys/disp.h> #include <sys/sdt.h> +#include <sys/pattr.h> +#include <sys/strsun.h> /* * MAC Provider Interface. @@ -298,8 +301,7 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) /* * Register the private properties. */ - mac_register_priv_prop(mip, mregp->m_priv_props, - mregp->m_priv_prop_count); + mac_register_priv_prop(mip, mregp->m_priv_props); /* * Stash the driver callbacks into the mac_impl_t, but first sanity @@ -334,6 +336,9 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) * Initialize the capabilities */ + bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t)); + bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t)); + if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL)) mip->mi_state_flags |= MIS_IS_VNIC; @@ -371,18 +376,6 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) } /* - * The driver must set mc_tx entry point to NULL when it advertises - * CAP_RINGS for tx rings. - */ - if (mip->mi_tx_groups != NULL) { - if (mregp->m_callbacks->mc_tx != NULL) - goto fail; - } else { - if (mregp->m_callbacks->mc_tx == NULL) - goto fail; - } - - /* * Initialize MAC addresses. Must be called after mac_init_rings(). */ mac_init_macaddr(mip); @@ -396,7 +389,7 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) /* * Initialize the kstats for this device. */ - mac_stat_create(mip); + mac_driver_stat_create(mip); /* Zero out any properties. */ bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t)); @@ -466,7 +459,7 @@ fail: mip->mi_info.mi_unicst_addr = NULL; } - mac_stat_destroy(mip); + mac_driver_stat_delete(mip); if (mip->mi_type != NULL) { atomic_dec_32(&mip->mi_type->mt_ref); @@ -484,6 +477,7 @@ fail: mac_minor_rele(minor); } + mip->mi_state_flags = 0; mac_unregister_priv_prop(mip); /* @@ -532,7 +526,7 @@ mac_unregister(mac_handle_t mh) ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags & MIS_EXCLUSIVE)); - mac_stat_destroy(mip); + mac_driver_stat_delete(mip); (void) mod_hash_remove(i_mac_impl_hash, (mod_hash_key_t)mip->mi_name, &val); @@ -772,11 +766,7 @@ mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) void mac_tx_update(mac_handle_t mh) { - /* - * Walk the list of MAC clients (mac_client_handle) - * and update - */ - i_mac_tx_srs_notify((mac_impl_t *)mh, NULL); + mac_tx_ring_update(mh, NULL); } /* @@ -959,6 +949,151 @@ mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max) return (0); } +static void +mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring) +{ + mac_client_impl_t *mcip; + flow_entry_t *flent; + mac_soft_ring_set_t *mac_rx_srs; + mac_cpus_t *srs_cpu; + int i; + + if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) && + (!ring->mr_info.mri_intr.mi_ddi_shared)) { + /* interrupt can be re-targeted */ + ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); + flent = mcip->mci_flent; + if (ring->mr_type == MAC_RING_TYPE_RX) { + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_rx_srs = flent->fe_rx_srs[i]; + if (mac_rx_srs->srs_ring != ring) + continue; + srs_cpu = &mac_rx_srs->srs_cpu; + mutex_enter(&cpu_lock); + mac_rx_srs_retarget_intr(mac_rx_srs, + srs_cpu->mc_rx_intr_cpu); + mutex_exit(&cpu_lock); + break; + } + } else { + if (flent->fe_tx_srs != NULL) { + mutex_enter(&cpu_lock); + mac_tx_srs_retarget_intr( + flent->fe_tx_srs); + mutex_exit(&cpu_lock); + } + } + } +} + +/* + * Clients like aggr create pseudo rings (mac_ring_t) and expose them to + * their clients. There is a 1-1 mapping pseudo ring and the hardware + * ring. ddi interrupt handles are exported from the hardware ring to + * the pseudo ring. Thus when the interrupt handle changes, clients of + * aggr that are using the handle need to use the new handle and + * re-target their interrupts. + */ +static void +mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring, + ddi_intr_handle_t ddh) +{ + mac_ring_t *pring; + mac_group_t *pgroup; + mac_impl_t *pmip; + char macname[MAXNAMELEN]; + mac_perim_handle_t p_mph; + uint64_t saved_gen_num; + +again: + pring = (mac_ring_t *)ring->mr_prh; + pgroup = (mac_group_t *)pring->mr_gh; + pmip = (mac_impl_t *)pgroup->mrg_mh; + saved_gen_num = ring->mr_gen_num; + (void) strlcpy(macname, pmip->mi_name, MAXNAMELEN); + /* + * We need to enter aggr's perimeter. The locking hierarchy + * dictates that aggr's perimeter should be entered first + * and then the port's perimeter. So drop the port's + * perimeter, enter aggr's and then re-enter port's + * perimeter. + */ + i_mac_perim_exit(mip); + /* + * While we know pmip is the aggr's mip, there is a + * possibility that aggr could have unregistered by + * the time we exit port's perimeter (mip) and + * enter aggr's perimeter (pmip). To avoid that + * scenario, enter aggr's perimeter using its name. + */ + if (mac_perim_enter_by_macname(macname, &p_mph) != 0) + return; + i_mac_perim_enter(mip); + /* + * Check if the ring got assigned to another aggregation before + * be could enter aggr's and the port's perimeter. When a ring + * gets deleted from an aggregation, it calls mac_stop_ring() + * which increments the generation number. So checking + * generation number will be enough. + */ + if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) { + i_mac_perim_exit(mip); + mac_perim_exit(p_mph); + i_mac_perim_enter(mip); + goto again; + } + + /* Check if pseudo ring is still present */ + if (ring->mr_prh != NULL) { + pring->mr_info.mri_intr.mi_ddi_handle = ddh; + pring->mr_info.mri_intr.mi_ddi_shared = + ring->mr_info.mri_intr.mi_ddi_shared; + if (ddh != NULL) + mac_ring_intr_retarget(pgroup, pring); + } + i_mac_perim_exit(mip); + mac_perim_exit(p_mph); +} +/* + * API called by driver to provide new interrupt handle for TX/RX rings. + * This usually happens when IRM (Interrupt Resource Manangement) + * framework either gives the driver more MSI-x interrupts or takes + * away MSI-x interrupts from the driver. + */ +void +mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh) +{ + mac_ring_t *ring = (mac_ring_t *)mrh; + mac_group_t *group = (mac_group_t *)ring->mr_gh; + mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; + + i_mac_perim_enter(mip); + ring->mr_info.mri_intr.mi_ddi_handle = ddh; + if (ddh == NULL) { + /* Interrupts being reset */ + ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE; + if (ring->mr_prh != NULL) { + mac_pseudo_ring_intr_retarget(mip, ring, ddh); + return; + } + } else { + /* New interrupt handle */ + mac_compare_ddi_handle(mip->mi_rx_groups, + mip->mi_rx_group_count, ring); + if (!ring->mr_info.mri_intr.mi_ddi_shared) { + mac_compare_ddi_handle(mip->mi_tx_groups, + mip->mi_tx_group_count, ring); + } + if (ring->mr_prh != NULL) { + mac_pseudo_ring_intr_retarget(mip, ring, ddh); + return; + } else { + mac_ring_intr_retarget(group, ring); + } + } + i_mac_perim_exit(mip); +} + /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */ /* @@ -1141,16 +1276,8 @@ mac_group_add_ring(mac_group_handle_t gh, int index) int ret; i_mac_perim_enter(mip); - - /* - * Only RX rings can be added or removed by drivers currently. - */ - ASSERT(group->mrg_type == MAC_RING_TYPE_RX); - ret = i_mac_group_add_ring(group, NULL, index); - i_mac_perim_exit(mip); - return (ret); } @@ -1166,13 +1293,167 @@ mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh) mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; i_mac_perim_enter(mip); + i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE); + i_mac_perim_exit(mip); +} - /* - * Only RX rings can be added or removed by drivers currently. - */ - ASSERT(group->mrg_type == MAC_RING_TYPE_RX); +/* + * mac_prop_info_*() callbacks called from the driver's prefix_propinfo() + * entry points. + */ - i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE); +void +mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; - i_mac_perim_exit(mip); + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + ASSERT(pr->pr_default_size >= sizeof (uint8_t)); + + *(uint8_t *)(pr->pr_default) = val; + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + ASSERT(pr->pr_default_size >= sizeof (uint64_t)); + + bcopy(&val, pr->pr_default, sizeof (val)); + + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + ASSERT(pr->pr_default_size >= sizeof (uint32_t)); + + bcopy(&val, pr->pr_default, sizeof (val)); + + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + if (strlen(str) > pr->pr_default_size) + pr->pr_default_status = ENOBUFS; + else + (void) strlcpy(pr->pr_default, str, strlen(str)); + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph, + link_flowctrl_t val) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t)); + + bcopy(&val, pr->pr_default, sizeof (val)); + + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min, + uint32_t max) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + mac_propval_range_t *range = pr->pr_range; + + /* nothing to do if the caller doesn't want the range info */ + if (range == NULL) + return; + + range->mpr_count = 1; + range->mpr_type = MAC_PROPVAL_UINT32; + range->mpr_range_uint32[0].mpur_min = min; + range->mpr_range_uint32[0].mpur_max = max; + pr->pr_flags |= MAC_PROP_INFO_RANGE; +} + +void +mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + pr->pr_perm = perm; + pr->pr_flags |= MAC_PROP_INFO_PERM; +} + +void mac_hcksum_get(mblk_t *mp, uint32_t *start, uint32_t *stuff, + uint32_t *end, uint32_t *value, uint32_t *flags_ptr) +{ + uint32_t flags; + + ASSERT(DB_TYPE(mp) == M_DATA); + + flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS; + if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) { + if (value != NULL) + *value = (uint32_t)DB_CKSUM16(mp); + if ((flags & HCK_PARTIALCKSUM) != 0) { + if (start != NULL) + *start = (uint32_t)DB_CKSUMSTART(mp); + if (stuff != NULL) + *stuff = (uint32_t)DB_CKSUMSTUFF(mp); + if (end != NULL) + *end = (uint32_t)DB_CKSUMEND(mp); + } + } + + if (flags_ptr != NULL) + *flags_ptr = flags; +} + +void mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff, + uint32_t end, uint32_t value, uint32_t flags) +{ + ASSERT(DB_TYPE(mp) == M_DATA); + + DB_CKSUMSTART(mp) = (intptr_t)start; + DB_CKSUMSTUFF(mp) = (intptr_t)stuff; + DB_CKSUMEND(mp) = (intptr_t)end; + DB_CKSUMFLAGS(mp) = (uint16_t)flags; + DB_CKSUM16(mp) = (uint16_t)value; +} + +void +mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) +{ + ASSERT(DB_TYPE(mp) == M_DATA); + + if (flags != NULL) { + *flags = DB_CKSUMFLAGS(mp) & HW_LSO; + if ((*flags != 0) && (mss != NULL)) + *mss = (uint32_t)DB_LSOMSS(mp); + } } diff --git a/usr/src/uts/common/io/mac/mac_sched.c b/usr/src/uts/common/io/mac/mac_sched.c index 8b7f718497..9e1b2b0a55 100644 --- a/usr/src/uts/common/io/mac/mac_sched.c +++ b/usr/src/uts/common/io/mac/mac_sched.c @@ -50,6 +50,8 @@ static mac_tx_cookie_t mac_tx_fanout_mode(mac_soft_ring_set_t *, mblk_t *, uintptr_t, uint16_t, mblk_t **); static mac_tx_cookie_t mac_tx_bw_mode(mac_soft_ring_set_t *, mblk_t *, uintptr_t, uint16_t, mblk_t **); +static mac_tx_cookie_t mac_tx_aggr_mode(mac_soft_ring_set_t *, mblk_t *, + uintptr_t, uint16_t, mblk_t **); typedef struct mac_tx_mode_s { mac_tx_srs_mode_t mac_tx_mode; @@ -57,18 +59,34 @@ typedef struct mac_tx_mode_s { } mac_tx_mode_t; /* - * There are five modes of operation on the Tx side. These modes get set + * There are seven modes of operation on the Tx side. These modes get set * in mac_tx_srs_setup(). Except for the experimental TX_SERIALIZE mode, * none of the other modes are user configurable. They get selected by * the system depending upon whether the link (or flow) has multiple Tx - * rings or a bandwidth configured, etc. + * rings or a bandwidth configured, or if the link is an aggr, etc. + * + * When the Tx SRS is operating in aggr mode (st_mode) or if there are + * multiple Tx rings owned by Tx SRS, then each Tx ring (pseudo or + * otherwise) will have a soft ring associated with it. These soft rings + * are stored in srs_tx_soft_rings[] array. + * + * Additionally in the case of aggr, there is the st_soft_rings[] array + * in the mac_srs_tx_t structure. This array is used to store the same + * set of soft rings that are present in srs_tx_soft_rings[] array but + * in a different manner. The soft ring associated with the pseudo Tx + * ring is saved at mr_index (of the pseudo ring) in st_soft_rings[] + * array. This helps in quickly getting the soft ring associated with the + * Tx ring when aggr_find_tx_ring() returns the pseudo Tx ring that is to + * be used for transmit. */ mac_tx_mode_t mac_tx_mode_list[] = { {SRS_TX_DEFAULT, mac_tx_single_ring_mode}, {SRS_TX_SERIALIZE, mac_tx_serializer_mode}, {SRS_TX_FANOUT, mac_tx_fanout_mode}, {SRS_TX_BW, mac_tx_bw_mode}, - {SRS_TX_BW_FANOUT, mac_tx_bw_mode} + {SRS_TX_BW_FANOUT, mac_tx_bw_mode}, + {SRS_TX_AGGR, mac_tx_aggr_mode}, + {SRS_TX_BW_AGGR, mac_tx_bw_mode} }; /* @@ -307,21 +325,16 @@ int mac_srs_worker_wakeup_ticks = 0; } \ } -#define TX_SINGLE_RING_MODE(mac_srs) \ - ((mac_srs)->srs_tx.st_mode == SRS_TX_DEFAULT || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_SERIALIZE || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_BW) - #define TX_BANDWIDTH_MODE(mac_srs) \ ((mac_srs)->srs_tx.st_mode == SRS_TX_BW || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_BW_FANOUT) + (mac_srs)->srs_tx.st_mode == SRS_TX_BW_FANOUT || \ + (mac_srs)->srs_tx.st_mode == SRS_TX_BW_AGGR) #define TX_SRS_TO_SOFT_RING(mac_srs, head, hint) { \ - uint_t hash, indx; \ - hash = HASH_HINT(hint); \ - indx = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count); \ - softring = mac_srs->srs_oth_soft_rings[indx]; \ - (void) (mac_tx_soft_ring_process(softring, head, 0, NULL)); \ + if (tx_mode == SRS_TX_BW_FANOUT) \ + (void) mac_tx_fanout_mode(mac_srs, head, hint, 0, NULL);\ + else \ + (void) mac_tx_aggr_mode(mac_srs, head, hint, 0, NULL); \ } /* @@ -341,7 +354,7 @@ int mac_srs_worker_wakeup_ticks = 0; } else { \ ASSERT(!((srs)->srs_state & SRS_TX_BLOCKED)); \ (srs)->srs_state |= SRS_TX_BLOCKED; \ - (srs)->srs_tx.st_blocked_cnt++; \ + (srs)->srs_tx.st_stat.mts_blockcnt++; \ } \ } @@ -364,7 +377,7 @@ int mac_srs_worker_wakeup_ticks = 0; (srs)->srs_tx.st_hiwat_cnt++; \ if ((srs)->srs_count > (srs)->srs_tx.st_max_q_cnt) { \ /* increment freed stats */ \ - (srs)->srs_tx.st_drop_count += cnt; \ + (srs)->srs_tx.st_stat.mts_sdrops += cnt; \ /* \ * b_prev may be set to the fanout hint \ * hence can't use freemsg directly \ @@ -391,7 +404,7 @@ int mac_srs_worker_wakeup_ticks = 0; #define MAC_TX_SRS_DROP_MESSAGE(srs, mp, cookie) { \ mac_pkt_drop(NULL, NULL, mp, B_FALSE); \ /* increment freed stats */ \ - mac_srs->srs_tx.st_drop_count++; \ + mac_srs->srs_tx.st_stat.mts_sdrops++; \ cookie = (mac_tx_cookie_t)srs; \ } @@ -415,7 +428,7 @@ mac_rx_drop_pkt(mac_soft_ring_set_t *srs, mblk_t *mp) MAC_UPDATE_SRS_SIZE_LOCKED(srs, msgdsize(mp)); mutex_exit(&srs->srs_lock); - srs_rx->sr_drop_count++; + srs_rx->sr_stat.mrs_sdrops++; freemsg(mp); } @@ -448,7 +461,7 @@ mac_srs_fire(void *arg) * 'hint' is fanout_hint (type of uint64_t) which is given by the TCP/IP stack, * and it is used on the TX path. */ -#define HASH_HINT(hint) \ +#define HASH_HINT(hint) \ ((hint) ^ ((hint) >> 24) ^ ((hint) >> 16) ^ ((hint) >> 8)) @@ -797,8 +810,8 @@ mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp, * packets or because mblk's need to be concatenated using * pullupmsg(). */ - if (mac_src_ipv6_fanout || !mac_ip_hdr_length_v6(mp, ip6h, - &hdr_len, &nexthdr, NULL, NULL)) { + if (mac_src_ipv6_fanout || !mac_ip_hdr_length_v6(ip6h, + mp->b_wptr, &hdr_len, &nexthdr, NULL)) { goto src_based_fanout; } whereptr = (uint8_t *)ip6h + hdr_len; @@ -1302,13 +1315,8 @@ check_again: tail->b_next = NULL; smcip = mac_srs->srs_mcip; - if ((mac_srs->srs_type & SRST_FLOW) || - (smcip == NULL)) { - FLOW_STAT_UPDATE(mac_srs->srs_flent, - rbytes, sz); - FLOW_STAT_UPDATE(mac_srs->srs_flent, - ipackets, count); - } + SRS_RX_STAT_UPDATE(mac_srs, pollbytes, sz); + SRS_RX_STAT_UPDATE(mac_srs, pollcnt, count); /* * If there are any promiscuous mode callbacks @@ -1316,9 +1324,6 @@ check_again: * if appropriate and also update the counters. */ if (smcip != NULL) { - smcip->mci_stat_ibytes += sz; - smcip->mci_stat_ipackets += count; - if (smcip->mci_mip->mi_promisc_list != NULL) { mutex_exit(lock); mac_promisc_dispatch(smcip->mci_mip, @@ -1331,15 +1336,14 @@ check_again: mac_srs->srs_bw->mac_bw_polled += sz; mutex_exit(&mac_srs->srs_bw->mac_bw_lock); } - srs_rx->sr_poll_count += count; MAC_RX_SRS_ENQUEUE_CHAIN(mac_srs, head, tail, count, sz); if (count <= 10) - srs_rx->sr_chain_cnt_undr10++; + srs_rx->sr_stat.mrs_chaincntundr10++; else if (count > 10 && count <= 50) - srs_rx->sr_chain_cnt_10to50++; + srs_rx->sr_stat.mrs_chaincnt10to50++; else - srs_rx->sr_chain_cnt_over50++; + srs_rx->sr_stat.mrs_chaincntover50++; } /* @@ -1637,10 +1641,17 @@ again: * callbacks for broadcast and multicast packets are delivered from * mac_rx() and we don't need to worry about that case in this path */ - if (mcip != NULL && mcip->mci_promisc_list != NULL) { - mutex_exit(&mac_srs->srs_lock); - mac_promisc_client_dispatch(mcip, head); - mutex_enter(&mac_srs->srs_lock); + if (mcip != NULL) { + if (mcip->mci_promisc_list != NULL) { + mutex_exit(&mac_srs->srs_lock); + mac_promisc_client_dispatch(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } + if (MAC_PROTECT_ENABLED(mcip, MPT_IPNOSPOOF)) { + mutex_exit(&mac_srs->srs_lock); + mac_protect_intercept_dhcp(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } } /* @@ -1886,7 +1897,7 @@ again: /* zero bandwidth: drop all and return to interrupt mode */ mutex_enter(&mac_srs->srs_bw->mac_bw_lock); if (mac_srs->srs_bw->mac_bw_limit == 0) { - srs_rx->sr_drop_count += cnt; + srs_rx->sr_stat.mrs_sdrops += cnt; ASSERT(mac_srs->srs_bw->mac_bw_sz >= sz); mac_srs->srs_bw->mac_bw_sz -= sz; mac_srs->srs_bw->mac_bw_drop_bytes += sz; @@ -1908,10 +1919,17 @@ again: * callbacks for broadcast and multicast packets are delivered from * mac_rx() and we don't need to worry about that case in this path */ - if (mcip != NULL && mcip->mci_promisc_list != NULL) { - mutex_exit(&mac_srs->srs_lock); - mac_promisc_client_dispatch(mcip, head); - mutex_enter(&mac_srs->srs_lock); + if (mcip != NULL) { + if (mcip->mci_promisc_list != NULL) { + mutex_exit(&mac_srs->srs_lock); + mac_promisc_client_dispatch(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } + if (MAC_PROTECT_ENABLED(mcip, MPT_IPNOSPOOF)) { + mutex_exit(&mac_srs->srs_lock); + mac_protect_intercept_dhcp(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } } /* @@ -2285,7 +2303,6 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, size_t sz = 0; size_t chain_sz, sz1; mac_bw_ctl_t *mac_bw; - mac_client_impl_t *smcip; mac_srs_rx_t *srs_rx = &mac_srs->srs_rx; /* @@ -2302,15 +2319,14 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, } mutex_enter(&mac_srs->srs_lock); - smcip = mac_srs->srs_mcip; - if (mac_srs->srs_type & SRST_FLOW || smcip == NULL) { - FLOW_STAT_UPDATE(mac_srs->srs_flent, rbytes, sz); - FLOW_STAT_UPDATE(mac_srs->srs_flent, ipackets, count); - } - if (smcip != NULL) { - smcip->mci_stat_ibytes += sz; - smcip->mci_stat_ipackets += count; + if (loopback) { + SRS_RX_STAT_UPDATE(mac_srs, lclbytes, sz); + SRS_RX_STAT_UPDATE(mac_srs, lclcnt, count); + + } else { + SRS_RX_STAT_UPDATE(mac_srs, intrbytes, sz); + SRS_RX_STAT_UPDATE(mac_srs, intrcnt, count); } /* @@ -2323,12 +2339,10 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, mac_bw = mac_srs->srs_bw; ASSERT(mac_bw != NULL); mutex_enter(&mac_bw->mac_bw_lock); - /* Count the packets and bytes via interrupt */ - srs_rx->sr_intr_count += count; mac_bw->mac_bw_intr += sz; if (mac_bw->mac_bw_limit == 0) { /* zero bandwidth: drop all */ - srs_rx->sr_drop_count += count; + srs_rx->sr_stat.mrs_sdrops += count; mac_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_bw->mac_bw_lock); mutex_exit(&mac_srs->srs_lock); @@ -2370,7 +2384,7 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, } if (head != NULL) { /* Drop any packet over the threshold */ - srs_rx->sr_drop_count += count; + srs_rx->sr_stat.mrs_sdrops += count; mutex_enter(&mac_bw->mac_bw_lock); mac_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_bw->mac_bw_lock); @@ -2392,7 +2406,7 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, if (!(mac_srs->srs_type & SRST_BW_CONTROL) && (srs_rx->sr_poll_pkt_cnt > srs_rx->sr_hiwat)) { mac_bw = mac_srs->srs_bw; - srs_rx->sr_drop_count += count; + srs_rx->sr_stat.mrs_sdrops += count; mutex_enter(&mac_bw->mac_bw_lock); mac_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_bw->mac_bw_lock); @@ -2402,8 +2416,6 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, } MAC_RX_SRS_ENQUEUE_CHAIN(mac_srs, mp_chain, tail, count, sz); - /* Count the packets entering via interrupt path */ - srs_rx->sr_intr_count += count; if (!(mac_srs->srs_state & SRS_PROC)) { /* @@ -2510,7 +2522,7 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, /* * Ignore fanout hint if we don't have multiple tx rings. */ - if (!TX_MULTI_RING_MODE(mac_srs)) + if (!MAC_TX_SOFT_RINGS(mac_srs)) fanout_hint = 0; if (mac_srs->srs_first != NULL) @@ -2550,25 +2562,30 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, } /* - * There are five tx modes: + * There are seven tx modes: * * 1) Default mode (SRS_TX_DEFAULT) * 2) Serialization mode (SRS_TX_SERIALIZE) * 3) Fanout mode (SRS_TX_FANOUT) * 4) Bandwdith mode (SRS_TX_BW) * 5) Fanout and Bandwidth mode (SRS_TX_BW_FANOUT) + * 6) aggr Tx mode (SRS_TX_AGGR) + * 7) aggr Tx bw mode (SRS_TX_BW_AGGR) * * The tx mode in which an SRS operates is decided in mac_tx_srs_setup() * based on the number of Tx rings requested for an SRS and whether * bandwidth control is requested or not. * - * In the default mode (i.e., no fanout/no bandwidth), the SRS acts as a - * pass-thru. Packets will go directly to mac_tx_send(). When the underlying - * Tx ring runs out of Tx descs, it starts queueing up packets in SRS. - * When flow-control is relieved, the srs_worker drains the queued - * packets and informs blocked clients to restart sending packets. + * The default mode (i.e., no fanout/no bandwidth) is used when the + * underlying NIC does not have Tx rings or just one Tx ring. In this mode, + * the SRS acts as a pass-thru. Packets will go directly to mac_tx_send(). + * When the underlying Tx ring runs out of Tx descs, it starts queueing up + * packets in SRS. When flow-control is relieved, the srs_worker drains + * the queued packets and informs blocked clients to restart sending + * packets. * - * In the SRS_TX_SERIALIZE mode, all calls to mac_tx() are serialized. + * In the SRS_TX_SERIALIZE mode, all calls to mac_tx() are serialized. This + * mode is used when the link has no Tx rings or only one Tx ring. * * In the SRS_TX_FANOUT mode, packets will be fanned out to multiple * Tx rings. Each Tx ring will have a soft ring associated with it. @@ -2581,6 +2598,19 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, * SRS. If fanout to multiple Tx rings is configured, the packets will * be fanned out among the soft rings associated with the Tx rings. * + * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine + * invokes an aggr function, aggr_find_tx_ring(), to find a pseudo Tx ring + * belonging to a port on which the packet has to be sent. Aggr will + * always have a pseudo Tx ring associated with it even when it is an + * aggregation over a single NIC that has no Tx rings. Even in such a + * case, the single pseudo Tx ring will have a soft ring associated with + * it and the soft ring will hang off the SRS. + * + * If a bandwidth is specified for an aggr, SRS_TX_BW_AGGR mode is used. + * In this mode, the bandwidth is first applied on the outgoing packets + * and later mac_tx_addr_mode() function is called to send the packet out + * of one of the pseudo Tx rings. + * * Four flags are used in srs_state for indicating flow control * conditions : SRS_TX_BLOCKED, SRS_TX_HIWAT, SRS_TX_WAKEUP_CLIENT. * SRS_TX_BLOCKED indicates out of Tx descs. SRS expects a wakeup from the @@ -2625,7 +2655,6 @@ mac_tx_single_ring_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, uintptr_t fanout_hint, uint16_t flag, mblk_t **ret_mp) { mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; - boolean_t is_subflow; mac_tx_stats_t stats; mac_tx_cookie_t cookie = NULL; @@ -2656,10 +2685,8 @@ mac_tx_single_ring_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mutex_exit(&mac_srs->srs_lock); } - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); - mp_chain = mac_tx_send(srs_tx->st_arg1, srs_tx->st_arg2, - mp_chain, (is_subflow ? &stats : NULL)); + mp_chain, &stats); /* * Multiple threads could be here sending packets. @@ -2676,9 +2703,7 @@ mac_tx_single_ring_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mutex_exit(&mac_srs->srs_lock); return (cookie); } - - if (is_subflow) - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); + SRS_TX_STATS_UPDATE(mac_srs, &stats); return (NULL); } @@ -2696,7 +2721,6 @@ static mac_tx_cookie_t mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, uintptr_t fanout_hint, uint16_t flag, mblk_t **ret_mp) { - boolean_t is_subflow; mac_tx_stats_t stats; mac_tx_cookie_t cookie = NULL; mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; @@ -2726,10 +2750,8 @@ mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mac_srs->srs_state |= SRS_PROC; mutex_exit(&mac_srs->srs_lock); - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); - mp_chain = mac_tx_send(srs_tx->st_arg1, srs_tx->st_arg2, - mp_chain, (is_subflow ? &stats : NULL)); + mp_chain, &stats); mutex_enter(&mac_srs->srs_lock); mac_srs->srs_state &= ~SRS_PROC; @@ -2747,8 +2769,8 @@ mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, } mutex_exit(&mac_srs->srs_lock); - if (is_subflow && cookie == NULL) - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); + if (cookie == NULL) + SRS_TX_STATS_UPDATE(mac_srs, &stats); return (cookie); } @@ -2766,8 +2788,8 @@ mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, */ #define MAC_TX_SOFT_RING_PROCESS(chain) { \ - index = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count), \ - softring = mac_srs->srs_oth_soft_rings[index]; \ + index = COMPUTE_INDEX(hash, mac_srs->srs_tx_ring_count), \ + softring = mac_srs->srs_tx_soft_rings[index]; \ cookie = mac_tx_soft_ring_process(softring, chain, flag, ret_mp); \ DTRACE_PROBE2(tx__fanout, uint64_t, hash, uint_t, index); \ } @@ -2781,7 +2803,8 @@ mac_tx_fanout_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, uint_t index; mac_tx_cookie_t cookie = NULL; - ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_FANOUT); + ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_FANOUT || + mac_srs->srs_tx.st_mode == SRS_TX_BW_FANOUT); if (fanout_hint != 0) { /* * The hint is specified by the caller, simply pass the @@ -2926,18 +2949,18 @@ mac_tx_bw_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, hash = HASH_HINT(fanout_hint); indx = COMPUTE_INDEX(hash, - mac_srs->srs_oth_ring_count); - softring = mac_srs->srs_oth_soft_rings[indx]; + mac_srs->srs_tx_ring_count); + softring = mac_srs->srs_tx_soft_rings[indx]; return (mac_tx_soft_ring_process(softring, mp_chain, flag, ret_mp)); + } else if (srs_tx->st_mode == SRS_TX_BW_AGGR) { + return (mac_tx_aggr_mode(mac_srs, mp_chain, + fanout_hint, flag, ret_mp)); } else { - boolean_t is_subflow; mac_tx_stats_t stats; - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); - mp_chain = mac_tx_send(srs_tx->st_arg1, srs_tx->st_arg2, - mp_chain, (is_subflow ? &stats : NULL)); + mp_chain, &stats); if (mp_chain != NULL) { mutex_enter(&mac_srs->srs_lock); @@ -2951,13 +2974,68 @@ mac_tx_bw_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mutex_exit(&mac_srs->srs_lock); return (cookie); } - if (is_subflow) - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); + SRS_TX_STATS_UPDATE(mac_srs, &stats); return (NULL); } } +/* + * mac_tx_aggr_mode + * + * This routine invokes an aggr function, aggr_find_tx_ring(), to find + * a (pseudo) Tx ring belonging to a port on which the packet has to + * be sent. aggr_find_tx_ring() first finds the outgoing port based on + * L2/L3/L4 policy and then uses the fanout_hint passed to it to pick + * a Tx ring from the selected port. + * + * Note that a port can be deleted from the aggregation. In such a case, + * the aggregation layer first separates the port from the rest of the + * ports making sure that port (and thus any Tx rings associated with + * it) won't get selected in the call to aggr_find_tx_ring() function. + * Later calls are made to mac_group_rem_ring() passing pseudo Tx ring + * handles one by one which in turn will quiesce the Tx SRS and remove + * the soft ring associated with the pseudo Tx ring. Unlike Rx side + * where a cookie is used to protect against mac_rx_ring() calls on + * rings that have been removed, no such cookie is needed on the Tx + * side as the pseudo Tx ring won't be available anymore to + * aggr_find_tx_ring() once the port has been removed. + */ +static mac_tx_cookie_t +mac_tx_aggr_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, + uintptr_t fanout_hint, uint16_t flag, mblk_t **ret_mp) +{ + mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; + mac_tx_ring_fn_t find_tx_ring_fn; + mac_ring_handle_t ring = NULL; + void *arg; + mac_soft_ring_t *sringp; + + find_tx_ring_fn = srs_tx->st_capab_aggr.mca_find_tx_ring_fn; + arg = srs_tx->st_capab_aggr.mca_arg; + if (find_tx_ring_fn(arg, mp_chain, fanout_hint, &ring) == NULL) + return (NULL); + sringp = srs_tx->st_soft_rings[((mac_ring_t *)ring)->mr_index]; + return (mac_tx_soft_ring_process(sringp, mp_chain, flag, ret_mp)); +} + +void +mac_tx_invoke_callbacks(mac_client_impl_t *mcip, mac_tx_cookie_t cookie) +{ + mac_cb_t *mcb; + mac_tx_notify_cb_t *mtnfp; + + /* Wakeup callback registered clients */ + MAC_CALLBACK_WALKER_INC(&mcip->mci_tx_notify_cb_info); + for (mcb = mcip->mci_tx_notify_cb_list; mcb != NULL; + mcb = mcb->mcb_nextp) { + mtnfp = (mac_tx_notify_cb_t *)mcb->mcb_objp; + mtnfp->mtnf_fn(mtnfp->mtnf_arg, cookie); + } + MAC_CALLBACK_WALKER_DCR(&mcip->mci_tx_notify_cb_info, + &mcip->mci_tx_notify_cb_list); +} + /* ARGSUSED */ void mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) @@ -2966,7 +3044,6 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) size_t sz; uint32_t tx_mode; uint_t saved_pkt_count; - boolean_t is_subflow; mac_tx_stats_t stats; mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; clock_t now; @@ -2977,7 +3054,6 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) mac_srs->srs_state |= SRS_PROC; - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); tx_mode = srs_tx->st_mode; if (tx_mode == SRS_TX_DEFAULT || tx_mode == SRS_TX_SERIALIZE) { if (mac_srs->srs_first != NULL) { @@ -3000,16 +3076,13 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) tail->b_next = mac_srs->srs_first; mac_srs->srs_first = head; mac_srs->srs_count += - (saved_pkt_count - stats.ts_opackets); + (saved_pkt_count - stats.mts_opackets); if (mac_srs->srs_last == NULL) mac_srs->srs_last = tail; MAC_TX_SRS_BLOCK(mac_srs, head); } else { srs_tx->st_woken_up = B_FALSE; - if (is_subflow) { - FLOW_TX_STATS_UPDATE( - mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); } } } else if (tx_mode == SRS_TX_BW) { @@ -3065,10 +3138,10 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) tail->b_next = mac_srs->srs_first; mac_srs->srs_first = head; mac_srs->srs_count += - (saved_pkt_count - stats.ts_opackets); + (saved_pkt_count - stats.mts_opackets); if (mac_srs->srs_last == NULL) mac_srs->srs_last = tail; - size_sent = sz - stats.ts_obytes; + size_sent = sz - stats.mts_obytes; mac_srs->srs_size += size_sent; mac_srs->srs_bw->mac_bw_sz += size_sent; if (mac_srs->srs_bw->mac_bw_used > size_sent) { @@ -3080,15 +3153,11 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) MAC_TX_SRS_BLOCK(mac_srs, head); } else { srs_tx->st_woken_up = B_FALSE; - if (is_subflow) { - FLOW_TX_STATS_UPDATE( - mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); } } - } else if (tx_mode == SRS_TX_BW_FANOUT) { + } else if (tx_mode == SRS_TX_BW_FANOUT || tx_mode == SRS_TX_BW_AGGR) { mblk_t *prev; - mac_soft_ring_t *softring; uint64_t hint; /* @@ -3155,8 +3224,6 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) */ if (mac_srs->srs_count == 0 && (mac_srs->srs_state & (SRS_TX_HIWAT | SRS_TX_WAKEUP_CLIENT | SRS_ENQUEUED))) { - mac_tx_notify_cb_t *mtnfp; - mac_cb_t *mcb; mac_client_impl_t *mcip = mac_srs->srs_mcip; boolean_t wakeup_required = B_FALSE; @@ -3168,16 +3235,7 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) SRS_TX_WAKEUP_CLIENT | SRS_ENQUEUED); mutex_exit(&mac_srs->srs_lock); if (wakeup_required) { - /* Wakeup callback registered clients */ - MAC_CALLBACK_WALKER_INC(&mcip->mci_tx_notify_cb_info); - for (mcb = mcip->mci_tx_notify_cb_list; mcb != NULL; - mcb = mcb->mcb_nextp) { - mtnfp = (mac_tx_notify_cb_t *)mcb->mcb_objp; - mtnfp->mtnf_fn(mtnfp->mtnf_arg, - (mac_tx_cookie_t)mac_srs); - } - MAC_CALLBACK_WALKER_DCR(&mcip->mci_tx_notify_cb_info, - &mcip->mci_tx_notify_cb_list); + mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)mac_srs); /* * If the client is not the primary MAC client, then we * need to send the notification to the clients upper @@ -3276,11 +3334,10 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, } /* - * Fastpath: if there's only one client, and there's no - * multicast listeners, we simply send the packet down to the - * underlying NIC. + * Fastpath: if there's only one client, we simply send + * the packet down to the underlying NIC. */ - if (mip->mi_nactiveclients == 1 && mip->mi_promisc_list == NULL) { + if (mip->mi_nactiveclients == 1) { DTRACE_PROBE2(fastpath, mac_client_impl_t *, src_mcip, mblk_t *, mp_chain); @@ -3293,9 +3350,7 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, msgdsize(mp)); CHECK_VID_AND_ADD_TAG(mp); - MAC_TX(mip, ring, mp, - ((src_mcip->mci_state_flags & MCIS_SHARE_BOUND) != - 0)); + MAC_TX(mip, ring, mp, src_mcip); /* * If the driver is out of descriptors and does a @@ -3336,12 +3391,6 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, CHECK_VID_AND_ADD_TAG(mp); /* - * Check if there are promiscuous mode callbacks defined. - */ - if (mip->mi_promisc_list != NULL) - mac_promisc_dispatch(mip, mp, src_mcip); - - /* * Find the destination. */ dst_flow_ent = mac_tx_classify(mip, mp); @@ -3395,16 +3444,31 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, B_TRUE); } else { /* - * loopback the packet to a - * local MAC client. We force a context - * switch if both source and destination - * MAC clients are used by IP, i.e. bypass - * is set. + * loopback the packet to a local MAC + * client. We force a context switch + * if both source and destination MAC + * clients are used by IP, i.e. + * bypass is set. */ boolean_t do_switch; mac_client_impl_t *dst_mcip = dst_flow_ent->fe_mcip; + /* + * Check if there are promiscuous mode + * callbacks defined. This check is + * done here in the 'else' case and + * not in other cases because this + * path is for local loopback + * communication which does not go + * through MAC_TX(). For paths that go + * through MAC_TX(), the promisc_list + * check is done inside the MAC_TX() + * macro. + */ + if (mip->mi_promisc_list != NULL) + mac_promisc_dispatch(mip, mp, src_mcip); + do_switch = ((src_mcip->mci_state_flags & dst_mcip->mci_state_flags & MCIS_CLIENT_POLL_CAPABLE) != 0); @@ -3422,9 +3486,7 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, * Unknown destination, send via the underlying * NIC. */ - MAC_TX(mip, ring, mp, - ((src_mcip->mci_state_flags & MCIS_SHARE_BOUND) != - 0)); + MAC_TX(mip, ring, mp, src_mcip); if (mp != NULL) { /* * Adjust for the last packet that @@ -3440,15 +3502,9 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, } done: - src_mcip->mci_stat_obytes += obytes; - src_mcip->mci_stat_opackets += opackets; - src_mcip->mci_stat_oerrors += oerrors; - - if (stats != NULL) { - stats->ts_opackets = opackets; - stats->ts_obytes = obytes; - stats->ts_oerrors = oerrors; - } + stats->mts_obytes = obytes; + stats->mts_opackets = opackets; + stats->mts_oerrors = oerrors; return (mp); } @@ -3466,8 +3522,8 @@ mac_tx_srs_ring_present(mac_soft_ring_set_t *srs, mac_ring_t *tx_ring) if (srs->srs_tx.st_arg2 == tx_ring) return (B_TRUE); - for (i = 0; i < srs->srs_oth_ring_count; i++) { - soft_ring = srs->srs_oth_soft_rings[i]; + for (i = 0; i < srs->srs_tx_ring_count; i++) { + soft_ring = srs->srs_tx_soft_rings[i]; if (soft_ring->s_ring_tx_arg2 == tx_ring) return (B_TRUE); } @@ -3476,6 +3532,29 @@ mac_tx_srs_ring_present(mac_soft_ring_set_t *srs, mac_ring_t *tx_ring) } /* + * mac_tx_srs_get_soft_ring + * + * Returns the TX soft ring associated with the given ring, if present. + */ +mac_soft_ring_t * +mac_tx_srs_get_soft_ring(mac_soft_ring_set_t *srs, mac_ring_t *tx_ring) +{ + int i; + mac_soft_ring_t *soft_ring; + + if (srs->srs_tx.st_arg2 == tx_ring) + return (NULL); + + for (i = 0; i < srs->srs_tx_ring_count; i++) { + soft_ring = srs->srs_tx_soft_rings[i]; + if (soft_ring->s_ring_tx_arg2 == tx_ring) + return (soft_ring); + } + + return (NULL); +} + +/* * mac_tx_srs_wakeup * * Called when Tx desc become available. Wakeup the appropriate worker @@ -3490,11 +3569,16 @@ mac_tx_srs_wakeup(mac_soft_ring_set_t *mac_srs, mac_ring_handle_t ring) mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; mutex_enter(&mac_srs->srs_lock); - if (TX_SINGLE_RING_MODE(mac_srs)) { + /* + * srs_tx_ring_count == 0 is the single ring mode case. In + * this mode, there will not be Tx soft rings associated + * with the SRS. + */ + if (!MAC_TX_SOFT_RINGS(mac_srs)) { if (srs_tx->st_arg2 == ring && mac_srs->srs_state & SRS_TX_BLOCKED) { mac_srs->srs_state &= ~SRS_TX_BLOCKED; - srs_tx->st_unblocked_cnt++; + srs_tx->st_stat.mts_unblockcnt++; cv_signal(&mac_srs->srs_async); } /* @@ -3507,15 +3591,17 @@ mac_tx_srs_wakeup(mac_soft_ring_set_t *mac_srs, mac_ring_handle_t ring) return; } - /* If you are here, it is for FANOUT or BW_FANOUT case */ - ASSERT(TX_MULTI_RING_MODE(mac_srs)); - for (i = 0; i < mac_srs->srs_oth_ring_count; i++) { - sringp = mac_srs->srs_oth_soft_rings[i]; + /* + * If you are here, it is for FANOUT, BW_FANOUT, + * AGGR_MODE or AGGR_BW_MODE case + */ + for (i = 0; i < mac_srs->srs_tx_ring_count; i++) { + sringp = mac_srs->srs_tx_soft_rings[i]; mutex_enter(&sringp->s_ring_lock); if (sringp->s_ring_tx_arg2 == ring) { if (sringp->s_ring_state & S_RING_BLOCK) { sringp->s_ring_state &= ~S_RING_BLOCK; - sringp->s_ring_unblocked_cnt++; + sringp->s_st_stat.mts_unblockcnt++; cv_signal(&sringp->s_ring_async); } sringp->s_ring_tx_woken_up = B_TRUE; @@ -3619,6 +3705,7 @@ mac_rx_soft_ring_process(mac_client_impl_t *mcip, mac_soft_ring_t *ringp, mutex_enter(&ringp->s_ring_lock); ringp->s_ring_total_inpkt += cnt; + ringp->s_ring_total_rbytes += sz; if ((mac_srs->srs_rx.sr_poll_pkt_cnt <= 1) && !(ringp->s_ring_type & ST_RING_WORKER_ONLY)) { /* If on processor or blanking on, then enqueue and return */ @@ -3831,11 +3918,14 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, ASSERT(mp_chain != NULL); ASSERT(MUTEX_NOT_HELD(&ringp->s_ring_lock)); /* - * Only two modes can come here; either it can be - * SRS_TX_BW_FANOUT or SRS_TX_FANOUT + * The following modes can come here: SRS_TX_BW_FANOUT, + * SRS_TX_FANOUT, SRS_TX_AGGR, SRS_TX_BW_AGGR. */ + ASSERT(MAC_TX_SOFT_RINGS(mac_srs)); ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_FANOUT || - mac_srs->srs_tx.st_mode == SRS_TX_BW_FANOUT); + mac_srs->srs_tx.st_mode == SRS_TX_BW_FANOUT || + mac_srs->srs_tx.st_mode == SRS_TX_AGGR || + mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); if (ringp->s_ring_type & ST_RING_WORKER_ONLY) { /* Serialization mode */ @@ -3871,7 +3961,6 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, * tx_srs_drain() completely drains out the * messages. */ - boolean_t is_subflow; mac_tx_stats_t stats; if (ringp->s_ring_state & S_RING_ENQUEUED) { @@ -3890,11 +3979,9 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, */ mutex_exit(&ringp->s_ring_lock); } - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); mp_chain = mac_tx_send(ringp->s_ring_tx_arg1, - ringp->s_ring_tx_arg2, mp_chain, - (is_subflow ? &stats : NULL)); + ringp->s_ring_tx_arg2, mp_chain, &stats); /* * Multiple threads could be here sending packets. @@ -3912,9 +3999,9 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, mutex_exit(&ringp->s_ring_lock); return (cookie); } - if (is_subflow) { - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); + SOFTRING_TX_STATS_UPDATE(ringp, &stats); + return (NULL); } } diff --git a/usr/src/uts/common/io/mac/mac_soft_ring.c b/usr/src/uts/common/io/mac/mac_soft_ring.c index 25cc66ed52..151c99893b 100644 --- a/usr/src/uts/common/io/mac/mac_soft_ring.c +++ b/usr/src/uts/common/io/mac/mac_soft_ring.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -88,6 +88,7 @@ #include <sys/mac_client_impl.h> #include <sys/mac_soft_ring.h> #include <sys/mac_flow_impl.h> +#include <sys/mac_stat.h> static void mac_rx_soft_ring_drain(mac_soft_ring_t *); static void mac_soft_ring_fire(void *); @@ -145,7 +146,7 @@ mac_soft_ring_worker_wakeup(mac_soft_ring_t *ringp) * thread to the assigned CPU. */ mac_soft_ring_t * -mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, +mac_soft_ring_create(int id, clock_t wait, uint16_t type, pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs, processorid_t cpuid, mac_direct_rx_t rx_func, void *x_arg1, mac_resource_handle_t x_arg2) @@ -162,9 +163,13 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, } else if (type & ST_RING_UDP) { (void) snprintf(name, sizeof (name), "mac_udp_soft_ring_%d_%p", id, (void *)mac_srs); - } else { + } else if (type & ST_RING_OTH) { (void) snprintf(name, sizeof (name), "mac_oth_soft_ring_%d_%p", id, (void *)mac_srs); + } else { + ASSERT(type & ST_RING_TX); + (void) snprintf(name, sizeof (name), + "mac_tx_soft_ring_%d_%p", id, (void *)mac_srs); } bzero(ringp, sizeof (mac_soft_ring_t)); @@ -177,7 +182,6 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, ringp->s_ring_wait = MSEC_TO_TICK(wait); ringp->s_ring_mcip = mcip; ringp->s_ring_set = mac_srs; - ringp->s_ring_flent = flent; /* * Protect against access from DR callbacks (mac_walk_srs_bind/unbind) @@ -202,6 +206,14 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, ringp->s_ring_tx_hiwat = (mac_tx_soft_ring_hiwat > mac_tx_soft_ring_max_q_cnt) ? mac_tx_soft_ring_max_q_cnt : mac_tx_soft_ring_hiwat; + if (mcip->mci_state_flags & MCIS_IS_AGGR) { + mac_srs_tx_t *tx = &mac_srs->srs_tx; + + ASSERT(tx->st_soft_rings[ + ((mac_ring_t *)x_arg2)->mr_index] == NULL); + tx->st_soft_rings[((mac_ring_t *)x_arg2)->mr_index] = + ringp; + } } else { ringp->s_ring_drain_func = mac_rx_soft_ring_drain; ringp->s_ring_rx_func = rx_func; @@ -213,6 +225,8 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, if (cpuid != -1) (void) mac_soft_ring_bind(ringp, cpuid); + mac_soft_ring_stat_create(ringp); + return (ringp); } @@ -222,18 +236,14 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, * Free the soft ring once we are done with it. */ void -mac_soft_ring_free(mac_soft_ring_t *softring, boolean_t release_tx_ring) +mac_soft_ring_free(mac_soft_ring_t *softring) { ASSERT((softring->s_ring_state & (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE | S_RING_PROC)) == (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE)); mac_pkt_drop(NULL, NULL, softring->s_ring_first, B_FALSE); - if (release_tx_ring && softring->s_ring_tx_arg2 != NULL) { - ASSERT(softring->s_ring_type & ST_RING_TX); - mac_release_tx_ring(softring->s_ring_tx_arg2); - } - if (softring->s_ring_ksp) - kstat_delete(softring->s_ring_ksp); + softring->s_ring_tx_arg2 = NULL; + mac_soft_ring_stat_delete(softring); mac_callback_free(softring->s_ring_notify_cb_list); kmem_cache_free(mac_soft_ring_cache, softring); } @@ -642,7 +652,6 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) void *arg2; mblk_t *tail; uint_t saved_pkt_count, saved_size; - boolean_t is_subflow; mac_tx_stats_t stats; mac_soft_ring_set_t *mac_srs = ringp->s_ring_set; @@ -652,7 +661,6 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) ASSERT(!(ringp->s_ring_state & S_RING_PROC)); ringp->s_ring_state |= S_RING_PROC; - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); arg1 = ringp->s_ring_tx_arg1; arg2 = ringp->s_ring_tx_arg2; @@ -675,8 +683,8 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) tail->b_next = ringp->s_ring_first; ringp->s_ring_first = mp; ringp->s_ring_count += - (saved_pkt_count - stats.ts_opackets); - ringp->s_ring_size += (saved_size - stats.ts_obytes); + (saved_pkt_count - stats.mts_opackets); + ringp->s_ring_size += (saved_size - stats.mts_obytes); if (ringp->s_ring_last == NULL) ringp->s_ring_last = tail; @@ -684,7 +692,7 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) ringp->s_ring_tx_woken_up = B_FALSE; } else { ringp->s_ring_state |= S_RING_BLOCK; - ringp->s_ring_blocked_cnt++; + ringp->s_st_stat.mts_blockcnt++; } ringp->s_ring_state &= ~S_RING_PROC; @@ -692,17 +700,13 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) return; } else { ringp->s_ring_tx_woken_up = B_FALSE; - if (is_subflow) { - FLOW_TX_STATS_UPDATE( - mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); + SOFTRING_TX_STATS_UPDATE(ringp, &stats); } } if (ringp->s_ring_count == 0 && ringp->s_ring_state & (S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED)) { - mac_tx_notify_cb_t *mtnfp; - mac_cb_t *mcb; mac_client_impl_t *mcip = ringp->s_ring_mcip; boolean_t wakeup_required = B_FALSE; @@ -714,16 +718,7 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) ~(S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED); mutex_exit(&ringp->s_ring_lock); if (wakeup_required) { - /* Wakeup callback registered clients */ - MAC_CALLBACK_WALKER_INC(&mcip->mci_tx_notify_cb_info); - for (mcb = mcip->mci_tx_notify_cb_list; mcb != NULL; - mcb = mcb->mcb_nextp) { - mtnfp = (mac_tx_notify_cb_t *)mcb->mcb_objp; - mtnfp->mtnf_fn(mtnfp->mtnf_arg, - (mac_tx_cookie_t)ringp); - } - MAC_CALLBACK_WALKER_DCR(&mcip->mci_tx_notify_cb_info, - &mcip->mci_tx_notify_cb_list); + mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)ringp); /* * If the client is not the primary MAC client, then we * need to send the notification to the clients upper diff --git a/usr/src/uts/common/io/mac/mac_stat.c b/usr/src/uts/common/io/mac/mac_stat.c index 87f2f914ff..31972f94d8 100644 --- a/usr/src/uts/common/io/mac/mac_stat.c +++ b/usr/src/uts/common/io/mac/mac_stat.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,10 +33,40 @@ #include <sys/kstat.h> #include <sys/mac.h> #include <sys/mac_impl.h> +#include <sys/mac_client_impl.h> +#include <sys/mac_stat.h> +#include <sys/mac_soft_ring.h> +#include <sys/vlan.h> #define MAC_KSTAT_NAME "mac" #define MAC_KSTAT_CLASS "net" +enum mac_stat { + MAC_STAT_LCL, + MAC_STAT_LCLBYTES, + MAC_STAT_INTRS, + MAC_STAT_INTRBYTES, + MAC_STAT_POLLS, + MAC_STAT_POLLBYTES, + MAC_STAT_RXSDROPS, + MAC_STAT_CHU10, + MAC_STAT_CH10T50, + MAC_STAT_CHO50, + MAC_STAT_BLOCK, + MAC_STAT_UNBLOCK, + MAC_STAT_TXSDROPS, + MAC_STAT_TX_ERRORS, + MAC_STAT_MACSPOOFED, + MAC_STAT_IPSPOOFED, + MAC_STAT_DHCPSPOOFED, + MAC_STAT_RESTRICTED, + MAC_STAT_DHCPDROPPED, + MAC_STAT_MULTIRCVBYTES, + MAC_STAT_BRDCSTRCVBYTES, + MAC_STAT_MULTIXMTBYTES, + MAC_STAT_BRDCSTXMTBYTES +}; + static mac_stat_info_t i_mac_si[] = { { MAC_STAT_IFSPEED, "ifspeed", KSTAT_DATA_UINT64, 0 }, { MAC_STAT_MULTIRCV, "multircv", KSTAT_DATA_UINT32, 0 }, @@ -60,7 +90,6 @@ static mac_stat_info_t i_mac_si[] = { { MAC_STAT_OBYTES, "obytes64", KSTAT_DATA_UINT64, 0 }, { MAC_STAT_OPACKETS, "opackets64", KSTAT_DATA_UINT64, 0 } }; - #define MAC_NKSTAT \ (sizeof (i_mac_si) / sizeof (mac_stat_info_t)) @@ -70,7 +99,6 @@ static mac_stat_info_t i_mac_mod_si[] = { { MAC_STAT_LINK_UP, "link_up", KSTAT_DATA_UINT32, 0 }, { MAC_STAT_PROMISC, "promisc", KSTAT_DATA_UINT32, 0 } }; - #define MAC_MOD_NKSTAT \ (sizeof (i_mac_mod_si) / sizeof (mac_stat_info_t)) @@ -79,11 +107,195 @@ static mac_stat_info_t i_mac_mod_si[] = { #define MAC_TYPE_KSTAT_OFFSET MAC_KSTAT_OFFSET + MAC_NKSTAT /* + * Definitions for per rx ring statistics + */ +static mac_stat_info_t i_mac_rx_ring_si[] = { + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_HDROPS, "hdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_RX_RING_NKSTAT \ + (sizeof (i_mac_rx_ring_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per tx ring statistics + */ +static mac_stat_info_t i_mac_tx_ring_si[] = { + { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT64, 0} +}; +#define MAC_TX_RING_NKSTAT \ + (sizeof (i_mac_tx_ring_si) / sizeof (mac_stat_info_t)) + + +/* + * Definitions for per software lane tx statistics + */ +static mac_stat_info_t i_mac_tx_swlane_si[] = { + { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OERRORS, "oerrors", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BLOCK, "blockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_UNBLOCK, "unblockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_TXSDROPS, "txsdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_TX_SWLANE_NKSTAT \ + (sizeof (i_mac_tx_swlane_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per software lane rx statistics + */ +static mac_stat_info_t i_mac_rx_swlane_si[] = { + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_LCL, "local", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_LCLBYTES, "localbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRS, "intrs", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRBYTES, "intrbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RXSDROPS, "rxsdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_RX_SWLANE_NKSTAT \ + (sizeof (i_mac_rx_swlane_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per hardware lane rx statistics + */ +static mac_stat_info_t i_mac_rx_hwlane_si[] = { + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRS, "intrs", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRBYTES, "intrbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_POLLS, "polls", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_POLLBYTES, "pollbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RXSDROPS, "rxsdrops", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CHU10, "chainunder10", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CH10T50, "chain10to50", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CHO50, "chainover50", KSTAT_DATA_UINT64, 0} +}; +#define MAC_RX_HWLANE_NKSTAT \ + (sizeof (i_mac_rx_hwlane_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for misc statistics + */ +static mac_stat_info_t i_mac_misc_si[] = { + { MAC_STAT_MULTIRCV, "multircv", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BRDCSTRCV, "brdcstrcv", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_MULTIXMT, "multixmt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BRDCSTXMT, "brdcstxmt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_MULTIRCVBYTES, "multircvbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BRDCSTRCVBYTES, "brdcstrcvbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_MULTIXMTBYTES, "multixmtbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BRDCSTXMTBYTES, "brdcstxmtbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_TX_ERRORS, "txerrors", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_MACSPOOFED, "macspoofed", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_IPSPOOFED, "ipspoofed", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_DHCPSPOOFED, "dhcpspoofed", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RESTRICTED, "restricted", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_DHCPDROPPED, "dhcpdropped", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_LCL, "local", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_LCLBYTES, "localbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRS, "intrs", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRBYTES, "intrbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_POLLS, "polls", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_POLLBYTES, "pollbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RXSDROPS, "rxsdrops", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CHU10, "chainunder10", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CH10T50, "chain10to50", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CHO50, "chainover50", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OERRORS, "oerrors", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BLOCK, "blockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_UNBLOCK, "unblockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_TXSDROPS, "txsdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_SUMMARY_NKSTAT \ + (sizeof (i_mac_misc_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per hardware lane tx statistics + */ +static mac_stat_info_t i_mac_tx_hwlane_si[] = { + { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OERRORS, "oerrors", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BLOCK, "blockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_UNBLOCK, "unblockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_TXSDROPS, "txsdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_TX_HWLANE_NKSTAT \ + (sizeof (i_mac_tx_hwlane_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per fanout rx statistics + */ +static mac_stat_info_t i_mac_rx_fanout_si[] = { + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, +}; +#define MAC_RX_FANOUT_NKSTAT \ + (sizeof (i_mac_rx_fanout_si) / sizeof (mac_stat_info_t)) + +/* * Private functions. */ +typedef struct { + uint_t si_offset; +} stat_info_t; + +#define RX_SRS_STAT_OFF(f) (offsetof(mac_rx_stats_t, f)) +static stat_info_t rx_srs_stats_list[] = { + {RX_SRS_STAT_OFF(mrs_lclbytes)}, + {RX_SRS_STAT_OFF(mrs_lclcnt)}, + {RX_SRS_STAT_OFF(mrs_pollcnt)}, + {RX_SRS_STAT_OFF(mrs_pollbytes)}, + {RX_SRS_STAT_OFF(mrs_intrcnt)}, + {RX_SRS_STAT_OFF(mrs_intrbytes)}, + {RX_SRS_STAT_OFF(mrs_sdrops)}, + {RX_SRS_STAT_OFF(mrs_chaincntundr10)}, + {RX_SRS_STAT_OFF(mrs_chaincnt10to50)}, + {RX_SRS_STAT_OFF(mrs_chaincntover50)}, + {RX_SRS_STAT_OFF(mrs_ierrors)} +}; +#define RX_SRS_STAT_SIZE \ + (sizeof (rx_srs_stats_list) / sizeof (stat_info_t)) + +#define TX_SOFTRING_STAT_OFF(f) (offsetof(mac_tx_stats_t, f)) +static stat_info_t tx_softring_stats_list[] = { + {TX_SOFTRING_STAT_OFF(mts_obytes)}, + {TX_SOFTRING_STAT_OFF(mts_opackets)}, + {TX_SOFTRING_STAT_OFF(mts_oerrors)}, + {TX_SOFTRING_STAT_OFF(mts_blockcnt)}, + {TX_SOFTRING_STAT_OFF(mts_unblockcnt)}, + {TX_SOFTRING_STAT_OFF(mts_sdrops)}, +}; +#define TX_SOFTRING_STAT_SIZE \ + (sizeof (tx_softring_stats_list) / sizeof (stat_info_t)) + +static void +i_mac_add_stats(void *sum, void *op1, void *op2, + stat_info_t stats_list[], uint_t size) +{ + int i; + + for (i = 0; i < size; i++) { + uint64_t *op1_val = (uint64_t *) + ((uchar_t *)op1 + stats_list[i].si_offset); + uint64_t *op2_val = (uint64_t *) + ((uchar_t *)op2 + stats_list[i].si_offset); + uint64_t *sum_val = (uint64_t *) + ((uchar_t *)sum + stats_list[i].si_offset); + + *sum_val = *op1_val + *op2_val; + } +} + static int -i_mac_stat_update(kstat_t *ksp, int rw) +i_mac_driver_stat_update(kstat_t *ksp, int rw) { mac_impl_t *mip = ksp->ks_private; kstat_named_t *knp = ksp->ks_data; @@ -136,6 +348,587 @@ i_mac_kstat_init(kstat_named_t *knp, mac_stat_info_t *si, uint_t count) } } +static int +i_mac_stat_update(kstat_t *ksp, int rw, uint64_t (*fn)(void *, uint_t), + mac_stat_info_t *msi, uint_t count) +{ + kstat_named_t *knp = ksp->ks_data; + uint_t i; + uint64_t val; + + if (rw != KSTAT_READ) + return (EACCES); + + for (i = 0; i < count; i++) { + val = fn(ksp->ks_private, msi[i].msi_stat); + + switch (msi[i].msi_type) { + case KSTAT_DATA_UINT64: + knp->value.ui64 = val; + break; + case KSTAT_DATA_UINT32: + knp->value.ui32 = (uint32_t)val; + break; + default: + ASSERT(B_FALSE); + break; + } + knp++; + } + return (0); +} + +/* + * Create kstat with given name - statname, update function - fn + * and initialize it with given names - init_stat_info + */ +static kstat_t * +i_mac_stat_create(void *handle, const char *modname, const char *statname, + int (*fn) (kstat_t *, int), + mac_stat_info_t *init_stat_info, uint_t count) +{ + kstat_t *ksp; + kstat_named_t *knp; + + ksp = kstat_create(modname, 0, statname, "net", + KSTAT_TYPE_NAMED, count, 0); + + if (ksp == NULL) + return (NULL); + + ksp->ks_update = fn; + ksp->ks_private = handle; + + knp = (kstat_named_t *)ksp->ks_data; + i_mac_kstat_init(knp, init_stat_info, count); + kstat_install(ksp); + + return (ksp); +} + +/* + * Per rx ring statistics + */ +uint64_t +mac_rx_ring_stat_get(void *handle, uint_t stat) +{ + mac_ring_t *ring = (mac_ring_t *)handle; + uint64_t val = 0; + + /* + * XXX Every ring-capable driver must implement an entry point to + * query per ring statistics. CR 6893122 tracks this work item. + * Once this bug is fixed, the framework should fail registration + * for a driver that does not implement this entry point and + * assert ring->mr_stat != NULL here. + */ + if (ring->mr_stat != NULL) + ring->mr_stat(ring->mr_driver, stat, &val); + + return (val); +} + +static int +i_mac_rx_ring_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, mac_rx_ring_stat_get, + i_mac_rx_ring_si, MAC_RX_RING_NKSTAT)); +} + +static void +i_mac_rx_ring_stat_create(mac_ring_t *ring, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(ring, modname, statname, + i_mac_rx_ring_stat_update, i_mac_rx_ring_si, MAC_RX_RING_NKSTAT); + + ring->mr_ksp = ksp; +} + +/* + * Per tx ring statistics + */ +uint64_t +mac_tx_ring_stat_get(void *handle, uint_t stat) +{ + mac_ring_t *ring = (mac_ring_t *)handle; + uint64_t val = 0; + + /* + * XXX Every ring-capable driver must implement an entry point to + * query per ring statistics. CR 6893122 tracks this work item. + * Once this bug is fixed, the framework should fail registration + * for a driver that does not implement this entry point and + * assert ring->mr_stat != NULL here. + */ + if (ring->mr_stat != NULL) + ring->mr_stat(ring->mr_driver, stat, &val); + + return (val); +} + +static int +i_mac_tx_ring_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, mac_tx_ring_stat_get, + i_mac_tx_ring_si, MAC_TX_RING_NKSTAT)); +} + +static void +i_mac_tx_ring_stat_create(mac_ring_t *ring, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(ring, modname, statname, + i_mac_tx_ring_stat_update, i_mac_tx_ring_si, MAC_TX_RING_NKSTAT); + + ring->mr_ksp = ksp; +} + +/* + * Per software lane tx statistics + */ +static uint64_t +i_mac_tx_swlane_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)handle; + mac_tx_stats_t *mac_tx_stat = &mac_srs->srs_tx.st_stat; + + switch (stat) { + case MAC_STAT_OBYTES: + return (mac_tx_stat->mts_obytes); + + case MAC_STAT_OPACKETS: + return (mac_tx_stat->mts_opackets); + + case MAC_STAT_OERRORS: + return (mac_tx_stat->mts_oerrors); + + case MAC_STAT_BLOCK: + return (mac_tx_stat->mts_blockcnt); + + case MAC_STAT_UNBLOCK: + return (mac_tx_stat->mts_unblockcnt); + + case MAC_STAT_TXSDROPS: + return (mac_tx_stat->mts_sdrops); + + default: + return (0); + } +} + +static int +i_mac_tx_swlane_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_tx_swlane_stat_get, + i_mac_tx_swlane_si, MAC_TX_SWLANE_NKSTAT)); +} + +static void +i_mac_tx_swlane_stat_create(mac_soft_ring_set_t *mac_srs, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(mac_srs, modname, statname, + i_mac_tx_swlane_stat_update, i_mac_tx_swlane_si, + MAC_TX_SWLANE_NKSTAT); + + mac_srs->srs_ksp = ksp; +} + +/* + * Per software lane rx statistics + */ +static uint64_t +i_mac_rx_swlane_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)handle; + mac_rx_stats_t *mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + switch (stat) { + case MAC_STAT_IPACKETS: + return (mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_lclcnt); + + case MAC_STAT_RBYTES: + return (mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_lclbytes); + + case MAC_STAT_LCL: + return (mac_rx_stat->mrs_lclcnt); + + case MAC_STAT_LCLBYTES: + return (mac_rx_stat->mrs_lclbytes); + + case MAC_STAT_INTRS: + return (mac_rx_stat->mrs_intrcnt); + + case MAC_STAT_INTRBYTES: + return (mac_rx_stat->mrs_intrbytes); + + case MAC_STAT_RXSDROPS: + return (mac_rx_stat->mrs_sdrops); + + default: + return (0); + } +} + +static int +i_mac_rx_swlane_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_rx_swlane_stat_get, + i_mac_rx_swlane_si, MAC_RX_SWLANE_NKSTAT)); +} + +static void +i_mac_rx_swlane_stat_create(mac_soft_ring_set_t *mac_srs, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(mac_srs, modname, statname, + i_mac_rx_swlane_stat_update, i_mac_rx_swlane_si, + MAC_RX_SWLANE_NKSTAT); + + mac_srs->srs_ksp = ksp; +} + + +/* + * Per hardware lane rx statistics + */ +static uint64_t +i_mac_rx_hwlane_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)handle; + mac_rx_stats_t *mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + switch (stat) { + case MAC_STAT_IPACKETS: + return (mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt); + + case MAC_STAT_RBYTES: + return (mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes); + + case MAC_STAT_INTRS: + return (mac_rx_stat->mrs_intrcnt); + + case MAC_STAT_INTRBYTES: + return (mac_rx_stat->mrs_intrbytes); + + case MAC_STAT_POLLS: + return (mac_rx_stat->mrs_pollcnt); + + case MAC_STAT_POLLBYTES: + return (mac_rx_stat->mrs_pollbytes); + + case MAC_STAT_RXSDROPS: + return (mac_rx_stat->mrs_sdrops); + + case MAC_STAT_CHU10: + return (mac_rx_stat->mrs_chaincntundr10); + + case MAC_STAT_CH10T50: + return (mac_rx_stat->mrs_chaincnt10to50); + + case MAC_STAT_CHO50: + return (mac_rx_stat->mrs_chaincntover50); + + default: + return (0); + } +} + +static int +i_mac_rx_hwlane_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_rx_hwlane_stat_get, + i_mac_rx_hwlane_si, MAC_RX_HWLANE_NKSTAT)); +} + +static void +i_mac_rx_hwlane_stat_create(mac_soft_ring_set_t *mac_srs, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(mac_srs, modname, statname, + i_mac_rx_hwlane_stat_update, i_mac_rx_hwlane_si, + MAC_RX_HWLANE_NKSTAT); + + mac_srs->srs_ksp = ksp; +} + + +/* + * Misc statistics + * + * Counts for + * - Multicast/broadcast Rx/Tx counts + * - Tx errors + */ +static uint64_t +i_mac_misc_stat_get(void *handle, uint_t stat) +{ + flow_entry_t *flent = handle; + mac_client_impl_t *mcip = flent->fe_mcip; + mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + + mac_rx_stat = &mac_misc_stat->mms_defunctrxlanestats; + mac_tx_stat = &mac_misc_stat->mms_defuncttxlanestats; + + switch (stat) { + case MAC_STAT_MULTIRCV: + return (mac_misc_stat->mms_multircv); + + case MAC_STAT_BRDCSTRCV: + return (mac_misc_stat->mms_brdcstrcv); + + case MAC_STAT_MULTIXMT: + return (mac_misc_stat->mms_multixmt); + + case MAC_STAT_BRDCSTXMT: + return (mac_misc_stat->mms_brdcstxmt); + + case MAC_STAT_MULTIRCVBYTES: + return (mac_misc_stat->mms_multircvbytes); + + case MAC_STAT_BRDCSTRCVBYTES: + return (mac_misc_stat->mms_brdcstrcvbytes); + + case MAC_STAT_MULTIXMTBYTES: + return (mac_misc_stat->mms_multixmtbytes); + + case MAC_STAT_BRDCSTXMTBYTES: + return (mac_misc_stat->mms_brdcstxmtbytes); + + case MAC_STAT_TX_ERRORS: + return (mac_misc_stat->mms_txerrors); + + case MAC_STAT_MACSPOOFED: + return (mac_misc_stat->mms_macspoofed); + + case MAC_STAT_IPSPOOFED: + return (mac_misc_stat->mms_ipspoofed); + + case MAC_STAT_DHCPSPOOFED: + return (mac_misc_stat->mms_dhcpspoofed); + + case MAC_STAT_RESTRICTED: + return (mac_misc_stat->mms_restricted); + + case MAC_STAT_DHCPDROPPED: + return (mac_misc_stat->mms_dhcpdropped); + + case MAC_STAT_IPACKETS: + return (mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt); + + case MAC_STAT_RBYTES: + return (mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes); + + case MAC_STAT_LCL: + return (mac_rx_stat->mrs_lclcnt); + + case MAC_STAT_LCLBYTES: + return (mac_rx_stat->mrs_lclbytes); + + case MAC_STAT_INTRS: + return (mac_rx_stat->mrs_intrcnt); + + case MAC_STAT_INTRBYTES: + return (mac_rx_stat->mrs_intrbytes); + + case MAC_STAT_POLLS: + return (mac_rx_stat->mrs_pollcnt); + + case MAC_STAT_POLLBYTES: + return (mac_rx_stat->mrs_pollbytes); + + case MAC_STAT_RXSDROPS: + return (mac_rx_stat->mrs_sdrops); + + case MAC_STAT_CHU10: + return (mac_rx_stat->mrs_chaincntundr10); + + case MAC_STAT_CH10T50: + return (mac_rx_stat->mrs_chaincnt10to50); + + case MAC_STAT_CHO50: + return (mac_rx_stat->mrs_chaincntover50); + + case MAC_STAT_OBYTES: + return (mac_tx_stat->mts_obytes); + + case MAC_STAT_OPACKETS: + return (mac_tx_stat->mts_opackets); + + case MAC_STAT_OERRORS: + return (mac_tx_stat->mts_oerrors); + + case MAC_STAT_BLOCK: + return (mac_tx_stat->mts_blockcnt); + + case MAC_STAT_UNBLOCK: + return (mac_tx_stat->mts_unblockcnt); + + case MAC_STAT_TXSDROPS: + return (mac_tx_stat->mts_sdrops); + + default: + return (0); + } +} + +static int +i_mac_misc_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_misc_stat_get, + i_mac_misc_si, MAC_SUMMARY_NKSTAT)); +} + +static void +i_mac_misc_stat_create(flow_entry_t *flent, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(flent, modname, statname, + i_mac_misc_stat_update, i_mac_misc_si, + MAC_SUMMARY_NKSTAT); + + flent->fe_misc_stat_ksp = ksp; +} + +/* + * Per hardware lane tx statistics + */ +static uint64_t +i_mac_tx_hwlane_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_t *ringp = (mac_soft_ring_t *)handle; + mac_tx_stats_t *mac_tx_stat = &ringp->s_st_stat; + + switch (stat) { + case MAC_STAT_OBYTES: + return (mac_tx_stat->mts_obytes); + + case MAC_STAT_OPACKETS: + return (mac_tx_stat->mts_opackets); + + case MAC_STAT_OERRORS: + return (mac_tx_stat->mts_oerrors); + + case MAC_STAT_BLOCK: + return (mac_tx_stat->mts_blockcnt); + + case MAC_STAT_UNBLOCK: + return (mac_tx_stat->mts_unblockcnt); + + case MAC_STAT_TXSDROPS: + return (mac_tx_stat->mts_sdrops); + + default: + return (0); + } +} + +static int +i_mac_tx_hwlane_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_tx_hwlane_stat_get, + i_mac_tx_hwlane_si, MAC_TX_HWLANE_NKSTAT)); +} + +static void +i_mac_tx_hwlane_stat_create(mac_soft_ring_t *ringp, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(ringp, modname, statname, + i_mac_tx_hwlane_stat_update, i_mac_tx_hwlane_si, + MAC_TX_HWLANE_NKSTAT); + + ringp->s_ring_ksp = ksp; +} + +/* + * Per fanout rx statistics + */ +static uint64_t +i_mac_rx_fanout_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_t *tcp_ringp = (mac_soft_ring_t *)handle; + mac_soft_ring_t *udp_ringp = NULL, *oth_ringp = NULL; + mac_soft_ring_set_t *mac_srs = tcp_ringp->s_ring_set; + int index; + uint64_t val; + + mutex_enter(&mac_srs->srs_lock); + /* Extract corresponding udp and oth ring pointers */ + for (index = 0; mac_srs->srs_tcp_soft_rings[index] != NULL; index++) { + if (mac_srs->srs_tcp_soft_rings[index] == tcp_ringp) { + udp_ringp = mac_srs->srs_udp_soft_rings[index]; + oth_ringp = mac_srs->srs_oth_soft_rings[index]; + break; + } + } + + ASSERT((udp_ringp != NULL) && (oth_ringp != NULL)); + + switch (stat) { + case MAC_STAT_RBYTES: + val = (tcp_ringp->s_ring_total_rbytes) + + (udp_ringp->s_ring_total_rbytes) + + (oth_ringp->s_ring_total_rbytes); + break; + + case MAC_STAT_IPACKETS: + val = (tcp_ringp->s_ring_total_inpkt) + + (udp_ringp->s_ring_total_inpkt) + + (oth_ringp->s_ring_total_inpkt); + break; + + default: + val = 0; + break; + } + mutex_exit(&mac_srs->srs_lock); + return (val); +} + +static int +i_mac_rx_fanout_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_rx_fanout_stat_get, + i_mac_rx_fanout_si, MAC_RX_FANOUT_NKSTAT)); +} + +static void +i_mac_rx_fanout_stat_create(mac_soft_ring_t *ringp, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(ringp, modname, statname, + i_mac_rx_fanout_stat_update, i_mac_rx_fanout_si, + MAC_RX_FANOUT_NKSTAT); + + ringp->s_ring_ksp = ksp; +} + /* * Exported functions. */ @@ -147,7 +940,7 @@ i_mac_kstat_init(kstat_named_t *knp, mac_stat_info_t *si, uint_t count) * also maintained by the driver. */ void -mac_stat_create(mac_impl_t *mip) +mac_driver_stat_create(mac_impl_t *mip) { kstat_t *ksp; kstat_named_t *knp; @@ -161,7 +954,7 @@ mac_stat_create(mac_impl_t *mip) if (ksp == NULL) return; - ksp->ks_update = i_mac_stat_update; + ksp->ks_update = i_mac_driver_stat_update; ksp->ks_private = mip; mip->mi_ksp = ksp; mip->mi_kstat_count = count; @@ -181,7 +974,7 @@ mac_stat_create(mac_impl_t *mip) /*ARGSUSED*/ void -mac_stat_destroy(mac_impl_t *mip) +mac_driver_stat_delete(mac_impl_t *mip) { if (mip->mi_ksp != NULL) { kstat_delete(mip->mi_ksp); @@ -191,15 +984,311 @@ mac_stat_destroy(mac_impl_t *mip) } uint64_t -mac_stat_default(mac_impl_t *mip, uint_t stat) +mac_driver_stat_default(mac_impl_t *mip, uint_t stat) { uint_t stat_index; if (IS_MAC_STAT(stat)) { stat_index = stat - MAC_STAT_MIN; + ASSERT(stat_index < MAC_NKSTAT); return (i_mac_si[stat_index].msi_default); } ASSERT(IS_MACTYPE_STAT(stat)); stat_index = stat - MACTYPE_STAT_MIN; + ASSERT(stat_index < mip->mi_type->mt_statcount); return (mip->mi_type->mt_stats[stat_index].msi_default); } + +void +mac_ring_stat_create(mac_ring_t *ring) +{ + mac_impl_t *mip = ring->mr_mip; + char statname[MAXNAMELEN]; + char modname[MAXNAMELEN]; + + if (mip->mi_state_flags & MIS_IS_AGGR) { + (void) strlcpy(modname, mip->mi_clients_list->mci_name, + MAXNAMELEN); + } else + (void) strlcpy(modname, mip->mi_name, MAXNAMELEN); + + switch (ring->mr_type) { + case MAC_RING_TYPE_RX: + (void) snprintf(statname, sizeof (statname), "mac_rx_ring%d", + ring->mr_index); + i_mac_rx_ring_stat_create(ring, modname, statname); + break; + + case MAC_RING_TYPE_TX: + (void) snprintf(statname, sizeof (statname), "mac_tx_ring%d", + ring->mr_index); + i_mac_tx_ring_stat_create(ring, modname, statname); + break; + + default: + ASSERT(B_FALSE); + break; + } +} + +void +mac_srs_stat_create(mac_soft_ring_set_t *mac_srs) +{ + flow_entry_t *flent = mac_srs->srs_flent; + char statname[MAXNAMELEN]; + boolean_t is_tx_srs; + + /* No hardware/software lanes for user defined flows */ + if ((flent->fe_type & FLOW_USER) != 0) + return; + + is_tx_srs = ((mac_srs->srs_type & SRST_TX) != 0); + + if (is_tx_srs) { + mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; + mac_ring_t *ring = srs_tx->st_arg2; + + if (ring != NULL) { + (void) snprintf(statname, sizeof (statname), + "mac_tx_hwlane%d", ring->mr_index); + } else { + (void) snprintf(statname, sizeof (statname), + "mac_tx_swlane0"); + } + i_mac_tx_swlane_stat_create(mac_srs, flent->fe_flow_name, + statname); + } else { + mac_ring_t *ring = mac_srs->srs_ring; + + if (ring == NULL) { + (void) snprintf(statname, sizeof (statname), + "mac_rx_swlane0"); + i_mac_rx_swlane_stat_create(mac_srs, + flent->fe_flow_name, statname); + } else { + (void) snprintf(statname, sizeof (statname), + "mac_rx_hwlane%d", ring->mr_index); + i_mac_rx_hwlane_stat_create(mac_srs, + flent->fe_flow_name, statname); + } + } +} + +void +mac_misc_stat_create(flow_entry_t *flent) +{ + char statname[MAXNAMELEN]; + + /* No misc stats for user defined or mcast/bcast flows */ + if (((flent->fe_type & FLOW_USER) != 0) || + ((flent->fe_type & FLOW_MCAST) != 0)) + return; + + (void) snprintf(statname, sizeof (statname), "mac_misc_stat"); + i_mac_misc_stat_create(flent, flent->fe_flow_name, statname); +} + +void +mac_soft_ring_stat_create(mac_soft_ring_t *ringp) +{ + mac_soft_ring_set_t *mac_srs = ringp->s_ring_set; + flow_entry_t *flent = ringp->s_ring_mcip->mci_flent; + mac_ring_t *ring = (mac_ring_t *)ringp->s_ring_tx_arg2; + boolean_t is_tx_srs; + char statname[MAXNAMELEN]; + + /* No hardware/software lanes for user defined flows */ + if ((flent->fe_type & FLOW_USER) != 0) + return; + + is_tx_srs = ((mac_srs->srs_type & SRST_TX) != 0); + if (is_tx_srs) { /* tx side hardware lane */ + ASSERT(ring != NULL); + (void) snprintf(statname, sizeof (statname), "mac_tx_hwlane%d", + ring->mr_index); + i_mac_tx_hwlane_stat_create(ringp, flent->fe_flow_name, + statname); + } else { /* rx side fanout */ + /* Maintain single stat for (tcp, udp, oth) */ + if (ringp->s_ring_type & ST_RING_TCP) { + int index; + mac_soft_ring_t *softring; + + for (index = 0, softring = mac_srs->srs_soft_ring_head; + softring != NULL; + index++, softring = softring->s_ring_next) { + if (softring == ringp) + break; + } + + if (mac_srs->srs_ring == NULL) { + (void) snprintf(statname, sizeof (statname), + "mac_rx_swlane0_fanout%d", index/3); + } else { + (void) snprintf(statname, sizeof (statname), + "mac_rx_hwlane%d_fanout%d", + mac_srs->srs_ring->mr_index, index/3); + } + i_mac_rx_fanout_stat_create(ringp, flent->fe_flow_name, + statname); + } + } +} + +void +mac_ring_stat_delete(mac_ring_t *ring) +{ + if (ring->mr_ksp != NULL) { + kstat_delete(ring->mr_ksp); + ring->mr_ksp = NULL; + } +} + +void +mac_srs_stat_delete(mac_soft_ring_set_t *mac_srs) +{ + boolean_t is_tx_srs; + + is_tx_srs = ((mac_srs->srs_type & SRST_TX) != 0); + if (!is_tx_srs) { + /* + * Rx ring has been taken away. Before destroying corresponding + * SRS, save the stats recorded by that SRS. + */ + mac_client_impl_t *mcip = mac_srs->srs_mcip; + mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat; + mac_rx_stats_t *mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + i_mac_add_stats(&mac_misc_stat->mms_defunctrxlanestats, + mac_rx_stat, &mac_misc_stat->mms_defunctrxlanestats, + rx_srs_stats_list, RX_SRS_STAT_SIZE); + } + + if (mac_srs->srs_ksp != NULL) { + kstat_delete(mac_srs->srs_ksp); + mac_srs->srs_ksp = NULL; + } +} + +void +mac_misc_stat_delete(flow_entry_t *flent) +{ + if (flent->fe_misc_stat_ksp != NULL) { + kstat_delete(flent->fe_misc_stat_ksp); + flent->fe_misc_stat_ksp = NULL; + } +} + +void +mac_soft_ring_stat_delete(mac_soft_ring_t *ringp) +{ + mac_soft_ring_set_t *mac_srs = ringp->s_ring_set; + boolean_t is_tx_srs; + + is_tx_srs = ((mac_srs->srs_type & SRST_TX) != 0); + if (is_tx_srs) { + /* + * Tx ring has been taken away. Before destroying corresponding + * soft ring, save the stats recorded by that soft ring. + */ + mac_client_impl_t *mcip = mac_srs->srs_mcip; + mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat; + mac_tx_stats_t *mac_tx_stat = &ringp->s_st_stat; + + i_mac_add_stats(&mac_misc_stat->mms_defuncttxlanestats, + mac_tx_stat, &mac_misc_stat->mms_defuncttxlanestats, + tx_softring_stats_list, TX_SOFTRING_STAT_SIZE); + } + + if (ringp->s_ring_ksp) { + kstat_delete(ringp->s_ring_ksp); + ringp->s_ring_ksp = NULL; + } +} + +void +mac_pseudo_ring_stat_rename(mac_impl_t *mip) +{ + mac_group_t *group; + mac_ring_t *ring; + + /* Recreate pseudo rx ring kstats */ + for (group = mip->mi_rx_groups; group != NULL; + group = group->mrg_next) { + for (ring = group->mrg_rings; ring != NULL; + ring = ring->mr_next) { + mac_ring_stat_delete(ring); + mac_ring_stat_create(ring); + } + } + + /* Recreate pseudo tx ring kstats */ + for (group = mip->mi_tx_groups; group != NULL; + group = group->mrg_next) { + for (ring = group->mrg_rings; ring != NULL; + ring = ring->mr_next) { + mac_ring_stat_delete(ring); + mac_ring_stat_create(ring); + } + } +} + +void +mac_stat_rename(mac_client_impl_t *mcip) +{ + flow_entry_t *flent = mcip->mci_flent; + mac_soft_ring_set_t *mac_srs; + mac_soft_ring_t *ringp; + int i, j; + + ASSERT(flent != NULL); + + /* Recreate rx SRSes kstats */ + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_srs_stat_delete(mac_srs); + mac_srs_stat_create(mac_srs); + + /* Recreate rx fanout kstats */ + for (j = 0; j < mac_srs->srs_tcp_ring_count; j++) { + ringp = mac_srs->srs_tcp_soft_rings[j]; + mac_soft_ring_stat_delete(ringp); + mac_soft_ring_stat_create(ringp); + } + } + + /* Recreate tx SRS kstats */ + mac_srs = (mac_soft_ring_set_t *)flent->fe_tx_srs; + mac_srs_stat_delete(mac_srs); + mac_srs_stat_create(mac_srs); + + /* Recreate tx sofring kstats */ + for (ringp = mac_srs->srs_soft_ring_head; ringp; + ringp = ringp->s_ring_next) { + mac_soft_ring_stat_delete(ringp); + mac_soft_ring_stat_create(ringp); + } + + /* Recreate misc kstats */ + mac_misc_stat_delete(flent); + mac_misc_stat_create(flent); +} + +void +mac_tx_srs_stat_recreate(mac_soft_ring_set_t *tx_srs, boolean_t add_stats) +{ + mac_client_impl_t *mcip = tx_srs->srs_mcip; + mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat; + mac_tx_stats_t *mac_tx_stat = &tx_srs->srs_tx.st_stat; + + if (add_stats) { + /* Add the stats to cumulative stats */ + i_mac_add_stats(&mac_misc_stat->mms_defuncttxlanestats, + mac_tx_stat, &mac_misc_stat->mms_defuncttxlanestats, + tx_softring_stats_list, TX_SOFTRING_STAT_SIZE); + } + + bzero(mac_tx_stat, sizeof (mac_tx_stats_t)); + mac_srs_stat_delete(tx_srs); + mac_srs_stat_create(tx_srs); +} diff --git a/usr/src/uts/common/io/mac/mac_util.c b/usr/src/uts/common/io/mac/mac_util.c index 371145e68c..3d9d2f9b39 100644 --- a/usr/src/uts/common/io/mac/mac_util.c +++ b/usr/src/uts/common/io/mac/mac_util.c @@ -244,14 +244,23 @@ mac_fix_cksum(mblk_t *mp_chain) offset, cksum); *(up) = (uint16_t)(cksum ? cksum : ~cksum); + /* + * Flag the packet so that it appears + * that the checksum has already been + * verified by the hardware. + */ + flags &= ~HCK_FULLCKSUM; flags |= HCK_FULLCKSUM_OK; - value = 0xffff; + value = 0; } if (flags & HCK_IPV4_HDRCKSUM) { ASSERT(ipha != NULL); ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); + flags &= ~HCK_IPV4_HDRCKSUM; + flags |= HCK_IPV4_HDRCKSUM_OK; + } } @@ -292,8 +301,8 @@ mac_fix_cksum(mblk_t *mp_chain) * been verified by the hardware. */ flags &= ~HCK_PARTIALCKSUM; - flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK); - value = 0xffff; + flags |= HCK_FULLCKSUM_OK; + value = 0; } (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, @@ -470,27 +479,25 @@ mac_pkt_drop(void *arg, mac_resource_handle_t resource, mblk_t *mp, * returns B_TRUE. */ boolean_t -mac_ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length, - uint8_t *next_hdr, boolean_t *ip_fragmented, uint32_t *ip_frag_ident) +mac_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr, uint16_t *hdr_length, + uint8_t *next_hdr, ip6_frag_t **fragp) { uint16_t length; uint_t ehdrlen; uint8_t *whereptr; - uint8_t *endptr; uint8_t *nexthdrp; ip6_dest_t *desthdr; ip6_rthdr_t *rthdr; ip6_frag_t *fraghdr; - endptr = mp->b_wptr; if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr) return (B_FALSE); ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); length = IPV6_HDR_LEN; whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ - if (ip_fragmented != NULL) - *ip_fragmented = B_FALSE; + if (fragp != NULL) + *fragp = NULL; nexthdrp = &ip6h->ip6_nxt; while (whereptr < endptr) { @@ -521,10 +528,8 @@ mac_ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length, if ((uchar_t *)&fraghdr[1] > endptr) return (B_FALSE); nexthdrp = &fraghdr->ip6f_nxt; - if (ip_fragmented != NULL) - *ip_fragmented = B_TRUE; - if (ip_frag_ident != NULL) - *ip_frag_ident = fraghdr->ip6f_ident; + if (fragp != NULL) + *fragp = fraghdr; break; case IPPROTO_NONE: /* No next header means we're finished */ @@ -561,6 +566,13 @@ mac_ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length, } } +/* + * The following set of routines are there to take care of interrupt + * re-targeting for legacy (fixed) interrupts. Some older versions + * of the popular NICs like e1000g do not support MSI-X interrupts + * and they reserve fixed interrupts for RX/TX rings. To re-target + * these interrupts, PCITOOL ioctls need to be used. + */ typedef struct mac_dladm_intr { int ino; int cpu_id; @@ -807,13 +819,20 @@ mac_client_set_intr_cpu(void *arg, mac_client_handle_t mch, int32_t cpuid) mac_client_impl_t *mcip = (mac_client_impl_t *)mch; mac_resource_props_t *mrp; mac_perim_handle_t mph; + flow_entry_t *flent = mcip->mci_flent; + mac_soft_ring_set_t *rx_srs; + mac_cpus_t *srs_cpu; - if (cpuid == -1 || !mac_check_interrupt_binding(mdip, cpuid)) - return; - + if (!mac_check_interrupt_binding(mdip, cpuid)) + cpuid = -1; mac_perim_enter_by_mh((mac_handle_t)mcip->mci_mip, &mph); mrp = MCIP_RESOURCE_PROPS(mcip); - mrp->mrp_intr_cpu = cpuid; + mrp->mrp_rx_intr_cpu = cpuid; + if (flent != NULL && flent->fe_rx_srs_cnt == 2) { + rx_srs = flent->fe_rx_srs[1]; + srs_cpu = &rx_srs->srs_cpu; + srs_cpu->mc_rx_intr_cpu = cpuid; + } mac_perim_exit(mph); } @@ -825,18 +844,29 @@ mac_client_intr_cpu(mac_client_handle_t mch) mac_soft_ring_set_t *rx_srs; flow_entry_t *flent = mcip->mci_flent; mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + mac_ring_t *ring; + mac_intr_t *mintr; /* * Check if we need to retarget the interrupt. We do this only * for the primary MAC client. We do this if we have the only - * exclusive ring in the group. + * exclusive ring in the group. */ if (mac_is_primary_client(mcip) && flent->fe_rx_srs_cnt == 2) { rx_srs = flent->fe_rx_srs[1]; srs_cpu = &rx_srs->srs_cpu; - if (mrp->mrp_intr_cpu == srs_cpu->mc_pollid) + ring = rx_srs->srs_ring; + mintr = &ring->mr_info.mri_intr; + /* + * If ddi_handle is present or the poll CPU is + * already bound to the interrupt CPU, return -1. + */ + if (mintr->mi_ddi_handle != NULL || + ((mrp->mrp_ncpus != 0) && + (mrp->mrp_rx_intr_cpu == srs_cpu->mc_rx_pollid))) { return (-1); - return (srs_cpu->mc_pollid); + } + return (srs_cpu->mc_rx_pollid); } return (-1); } @@ -970,8 +1000,8 @@ mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) } case ETHERTYPE_IPV6: { ip6_t *ip6hp; + ip6_frag_t *frag = NULL; uint16_t hdr_length; - uint32_t ip_frag_ident; /* * If the header is not aligned or the header doesn't fit @@ -984,8 +1014,8 @@ mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) !OK_32PTR((char *)ip6hp)) goto done; - if (!mac_ip_hdr_length_v6(mp, ip6hp, &hdr_length, &proto, - &ip_fragmented, &ip_frag_ident)) + if (!mac_ip_hdr_length_v6(ip6hp, mp->b_wptr, &hdr_length, + &proto, &frag)) goto done; skip_len += hdr_length; @@ -994,7 +1024,7 @@ mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) * the frag_id to generate the hash inorder to get * better distribution. */ - if (ip_fragmented || (policy & MAC_PKT_HASH_L3) != 0) { + if (frag != NULL || (policy & MAC_PKT_HASH_L3) != 0) { uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]); uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]); @@ -1003,8 +1033,8 @@ mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) policy &= ~MAC_PKT_HASH_L3; } - if (ip_fragmented) { - uint8_t *identp = (uint8_t *)&ip_frag_ident; + if (frag != NULL) { + uint8_t *identp = (uint8_t *)&frag->ip6f_ident; hash ^= PKT_HASH_4BYTES(identp); goto done; } diff --git a/usr/src/uts/common/io/mii/mii.c b/usr/src/uts/common/io/mii/mii.c index 2187553b40..bfff2a52e8 100644 --- a/usr/src/uts/common/io/mii/mii.c +++ b/usr/src/uts/common/io/mii/mii.c @@ -650,12 +650,10 @@ mii_m_loop_ioctl(mii_handle_t mh, queue_t *wq, mblk_t *mp) int mii_m_getprop(mii_handle_t mh, const char *name, mac_prop_id_t num, - uint_t flags, uint_t sz, void *val, uint_t *permp) + uint_t sz, void *val) { phy_handle_t *ph; int err = 0; - uint_t perm; - boolean_t dfl = flags & MAC_PROP_DEFAULT; _NOTE(ARGUNUSED(name)); @@ -665,54 +663,36 @@ mii_m_getprop(mii_handle_t mh, const char *name, mac_prop_id_t num, mutex_enter(&mh->m_lock); ph = mh->m_phy; - perm = MAC_PROP_PERM_RW; #define CASE_PROP_ABILITY(PROP, VAR) \ case MAC_PROP_ADV_##PROP: \ - perm = MAC_PROP_PERM_READ; \ - *(uint8_t *)val = \ - dfl ? ph->phy_cap_##VAR : ph->phy_adv_##VAR; \ + *(uint8_t *)val = ph->phy_adv_##VAR; \ break; \ \ case MAC_PROP_EN_##PROP: \ - if (!ph->phy_cap_##VAR) \ - perm = MAC_PROP_PERM_READ; \ - *(uint8_t *)val = \ - dfl ? ph->phy_cap_##VAR : ph->phy_en_##VAR; \ + *(uint8_t *)val = ph->phy_en_##VAR; \ break; switch (num) { case MAC_PROP_DUPLEX: - perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (link_duplex_t)) { - bcopy(&ph->phy_duplex, val, sizeof (link_duplex_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (link_duplex_t)); + bcopy(&ph->phy_duplex, val, sizeof (link_duplex_t)); break; - case MAC_PROP_SPEED: - perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (uint64_t)) { - uint64_t speed = ph->phy_speed * 1000000ull; - bcopy(&speed, val, sizeof (speed)); - } else { - err = EINVAL; - } + case MAC_PROP_SPEED: { + uint64_t speed = ph->phy_speed * 1000000ull; + ASSERT(sz >= sizeof (uint64_t)); + bcopy(&speed, val, sizeof (speed)); break; + } case MAC_PROP_AUTONEG: - *(uint8_t *)val = - dfl ? ph->phy_cap_aneg : ph->phy_adv_aneg; + *(uint8_t *)val = ph->phy_adv_aneg; break; case MAC_PROP_FLOWCTRL: - if (sz >= sizeof (link_flowctrl_t)) { - bcopy(&ph->phy_flowctrl, val, - sizeof (link_flowctrl_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (link_flowctrl_t)); + bcopy(&ph->phy_flowctrl, val, sizeof (link_flowctrl_t)); break; CASE_PROP_ABILITY(1000FDX_CAP, 1000_fdx) @@ -728,15 +708,57 @@ mii_m_getprop(mii_handle_t mh, const char *name, mac_prop_id_t num, break; } - if (err == 0) { - *permp = perm; - } - mutex_exit(&mh->m_lock); return (err); } +void +mii_m_propinfo(mii_handle_t mh, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + phy_handle_t *ph; + + _NOTE(ARGUNUSED(name)); + + mutex_enter(&mh->m_lock); + + ph = mh->m_phy; + + switch (num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, ph->phy_cap_aneg); + break; + +#define CASE_PROP_PERM(PROP, VAR) \ + case MAC_PROP_ADV_##PROP: \ + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); \ + mac_prop_info_set_default_uint8(prh, ph->phy_cap_##VAR); \ + break; \ + \ + case MAC_PROP_EN_##PROP: \ + if (!ph->phy_cap_##VAR) \ + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); \ + mac_prop_info_set_default_uint8(prh, ph->phy_cap_##VAR); \ + break; + + CASE_PROP_PERM(1000FDX_CAP, 1000_fdx) + CASE_PROP_PERM(1000HDX_CAP, 1000_hdx) + CASE_PROP_PERM(100T4_CAP, 100_t4) + CASE_PROP_PERM(100FDX_CAP, 100_fdx) + CASE_PROP_PERM(100HDX_CAP, 100_hdx) + CASE_PROP_PERM(10FDX_CAP, 10_fdx) + CASE_PROP_PERM(10HDX_CAP, 10_hdx) + } + + mutex_exit(&mh->m_lock); +} + int mii_m_setprop(mii_handle_t mh, const char *name, mac_prop_id_t num, uint_t sz, const void *valp) @@ -813,65 +835,62 @@ mii_m_setprop(mii_handle_t mh, const char *name, mac_prop_id_t num, advp = &ph->phy_en_aneg; macpp = &mh->m_en_aneg; break; - case MAC_PROP_FLOWCTRL: - if (sz < sizeof (link_flowctrl_t)) { - rv = EINVAL; - } else { - link_flowctrl_t fc; - boolean_t chg; + case MAC_PROP_FLOWCTRL: { + link_flowctrl_t fc; + boolean_t chg; - bcopy(valp, &fc, sizeof (fc)); + ASSERT(sz >= sizeof (link_flowctrl_t)); + bcopy(valp, &fc, sizeof (fc)); - chg = fc == ph->phy_en_flowctrl ? B_FALSE : B_TRUE; - switch (fc) { - case LINK_FLOWCTRL_NONE: - ph->phy_en_pause = B_FALSE; - ph->phy_en_asmpause = B_FALSE; + chg = fc == ph->phy_en_flowctrl ? B_FALSE : B_TRUE; + switch (fc) { + case LINK_FLOWCTRL_NONE: + ph->phy_en_pause = B_FALSE; + ph->phy_en_asmpause = B_FALSE; + ph->phy_en_flowctrl = fc; + break; + /* + * Note that while we don't have a way to advertise + * that we can RX pause (we just won't send pause + * frames), we advertise full support. The MAC driver + * will learn of the configuration via the saved value + * of the tunable. + */ + case LINK_FLOWCTRL_BI: + case LINK_FLOWCTRL_RX: + if (ph->phy_cap_pause) { + ph->phy_en_pause = B_TRUE; + ph->phy_en_asmpause = B_TRUE; ph->phy_en_flowctrl = fc; - break; - /* - * Note that while we don't have a way to - * advertise that we can RX pause (we just - * won't send pause frames), we advertise full - * support. The MAC driver will learn of the - * configuration via the saved value of the - * tunable. - */ - case LINK_FLOWCTRL_BI: - case LINK_FLOWCTRL_RX: - if (ph->phy_cap_pause) { - ph->phy_en_pause = B_TRUE; - ph->phy_en_asmpause = B_TRUE; - ph->phy_en_flowctrl = fc; - } else { - rv = EINVAL; - } - break; - - /* - * Tell the other side that we can assert - * pause, but we cannot resend. - */ - case LINK_FLOWCTRL_TX: - if (ph->phy_cap_asmpause) { - ph->phy_en_pause = B_FALSE; - ph->phy_en_flowctrl = fc; - ph->phy_en_asmpause = B_TRUE; - } else { - rv = EINVAL; - } - break; - default: + } else { rv = EINVAL; - break; } - if ((rv == 0) && chg) { - mh->m_en_flowctrl = fc; - mh->m_tstate = MII_STATE_RESET; - cv_broadcast(&mh->m_cv); + break; + + /* + * Tell the other side that we can assert pause, but + * we cannot resend. + */ + case LINK_FLOWCTRL_TX: + if (ph->phy_cap_asmpause) { + ph->phy_en_pause = B_FALSE; + ph->phy_en_flowctrl = fc; + ph->phy_en_asmpause = B_TRUE; + } else { + rv = EINVAL; } + break; + default: + rv = EINVAL; + break; + } + if ((rv == 0) && chg) { + mh->m_en_flowctrl = fc; + mh->m_tstate = MII_STATE_RESET; + cv_broadcast(&mh->m_cv); } break; + } default: rv = ENOTSUP; diff --git a/usr/src/uts/common/io/mwl/mwl.c b/usr/src/uts/common/io/mwl/mwl.c index ce99b07504..98d0892326 100644 --- a/usr/src/uts/common/io/mwl/mwl.c +++ b/usr/src/uts/common/io/mwl/mwl.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -86,11 +86,13 @@ static int mwl_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int mwl_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, + void *wldp_buf); +static void mwl_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t mwl_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, mwl_m_stat, mwl_m_start, mwl_m_stop, @@ -98,12 +100,14 @@ static mac_callbacks_t mwl_m_callbacks = { mwl_m_multicst, mwl_m_unicst, mwl_m_tx, + NULL, mwl_m_ioctl, NULL, NULL, NULL, mwl_m_setprop, - mwl_m_getprop + mwl_m_getprop, + mwl_m_propinfo }; #define MWL_DBG_ATTACH (1 << 0) @@ -3746,17 +3750,26 @@ mwl_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int mwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct mwl_softc *sc = (struct mwl_softc *)arg; int err = 0; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +mwl_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct mwl_softc *sc = (struct mwl_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + static int mwl_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/mxfe/mxfe.c b/usr/src/uts/common/io/mxfe/mxfe.c index d48164a80f..790c936fd5 100644 --- a/usr/src/uts/common/io/mxfe/mxfe.c +++ b/usr/src/uts/common/io/mxfe/mxfe.c @@ -29,7 +29,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -114,9 +114,11 @@ static int mxfe_m_stat(void *, uint_t, uint64_t *); static int mxfe_m_start(void *); static void mxfe_m_stop(void *); static int mxfe_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int mxfe_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void mxfe_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static unsigned mxfe_intr(caddr_t); static void mxfe_startmac(mxfe_t *); static void mxfe_stopmac(mxfe_t *); @@ -170,7 +172,7 @@ static void mxfe_dprintf(mxfe_t *, const char *, int, char *, ...); #define KIOIP KSTAT_INTR_PTR(mxfep->mxfe_intrstat) static mac_callbacks_t mxfe_m_callbacks = { - MC_SETPROP | MC_GETPROP, + MC_SETPROP | MC_GETPROP | MC_PROPINFO, mxfe_m_stat, mxfe_m_start, mxfe_m_stop, @@ -178,12 +180,14 @@ static mac_callbacks_t mxfe_m_callbacks = { mxfe_m_multicst, mxfe_m_unicst, mxfe_m_tx, + NULL, NULL, /* mc_ioctl */ NULL, /* mc_getcapab */ NULL, /* mc_open */ NULL, /* mc_close */ mxfe_m_setprop, - mxfe_m_getprop + mxfe_m_getprop, + mxfe_m_propinfo }; /* @@ -2877,90 +2881,50 @@ mxfe_m_stat(void *arg, uint_t stat, uint64_t *val) /*ARGSUSED*/ int -mxfe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +mxfe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { mxfe_t *mxfep = arg; int err = 0; - boolean_t dfl = flags & MAC_PROP_DEFAULT; - if (sz == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; switch (num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (link_duplex_t)) { - bcopy(&mxfep->mxfe_duplex, val, - sizeof (link_duplex_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (link_duplex_t)); + bcopy(&mxfep->mxfe_duplex, val, sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (uint64_t)) { - bcopy(&mxfep->mxfe_ifspeed, val, sizeof (uint64_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (uint64_t)); + bcopy(&mxfep->mxfe_ifspeed, val, sizeof (uint64_t)); break; case MAC_PROP_AUTONEG: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_aneg : mxfep->mxfe_adv_aneg; + *(uint8_t *)val = mxfep->mxfe_adv_aneg; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100fdx : mxfep->mxfe_adv_100fdx; - break; case MAC_PROP_EN_100FDX_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100fdx : mxfep->mxfe_adv_100fdx; + *(uint8_t *)val = mxfep->mxfe_adv_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100hdx : mxfep->mxfe_adv_100hdx; - break; case MAC_PROP_EN_100HDX_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100hdx : mxfep->mxfe_adv_100hdx; + *(uint8_t *)val = mxfep->mxfe_adv_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_10fdx : mxfep->mxfe_adv_10fdx; - break; case MAC_PROP_EN_10FDX_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_10fdx : mxfep->mxfe_adv_10fdx; + *(uint8_t *)val = mxfep->mxfe_adv_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_10hdx : mxfep->mxfe_adv_10hdx; - break; case MAC_PROP_EN_10HDX_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_10hdx : mxfep->mxfe_adv_10hdx; + *(uint8_t *)val = mxfep->mxfe_adv_10hdx; break; case MAC_PROP_ADV_100T4_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100T4 : mxfep->mxfe_adv_100T4; - break; case MAC_PROP_EN_100T4_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100T4 : mxfep->mxfe_adv_100T4; + *(uint8_t *)val = mxfep->mxfe_adv_100T4; break; default: @@ -3041,6 +3005,51 @@ mxfe_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, return (0); } +static void +mxfe_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t mph) +{ + mxfe_t *mxfep = arg; + + _NOTE(ARGUNUSED(name)); + + switch (num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + mac_prop_info_set_perm(mph, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_aneg); + break; + + case MAC_PROP_EN_100FDX_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_100fdx); + break; + + case MAC_PROP_EN_100HDX_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_100hdx); + break; + + case MAC_PROP_EN_10FDX_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_10fdx); + break; + + case MAC_PROP_EN_10HDX_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_10hdx); + break; + + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_100T4); + break; + } +} + /* * Debugging and error reporting. */ diff --git a/usr/src/uts/common/io/myri10ge/drv/myri10ge.c b/usr/src/uts/common/io/myri10ge/drv/myri10ge.c index d2bda2311b..7cdbad3249 100644 --- a/usr/src/uts/common/io/myri10ge/drv/myri10ge.c +++ b/usr/src/uts/common/io/myri10ge/drv/myri10ge.c @@ -2380,8 +2380,7 @@ myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum) return; } - (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, - csum, HCK_PARTIALCKSUM, 0); + mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM); } static mblk_t * @@ -2889,7 +2888,7 @@ static inline void myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) { uint32_t lso_flag; - lso_info_get(mp, mss, &lso_flag); + mac_lso_get(mp, mss, &lso_flag); (*flags) |= lso_flag; } @@ -2902,8 +2901,7 @@ myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp) int ok; mss = 0; - hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, NULL, - &tx_offload_flags); + mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); ok = pullupmsg(mp, -1); @@ -2912,8 +2910,7 @@ myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp) return (DDI_FAILURE); } MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup); - (void) hcksum_assoc(mp, NULL, NULL, start, stuff, NULL, - NULL, tx_offload_flags, 0); + mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags); if (tx_offload_flags & HW_LSO) DB_LSOMSS(mp) = (uint16_t)mss; lso_info_set(mp, mss, tx_offload_flags); @@ -3347,8 +3344,7 @@ myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp, again: /* Setup checksum offloading, if needed */ - hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, NULL, - &tx_offload_flags); + mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); if (tx_offload_flags & HW_LSO) { max_segs = MYRI10GE_MAX_SEND_DESC_TSO; @@ -3796,6 +3792,58 @@ myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num) return (0); } +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + struct myri10ge_slice_state *ss; + + ss = (struct myri10ge_slice_state *)rh; + switch (stat) { + case MAC_STAT_RBYTES: + *val = ss->rx_stats.ibytes; + break; + + case MAC_STAT_IPACKETS: + *val = ss->rx_stats.ipackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + struct myri10ge_slice_state *ss; + + ss = (struct myri10ge_slice_state *)rh; + switch (stat) { + case MAC_STAT_OBYTES: + *val = ss->tx.stats.obytes; + break; + + case MAC_STAT_OPACKETS: + *val = ss->tx.stats.opackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + static int myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh) { @@ -3843,6 +3891,7 @@ myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = myri10ge_ring_start; infop->mri_stop = NULL; infop->mri_poll = myri10ge_poll_rx; + infop->mri_stat = myri10ge_rx_ring_stat; mintr->mi_handle = (mac_intr_handle_t)ss; mintr->mi_enable = myri10ge_rx_ring_intr_enable; mintr->mi_disable = myri10ge_rx_ring_intr_disable; @@ -3853,6 +3902,7 @@ myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = NULL; infop->mri_stop = NULL; infop->mri_tx = myri10ge_send_wrapper; + infop->mri_stat = myri10ge_tx_ring_stat; break; default: break; @@ -5329,6 +5379,7 @@ static mac_callbacks_t myri10ge_m_callbacks = { myri10ge_m_multicst, NULL, NULL, + NULL, myri10ge_m_ioctl, myri10ge_m_getcapab }; diff --git a/usr/src/uts/common/io/myri10ge/drv/myri10ge_lro.c b/usr/src/uts/common/io/myri10ge/drv/myri10ge_lro.c index 2d03fceac6..ba2177e0fe 100644 --- a/usr/src/uts/common/io/myri10ge/drv/myri10ge_lro.c +++ b/usr/src/uts/common/io/myri10ge/drv/myri10ge_lro.c @@ -118,8 +118,8 @@ myri10ge_lro_flush(struct myri10ge_slice_state *ss, struct lro_entry *lro, tcp->th_sum = 0xffff ^ tcp_csum; } - (void) hcksum_assoc(lro->m_head, NULL, NULL, 0, 0, 0, - 0, HCK_IPV4_HDRCKSUM | HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0); + mac_hcksum_set(lro->m_head, 0, 0, 0, + 0, HCK_IPV4_HDRCKSUM_OK | HCK_FULLCKSUM_OK); mbl->cnt += lro->append_cnt; myri10ge_mbl_append(ss, mbl, lro->m_head); diff --git a/usr/src/uts/common/io/myri10ge/drv/myri10ge_var.h b/usr/src/uts/common/io/myri10ge/drv/myri10ge_var.h index 6840795e94..24889e48a6 100644 --- a/usr/src/uts/common/io/myri10ge/drv/myri10ge_var.h +++ b/usr/src/uts/common/io/myri10ge/drv/myri10ge_var.h @@ -57,9 +57,7 @@ extern "C" { #include <sys/sunddi.h> #include <sys/strsubr.h> /* for hw cksum stuff */ #include <sys/pattr.h> /* for hw cksum stuff */ -#ifdef MYRICOM_PRIV #include <netinet/in.h> /* for hw cksum stuff */ -#endif #include <netinet/ip.h> /* for hw cksum stuff */ #include <netinet/ip6.h> /* for hw cksum stuff */ #include <netinet/tcp.h> /* for hw cksum stuff */ diff --git a/usr/src/uts/common/io/net80211/net80211_ioctl.c b/usr/src/uts/common/io/net80211/net80211_ioctl.c index 93212719e3..25ef1e4fde 100644 --- a/usr/src/uts/common/io/net80211/net80211_ioctl.c +++ b/usr/src/uts/common/io/net80211/net80211_ioctl.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2457,22 +2457,14 @@ ieee80211_setprop(void *ic_arg, const char *pr_name, mac_prop_id_t wldp_pr_num, /* ARGSUSED */ int ieee80211_getprop(void *ic_arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; struct ieee80211com *ic = ic_arg; - if (wldp_length == 0) { - err = EINVAL; - return (err); - } - bzero(wldp_buf, wldp_length); - ASSERT(ic != NULL); IEEE80211_LOCK(ic); - *perm = MAC_PROP_PERM_RW; - switch (wldp_pr_num) { /* mac_prop_id */ case MAC_PROP_WL_ESSID: @@ -2497,34 +2489,27 @@ ieee80211_getprop(void *ic_arg, const char *pr_name, mac_prop_id_t wldp_pr_num, wl_get_desrates(ic, wldp_buf); break; case MAC_PROP_WL_LINKSTATUS: - *perm = MAC_PROP_PERM_READ; wl_get_linkstatus(ic, wldp_buf); break; case MAC_PROP_WL_ESS_LIST: - *perm = MAC_PROP_PERM_READ; wl_get_esslist(ic, wldp_buf); break; case MAC_PROP_WL_SUPPORTED_RATES: - *perm = MAC_PROP_PERM_READ; wl_get_suprates(ic, wldp_buf); break; case MAC_PROP_WL_RSSI: - *perm = MAC_PROP_PERM_READ; wl_get_rssi(ic, wldp_buf); break; case MAC_PROP_WL_CAPABILITY: - *perm = MAC_PROP_PERM_READ; wl_get_capability(ic, wldp_buf); break; case MAC_PROP_WL_WPA: wl_get_wpa(ic, wldp_buf); break; case MAC_PROP_WL_SCANRESULTS: - *perm = MAC_PROP_PERM_READ; wl_get_scanresults(ic, wldp_buf); break; case MAC_PROP_WL_CREATE_IBSS: - *perm = MAC_PROP_PERM_READ; wl_get_createibss(ic, wldp_buf); break; case MAC_PROP_WL_KEY_TAB: @@ -2545,3 +2530,25 @@ ieee80211_getprop(void *ic_arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } + +void ieee80211_propinfo(void *ic_arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(pr_name, ic_arg)); + + /* + * By default permissions are read/write unless specified + * otherwise by the driver. + */ + + switch (wldp_pr_num) { + case MAC_PROP_WL_LINKSTATUS: + case MAC_PROP_WL_ESS_LIST: + case MAC_PROP_WL_SUPPORTED_RATES: + case MAC_PROP_WL_RSSI: + case MAC_PROP_WL_CAPABILITY: + case MAC_PROP_WL_SCANRESULTS: + case MAC_PROP_WL_CREATE_IBSS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } +} diff --git a/usr/src/uts/common/io/nge/nge_main.c b/usr/src/uts/common/io/nge/nge_main.c index 583e9bd61e..1aad680aa7 100644 --- a/usr/src/uts/common/io/nge/nge_main.c +++ b/usr/src/uts/common/io/nge/nge_main.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -177,14 +177,17 @@ static boolean_t nge_m_getcapab(void *, mac_capab_t, void *); static int nge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int nge_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void nge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int nge_set_priv_prop(nge_t *, const char *, uint_t, const void *); static int nge_get_priv_prop(nge_t *, const char *, uint_t, - uint_t, void *); + void *); #define NGE_M_CALLBACK_FLAGS\ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | \ + MC_PROPINFO) static mac_callbacks_t nge_m_callbacks = { NGE_M_CALLBACK_FLAGS, @@ -195,27 +198,27 @@ static mac_callbacks_t nge_m_callbacks = { nge_m_multicst, nge_m_unicst, nge_m_tx, + NULL, nge_m_ioctl, nge_m_getcapab, NULL, NULL, nge_m_setprop, - nge_m_getprop + nge_m_getprop, + nge_m_propinfo }; -mac_priv_prop_t nge_priv_props[] = { - {"_tx_bcopy_threshold", MAC_PROP_PERM_RW}, - {"_rx_bcopy_threshold", MAC_PROP_PERM_RW}, - {"_recv_max_packet", MAC_PROP_PERM_RW}, - {"_poll_quiet_time", MAC_PROP_PERM_RW}, - {"_poll_busy_time", MAC_PROP_PERM_RW}, - {"_rx_intr_hwater", MAC_PROP_PERM_RW}, - {"_rx_intr_lwater", MAC_PROP_PERM_RW}, +char *nge_priv_props[] = { + "_tx_bcopy_threshold", + "_rx_bcopy_threshold", + "_recv_max_packet", + "_poll_quiet_time", + "_poll_busy_time", + "_rx_intr_hwater", + "_rx_intr_lwater", + NULL }; -#define NGE_MAX_PRIV_PROPS \ - (sizeof (nge_priv_props)/sizeof (mac_priv_prop_t)) - static int nge_add_intrs(nge_t *, int); static void nge_rem_intrs(nge_t *); static int nge_register_intrs_and_init_locks(nge_t *); @@ -1750,193 +1753,167 @@ reprogram: static int nge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { nge_t *ngep = barg; int err = 0; link_flowctrl_t fl; uint64_t speed; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); - - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; - - bzero(pr_val, pr_valsize); switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (link_duplex_t)) { - bcopy(&ngep->param_link_duplex, pr_val, - sizeof (link_duplex_t)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_duplex_t)); + bcopy(&ngep->param_link_duplex, pr_val, + sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (uint64_t)) { - speed = ngep->param_link_speed * 1000000ull; - bcopy(&speed, pr_val, sizeof (speed)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint64_t)); + speed = ngep->param_link_speed * 1000000ull; + bcopy(&speed, pr_val, sizeof (speed)); break; case MAC_PROP_AUTONEG: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_autoneg; - } + *(uint8_t *)pr_val = ngep->param_adv_autoneg; break; case MAC_PROP_FLOWCTRL: - if (pr_valsize >= sizeof (link_flowctrl_t)) { - if (pr_flags & MAC_PROP_DEFAULT) { - fl = LINK_FLOWCTRL_BI; - bcopy(&fl, pr_val, sizeof (fl)); - break; - } - if (ngep->param_link_rx_pause && - !ngep->param_link_tx_pause) - fl = LINK_FLOWCTRL_RX; - - if (!ngep->param_link_rx_pause && - !ngep->param_link_tx_pause) - fl = LINK_FLOWCTRL_NONE; - - if (!ngep->param_link_rx_pause && - ngep->param_link_tx_pause) - fl = LINK_FLOWCTRL_TX; - - if (ngep->param_link_rx_pause && - ngep->param_link_tx_pause) - fl = LINK_FLOWCTRL_BI; - bcopy(&fl, pr_val, sizeof (fl)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_flowctrl_t)); + if (ngep->param_link_rx_pause && + !ngep->param_link_tx_pause) + fl = LINK_FLOWCTRL_RX; + + if (!ngep->param_link_rx_pause && + !ngep->param_link_tx_pause) + fl = LINK_FLOWCTRL_NONE; + + if (!ngep->param_link_rx_pause && + ngep->param_link_tx_pause) + fl = LINK_FLOWCTRL_TX; + + if (ngep->param_link_rx_pause && + ngep->param_link_tx_pause) + fl = LINK_FLOWCTRL_BI; + bcopy(&fl, pr_val, sizeof (fl)); break; case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_1000fdx; - } + *(uint8_t *)pr_val = ngep->param_adv_1000fdx; break; case MAC_PROP_EN_1000FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_1000fdx; - } + *(uint8_t *)pr_val = ngep->param_en_1000fdx; break; case MAC_PROP_ADV_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 0; - } else { - *(uint8_t *)pr_val = ngep->param_adv_1000hdx; - } + *(uint8_t *)pr_val = ngep->param_adv_1000hdx; break; case MAC_PROP_EN_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 0; - } else { - *(uint8_t *)pr_val = ngep->param_en_1000hdx; - } + *(uint8_t *)pr_val = ngep->param_en_1000hdx; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_100fdx; - } + *(uint8_t *)pr_val = ngep->param_adv_100fdx; break; case MAC_PROP_EN_100FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_100fdx; - } + *(uint8_t *)pr_val = ngep->param_en_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_100hdx; - } + *(uint8_t *)pr_val = ngep->param_adv_100hdx; break; case MAC_PROP_EN_100HDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_100hdx; - } + *(uint8_t *)pr_val = ngep->param_en_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_10fdx; - } + *(uint8_t *)pr_val = ngep->param_adv_10fdx; break; case MAC_PROP_EN_10FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_10fdx; - } + *(uint8_t *)pr_val = ngep->param_en_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_10hdx; - } + *(uint8_t *)pr_val = ngep->param_adv_10hdx; break; case MAC_PROP_EN_10HDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_10hdx; - } + *(uint8_t *)pr_val = ngep->param_en_10hdx; break; case MAC_PROP_ADV_100T4_CAP: case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = 0; break; case MAC_PROP_PRIVATE: - err = nge_get_priv_prop(ngep, pr_name, pr_flags, + err = nge_get_priv_prop(ngep, pr_name, pr_valsize, pr_val); break; - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = ETHERMTU; - if (ngep->dev_spec_param.jumbo) - range.range_uint32[0].mpur_max = NGE_MAX_MTU; - bcopy(&range, pr_val, sizeof (range)); - break; - } default: err = ENOTSUP; } return (err); } +static void +nge_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + nge_t *ngep = barg; + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, LINK_FLOWCTRL_BI); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, ETHERMTU, + ngep->dev_spec_param.jumbo ? NGE_MAX_MTU : ETHERMTU); + break; + + case MAC_PROP_PRIVATE: { + char valstr[64]; + int value; + + bzero(valstr, sizeof (valstr)); + if (strcmp(pr_name, "_tx_bcopy_threshold") == 0) { + value = NGE_TX_COPY_SIZE; + } else if (strcmp(pr_name, "_rx_bcopy_threshold") == 0) { + value = NGE_RX_COPY_SIZE; + } else if (strcmp(pr_name, "_recv_max_packet") == 0) { + value = 128; + } else if (strcmp(pr_name, "_poll_quiet_time") == 0) { + value = NGE_POLL_QUIET_TIME; + } else if (strcmp(pr_name, "_poll_busy_time") == 0) { + value = NGE_POLL_BUSY_TIME; + } else if (strcmp(pr_name, "_rx_intr_hwater") == 0) { + value = 1; + } else if (strcmp(pr_name, "_rx_intr_lwater") == 0) { + value = 8; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + } + } + +} + /* ARGSUSED */ static int nge_set_priv_prop(nge_t *ngep, const char *pr_name, uint_t pr_valsize, @@ -2056,49 +2033,44 @@ reprogram: } static int -nge_get_priv_prop(nge_t *ngep, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val) +nge_get_priv_prop(nge_t *ngep, const char *pr_name, uint_t pr_valsize, + void *pr_val) { int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; if (strcmp(pr_name, "_tx_bcopy_threshold") == 0) { - value = (is_default ? NGE_TX_COPY_SIZE : - ngep->param_txbcopy_threshold); + value = ngep->param_txbcopy_threshold; err = 0; goto done; } if (strcmp(pr_name, "_rx_bcopy_threshold") == 0) { - value = (is_default ? NGE_RX_COPY_SIZE : - ngep->param_rxbcopy_threshold); + value = ngep->param_rxbcopy_threshold; err = 0; goto done; } if (strcmp(pr_name, "_recv_max_packet") == 0) { - value = (is_default ? 128 : ngep->param_recv_max_packet); + value = ngep->param_recv_max_packet; err = 0; goto done; } if (strcmp(pr_name, "_poll_quiet_time") == 0) { - value = (is_default ? NGE_POLL_QUIET_TIME : - ngep->param_poll_quiet_time); + value = ngep->param_poll_quiet_time; err = 0; goto done; } if (strcmp(pr_name, "_poll_busy_time") == 0) { - value = (is_default ? NGE_POLL_BUSY_TIME : - ngep->param_poll_busy_time); + value = ngep->param_poll_busy_time; err = 0; goto done; } if (strcmp(pr_name, "_rx_intr_hwater") == 0) { - value = (is_default ? 1 : ngep->param_rx_intr_hwater); + value = ngep->param_rx_intr_hwater; err = 0; goto done; } if (strcmp(pr_name, "_rx_intr_lwater") == 0) { - value = (is_default ? 8 : ngep->param_rx_intr_lwater); + value = ngep->param_rx_intr_lwater; err = 0; goto done; } @@ -2561,7 +2533,6 @@ nge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) macp->m_max_sdu = ngep->default_mtu; macp->m_margin = VTAG_SIZE; macp->m_priv_props = nge_priv_props; - macp->m_priv_prop_count = NGE_MAX_PRIV_PROPS; /* * Finally, we're ready to register ourselves with the mac * interface; if this succeeds, we're all ready to start() diff --git a/usr/src/uts/common/io/nge/nge_rx.c b/usr/src/uts/common/io/nge/nge_rx.c index 86484445d3..c362117fd2 100644 --- a/usr/src/uts/common/io/nge/nge_rx.c +++ b/usr/src/uts/common/io/nge/nge_rx.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -271,19 +271,18 @@ nge_rxsta_handle(nge_t *ngep, uint32_t stflag, uint32_t *pflags) case RXD_CK8G_TCP_SUM: case RXD_CK8G_UDP_SUM: - *pflags |= HCK_FULLCKSUM; - *pflags |= HCK_IPV4_HDRCKSUM; + *pflags |= HCK_IPV4_HDRCKSUM_OK; *pflags |= HCK_FULLCKSUM_OK; break; case RXD_CK8G_TCP_SUM_ERR: case RXD_CK8G_UDP_SUM_ERR: sw_stp->tcp_hwsum_err++; - *pflags |= HCK_IPV4_HDRCKSUM; + *pflags |= HCK_IPV4_HDRCKSUM_OK; break; case RXD_CK8G_IP_HSUM: - *pflags |= HCK_IPV4_HDRCKSUM; + *pflags |= HCK_IPV4_HDRCKSUM_OK; break; case RXD_CK8G_NO_HSUM: @@ -379,8 +378,7 @@ nge_recv_ring(nge_t *ngep) } if (mp != NULL) { if (!(flag_err & (RX_SUM_NO | RX_SUM_ERR))) { - (void) hcksum_assoc(mp, NULL, NULL, - 0, 0, 0, 0, sum_flags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, sum_flags); } *tail = mp; tail = &mp->b_next; diff --git a/usr/src/uts/common/io/nge/nge_tx.c b/usr/src/uts/common/io/nge/nge_tx.c index c16368bd5f..6ece5b5730 100644 --- a/usr/src/uts/common/io/nge/nge_tx.c +++ b/usr/src/uts/common/io/nge/nge_tx.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -362,8 +362,7 @@ nge_send_copy(nge_t *ngep, mblk_t *mp, send_ring_t *srp) sw_tx_sbd_t *ssbdp; boolean_t tfint; - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, - NULL, NULL, &flags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &flags); bds = 0x1; if ((uint32_t)-1 == (start_index = nge_tx_alloc(ngep, bds))) @@ -476,7 +475,7 @@ nge_send_mapped(nge_t *ngep, mblk_t *mp, size_t fragno) slot = 0; dmah = dmah_list.head; - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &flags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &flags); for (bp = mp; bp != NULL; bp = bp->b_cont) { diff --git a/usr/src/uts/common/io/ntxn/unm_nic_main.c b/usr/src/uts/common/io/ntxn/unm_nic_main.c index 4165589454..be99c52ff3 100644 --- a/usr/src/uts/common/io/ntxn/unm_nic_main.c +++ b/usr/src/uts/common/io/ntxn/unm_nic_main.c @@ -23,7 +23,7 @@ * Use is subject to license terms. */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <sys/types.h> @@ -649,8 +649,7 @@ unm_tx_csum(cmdDescType0_t *desc, mblk_t *mp, pktinfo_t *pktinfo) if (pktinfo->etype == htons(ETHERTYPE_IP)) { uint32_t start, flags; - hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, - &flags); + mac_hcksum_get(mp, &start, NULL, NULL, NULL, &flags); if ((flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) == 0) return; @@ -1306,11 +1305,11 @@ unm_process_rcv(unm_adapter *adapter, statusDesc_t *desc) if (desc->u1.s1.status == STATUS_CKSUM_OK) { adapter->stats.csummed++; cksum_flags = - HCK_FULLCKSUM_OK | HCK_IPV4_HDRCKSUM | HCK_FULLCKSUM; + HCK_FULLCKSUM_OK | HCK_IPV4_HDRCKSUM_OK; } else { cksum_flags = 0; } - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, cksum_flags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, cksum_flags); adapter->stats.no_rcv++; adapter->stats.rxbytes += pkt_length; @@ -2533,9 +2532,7 @@ static mac_callbacks_t ntxn_m_callbacks = { ntxn_m_multicst, ntxn_m_unicst, ntxn_m_tx, -#ifndef SOLARIS11 - NULL, /* mc_resources */ -#endif + NULL, /* mc_reserved */ ntxn_m_ioctl, ntxn_m_getcapab, NULL, /* mc_open */ diff --git a/usr/src/uts/common/io/nxge/nxge_fflp.c b/usr/src/uts/common/io/nxge/nxge_fflp.c index 39e107486e..ac1528275a 100644 --- a/usr/src/uts/common/io/nxge/nxge_fflp.c +++ b/usr/src/uts/common/io/nxge/nxge_fflp.c @@ -18,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -448,6 +449,7 @@ nxge_main_mac_assign_rdc_table(p_nxge_t nxgep) npi_status_t rs = NPI_SUCCESS; hostinfo_t mac_rdc; npi_handle_t handle; + int i; handle = nxgep->npi_reg_handle; mac_rdc.value = 0; @@ -456,6 +458,12 @@ nxge_main_mac_assign_rdc_table(p_nxge_t nxgep) switch (nxgep->function_num) { case 0: case 1: + /* + * Tests indicate that it is OK not to re-initialize the + * hostinfo registers for the XMAC's alternate MAC + * addresses. But that is necessary for BMAC (case 2 + * and case 3 below) + */ rs = npi_mac_hostinfo_entry(handle, OP_SET, nxgep->function_num, XMAC_UNIQUE_HOST_INFO_ENTRY, &mac_rdc); break; @@ -463,6 +471,9 @@ nxge_main_mac_assign_rdc_table(p_nxge_t nxgep) case 3: rs = npi_mac_hostinfo_entry(handle, OP_SET, nxgep->function_num, BMAC_UNIQUE_HOST_INFO_ENTRY, &mac_rdc); + for (i = 1; i <= BMAC_MAX_ALT_ADDR_ENTRY; i++) + rs |= npi_mac_hostinfo_entry(handle, OP_SET, + nxgep->function_num, i, &mac_rdc); break; default: NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL, @@ -488,7 +499,6 @@ nxge_alt_mcast_mac_assign_rdc_table(p_nxge_t nxgep) npi_status_t rs = NPI_SUCCESS; hostinfo_t mac_rdc; npi_handle_t handle; - int i; handle = nxgep->npi_reg_handle; mac_rdc.value = 0; @@ -497,25 +507,13 @@ nxge_alt_mcast_mac_assign_rdc_table(p_nxge_t nxgep) switch (nxgep->function_num) { case 0: case 1: - /* - * Tests indicate that it is OK not to re-initialize the - * hostinfo registers for the XMAC's alternate MAC - * addresses. But that is necessary for BMAC (case 2 - * and case 3 below) - */ rs = npi_mac_hostinfo_entry(handle, OP_SET, - nxgep->function_num, - XMAC_MULTI_HOST_INFO_ENTRY, &mac_rdc); + nxgep->function_num, XMAC_MULTI_HOST_INFO_ENTRY, &mac_rdc); break; case 2: case 3: - for (i = 1; i <= BMAC_MAX_ALT_ADDR_ENTRY; i++) - rs |= npi_mac_hostinfo_entry(handle, OP_SET, - nxgep->function_num, i, &mac_rdc); - - rs |= npi_mac_hostinfo_entry(handle, OP_SET, - nxgep->function_num, - BMAC_MULTI_HOST_INFO_ENTRY, &mac_rdc); + rs = npi_mac_hostinfo_entry(handle, OP_SET, + nxgep->function_num, BMAC_MULTI_HOST_INFO_ENTRY, &mac_rdc); break; default: NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL, diff --git a/usr/src/uts/common/io/nxge/nxge_hio.c b/usr/src/uts/common/io/nxge/nxge_hio.c index 1130955670..2eaadd7b7c 100644 --- a/usr/src/uts/common/io/nxge/nxge_hio.c +++ b/usr/src/uts/common/io/nxge/nxge_hio.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -50,6 +50,7 @@ extern npi_status_t npi_rxdma_dump_rdc_table(npi_handle_t, uint8_t); extern int nxge_m_mmac_remove(void *arg, int slot); extern int nxge_m_mmac_add_g(void *arg, const uint8_t *maddr, int rdctbl, boolean_t usetbl); +extern int nxge_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); /* The following function may be found in nxge_[t|r]xdma.c */ extern npi_status_t nxge_txdma_channel_disable(nxge_t *, int); @@ -428,6 +429,7 @@ nxge_grp_dc_add( nxge_hio_dc_t *dc; nxge_grp_set_t *set; nxge_status_t status = NXGE_OK; + int error = 0; NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_grp_dc_add")); @@ -501,8 +503,13 @@ nxge_grp_dc_add( dc->group = group; - if (isLDOMguest(nxge)) - (void) nxge_hio_ldsv_add(nxge, dc); + if (isLDOMguest(nxge)) { + error = nxge_hio_ldsv_add(nxge, dc); + if (error != 0) { + MUTEX_EXIT(&nhd->lock); + return (NXGE_ERROR); + } + } NXGE_DC_SET(set->owned.map, channel); set->owned.count++; @@ -1778,6 +1785,10 @@ nxge_hio_share_bind(mac_share_handle_t shandle, uint64_t cookie, uint64_t rmap, tmap, hv_rmap, hv_tmap; int rv; + ASSERT(shp != NULL); + ASSERT(shp->nxgep != NULL); + ASSERT(shp->vrp != NULL); + nxge = shp->nxgep; vr = (nxge_hio_vr_t *)shp->vrp; @@ -1956,16 +1967,17 @@ nxge_hio_unshare( int nxge_hio_addres(nxge_hio_vr_t *vr, mac_ring_type_t type, uint64_t *map) { - nxge_t *nxge = (nxge_t *)vr->nxge; + nxge_t *nxge; nxge_grp_t *group; int groupid; int i, rv = 0; int max_dcs; - NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_addres")); + ASSERT(vr != NULL); + ASSERT(vr->nxge != NULL); + nxge = (nxge_t *)vr->nxge; - if (!nxge) - return (EINVAL); + NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_addres")); /* * For each ring associated with the group, add the resources @@ -1984,6 +1996,8 @@ nxge_hio_addres(nxge_hio_vr_t *vr, mac_ring_type_t type, uint64_t *map) group = nxge->rx_set.group[groupid]; } + ASSERT(group != NULL); + if (group->map == 0) { NXGE_DEBUG_MSG((nxge, HIO_CTL, "There is no rings associated " "with this VR")); @@ -2424,6 +2438,7 @@ nxge_hio_rdc_unshare( nxge_grp_set_t *set = &nxge->rx_set; nxge_grp_t *group; int grpid; + int i; NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_rdc_unshare")); @@ -2484,6 +2499,14 @@ nxge_hio_rdc_unshare( } NXGE_DEBUG_MSG((nxge, HIO_CTL, "<== nxge_hio_rdc_unshare")); + + for (i = 0; i < NXGE_MAX_RDCS; i++) { + if (nxge->rx_ring_handles[i].channel == channel) { + nxge_rx_ring_start( + (mac_ring_driver_t)&nxge->rx_ring_handles[i], + nxge->rx_ring_handles[i].ring_gen_num); + } + } } /* diff --git a/usr/src/uts/common/io/nxge/nxge_hio_guest.c b/usr/src/uts/common/io/nxge/nxge_hio_guest.c index 3c552f2058..176c6a4e09 100644 --- a/usr/src/uts/common/io/nxge/nxge_hio_guest.c +++ b/usr/src/uts/common/io/nxge/nxge_hio_guest.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -169,7 +169,6 @@ static void nxge_check_guest_state(nxge_hio_vr_t *); * Context: * Guest domain */ -/* ARGSUSED */ int nxge_hio_vr_add(nxge_t *nxge) { @@ -411,6 +410,20 @@ nxge_guest_dc_alloc( return (0); } +int +nxge_hio_get_dc_htable_idx(nxge_t *nxge, vpc_type_t type, uint32_t channel) +{ + nxge_hio_dc_t *dc; + + ASSERT(isLDOMguest(nxge)); + + dc = nxge_grp_dc_find(nxge, type, channel); + if (dc == NULL) + return (-1); + + return (dc->ldg.vector); +} + /* * res_map_parse * diff --git a/usr/src/uts/common/io/nxge/nxge_intr.c b/usr/src/uts/common/io/nxge/nxge_intr.c index 0e6f85a0b6..2e73677ca5 100644 --- a/usr/src/uts/common/io/nxge/nxge_intr.c +++ b/usr/src/uts/common/io/nxge/nxge_intr.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -903,26 +903,23 @@ nxge_hio_rdsv_add( * Context: * Guest domain */ -hv_rv_t -nxge_hio_ldsv_add( - nxge_t *nxge, - nxge_hio_dc_t *dc) +int +nxge_hio_ldsv_add(nxge_t *nxge, nxge_hio_dc_t *dc) { nxge_ldgv_t *control; nxge_ldg_t *group; nxge_ldv_t *device; - hv_rv_t hv_rv; if (dc->type == VP_BOUND_TX) { NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_ldsv_add(TDC %d)", dc->channel)); - if ((hv_rv = nxge_hio_tdsv_add(nxge, dc)) != 0) - return (hv_rv); + if (nxge_hio_tdsv_add(nxge, dc) != 0) + return (EIO); } else { NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_ldsv_add(RDC %d)", dc->channel)); - if ((hv_rv = nxge_hio_rdsv_add(nxge, dc)) != 0) - return (hv_rv); + if (nxge_hio_rdsv_add(nxge, dc) != 0) + return (EIO); } dc->ldg.map |= (1 << dc->ldg.ldsv); diff --git a/usr/src/uts/common/io/nxge/nxge_kstats.c b/usr/src/uts/common/io/nxge/nxge_kstats.c index c9fa73c35f..34cfafc58d 100644 --- a/usr/src/uts/common/io/nxge/nxge_kstats.c +++ b/usr/src/uts/common/io/nxge/nxge_kstats.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/nxge/nxge_impl.h> #include <sys/nxge/nxge_hio.h> @@ -2192,6 +2190,86 @@ nxge_m_tx_stat( return (val); } +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +nxge_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + p_nxge_ring_handle_t rhp = (p_nxge_ring_handle_t)rdriver; + p_nxge_t nxgep = rhp->nxgep; + int r_index; + p_nxge_stats_t statsp; + + ASSERT(nxgep != NULL); + statsp = (p_nxge_stats_t)nxgep->statsp; + ASSERT(statsp != NULL); + r_index = rhp->index + nxgep->pt_config.hw_config.start_rdc; + + if (statsp->rdc_ksp[r_index] == NULL) + return (0); + + switch (stat) { + case MAC_STAT_IERRORS: + *val = statsp->rdc_stats[r_index].ierrors; + break; + + case MAC_STAT_RBYTES: + *val = statsp->rdc_stats[r_index].ibytes; + break; + + case MAC_STAT_IPACKETS: + *val = statsp->rdc_stats[r_index].ipackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +nxge_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + p_nxge_ring_handle_t rhp = (p_nxge_ring_handle_t)rdriver; + p_nxge_t nxgep = rhp->nxgep; + int r_index; + p_nxge_stats_t statsp; + + ASSERT(nxgep != NULL); + statsp = (p_nxge_stats_t)nxgep->statsp; + ASSERT(statsp != NULL); + r_index = nxgep->pt_config.hw_config.tdc.start + rhp->index; + + if (statsp->tdc_ksp[r_index] == NULL) + return (0); + + switch (stat) { + case MAC_STAT_OERRORS: + *val = statsp->tdc_stats[r_index].oerrors; + break; + + case MAC_STAT_OBYTES: + *val = statsp->tdc_stats[r_index].obytes; + break; + + case MAC_STAT_OPACKETS: + *val = statsp->tdc_stats[r_index].opackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + /* ARGSUSED */ int nxge_m_stat(void *arg, uint_t stat, uint64_t *value) diff --git a/usr/src/uts/common/io/nxge/nxge_mac.c b/usr/src/uts/common/io/nxge/nxge_mac.c index dd8387652a..38aa5cc722 100644 --- a/usr/src/uts/common/io/nxge/nxge_mac.c +++ b/usr/src/uts/common/io/nxge/nxge_mac.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -3340,16 +3340,46 @@ fail: return (NXGE_ERROR | rs); } +static npi_status_t +nxge_rx_mac_mcast_hash_table(p_nxge_t nxgep) +{ + uint32_t i; + uint16_t hashtab_e; + p_hash_filter_t hash_filter; + uint8_t portn; + npi_handle_t handle; + npi_status_t rs = NPI_SUCCESS; + + portn = NXGE_GET_PORT_NUM(nxgep->function_num); + handle = nxgep->npi_handle; + + /* + * Load the multicast hash filter bits. + */ + hash_filter = nxgep->hash_filter; + for (i = 0; i < MAC_MAX_HASH_ENTRY; i++) { + if (hash_filter != NULL) { + hashtab_e = (uint16_t)hash_filter->hash_filter_regs[ + (NMCFILTER_REGS - 1) - i]; + } else { + hashtab_e = 0; + } + + if ((rs = npi_mac_hashtab_entry(handle, OP_SET, portn, i, + (uint16_t *)&hashtab_e)) != NPI_SUCCESS) + return (rs); + } -/* Initialize the RxMAC sub-block */ + return (NPI_SUCCESS); +} +/* + * Initialize the RxMAC sub-block + */ nxge_status_t nxge_rx_mac_init(p_nxge_t nxgep) { npi_attr_t ap; - uint32_t i; - uint16_t hashtab_e; - p_hash_filter_t hash_filter; nxge_port_t portt; uint8_t portn; npi_handle_t handle; @@ -3370,9 +3400,8 @@ nxge_rx_mac_init(p_nxge_t nxgep) addr0 = ntohs(addr16p[2]); addr1 = ntohs(addr16p[1]); addr2 = ntohs(addr16p[0]); - SET_MAC_ATTR3(handle, ap, portn, MAC_PORT_ADDR, addr0, addr1, addr2, - rs); - + SET_MAC_ATTR3(handle, ap, portn, MAC_PORT_ADDR, + addr0, addr1, addr2, rs); if (rs != NPI_SUCCESS) goto fail; SET_MAC_ATTR3(handle, ap, portn, MAC_PORT_ADDR_FILTER, 0, 0, 0, rs); @@ -3382,22 +3411,9 @@ nxge_rx_mac_init(p_nxge_t nxgep) if (rs != NPI_SUCCESS) goto fail; - /* - * Load the multicast hash filter bits. - */ - hash_filter = nxgep->hash_filter; - for (i = 0; i < MAC_MAX_HASH_ENTRY; i++) { - if (hash_filter != NULL) { - hashtab_e = (uint16_t)hash_filter->hash_filter_regs[ - (NMCFILTER_REGS - 1) - i]; - } else { - hashtab_e = 0; - } - - if ((rs = npi_mac_hashtab_entry(handle, OP_SET, portn, i, - (uint16_t *)&hashtab_e)) != NPI_SUCCESS) - goto fail; - } + rs = nxge_rx_mac_mcast_hash_table(nxgep); + if (rs != NPI_SUCCESS) + goto fail; if (portt == PORT_TYPE_XMAC) { if ((rs = npi_xmac_rx_iconfig(handle, INIT, portn, @@ -3413,48 +3429,51 @@ nxge_rx_mac_init(p_nxge_t nxgep) if (nxgep->filter.all_phys_cnt != 0) xconfig |= CFG_XMAC_RX_PROMISCUOUS; - if (nxgep->filter.all_multicast_cnt != 0) xconfig |= CFG_XMAC_RX_PROMISCUOUSGROUP; xconfig |= CFG_XMAC_RX_HASH_FILTER; - if ((rs = npi_xmac_rx_config(handle, INIT, portn, - xconfig)) != NPI_SUCCESS) + if ((rs = npi_xmac_rx_config(handle, INIT, + portn, xconfig)) != NPI_SUCCESS) goto fail; nxgep->mac.rx_config = xconfig; - /* Comparison of mac unique address is always enabled on XMAC */ - + /* + * Comparison of mac unique address is always + * enabled on XMAC + */ if ((rs = npi_xmac_zap_rx_counters(handle, portn)) != NPI_SUCCESS) goto fail; } else { - (void) nxge_fflp_init_hostinfo(nxgep); - if (npi_bmac_rx_iconfig(nxgep->npi_handle, INIT, portn, 0) != NPI_SUCCESS) goto fail; + nxgep->mac.rx_iconfig = NXGE_BMAC_RX_INTRS; + (void) nxge_fflp_init_hostinfo(nxgep); + bconfig = CFG_BMAC_RX_DISCARD_ON_ERR | CFG_BMAC_RX & ~CFG_BMAC_RX_STRIP_CRC; if (nxgep->filter.all_phys_cnt != 0) bconfig |= CFG_BMAC_RX_PROMISCUOUS; - if (nxgep->filter.all_multicast_cnt != 0) bconfig |= CFG_BMAC_RX_PROMISCUOUSGROUP; bconfig |= CFG_BMAC_RX_HASH_FILTER; - if ((rs = npi_bmac_rx_config(handle, INIT, portn, - bconfig)) != NPI_SUCCESS) + if ((rs = npi_bmac_rx_config(handle, INIT, + portn, bconfig)) != NPI_SUCCESS) goto fail; nxgep->mac.rx_config = bconfig; - /* Always enable comparison of mac unique address */ - if ((rs = npi_mac_altaddr_enable(handle, portn, 0)) - != NPI_SUCCESS) + /* + * Always enable comparison of mac unique address + */ + if ((rs = npi_mac_altaddr_enable(handle, + portn, 0)) != NPI_SUCCESS) goto fail; } @@ -4919,9 +4938,9 @@ nxge_add_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) uint32_t mchash; p_hash_filter_t hash_filter; uint16_t hash_bit; - boolean_t rx_init = B_FALSE; uint_t j; nxge_status_t status = NXGE_OK; + npi_status_t rs; NXGE_DEBUG_MSG((nxgep, MAC_CTL, "==> nxge_add_mcast_addr")); @@ -4933,6 +4952,7 @@ nxge_add_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) nxgep->hash_filter = KMEM_ZALLOC(sizeof (hash_filter_t), KM_SLEEP); } + hash_filter = nxgep->hash_filter; j = mchash / HASH_REG_WIDTH; hash_bit = (1 << (mchash % HASH_REG_WIDTH)); @@ -4940,19 +4960,14 @@ nxge_add_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) hash_filter->hash_bit_ref_cnt[mchash]++; if (hash_filter->hash_bit_ref_cnt[mchash] == 1) { hash_filter->hash_ref_cnt++; - rx_init = B_TRUE; - } - if (rx_init) { - if ((status = nxge_rx_mac_disable(nxgep)) != NXGE_OK) - goto fail; - if ((status = nxge_rx_mac_enable(nxgep)) != NXGE_OK) - goto fail; } - RW_EXIT(&nxgep->filter_lock); + rs = nxge_rx_mac_mcast_hash_table(nxgep); + if (rs != NPI_SUCCESS) + goto fail; + RW_EXIT(&nxgep->filter_lock); NXGE_DEBUG_MSG((nxgep, MAC_CTL, "<== nxge_add_mcast_addr")); - return (NXGE_OK); fail: RW_EXIT(&nxgep->filter_lock); @@ -4969,9 +4984,9 @@ nxge_del_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) uint32_t mchash; p_hash_filter_t hash_filter; uint16_t hash_bit; - boolean_t rx_init = B_FALSE; uint_t j; nxge_status_t status = NXGE_OK; + npi_status_t rs; NXGE_DEBUG_MSG((nxgep, MAC_CTL, "==> nxge_del_mcast_addr")); RW_ENTER_WRITER(&nxgep->filter_lock); @@ -4990,8 +5005,8 @@ nxge_del_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) hash_bit = (1 << (mchash % HASH_REG_WIDTH)); hash_filter->hash_filter_regs[j] &= ~hash_bit; hash_filter->hash_ref_cnt--; - rx_init = B_TRUE; } + if (hash_filter->hash_ref_cnt == 0) { NXGE_DEBUG_MSG((NULL, STR_CTL, "De-allocating hash filter storage.")); @@ -4999,12 +5014,10 @@ nxge_del_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) nxgep->hash_filter = NULL; } - if (rx_init) { - if ((status = nxge_rx_mac_disable(nxgep)) != NXGE_OK) - goto fail; - if ((status = nxge_rx_mac_enable(nxgep)) != NXGE_OK) - goto fail; - } + rs = nxge_rx_mac_mcast_hash_table(nxgep); + if (rs != NPI_SUCCESS) + goto fail; + RW_EXIT(&nxgep->filter_lock); NXGE_DEBUG_MSG((nxgep, MAC_CTL, "<== nxge_del_mcast_addr")); diff --git a/usr/src/uts/common/io/nxge/nxge_main.c b/usr/src/uts/common/io/nxge/nxge_main.c index c8df562520..885f521ed3 100644 --- a/usr/src/uts/common/io/nxge/nxge_main.c +++ b/usr/src/uts/common/io/nxge/nxge_main.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -296,12 +296,13 @@ static boolean_t nxge_m_getcapab(void *, mac_capab_t, void *); static int nxge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int nxge_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void nxge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); +static void nxge_priv_propinfo(const char *, mac_prop_info_handle_t); static int nxge_set_priv_prop(nxge_t *, const char *, uint_t, const void *); -static int nxge_get_priv_prop(nxge_t *, const char *, uint_t, uint_t, - void *, uint_t *); -static int nxge_get_def_val(nxge_t *, mac_prop_id_t, uint_t, void *); +static int nxge_get_priv_prop(nxge_t *, const char *, uint_t, void *); static void nxge_fill_ring(void *, mac_ring_type_t, const int, const int, mac_ring_info_t *, mac_ring_handle_t); static void nxge_group_add_ring(mac_group_driver_t, mac_ring_driver_t, @@ -312,34 +313,32 @@ static void nxge_group_rem_ring(mac_group_driver_t, mac_ring_driver_t, static void nxge_niu_peu_reset(p_nxge_t nxgep); static void nxge_set_pci_replay_timeout(nxge_t *); -mac_priv_prop_t nxge_priv_props[] = { - {"_adv_10gfdx_cap", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_RW}, - {"_function_number", MAC_PROP_PERM_READ}, - {"_fw_version", MAC_PROP_PERM_READ}, - {"_port_mode", MAC_PROP_PERM_READ}, - {"_hot_swap_phy", MAC_PROP_PERM_READ}, - {"_rxdma_intr_time", MAC_PROP_PERM_RW}, - {"_rxdma_intr_pkts", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_tcp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_udp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_ah", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_sctp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_tcp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_udp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_ah", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_sctp", MAC_PROP_PERM_RW}, - {"_soft_lso_enable", MAC_PROP_PERM_RW} +char *nxge_priv_props[] = { + "_adv_10gfdx_cap", + "_adv_pause_cap", + "_function_number", + "_fw_version", + "_port_mode", + "_hot_swap_phy", + "_rxdma_intr_time", + "_rxdma_intr_pkts", + "_class_opt_ipv4_tcp", + "_class_opt_ipv4_udp", + "_class_opt_ipv4_ah", + "_class_opt_ipv4_sctp", + "_class_opt_ipv6_tcp", + "_class_opt_ipv6_udp", + "_class_opt_ipv6_ah", + "_class_opt_ipv6_sctp", + "_soft_lso_enable", + NULL }; -#define NXGE_MAX_PRIV_PROPS \ - (sizeof (nxge_priv_props)/sizeof (mac_priv_prop_t)) - #define NXGE_NEPTUNE_MAGIC 0x4E584745UL #define MAX_DUMP_SZ 256 #define NXGE_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) mac_callbacks_t nxge_m_callbacks = { NXGE_M_CALLBACK_FLAGS, @@ -350,12 +349,14 @@ mac_callbacks_t nxge_m_callbacks = { nxge_m_multicst, NULL, NULL, + NULL, nxge_m_ioctl, nxge_m_getcapab, NULL, NULL, nxge_m_setprop, - nxge_m_getprop + nxge_m_getprop, + nxge_m_propinfo }; void @@ -4547,16 +4548,12 @@ nxge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, const void *pr_val) { nxge_t *nxgep = barg; - p_nxge_param_t param_arr; - p_nxge_stats_t statsp; + p_nxge_param_t param_arr = nxgep->param_arr; + p_nxge_stats_t statsp = nxgep->statsp; int err = 0; - uint8_t val; - uint32_t cur_mtu, new_mtu, old_framesize; - link_flowctrl_t fl; NXGE_DEBUG_MSG((nxgep, NXGE_CTL, "==> nxge_m_setprop")); - param_arr = nxgep->param_arr; - statsp = nxgep->statsp; + mutex_enter(nxgep->genlock); if (statsp->port_stats.lb_mode != nxge_lb_normal && nxge_param_locked(pr_num)) { @@ -4570,139 +4567,115 @@ nxge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, return (EBUSY); } - val = *(uint8_t *)pr_val; switch (pr_num) { - case MAC_PROP_EN_1000FDX_CAP: - nxgep->param_en_1000fdx = val; - param_arr[param_anar_1000fdx].value = val; - - goto reprogram; + case MAC_PROP_EN_1000FDX_CAP: + nxgep->param_en_1000fdx = + param_arr[param_anar_1000fdx].value = *(uint8_t *)pr_val; + goto reprogram; - case MAC_PROP_EN_100FDX_CAP: - nxgep->param_en_100fdx = val; - param_arr[param_anar_100fdx].value = val; + case MAC_PROP_EN_100FDX_CAP: + nxgep->param_en_100fdx = + param_arr[param_anar_100fdx].value = *(uint8_t *)pr_val; + goto reprogram; - goto reprogram; + case MAC_PROP_EN_10FDX_CAP: + nxgep->param_en_10fdx = + param_arr[param_anar_10fdx].value = *(uint8_t *)pr_val; + goto reprogram; - case MAC_PROP_EN_10FDX_CAP: - nxgep->param_en_10fdx = val; - param_arr[param_anar_10fdx].value = val; + case MAC_PROP_AUTONEG: + param_arr[param_autoneg].value = *(uint8_t *)pr_val; + goto reprogram; - goto reprogram; + case MAC_PROP_MTU: { + uint32_t cur_mtu, new_mtu, old_framesize; - case MAC_PROP_EN_1000HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_ADV_10FDX_CAP: - case MAC_PROP_ADV_10HDX_CAP: - case MAC_PROP_STATUS: - case MAC_PROP_SPEED: - case MAC_PROP_DUPLEX: - err = EINVAL; /* cannot set read-only properties */ - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_setprop: read only property %d", - pr_num)); - break; + cur_mtu = nxgep->mac.default_mtu; + ASSERT(pr_valsize >= sizeof (new_mtu)); + bcopy(pr_val, &new_mtu, sizeof (new_mtu)); - case MAC_PROP_AUTONEG: - param_arr[param_autoneg].value = val; + NXGE_DEBUG_MSG((nxgep, NXGE_CTL, + "==> nxge_m_setprop: set MTU: %d is_jumbo %d", + new_mtu, nxgep->mac.is_jumbo)); - goto reprogram; + if (new_mtu == cur_mtu) { + err = 0; + break; + } - case MAC_PROP_MTU: - cur_mtu = nxgep->mac.default_mtu; - bcopy(pr_val, &new_mtu, sizeof (new_mtu)); - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_setprop: set MTU: %d is_jumbo %d", - new_mtu, nxgep->mac.is_jumbo)); + if (nxgep->nxge_mac_state == NXGE_MAC_STARTED) { + err = EBUSY; + break; + } - if (new_mtu == cur_mtu) { - err = 0; - break; - } + if ((new_mtu < NXGE_DEFAULT_MTU) || + (new_mtu > NXGE_MAXIMUM_MTU)) { + err = EINVAL; + break; + } - if (nxgep->nxge_mac_state == NXGE_MAC_STARTED) { - err = EBUSY; - break; - } + old_framesize = (uint32_t)nxgep->mac.maxframesize; + nxgep->mac.maxframesize = (uint16_t) + (new_mtu + NXGE_EHEADER_VLAN_CRC); + if (nxge_mac_set_framesize(nxgep)) { + nxgep->mac.maxframesize = + (uint16_t)old_framesize; + err = EINVAL; + break; + } - if ((new_mtu < NXGE_DEFAULT_MTU) || - (new_mtu > NXGE_MAXIMUM_MTU)) { - err = EINVAL; - break; - } + nxgep->mac.default_mtu = new_mtu; + nxgep->mac.is_jumbo = (new_mtu > NXGE_DEFAULT_MTU); - old_framesize = (uint32_t)nxgep->mac.maxframesize; - nxgep->mac.maxframesize = (uint16_t) - (new_mtu + NXGE_EHEADER_VLAN_CRC); - if (nxge_mac_set_framesize(nxgep)) { - nxgep->mac.maxframesize = - (uint16_t)old_framesize; - err = EINVAL; - break; - } + NXGE_DEBUG_MSG((nxgep, NXGE_CTL, + "==> nxge_m_setprop: set MTU: %d maxframe %d", + new_mtu, nxgep->mac.maxframesize)); + break; + } - err = mac_maxsdu_update(nxgep->mach, new_mtu); - if (err) { - nxgep->mac.maxframesize = - (uint16_t)old_framesize; - err = EINVAL; - break; - } + case MAC_PROP_FLOWCTRL: { + link_flowctrl_t fl; - nxgep->mac.default_mtu = new_mtu; - if (new_mtu > NXGE_DEFAULT_MTU) - nxgep->mac.is_jumbo = B_TRUE; - else - nxgep->mac.is_jumbo = B_FALSE; + ASSERT(pr_valsize >= sizeof (fl)); + bcopy(pr_val, &fl, sizeof (fl)); - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_setprop: set MTU: %d maxframe %d", - new_mtu, nxgep->mac.maxframesize)); + switch (fl) { + case LINK_FLOWCTRL_NONE: + param_arr[param_anar_pause].value = 0; break; - case MAC_PROP_FLOWCTRL: - bcopy(pr_val, &fl, sizeof (fl)); - switch (fl) { - default: - err = EINVAL; - break; - - case LINK_FLOWCTRL_NONE: - param_arr[param_anar_pause].value = 0; - break; - - case LINK_FLOWCTRL_RX: - param_arr[param_anar_pause].value = 1; - break; + case LINK_FLOWCTRL_RX: + param_arr[param_anar_pause].value = 1; + break; - case LINK_FLOWCTRL_TX: - case LINK_FLOWCTRL_BI: + case LINK_FLOWCTRL_TX: + case LINK_FLOWCTRL_BI: + err = EINVAL; + break; + default: + err = EINVAL; + break; + } +reprogram: + if ((err == 0) && !isLDOMguest(nxgep)) { + if (!nxge_param_link_update(nxgep)) { err = EINVAL; - break; } + } else { + err = EINVAL; + } + break; + } -reprogram: - if (err == 0) { - if (!nxge_param_link_update(nxgep)) { - err = EINVAL; - } - } - break; - case MAC_PROP_PRIVATE: - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_setprop: private property")); - err = nxge_set_priv_prop(nxgep, pr_name, pr_valsize, - pr_val); - break; + case MAC_PROP_PRIVATE: + NXGE_DEBUG_MSG((nxgep, NXGE_CTL, + "==> nxge_m_setprop: private property")); + err = nxge_set_priv_prop(nxgep, pr_name, pr_valsize, pr_val); + break; - default: - err = ENOTSUP; - break; + default: + err = ENOTSUP; + break; } mutex_exit(nxgep->genlock); @@ -4714,142 +4687,198 @@ reprogram: static int nxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { nxge_t *nxgep = barg; p_nxge_param_t param_arr = nxgep->param_arr; p_nxge_stats_t statsp = nxgep->statsp; - int err = 0; - link_flowctrl_t fl; - uint64_t tmp = 0; - link_state_t ls; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, "==> nxge_m_getprop: pr_num %d", pr_num)); - if (pr_valsize == 0) - return (EINVAL); + switch (pr_num) { + case MAC_PROP_DUPLEX: + *(uint8_t *)pr_val = statsp->mac_stats.link_duplex; + break; - *perm = MAC_PROP_PERM_RW; + case MAC_PROP_SPEED: { + uint64_t val = statsp->mac_stats.link_speed * 1000000ull; - if ((is_default) && (pr_num != MAC_PROP_PRIVATE)) { - err = nxge_get_def_val(nxgep, pr_num, pr_valsize, pr_val); - return (err); + ASSERT(pr_valsize >= sizeof (val)); + bcopy(&val, pr_val, sizeof (val)); + break; } - bzero(pr_val, pr_valsize); - switch (pr_num) { - case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)pr_val = statsp->mac_stats.link_duplex; - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_getprop: duplex mode %d", - *(uint8_t *)pr_val)); - break; + case MAC_PROP_STATUS: { + link_state_t state = statsp->mac_stats.link_up ? + LINK_STATE_UP : LINK_STATE_DOWN; - case MAC_PROP_SPEED: - if (pr_valsize < sizeof (uint64_t)) - return (EINVAL); - *perm = MAC_PROP_PERM_READ; - tmp = statsp->mac_stats.link_speed * 1000000ull; - bcopy(&tmp, pr_val, sizeof (tmp)); - break; + ASSERT(pr_valsize >= sizeof (state)); + bcopy(&state, pr_val, sizeof (state)); + break; + } - case MAC_PROP_STATUS: - if (pr_valsize < sizeof (link_state_t)) - return (EINVAL); - *perm = MAC_PROP_PERM_READ; - if (!statsp->mac_stats.link_up) - ls = LINK_STATE_DOWN; - else - ls = LINK_STATE_UP; - bcopy(&ls, pr_val, sizeof (ls)); - break; + case MAC_PROP_AUTONEG: + *(uint8_t *)pr_val = param_arr[param_autoneg].value; + break; - case MAC_PROP_AUTONEG: - *(uint8_t *)pr_val = - param_arr[param_autoneg].value; - break; + case MAC_PROP_FLOWCTRL: { + link_flowctrl_t fl = param_arr[param_anar_pause].value != 0 ? + LINK_FLOWCTRL_RX : LINK_FLOWCTRL_NONE; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (fl)); + bcopy(&fl, pr_val, sizeof (fl)); + break; + } - fl = LINK_FLOWCTRL_NONE; - if (param_arr[param_anar_pause].value) { - fl = LINK_FLOWCTRL_RX; - } - bcopy(&fl, pr_val, sizeof (fl)); - break; + case MAC_PROP_ADV_1000FDX_CAP: + *(uint8_t *)pr_val = param_arr[param_anar_1000fdx].value; + break; - case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)pr_val = - param_arr[param_anar_1000fdx].value; - break; + case MAC_PROP_EN_1000FDX_CAP: + *(uint8_t *)pr_val = nxgep->param_en_1000fdx; + break; - case MAC_PROP_EN_1000FDX_CAP: - *(uint8_t *)pr_val = nxgep->param_en_1000fdx; - break; + case MAC_PROP_ADV_100FDX_CAP: + *(uint8_t *)pr_val = param_arr[param_anar_100fdx].value; + break; - case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)pr_val = - param_arr[param_anar_100fdx].value; - break; + case MAC_PROP_EN_100FDX_CAP: + *(uint8_t *)pr_val = nxgep->param_en_100fdx; + break; - case MAC_PROP_EN_100FDX_CAP: - *(uint8_t *)pr_val = nxgep->param_en_100fdx; - break; + case MAC_PROP_ADV_10FDX_CAP: + *(uint8_t *)pr_val = param_arr[param_anar_10fdx].value; + break; - case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)pr_val = - param_arr[param_anar_10fdx].value; - break; + case MAC_PROP_EN_10FDX_CAP: + *(uint8_t *)pr_val = nxgep->param_en_10fdx; + break; - case MAC_PROP_EN_10FDX_CAP: - *(uint8_t *)pr_val = nxgep->param_en_10fdx; - break; + case MAC_PROP_PRIVATE: + return (nxge_get_priv_prop(nxgep, pr_name, pr_valsize, + pr_val)); - case MAC_PROP_EN_1000HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_ADV_10HDX_CAP: - err = ENOTSUP; - break; + default: + return (ENOTSUP); + } - case MAC_PROP_PRIVATE: - err = nxge_get_priv_prop(nxgep, pr_name, pr_flags, - pr_valsize, pr_val, perm); - break; + return (0); +} - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = NXGE_DEFAULT_MTU; - range.range_uint32[0].mpur_max = NXGE_MAXIMUM_MTU; - bcopy(&range, pr_val, sizeof (range)); - break; - } - default: - err = EINVAL; - break; +static void +nxge_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + nxge_t *nxgep = barg; + p_nxge_stats_t statsp = nxgep->statsp; + + /* + * By default permissions are read/write unless specified + * otherwise by the driver. + */ + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + /* + * Note that read-only properties don't need to + * provide default values since they cannot be + * changed by the administrator. + */ + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, LINK_FLOWCTRL_RX); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + NXGE_DEFAULT_MTU, NXGE_MAXIMUM_MTU); + break; + + case MAC_PROP_PRIVATE: + nxge_priv_propinfo(pr_name, prh); + break; + } + + mutex_enter(nxgep->genlock); + if (statsp->port_stats.lb_mode != nxge_lb_normal && + nxge_param_locked(pr_num)) { + /* + * Some properties are locked (read-only) while the + * device is in any sort of loopback mode. + */ + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } + mutex_exit(nxgep->genlock); +} - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, "<== nxge_m_getprop")); +static void +nxge_priv_propinfo(const char *pr_name, mac_prop_info_handle_t prh) +{ + char valstr[64]; - return (err); + bzero(valstr, sizeof (valstr)); + + if (strcmp(pr_name, "_function_number") == 0 || + strcmp(pr_name, "_fw_version") == 0 || + strcmp(pr_name, "_port_mode") == 0 || + strcmp(pr_name, "_hot_swap_phy") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + + } else if (strcmp(pr_name, "_rxdma_intr_time") == 0) { + (void) snprintf(valstr, sizeof (valstr), + "%d", RXDMA_RCR_TO_DEFAULT); + + } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { + (void) snprintf(valstr, sizeof (valstr), + "%d", RXDMA_RCR_PTHRES_DEFAULT); + + } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0 || + strcmp(pr_name, "_class_opt_ipv4_udp") == 0 || + strcmp(pr_name, "_class_opt_ipv4_ah") == 0 || + strcmp(pr_name, "_class_opt_ipv4_sctp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_tcp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_udp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_ah") == 0 || + strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%x", + NXGE_CLASS_FLOW_GEN_SERVER); + + } else if (strcmp(pr_name, "_soft_lso_enable") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", 0); + + } else if (strcmp(pr_name, "_adv_10gfdx_cap") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", 1); + + } else if (strcmp(pr_name, "_adv_pause_cap") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", 1); + } + + if (strlen(valstr) > 0) + mac_prop_info_set_default_str(prh, valstr); } /* ARGSUSED */ @@ -5104,23 +5133,19 @@ nxge_set_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_valsize, } static int -nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val, uint_t *perm) +nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_valsize, + void *pr_val) { p_nxge_param_t param_arr = nxgep->param_arr; char valstr[MAXNAMELEN]; int err = EINVAL; uint_t strsize; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, "==> nxge_get_priv_prop: property %s", pr_name)); /* function number */ if (strcmp(pr_name, "_function_number") == 0) { - if (is_default) - return (ENOTSUP); - *perm = MAC_PROP_PERM_READ; (void) snprintf(valstr, sizeof (valstr), "%d", nxgep->function_num); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, @@ -5134,9 +5159,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Neptune firmware version */ if (strcmp(pr_name, "_fw_version") == 0) { - if (is_default) - return (ENOTSUP); - *perm = MAC_PROP_PERM_READ; (void) snprintf(valstr, sizeof (valstr), "%s", nxgep->vpd_info.ver); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, @@ -5150,9 +5172,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* port PHY mode */ if (strcmp(pr_name, "_port_mode") == 0) { - if (is_default) - return (ENOTSUP); - *perm = MAC_PROP_PERM_READ; switch (nxgep->mac.portmode) { case PORT_1G_COPPER: (void) snprintf(valstr, sizeof (valstr), "1G copper %s", @@ -5221,9 +5240,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Hot swappable PHY */ if (strcmp(pr_name, "_hot_swap_phy") == 0) { - if (is_default) - return (ENOTSUP); - *perm = MAC_PROP_PERM_READ; (void) snprintf(valstr, sizeof (valstr), "%s", nxgep->hot_swappable_phy ? "yes" : "no"); @@ -5241,12 +5257,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Receive Interrupt Blanking Parameters */ if (strcmp(pr_name, "_rxdma_intr_time") == 0) { err = 0; - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), - "%d", RXDMA_RCR_TO_DEFAULT); - goto done; - } - (void) snprintf(valstr, sizeof (valstr), "%d", nxgep->intr_timeout); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, @@ -5258,11 +5268,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { err = 0; - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), - "%d", RXDMA_RCR_PTHRES_DEFAULT); - goto done; - } (void) snprintf(valstr, sizeof (valstr), "%d", nxgep->intr_threshold); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, @@ -5274,12 +5279,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Classification and Load Distribution Configuration */ if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv4_tcp]); @@ -5292,12 +5291,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv4_udp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv4_udp]); @@ -5309,12 +5302,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, goto done; } if (strcmp(pr_name, "_class_opt_ipv4_ah") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv4_ah]); @@ -5327,12 +5314,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv4_sctp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv4_sctp]); @@ -5345,12 +5326,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv6_tcp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv6_tcp]); @@ -5363,12 +5338,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv6_udp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv6_udp]); @@ -5381,12 +5350,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv6_ah") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv6_ah]); @@ -5399,12 +5362,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv6_sctp]); @@ -5418,11 +5375,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Software LSO */ if (strcmp(pr_name, "_soft_lso_enable") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%d", 0); - err = 0; - goto done; - } (void) snprintf(valstr, sizeof (valstr), "%d", nxgep->soft_lso_enable); err = 0; @@ -5434,8 +5386,7 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_adv_10gfdx_cap") == 0) { err = 0; - if (is_default || - nxgep->param_arr[param_anar_10gfdx].value != 0) { + if (nxgep->param_arr[param_anar_10gfdx].value != 0) { (void) snprintf(valstr, sizeof (valstr), "%d", 1); goto done; } else { @@ -5445,8 +5396,7 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_adv_pause_cap") == 0) { err = 0; - if (is_default || - nxgep->param_arr[param_anar_pause].value != 0) { + if (nxgep->param_arr[param_anar_pause].value != 0) { (void) snprintf(valstr, sizeof (valstr), "%d", 1); goto done; } else { @@ -5587,6 +5537,7 @@ nxge_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num) ring = nxgep->tx_rings->rings[channel]; MUTEX_ENTER(&ring->lock); + ASSERT(ring->tx_ring_handle == NULL); ring->tx_ring_handle = rhp->ring_handle; MUTEX_EXIT(&ring->lock); @@ -5605,11 +5556,12 @@ nxge_tx_ring_stop(mac_ring_driver_t rdriver) ring = nxgep->tx_rings->rings[channel]; MUTEX_ENTER(&ring->lock); + ASSERT(ring->tx_ring_handle != NULL); ring->tx_ring_handle = (mac_ring_handle_t)NULL; MUTEX_EXIT(&ring->lock); } -static int +int nxge_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num) { p_nxge_ring_handle_t rhp = (p_nxge_ring_handle_t)rdriver; @@ -5623,23 +5575,25 @@ nxge_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num) MUTEX_ENTER(&ring->lock); - if (nxgep->rx_channel_started[channel] == B_TRUE) { + if (ring->started) { + ASSERT(ring->started == B_FALSE); MUTEX_EXIT(&ring->lock); return (0); } /* set rcr_ring */ for (i = 0; i < nxgep->ldgvp->maxldvs; i++) { - if ((nxgep->ldgvp->ldvp[i].is_rxdma == 1) && + if ((nxgep->ldgvp->ldvp[i].is_rxdma) && (nxgep->ldgvp->ldvp[i].channel == channel)) { ring->ldvp = &nxgep->ldgvp->ldvp[i]; ring->ldgp = nxgep->ldgvp->ldvp[i].ldgp; } } - nxgep->rx_channel_started[channel] = B_TRUE; ring->rcr_mac_handle = rhp->ring_handle; ring->rcr_gen_num = mr_gen_num; + ring->started = B_TRUE; + rhp->ring_gen_num = mr_gen_num; MUTEX_EXIT(&ring->lock); return (0); @@ -5657,11 +5611,53 @@ nxge_rx_ring_stop(mac_ring_driver_t rdriver) ring = nxgep->rx_rcr_rings->rcr_rings[channel]; MUTEX_ENTER(&ring->lock); - nxgep->rx_channel_started[channel] = B_FALSE; + ASSERT(ring->started == B_TRUE); ring->rcr_mac_handle = NULL; + ring->ldvp = NULL; + ring->ldgp = NULL; + ring->started = B_FALSE; MUTEX_EXIT(&ring->lock); } +static int +nxge_ring_get_htable_idx(p_nxge_t nxgep, mac_ring_type_t type, uint32_t channel) +{ + int i; + +#if defined(sun4v) + if (isLDOMguest(nxgep)) { + return (nxge_hio_get_dc_htable_idx(nxgep, + (type == MAC_RING_TYPE_TX) ? VP_BOUND_TX : VP_BOUND_RX, + channel)); + } +#endif + + ASSERT(nxgep->ldgvp != NULL); + + switch (type) { + case MAC_RING_TYPE_TX: + for (i = 0; i < nxgep->ldgvp->maxldvs; i++) { + if ((nxgep->ldgvp->ldvp[i].is_txdma) && + (nxgep->ldgvp->ldvp[i].channel == channel)) { + return ((int) + nxgep->ldgvp->ldvp[i].ldgp->htable_idx); + } + } + break; + + case MAC_RING_TYPE_RX: + for (i = 0; i < nxgep->ldgvp->maxldvs; i++) { + if ((nxgep->ldgvp->ldvp[i].is_rxdma) && + (nxgep->ldgvp->ldvp[i].channel == channel)) { + return ((int) + nxgep->ldgvp->ldvp[i].ldgp->htable_idx); + } + } + } + + return (-1); +} + /* * Callback funtion for MAC layer to register all rings. */ @@ -5671,13 +5667,22 @@ nxge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, { p_nxge_t nxgep = (p_nxge_t)arg; p_nxge_hw_pt_cfg_t p_cfgp = &nxgep->pt_config.hw_config; + p_nxge_intr_t intrp; + uint32_t channel; + int htable_idx; + p_nxge_ring_handle_t rhandlep; + + ASSERT(nxgep != NULL); + ASSERT(p_cfgp != NULL); + ASSERT(infop != NULL); - NXGE_DEBUG_MSG((nxgep, TX_CTL, + NXGE_DEBUG_MSG((nxgep, DDI_CTL, "==> nxge_fill_ring 0x%x index %d", rtype, index)); + switch (rtype) { case MAC_RING_TYPE_TX: { - p_nxge_ring_handle_t rhandlep; + mac_intr_t *mintr = &infop->mri_intr; NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_fill_ring (TX) 0x%x index %d ntdcs %d", @@ -5689,17 +5694,31 @@ nxge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, rhandlep->index = index; rhandlep->ring_handle = rh; + channel = nxgep->pt_config.hw_config.tdc.start + index; + rhandlep->channel = channel; + intrp = (p_nxge_intr_t)&nxgep->nxge_intr_type; + htable_idx = nxge_ring_get_htable_idx(nxgep, rtype, + channel); + if (htable_idx >= 0) + mintr->mi_ddi_handle = intrp->htable[htable_idx]; + else + mintr->mi_ddi_handle = NULL; + infop->mri_driver = (mac_ring_driver_t)rhandlep; infop->mri_start = nxge_tx_ring_start; infop->mri_stop = nxge_tx_ring_stop; infop->mri_tx = nxge_tx_ring_send; - + infop->mri_stat = nxge_tx_ring_stat; + infop->mri_flags = MAC_RING_TX_SERIALIZE; break; } + case MAC_RING_TYPE_RX: { - p_nxge_ring_handle_t rhandlep; - int nxge_rindex; mac_intr_t nxge_mac_intr; + int nxge_rindex; + p_nxge_intr_t intrp; + + intrp = (p_nxge_intr_t)&nxgep->nxge_intr_type; NXGE_DEBUG_MSG((nxgep, RX_CTL, "==> nxge_fill_ring (RX) 0x%x index %d nrdcs %d", @@ -5710,34 +5729,47 @@ nxge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, * Find the ring index in the nxge instance. */ nxge_rindex = nxge_get_rxring_index(nxgep, rg_index, index); + channel = nxgep->pt_config.hw_config.start_rdc + index; + intrp = (p_nxge_intr_t)&nxgep->nxge_intr_type; ASSERT((nxge_rindex >= 0) && (nxge_rindex < p_cfgp->max_rdcs)); rhandlep = &nxgep->rx_ring_handles[nxge_rindex]; rhandlep->nxgep = nxgep; rhandlep->index = nxge_rindex; rhandlep->ring_handle = rh; + rhandlep->channel = channel; /* * Entrypoint to enable interrupt (disable poll) and * disable interrupt (enable poll). */ + bzero(&nxge_mac_intr, sizeof (nxge_mac_intr)); nxge_mac_intr.mi_handle = (mac_intr_handle_t)rhandlep; nxge_mac_intr.mi_enable = (mac_intr_enable_t)nxge_disable_poll; nxge_mac_intr.mi_disable = (mac_intr_disable_t)nxge_enable_poll; + + htable_idx = nxge_ring_get_htable_idx(nxgep, rtype, + channel); + if (htable_idx >= 0) + nxge_mac_intr.mi_ddi_handle = intrp->htable[htable_idx]; + else + nxge_mac_intr.mi_ddi_handle = NULL; + infop->mri_driver = (mac_ring_driver_t)rhandlep; infop->mri_start = nxge_rx_ring_start; infop->mri_stop = nxge_rx_ring_stop; - infop->mri_intr = nxge_mac_intr; /* ??? */ + infop->mri_intr = nxge_mac_intr; infop->mri_poll = nxge_rx_poll; - + infop->mri_stat = nxge_rx_ring_stat; + infop->mri_flags = MAC_RING_RX_ENQUEUE; break; } + default: break; } - NXGE_DEBUG_MSG((nxgep, DDI_CTL, "<== nxge_fill_ring 0x%x", - rtype)); + NXGE_DEBUG_MSG((nxgep, DDI_CTL, "<== nxge_fill_ring 0x%x", rtype)); } static void @@ -6181,6 +6213,8 @@ nxge_add_intrs_adv_type(p_nxge_t nxgep, uint32_t int_type) return (NXGE_ERROR | NXGE_DDI_FAILED); } + + ldgp->htable_idx = x; intrp->intr_added++; } @@ -6341,6 +6375,8 @@ nxge_add_intrs_adv_type_fix(p_nxge_t nxgep, uint32_t int_type) return (NXGE_ERROR | NXGE_DDI_FAILED); } + + ldgp->htable_idx = x; intrp->intr_added++; } @@ -6516,13 +6552,10 @@ nxge_mac_register(p_nxge_t nxgep) macp->m_max_sdu = nxgep->mac.default_mtu; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = nxge_priv_props; - macp->m_priv_prop_count = NXGE_MAX_PRIV_PROPS; - if (isLDOMguest(nxgep)) { - macp->m_v12n = MAC_VIRT_LEVEL1 | MAC_VIRT_SERIALIZE; - } else { - macp->m_v12n = MAC_VIRT_HIO | MAC_VIRT_LEVEL1 | \ - MAC_VIRT_SERIALIZE; - } + if (isLDOMguest(nxgep)) + macp->m_v12n = MAC_VIRT_LEVEL1; + else + macp->m_v12n = MAC_VIRT_HIO | MAC_VIRT_LEVEL1; NXGE_DEBUG_MSG((nxgep, MAC_CTL, "==> nxge_mac_register: instance %d " @@ -6975,40 +7008,6 @@ nxge_create_msi_property(p_nxge_t nxgep) return (nmsi); } -/* ARGSUSED */ -static int -nxge_get_def_val(nxge_t *nxgep, mac_prop_id_t pr_num, uint_t pr_valsize, - void *pr_val) -{ - int err = 0; - link_flowctrl_t fl; - - switch (pr_num) { - case MAC_PROP_AUTONEG: - *(uint8_t *)pr_val = 1; - break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); - fl = LINK_FLOWCTRL_RX; - bcopy(&fl, pr_val, sizeof (fl)); - break; - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_EN_1000FDX_CAP: - *(uint8_t *)pr_val = 1; - break; - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_EN_100FDX_CAP: - *(uint8_t *)pr_val = 1; - break; - default: - err = ENOTSUP; - break; - } - return (err); -} - - /* * The following is a software around for the Neptune hardware's * interrupt bugs; The Neptune hardware may generate spurious interrupts when diff --git a/usr/src/uts/common/io/nxge/nxge_rxdma.c b/usr/src/uts/common/io/nxge/nxge_rxdma.c index 9751396cf8..16931c739b 100644 --- a/usr/src/uts/common/io/nxge/nxge_rxdma.c +++ b/usr/src/uts/common/io/nxge/nxge_rxdma.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1814,7 +1814,7 @@ nxge_rx_intr(void *arg1, void *arg2) channel = ldvp->channel; ldgp = ldvp->ldgp; - if (!isLDOMguest(nxgep) && (!nxgep->rx_channel_started[channel])) { + if (!isLDOMguest(nxgep) && (!rcrp->started)) { NXGE_DEBUG_MSG((nxgep, INT_CTL, "<== nxge_rx_intr: channel is not started")); @@ -2718,8 +2718,7 @@ nxge_receive_packet(p_nxge_t nxgep, is_valid, multi, is_tcp_udp, frag, error_type)); if (is_tcp_udp && !frag && !error_type) { - (void) hcksum_assoc(nmp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM_OK | HCK_FULLCKSUM, 0); + mac_hcksum_set(nmp, 0, 0, 0, 0, HCK_FULLCKSUM_OK); NXGE_DEBUG_MSG((nxgep, RX_CTL, "==> nxge_receive_packet: Full tcp/udp cksum " "is_valid 0x%x multi 0x%llx pkt %d frag %d " diff --git a/usr/src/uts/common/io/nxge/nxge_send.c b/usr/src/uts/common/io/nxge/nxge_send.c index 4f7edf292a..7b78fa8af6 100644 --- a/usr/src/uts/common/io/nxge/nxge_send.c +++ b/usr/src/uts/common/io/nxge/nxge_send.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -62,6 +62,8 @@ nxge_tx_ring_task(void *arg) { p_tx_ring_t ring = (p_tx_ring_t)arg; + ASSERT(ring->tx_ring_handle != NULL); + MUTEX_ENTER(&ring->lock); (void) nxge_txdma_reclaim(ring->nxgep, ring, 0); MUTEX_EXIT(&ring->lock); @@ -274,8 +276,8 @@ nxge_start(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp) } } - hcksum_retrieve(mp, NULL, NULL, &start_offset, - &stuff_offset, &end_offset, &value, &cksum_flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, &end_offset, + &value, &cksum_flags); if (!NXGE_IS_VLAN_PACKET(mp->b_rptr)) { start_offset += sizeof (ether_header_t); stuff_offset += sizeof (ether_header_t); @@ -809,7 +811,7 @@ nxge_start_control_header_only: i = TXDMA_DESC_NEXT_INDEX(i, 1, tx_ring_p->tx_wrap_mask); if (ngathers > nxge_tx_max_gathers) { good_packet = B_FALSE; - hcksum_retrieve(mp, NULL, NULL, &start_offset, + mac_hcksum_get(mp, &start_offset, &stuff_offset, &end_offset, &value, &cksum_flags); diff --git a/usr/src/uts/common/io/nxge/nxge_txdma.c b/usr/src/uts/common/io/nxge/nxge_txdma.c index 68b823b01c..f3fd19a3c2 100644 --- a/usr/src/uts/common/io/nxge/nxge_txdma.c +++ b/usr/src/uts/common/io/nxge/nxge_txdma.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1021,7 +1021,7 @@ nxge_txdma_reclaim(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, int nmblks) "==> nxge_txdma_reclaim: dump desc:")); pkt_len = tx_desc_pp->bits.hdw.tr_len; - tdc_stats->obytes += pkt_len; + tdc_stats->obytes += (pkt_len - TX_PKT_HEADER_SIZE); tdc_stats->opackets += tx_desc_pp->bits.hdw.sop; NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_txdma_reclaim: pkt_len %d " diff --git a/usr/src/uts/common/io/nxge/nxge_virtual.c b/usr/src/uts/common/io/nxge/nxge_virtual.c index 27840f8b30..395ded69b7 100644 --- a/usr/src/uts/common/io/nxge/nxge_virtual.c +++ b/usr/src/uts/common/io/nxge/nxge_virtual.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2542,10 +2542,6 @@ nxge_set_hw_dma_config(p_nxge_t nxgep) tdc_grp_p->grp_index = group->index; } - for (i = 0; i < NXGE_MAX_RDCS; i++) { - nxgep->rx_channel_started[i] = B_FALSE; - } - /* * Setup RDC groups */ diff --git a/usr/src/uts/common/io/pcan/pcan.c b/usr/src/uts/common/io/pcan/pcan.c index a22601cca2..be1fbf4aec 100644 --- a/usr/src/uts/common/io/pcan/pcan.c +++ b/usr/src/uts/common/io/pcan/pcan.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -103,11 +103,12 @@ static int pcan_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int pcan_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void pcan_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); mac_callbacks_t pcan_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, pcan_gstat, pcan_start, pcan_stop, @@ -115,12 +116,14 @@ mac_callbacks_t pcan_m_callbacks = { pcan_sdmulti, pcan_saddr, pcan_tx, + NULL, pcan_ioctl, NULL, NULL, NULL, pcan_m_setprop, - pcan_m_getprop + pcan_m_getprop, + pcan_m_propinfo }; static char *pcan_name_str = "pcan"; @@ -4525,7 +4528,7 @@ pcan_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, /* ARGSUSED */ static int pcan_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; pcan_maci_t *pcan_p = (pcan_maci_t *)arg; @@ -4536,9 +4539,6 @@ pcan_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, err = EINVAL; return (err); } - bzero(wldp_buf, wldp_length); - - *perm = MAC_PROP_PERM_RW; switch (wldp_pr_num) { /* mac_prop_id */ @@ -4558,22 +4558,18 @@ pcan_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, pcan_get_encrypt(pcan_p, wldp_buf); break; case MAC_PROP_WL_BSSTYPE: - *perm = MAC_PROP_PERM_READ; pcan_get_bsstype(pcan_p, wldp_buf); break; case MAC_PROP_WL_LINKSTATUS: pcan_get_linkstatus(pcan_p, wldp_buf); break; case MAC_PROP_WL_ESS_LIST: - *perm = MAC_PROP_PERM_READ; pcan_get_esslist(pcan_p, wldp_buf); break; case MAC_PROP_WL_SUPPORTED_RATES: - *perm = MAC_PROP_PERM_READ; pcan_get_suprates(wldp_buf); break; case MAC_PROP_WL_RSSI: - *perm = MAC_PROP_PERM_READ; err = pcan_get_rssi(pcan_p, wldp_buf); break; case MAC_PROP_WL_RADIO: @@ -4610,6 +4606,23 @@ pcan_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } +static void +pcan_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + _NOTE(ARGUNUSED(arg, pr_name)); + + switch (wldp_pr_num) { + case MAC_PROP_WL_BSSTYPE: + case MAC_PROP_WL_ESS_LIST: + case MAC_PROP_WL_SUPPORTED_RATES: + case MAC_PROP_WL_RSSI: + mac_prop_info_set_perm(mph, MAC_PROP_PERM_READ); + break; + } +} + + /* * quiesce(9E) entry point. * diff --git a/usr/src/uts/common/io/pcwl/pcwl.c b/usr/src/uts/common/io/pcwl/pcwl.c index 58d7028c39..bf1bfc7fd4 100644 --- a/usr/src/uts/common/io/pcwl/pcwl.c +++ b/usr/src/uts/common/io/pcwl/pcwl.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -85,13 +85,14 @@ static int pcwl_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int pcwl_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void pcwl_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wlpd_pr_num, mac_prop_info_handle_t mph); static void pcwl_delay(pcwl_maci_t *, clock_t); mac_callbacks_t pcwl_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, pcwl_gstat, pcwl_start, pcwl_stop, @@ -99,12 +100,14 @@ mac_callbacks_t pcwl_m_callbacks = { pcwl_sdmulti, pcwl_saddr, pcwl_tx, + NULL, pcwl_ioctl, NULL, NULL, NULL, pcwl_m_setprop, - pcwl_m_getprop + pcwl_m_getprop, + pcwl_m_propinfo }; static char *pcwl_name_str = "pcwl"; @@ -4400,18 +4403,11 @@ pcwl_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, /* ARGSUSED */ static int pcwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; - pcwl_maci_t *pcwl_p = (pcwl_maci_t *)arg; - if (wldp_length == 0) { - err = EINVAL; - return (err); - } - bzero(wldp_buf, wldp_length); - mutex_enter(&pcwl_p->pcwl_glock); if (!(pcwl_p->pcwl_flag & PCWL_CARD_READY)) { mutex_exit(&pcwl_p->pcwl_glock); @@ -4419,8 +4415,6 @@ pcwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } - *perm = MAC_PROP_PERM_RW; - switch (wldp_pr_num) { /* mac_prop_id */ case MAC_PROP_WL_ESSID: @@ -4442,19 +4436,15 @@ pcwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, pcwl_get_bsstype(pcwl_p, wldp_buf); break; case MAC_PROP_WL_LINKSTATUS: - *perm = MAC_PROP_PERM_READ; err = pcwl_get_linkstatus(pcwl_p, wldp_buf); break; case MAC_PROP_WL_ESS_LIST: - *perm = MAC_PROP_PERM_READ; pcwl_get_esslist(pcwl_p, wldp_buf); break; case MAC_PROP_WL_SUPPORTED_RATES: - *perm = MAC_PROP_PERM_READ; pcwl_get_suprates(wldp_buf); break; case MAC_PROP_WL_RSSI: - *perm = MAC_PROP_PERM_READ; pcwl_get_param_rssi(pcwl_p, wldp_buf); break; case MAC_PROP_WL_RADIO: @@ -4493,6 +4483,23 @@ pcwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } + +static void +pcwl_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wlpd_pr_num, + mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(arg, pr_name)); + + switch (wlpd_pr_num) { + case MAC_PROP_WL_LINKSTATUS: + case MAC_PROP_WL_ESS_LIST: + case MAC_PROP_WL_SUPPORTED_RATES: + case MAC_PROP_WL_RSSI: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } +} + + /* * quiesce(9E) entry point. * diff --git a/usr/src/uts/common/io/ral/rt2560.c b/usr/src/uts/common/io/ral/rt2560.c index 45e32d00dd..74733347cb 100644 --- a/usr/src/uts/common/io/ral/rt2560.c +++ b/usr/src/uts/common/io/ral/rt2560.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -172,10 +172,12 @@ static void rt2560_m_ioctl(void *, queue_t *, mblk_t *); static int rt2560_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int rt2560_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void rt2560_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t rt2560_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rt2560_m_stat, rt2560_m_start, rt2560_m_stop, @@ -183,12 +185,14 @@ static mac_callbacks_t rt2560_m_callbacks = { rt2560_m_multicst, rt2560_m_unicst, rt2560_m_tx, + NULL, rt2560_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, rt2560_m_setprop, - rt2560_m_getprop + rt2560_m_getprop, + rt2560_m_propinfo }; uint32_t ral_dbg_flags = 0; @@ -2138,18 +2142,27 @@ rt2560_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int rt2560_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct rt2560_softc *sc = arg; int err; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } static void +rt2560_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct rt2560_softc *sc = arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + +static void rt2560_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) { struct rt2560_softc *sc = (struct rt2560_softc *)arg; diff --git a/usr/src/uts/common/io/rge/rge_main.c b/usr/src/uts/common/io/rge/rge_main.c index 773d474301..7ad85f53de 100644 --- a/usr/src/uts/common/io/rge/rge_main.c +++ b/usr/src/uts/common/io/rge/rge_main.c @@ -123,6 +123,7 @@ static mac_callbacks_t rge_m_callbacks = { rge_m_multicst, rge_m_unicst, rge_m_tx, + NULL, rge_m_ioctl, rge_m_getcapab }; diff --git a/usr/src/uts/common/io/rge/rge_rxtx.c b/usr/src/uts/common/io/rge/rge_rxtx.c index a2d881c67b..9b16c2ae82 100644 --- a/usr/src/uts/common/io/rge/rge_rxtx.c +++ b/usr/src/uts/common/io/rge/rge_rxtx.c @@ -287,11 +287,11 @@ rge_receive_packet(rge_t *rgep, uint32_t slot) proto = rx_status & RBD_FLAG_PROTOCOL; if ((proto == RBD_FLAG_TCP && !(rx_status & RBD_TCP_CKSUM_ERR)) || (proto == RBD_FLAG_UDP && !(rx_status & RBD_UDP_CKSUM_ERR))) - pflags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK; + pflags |= HCK_FULLCKSUM_OK; if (proto != RBD_FLAG_NONE_IP && !(rx_status & RBD_IP_CKSUM_ERR)) - pflags |= HCK_IPV4_HDRCKSUM; + pflags |= HCK_IPV4_HDRCKSUM_OK; if (pflags != 0) { - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, pflags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, pflags); } return (mp); @@ -574,7 +574,7 @@ rge_send_copy(rge_t *rgep, mblk_t *mp, uint16_t tci) /* * h/w checksum offload flags */ - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); if (pflags & HCK_FULLCKSUM) { ASSERT(totlen >= sizeof (struct ether_header) + sizeof (struct ip)); diff --git a/usr/src/uts/common/io/rtls/rtls.c b/usr/src/uts/common/io/rtls/rtls.c index 4a0f6fef9a..d470fdfc7d 100644 --- a/usr/src/uts/common/io/rtls/rtls.c +++ b/usr/src/uts/common/io/rtls/rtls.c @@ -93,10 +93,6 @@ static int rtls_m_multicst(void *, boolean_t, const uint8_t *); static int rtls_m_promisc(void *, boolean_t); static mblk_t *rtls_m_tx(void *, mblk_t *); static int rtls_m_stat(void *, uint_t, uint64_t *); -static int rtls_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); -static int rtls_m_setprop(void *, const char *, mac_prop_id_t, uint_t, - const void *); static uint_t rtls_intr(caddr_t); @@ -184,13 +180,7 @@ static mac_callbacks_t rtls_m_callbacks = { rtls_m_promisc, rtls_m_multicst, rtls_m_unicst, - rtls_m_tx, - NULL, /* mc_ioctl */ - NULL, /* mc_getcapab */ - NULL, /* mc_open */ - NULL, /* mc_close */ - rtls_m_setprop, - rtls_m_getprop, + rtls_m_tx }; static mii_ops_t rtls_mii_ops = { @@ -912,24 +902,6 @@ rtls_m_stat(void *arg, uint_t stat, uint64_t *val) return (0); } -int -rtls_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) -{ - rtls_t *rtlsp = arg; - - return (mii_m_getprop(rtlsp->mii, name, num, flags, sz, val, perm)); -} - -int -rtls_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, - const void *val) -{ - rtls_t *rtlsp = arg; - - return (mii_m_setprop(rtlsp->mii, name, num, sz, val)); -} - /* * rtls_send() -- send a packet * diff --git a/usr/src/uts/common/io/rtw/rtw.c b/usr/src/uts/common/io/rtw/rtw.c index be463e40d5..c237184a6e 100644 --- a/usr/src/uts/common/io/rtw/rtw.c +++ b/usr/src/uts/common/io/rtw/rtw.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,6 +41,7 @@ #include <sys/mac_provider.h> #include <sys/mac_wifi.h> #include <sys/net80211.h> +#include <sys/byteorder.h> #include "rtwreg.h" #include "rtwvar.h" #include "smc93cx6var.h" @@ -139,10 +140,12 @@ static void rtw_m_ioctl(void *, queue_t *, mblk_t *); static int rtw_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int rtw_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void rtw_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t rtw_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rtw_m_stat, rtw_m_start, rtw_m_stop, @@ -150,12 +153,14 @@ static mac_callbacks_t rtw_m_callbacks = { rtw_m_multicst, rtw_m_unicst, rtw_m_tx, + NULL, rtw_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, rtw_m_setprop, - rtw_m_getprop + rtw_m_getprop, + rtw_m_propinfo }; DDI_DEFINE_STREAM_OPS(rtw_dev_ops, nulldev, nulldev, rtw_attach, rtw_detach, @@ -2914,17 +2919,25 @@ rtw_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int rtw_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { rtw_softc_t *rsc = arg; int err; err = ieee80211_getprop(&rsc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +rtw_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + rtw_softc_t *rsc = arg; + + ieee80211_propinfo(&rsc->sc_ic, pr_name, wldp_pr_num, prh); +} static int rtw_m_start(void *arg) diff --git a/usr/src/uts/common/io/rum/rum.c b/usr/src/uts/common/io/rum/rum.c index f76fb0fae1..abec7e727d 100644 --- a/usr/src/uts/common/io/rum/rum.c +++ b/usr/src/uts/common/io/rum/rum.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ #include <sys/mac_provider.h> #include <sys/mac_wifi.h> #include <sys/net80211.h> +#include <sys/byteorder.h> #define USBDRV_MAJOR_VER 2 #define USBDRV_MINOR_VER 0 @@ -260,10 +261,12 @@ static void rum_m_ioctl(void *, queue_t *, mblk_t *); static int rum_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int rum_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void rum_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t rum_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rum_m_stat, rum_m_start, rum_m_stop, @@ -271,12 +274,14 @@ static mac_callbacks_t rum_m_callbacks = { rum_m_multicst, rum_m_unicst, rum_m_tx, + NULL, rum_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, rum_m_setprop, - rum_m_getprop + rum_m_getprop, + rum_m_propinfo }; static void rum_amrr_start(struct rum_softc *, struct ieee80211_node *); @@ -2088,18 +2093,27 @@ rum_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int rum_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct rum_softc *sc = (struct rum_softc *)arg; int err; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } static void +rum_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct rum_softc *sc = (struct rum_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + +static void rum_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) { struct rum_softc *sc = (struct rum_softc *)arg; diff --git a/usr/src/uts/common/io/rwd/rt2661.c b/usr/src/uts/common/io/rwd/rt2661.c index 6419dbd96d..df3e688e4d 100644 --- a/usr/src/uts/common/io/rwd/rt2661.c +++ b/usr/src/uts/common/io/rwd/rt2661.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -271,11 +271,13 @@ static int rt2661_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int rt2661_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, + void *wldp_buf); +static void rt2661_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); static mac_callbacks_t rt2661_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rt2661_m_stat, rt2661_m_start, rt2661_m_stop, @@ -283,12 +285,14 @@ static mac_callbacks_t rt2661_m_callbacks = { rt2661_m_multicst, rt2661_m_unicst, rt2661_m_tx, + NULL, rt2661_m_ioctl, NULL, NULL, NULL, rt2661_m_setprop, - rt2661_m_getprop + rt2661_m_getprop, + rt2661_m_propinfo }; #ifdef DEBUG @@ -2617,17 +2621,26 @@ rt2661_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int rt2661_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct rt2661_softc *sc = (struct rt2661_softc *)arg; int err = 0; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +rt2661_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + struct rt2661_softc *sc = (struct rt2661_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + static int rt2661_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/rwn/rt2860.c b/usr/src/uts/common/io/rwn/rt2860.c index 588d5c3730..703f1df19a 100644 --- a/usr/src/uts/common/io/rwn/rt2860.c +++ b/usr/src/uts/common/io/rwn/rt2860.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -245,12 +245,14 @@ static void rt2860_m_ioctl(void *, queue_t *, mblk_t *); static int rt2860_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); +static void rt2860_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t prh); static int rt2860_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, + void *wldp_buf); static mac_callbacks_t rt2860_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rt2860_m_stat, rt2860_m_start, rt2860_m_stop, @@ -258,12 +260,14 @@ static mac_callbacks_t rt2860_m_callbacks = { rt2860_m_multicst, rt2860_m_unicst, rt2860_m_tx, + NULL, rt2860_m_ioctl, NULL, NULL, NULL, rt2860_m_setprop, - rt2860_m_getprop + rt2860_m_getprop, + rt2860_m_propinfo }; #ifdef DEBUG @@ -2635,17 +2639,26 @@ rt2860_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int rt2860_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct rt2860_softc *sc = (struct rt2860_softc *)arg; int err = 0; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +rt2860_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct rt2860_softc *sc = (struct rt2860_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + static int rt2860_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/sfe/sfe_util.c b/usr/src/uts/common/io/sfe/sfe_util.c index c600c6927a..fb6d4ceb9b 100644 --- a/usr/src/uts/common/io/sfe/sfe_util.c +++ b/usr/src/uts/common/io/sfe/sfe_util.c @@ -32,7 +32,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -4069,6 +4069,7 @@ static mac_callbacks_t gem_m_callbacks = { gem_m_multicst, gem_m_unicst, gem_m_tx, + NULL, gem_m_ioctl, gem_m_getcapab, }; diff --git a/usr/src/uts/common/io/simnet/simnet.c b/usr/src/uts/common/io/simnet/simnet.c index f1a172dd9b..727fbbad8e 100644 --- a/usr/src/uts/common/io/simnet/simnet.c +++ b/usr/src/uts/common/io/simnet/simnet.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -109,10 +109,12 @@ static mblk_t *simnet_m_tx(void *, mblk_t *); static int simnet_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int simnet_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void simnet_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t simnet_m_callbacks = { - (MC_IOCTL | MC_SETPROP | MC_GETPROP), + (MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO), simnet_m_stat, simnet_m_start, simnet_m_stop, @@ -120,12 +122,14 @@ static mac_callbacks_t simnet_m_callbacks = { simnet_m_multicst, simnet_m_unicst, simnet_m_tx, + NULL, simnet_m_ioctl, NULL, NULL, NULL, simnet_m_setprop, - simnet_m_getprop + simnet_m_getprop, + simnet_m_propinfo }; /* @@ -1228,17 +1232,16 @@ simnet_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, } static int -simnet_get_priv_prop(simnet_dev_t *sdev, const char *pr_name, uint_t pr_flags, +simnet_get_priv_prop(simnet_dev_t *sdev, const char *pr_name, uint_t pr_valsize, void *pr_val) { simnet_wifidev_t *wdev = sdev->sd_wifidev; - boolean_t is_default = ((pr_flags & MAC_PROP_DEFAULT) != 0); int err = 0; int value; if (strcmp(pr_name, "_wl_esslist") == 0) { /* Returns num of _wl_ess_conf_t that have been set */ - value = (is_default ? 0:wdev->swd_esslist_num); + value = wdev->swd_esslist_num; } else if (strcmp(pr_name, "_wl_connected") == 0) { value = ((wdev->swd_linkstatus == WL_CONNECTED) ? 1:0); } else { @@ -1252,7 +1255,7 @@ simnet_get_priv_prop(simnet_dev_t *sdev, const char *pr_name, uint_t pr_flags, static int simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { simnet_dev_t *sdev = arg; simnet_wifidev_t *wdev = sdev->sd_wifidev; @@ -1276,9 +1279,6 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, case MAC_PROP_WL_AUTH_MODE: case MAC_PROP_WL_ENCRYPTION: break; - case MAC_PROP_WL_BSSTYPE: - *perm = MAC_PROP_PERM_READ; - break; case MAC_PROP_WL_LINKSTATUS: (void) memcpy(wldp_buf, &wdev->swd_linkstatus, sizeof (wdev->swd_linkstatus)); @@ -1286,7 +1286,6 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, case MAC_PROP_WL_ESS_LIST: { wl_ess_conf_t *w_ess_conf; - *perm = MAC_PROP_PERM_READ; ((wl_ess_list_t *)wldp_buf)->wl_ess_list_num = wdev->swd_esslist_num; /* LINTED E_BAD_PTR_CAST_ALIGN */ @@ -1299,11 +1298,7 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, } break; } - case MAC_PROP_WL_SUPPORTED_RATES: - *perm = MAC_PROP_PERM_READ; - break; case MAC_PROP_WL_RSSI: - *perm = MAC_PROP_PERM_READ; *(wl_rssi_t *)wldp_buf = wdev->swd_rssi; break; case MAC_PROP_WL_RADIO: @@ -1314,8 +1309,8 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, case MAC_PROP_WL_DESIRED_RATES: break; case MAC_PROP_PRIVATE: - err = simnet_get_priv_prop(sdev, pr_name, pr_flags, - wldp_length, wldp_buf); + err = simnet_get_priv_prop(sdev, pr_name, wldp_length, + wldp_buf); break; default: err = ENOTSUP; @@ -1324,3 +1319,40 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } + +static void +simnet_priv_propinfo(const char *pr_name, mac_prop_info_handle_t prh) +{ + char valstr[MAXNAMELEN]; + + bzero(valstr, sizeof (valstr)); + + if (strcmp(pr_name, "_wl_esslist") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", 0); + } + + if (strlen(valstr) > 0) + mac_prop_info_set_default_str(prh, valstr); +} + +static void +simnet_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + simnet_dev_t *sdev = arg; + + if (sdev->sd_type == DL_ETHER) + return; + + switch (wldp_pr_num) { + case MAC_PROP_WL_BSSTYPE: + case MAC_PROP_WL_ESS_LIST: + case MAC_PROP_WL_SUPPORTED_RATES: + case MAC_PROP_WL_RSSI: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + case MAC_PROP_PRIVATE: + simnet_priv_propinfo(pr_name, prh); + break; + } +} diff --git a/usr/src/uts/common/io/softmac/softmac_main.c b/usr/src/uts/common/io/softmac/softmac_main.c index 9e33c31b7b..05f74dd4c1 100644 --- a/usr/src/uts/common/io/softmac/softmac_main.c +++ b/usr/src/uts/common/io/softmac/softmac_main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -84,11 +84,13 @@ static boolean_t softmac_m_getcapab(void *, mac_capab_t, void *); static int softmac_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int softmac_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); - + uint_t, void *); +static void softmac_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); #define SOFTMAC_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_OPEN | MC_CLOSE | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_OPEN | MC_CLOSE | MC_SETPROP | \ + MC_GETPROP | MC_PROPINFO) static mac_callbacks_t softmac_m_callbacks = { SOFTMAC_M_CALLBACK_FLAGS, @@ -99,12 +101,14 @@ static mac_callbacks_t softmac_m_callbacks = { softmac_m_multicst, softmac_m_unicst, softmac_m_tx, + NULL, softmac_m_ioctl, softmac_m_getcapab, softmac_m_open, softmac_m_close, softmac_m_setprop, - softmac_m_getprop + softmac_m_getprop, + softmac_m_propinfo }; /*ARGSUSED*/ @@ -1468,8 +1472,8 @@ softmac_m_setprop(void *arg, const char *name, mac_prop_id_t id, } static int -softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t flags, - uint_t valsize, void *val, uint_t *perm) +softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, + uint_t valsize, void *val) { softmac_t *softmac = arg; char *fpstr; @@ -1478,18 +1482,15 @@ softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t flags, return (ENOTSUP); if (strcmp(name, "_fastpath") == 0) { - if ((flags & MAC_PROP_DEFAULT) != 0) - return (ENOTSUP); - - *perm = MAC_PROP_PERM_READ; mutex_enter(&softmac->smac_fp_mutex); fpstr = (DATAPATH_MODE(softmac) == SOFTMAC_SLOWPATH) ? "disabled" : "enabled"; mutex_exit(&softmac->smac_fp_mutex); } else if (strcmp(name, "_disable_fastpath") == 0) { - *perm = MAC_PROP_PERM_RW; - fpstr = ((flags & MAC_PROP_DEFAULT) != 0) ? "false" : - (softmac->smac_fastpath_admin_disabled ? "true" : "false"); + fpstr = softmac->smac_fastpath_admin_disabled ? + "true" : "false"; + } else if (strcmp(name, "_softmac") == 0) { + fpstr = "true"; } else { return (ENOTSUP); } @@ -1497,6 +1498,23 @@ softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t flags, return (strlcpy(val, fpstr, valsize) >= valsize ? EINVAL : 0); } +static void +softmac_m_propinfo(void *arg, const char *name, mac_prop_id_t id, + mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(arg)); + + if (id != MAC_PROP_PRIVATE) + return; + + if (strcmp(name, "_fastpath") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else if (strcmp(name, "_disable_fastpath") == 0) { + mac_prop_info_set_default_str(prh, "false"); + } + +} + int softmac_hold_device(dev_t dev, dls_dev_handle_t *ddhp) { diff --git a/usr/src/uts/common/io/softmac/softmac_pkt.c b/usr/src/uts/common/io/softmac/softmac_pkt.c index 4641fb3372..6389ec1328 100644 --- a/usr/src/uts/common/io/softmac/softmac_pkt.c +++ b/usr/src/uts/common/io/softmac/softmac_pkt.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -75,10 +75,8 @@ softmac_rput_process_data(softmac_lower_t *slp, mblk_t *mp) "copymsg failed"); goto failed; } - hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, - &value, &flags); - VERIFY(hcksum_assoc(tmp, NULL, NULL, start, stuff, end, - value, flags, KM_NOSLEEP) == 0); + mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags); + mac_hcksum_set(tmp, start, stuff, end, value, flags); freemsg(mp); mp = tmp; } diff --git a/usr/src/uts/common/io/uath/uath.c b/usr/src/uts/common/io/uath/uath.c index 0bc0ba7ea1..3353369389 100644 --- a/usr/src/uts/common/io/uath/uath.c +++ b/usr/src/uts/common/io/uath/uath.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -60,6 +60,7 @@ #include <sys/strsun.h> #include <sys/modctl.h> #include <sys/devops.h> +#include <sys/byteorder.h> #include <sys/mac_provider.h> #include <sys/mac_wifi.h> #include <sys/net80211.h> @@ -193,10 +194,12 @@ static void uath_m_ioctl(void *, queue_t *, mblk_t *); static int uath_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int uath_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void uath_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t uath_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, uath_m_stat, uath_m_start, uath_m_stop, @@ -204,12 +207,14 @@ static mac_callbacks_t uath_m_callbacks = { uath_m_multicst, uath_m_unicst, uath_m_tx, + NULL, uath_m_ioctl, NULL, NULL, NULL, uath_m_setprop, - uath_m_getprop + uath_m_getprop, + uath_m_propinfo }; static usb_alt_if_data_t * @@ -2874,16 +2879,25 @@ uath_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int uath_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct uath_softc *sc = (struct uath_softc *)arg; int err; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +uath_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct uath_softc *sc = (struct uath_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + static int uath_m_stat(void *arg, uint_t stat, uint64_t *val) { diff --git a/usr/src/uts/common/io/ural/ural.c b/usr/src/uts/common/io/ural/ural.c index 82e9d711e4..a55ecd9f61 100644 --- a/usr/src/uts/common/io/ural/ural.c +++ b/usr/src/uts/common/io/ural/ural.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +29,7 @@ #include <sys/strsubr.h> #include <sys/modctl.h> #include <sys/devops.h> +#include <sys/byteorder.h> #include <sys/mac_provider.h> #include <sys/mac_wifi.h> #include <sys/net80211.h> @@ -264,10 +265,12 @@ static void ural_m_ioctl(void *, queue_t *, mblk_t *); static int ural_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int ural_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void ural_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t ural_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, ural_m_stat, ural_m_start, ural_m_stop, @@ -275,12 +278,14 @@ static mac_callbacks_t ural_m_callbacks = { ural_m_multicst, ural_m_unicst, ural_m_tx, + NULL, ural_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, ural_m_setprop, - ural_m_getprop + ural_m_getprop, + ural_m_propinfo }; static void ural_amrr_start(struct ural_softc *, struct ieee80211_node *); @@ -2067,18 +2072,27 @@ ural_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int ural_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct ural_softc *sc = (struct ural_softc *)arg; int err; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } static void +ural_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + struct ural_softc *sc = (struct ural_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + +static void ural_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) { struct ural_softc *sc = (struct ural_softc *)arg; diff --git a/usr/src/uts/common/io/urtw/urtw.c b/usr/src/uts/common/io/urtw/urtw.c index 3060ea80a0..b649ca609d 100644 --- a/usr/src/uts/common/io/urtw/urtw.c +++ b/usr/src/uts/common/io/urtw/urtw.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -83,11 +83,13 @@ static mblk_t *urtw_m_tx(void *, mblk_t *); static void urtw_m_ioctl(void *, queue_t *, mblk_t *); static int urtw_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -static int urtw_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); +static int urtw_m_getprop(void *, const char *, mac_prop_id_t, + uint_t, void *); +static void urtw_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t urtw_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, urtw_m_stat, urtw_m_start, urtw_m_stop, @@ -95,12 +97,14 @@ static mac_callbacks_t urtw_m_callbacks = { urtw_m_multicst, urtw_m_unicst, urtw_m_tx, + NULL, urtw_m_ioctl, NULL, NULL, NULL, urtw_m_setprop, - urtw_m_getprop + urtw_m_getprop, + urtw_m_propinfo }; static int urtw_tx_start(struct urtw_softc *, mblk_t *, int); @@ -4024,16 +4028,25 @@ urtw_m_promisc(void *arg, boolean_t on) static int urtw_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct urtw_softc *sc = (struct urtw_softc *)arg; int err = 0; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +urtw_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + struct urtw_softc *sc = (struct urtw_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + static int urtw_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/vnic/vnic_dev.c b/usr/src/uts/common/io/vnic/vnic_dev.c index b62ed5a16c..2b063cf79c 100644 --- a/usr/src/uts/common/io/vnic/vnic_dev.c +++ b/usr/src/uts/common/io/vnic/vnic_dev.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -82,8 +82,8 @@ static int vnic_m_stat(void *, uint_t, uint64_t *); static void vnic_m_ioctl(void *, queue_t *, mblk_t *); static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -static int vnic_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); +static void vnic_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mblk_t *vnic_m_tx(void *, mblk_t *); static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *); static void vnic_notify_cb(void *, mac_notify_type_t); @@ -101,7 +101,7 @@ static mod_hash_t *vnic_hash; #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id) #define VNIC_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO) static mac_callbacks_t vnic_m_callbacks = { VNIC_M_CALLBACK_FLAGS, @@ -112,12 +112,14 @@ static mac_callbacks_t vnic_m_callbacks = { vnic_m_multicst, vnic_m_unicst, vnic_m_tx, + NULL, vnic_m_ioctl, vnic_m_capab_get, NULL, NULL, vnic_m_setprop, - vnic_m_getprop + NULL, + vnic_m_propinfo }; void @@ -185,7 +187,7 @@ static int vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type, int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg, uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag, - uint16_t vid) + uint16_t vid, boolean_t req_hwgrp_flag) { mac_diag_t mac_diag; uint16_t mac_flags = 0; @@ -290,7 +292,14 @@ vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type, /* * We get the address here since we copy it in the * vnic's vn_addr. + * We can't ask for hardware resources since we + * don't currently support hardware classification + * for these MAC clients. */ + if (req_hwgrp_flag) { + *diag = VNIC_IOC_DIAG_NO_HWRINGS; + return (ENOTSUP); + } mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg); *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh); mac_flags |= MAC_UNICAST_VNIC_PRIMARY; @@ -330,8 +339,7 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0); char vnic_name[MAXNAMELEN]; const mac_info_t *minfop; - uint32_t req_hwgrp_flag = ((flags & VNIC_IOC_CREATE_REQ_HWRINGS) != 0) ? - MAC_OPEN_FLAGS_REQ_HWRINGS : 0; + uint32_t req_hwgrp_flag = B_FALSE; *diag = VNIC_IOC_DIAG_NONE; @@ -394,11 +402,15 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, (void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL, NULL); err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch, - vnic_name, MAC_OPEN_FLAGS_IS_VNIC | req_hwgrp_flag); + vnic_name, MAC_OPEN_FLAGS_IS_VNIC); if (err != 0) goto bail; if (mrp != NULL) { + if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 || + (mrp->mrp_mask & MRP_TX_RINGS) != 0) { + req_hwgrp_flag = B_TRUE; + } err = mac_client_set_resources(vnic->vn_mch, mrp); if (err != 0) goto bail; @@ -406,10 +418,11 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, /* assign a MAC address to the VNIC */ err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot, - mac_prefix_len, mac_len, mac_addr, flags, diag, vid); + mac_prefix_len, mac_len, mac_addr, flags, diag, vid, + req_hwgrp_flag); if (err != 0) { vnic->vn_muh = NULL; - if (diag != NULL && req_hwgrp_flag != 0) + if (diag != NULL && req_hwgrp_flag) *diag = VNIC_IOC_DIAG_NO_HWRINGS; goto bail; } @@ -495,7 +508,7 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, /* Set the VNIC's MAC in the client */ if (!is_anchor) - mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh); + mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp); err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp)); if (err != 0) { @@ -850,38 +863,25 @@ vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, return (err); } -/*ARGSUSED*/ -static int -vnic_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) +/* ARGSUSED */ +static void vnic_m_propinfo(void *m_driver, const char *pr_name, + mac_prop_id_t pr_num, mac_prop_info_handle_t prh) { - mac_propval_range_t range; - vnic_t *vn = m_driver; - int err = ENOTSUP; + vnic_t *vn = m_driver; /* MTU setting allowed only on an etherstub */ if (vn->vn_link_id != DATALINK_INVALID_LINKID) - return (err); + return; switch (pr_num) { case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = ANCHOR_VNIC_MIN_MTU; - range.range_uint32[0].mpur_max = ANCHOR_VNIC_MAX_MTU; - bcopy(&range, pr_val, sizeof (range)); - return (0); - default: + mac_prop_info_set_range_uint32(prh, + ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU); break; } - - return (err); } + int vnic_info(vnic_info_t *info, cred_t *credp) { diff --git a/usr/src/uts/common/io/vr/vr.c b/usr/src/uts/common/io/vr/vr.c index 03fadffe72..4a756212ea 100644 --- a/usr/src/uts/common/io/vr/vr.c +++ b/usr/src/uts/common/io/vr/vr.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -148,7 +148,7 @@ static ddi_dma_attr_t vr_data_dma_attr = { }; static mac_callbacks_t vr_mac_callbacks = { - MC_SETPROP|MC_GETPROP, /* Which callbacks are set */ + MC_SETPROP|MC_GETPROP|MC_PROPINFO, /* Which callbacks are set */ vr_mac_getstat, /* Get the value of a statistic */ vr_mac_start, /* Start the device */ vr_mac_stop, /* Stop the device */ @@ -156,12 +156,14 @@ static mac_callbacks_t vr_mac_callbacks = { vr_mac_set_multicast, /* Enable or disable a multicast addr */ vr_mac_set_ether_addr, /* Set the unicast MAC address */ vr_mac_tx_enqueue_list, /* Transmit a packet */ + NULL, NULL, /* Process an unknown ioctl */ NULL, /* Get capability information */ NULL, /* Open the device */ NULL, /* Close the device */ vr_mac_setprop, /* Set properties of the device */ - vr_mac_getprop /* Get properties of the device */ + vr_mac_getprop, /* Get properties of the device */ + vr_mac_propinfo /* Get properties attributes */ }; /* @@ -3157,7 +3159,7 @@ vr_remove_kstats(vr_t *vrp) */ int vr_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { vr_t *vrp; uint32_t err; @@ -3168,228 +3170,220 @@ vr_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, err = 0; vrp = (vr_t *)arg; - if ((pr_flags & MAC_PROP_DEFAULT) != 0) { - /* - * Defaults depend on the PHY/MAC's capabilities - * All defaults are read/write, otherwise reset-linkprop fails - * with enotsup .... - */ - *perm = MAC_PROP_PERM_RW; - switch (pr_num) { - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_EN_1000FDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - val = 0; - break; + switch (pr_num) { + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + val = 0; + break; - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_EN_100FDX_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_100_BASEX_FD) != 0; - break; + case MAC_PROP_ADV_100FDX_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_100BASE_TX_FD) != 0; + break; - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_100_BASEX) != 0; - break; + case MAC_PROP_ADV_100HDX_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_100BASE_TX) != 0; + break; - case MAC_PROP_ADV_100T4_CAP: - case MAC_PROP_EN_100T4_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_100_BASE_T4) != 0; - break; + case MAC_PROP_ADV_100T4_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_100BASE_T4) != 0; + break; - case MAC_PROP_ADV_10FDX_CAP: - case MAC_PROP_EN_10FDX_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_10_FD) != 0; - break; + case MAC_PROP_ADV_10FDX_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_10BASE_T_FD) != 0; + break; - case MAC_PROP_ADV_10HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_10) != 0; - break; + case MAC_PROP_ADV_10HDX_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_10BASE_T) != 0; + break; - case MAC_PROP_AUTONEG: - case MAC_PROP_EN_AUTONEG: - val = (vrp->chip.mii.status & - MII_STATUS_CANAUTONEG) != 0; - break; + case MAC_PROP_AUTONEG: + val = (vrp->chip.mii.control & + MII_CONTROL_ANE) != 0; + break; - case MAC_PROP_DUPLEX: - val = VR_LINK_DUPLEX_FULL; - break; + case MAC_PROP_DUPLEX: + val = vrp->chip.link.duplex; + break; - case MAC_PROP_FLOWCTRL: - val = VR_PAUSE_BIDIRECTIONAL; - break; + case MAC_PROP_EN_100FDX_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_100BASE_TX_FD) != 0; + break; - case MAC_PROP_MTU: - val = ETHERMTU; - break; + case MAC_PROP_EN_100HDX_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_100BASE_TX) != 0; + break; - case MAC_PROP_SPEED: - val = 100 * 1000 * 1000; - break; + case MAC_PROP_EN_100T4_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_100BASE_T4) != 0; + break; - case MAC_PROP_STATUS: - val = VR_LINK_STATE_UP; - break; + case MAC_PROP_EN_10FDX_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_10BASE_T_FD) != 0; + break; - default: - return (ENOTSUP); - } - } else { - switch (pr_num) { - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - val = 0; - *perm = MAC_PROP_PERM_READ; - break; + case MAC_PROP_EN_10HDX_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_10BASE_T) != 0; + break; + + case MAC_PROP_EN_AUTONEG: + val = vrp->param.an_en == VR_LINK_AUTONEG_ON; + break; + + case MAC_PROP_FLOWCTRL: + val = vrp->chip.link.flowctrl; + break; + + case MAC_PROP_MTU: + val = vrp->param.mtu; + break; - case MAC_PROP_EN_1000FDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; + case MAC_PROP_SPEED: + if (vrp->chip.link.speed == + VR_LINK_SPEED_100MBS) + val = 100 * 1000 * 1000; + else if (vrp->chip.link.speed == + VR_LINK_SPEED_10MBS) + val = 10 * 1000 * 1000; + else val = 0; - break; + break; - case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_100BASE_TX_FD) != 0; - break; + case MAC_PROP_STATUS: + val = vrp->chip.link.state; + break; - case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_100BASE_TX) != 0; - break; + default: + err = ENOTSUP; + break; + } - case MAC_PROP_ADV_100T4_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_100BASE_T4) != 0; - break; + if (err == 0 && pr_num != MAC_PROP_PRIVATE) { + if (pr_valsize == sizeof (uint64_t)) + *(uint64_t *)pr_val = val; + else if (pr_valsize == sizeof (uint32_t)) + *(uint32_t *)pr_val = val; + else if (pr_valsize == sizeof (uint16_t)) + *(uint16_t *)pr_val = val; + else if (pr_valsize == sizeof (uint8_t)) + *(uint8_t *)pr_val = val; + else + err = EINVAL; + } + return (err); +} - case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_10BASE_T_FD) != 0; - break; +void +vr_mac_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + vr_t *vrp = (vr_t *)arg; + uint8_t val, perm; - case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_10BASE_T) != 0; - break; + /* Since we have no private properties */ + _NOTE(ARGUNUSED(pr_name)) - case MAC_PROP_AUTONEG: - *perm = MAC_PROP_PERM_RW; - val = (vrp->chip.mii.control & - MII_CONTROL_ANE) != 0; - break; + switch (pr_num) { + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; - case MAC_PROP_DUPLEX: - /* - * Writability depends on autoneg. - */ - if ((vrp->chip.mii.control & - MII_CONTROL_ANE) == 0) - *perm = MAC_PROP_PERM_RW; - else - *perm = MAC_PROP_PERM_READ; - val = vrp->chip.link.duplex; - break; + case MAC_PROP_EN_100FDX_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_100_BASEX_FD) != 0; + break; - case MAC_PROP_EN_100FDX_CAP: - *perm = MAC_PROP_PERM_RW; - val = (vrp->param.anadv_en & - MII_ABILITY_100BASE_TX_FD) != 0; - break; + case MAC_PROP_EN_100HDX_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_100_BASEX) != 0; + break; - case MAC_PROP_EN_100HDX_CAP: - *perm = MAC_PROP_PERM_RW; - val = (vrp->param.anadv_en & - MII_ABILITY_100BASE_TX) != 0; - break; + case MAC_PROP_EN_100T4_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_100_BASE_T4) != 0; + break; - case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->param.anadv_en & - MII_ABILITY_100BASE_T4) != 0; - break; + case MAC_PROP_EN_10FDX_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_10_FD) != 0; + break; - case MAC_PROP_EN_10FDX_CAP: - *perm = MAC_PROP_PERM_RW; - val = (vrp->param.anadv_en & - MII_ABILITY_10BASE_T_FD) != 0; - break; + case MAC_PROP_EN_10HDX_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_10) != 0; + break; - case MAC_PROP_EN_10HDX_CAP: - *perm = MAC_PROP_PERM_RW; - val = (vrp->param.anadv_en & - MII_ABILITY_10BASE_T) != 0; - break; + case MAC_PROP_AUTONEG: + case MAC_PROP_EN_AUTONEG: + val = (vrp->chip.mii.status & + MII_STATUS_CANAUTONEG) != 0; + break; - case MAC_PROP_EN_AUTONEG: - *perm = MAC_PROP_PERM_RW; - val = vrp->param.an_en == VR_LINK_AUTONEG_ON; - break; + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, + LINK_FLOWCTRL_BI); + return; - case MAC_PROP_FLOWCTRL: - *perm = MAC_PROP_PERM_RW; - val = vrp->chip.link.flowctrl; - break; + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + ETHERMTU, ETHERMTU); + return; - case MAC_PROP_MTU: - *perm = MAC_PROP_PERM_RW; - val = vrp->param.mtu; - break; + case MAC_PROP_DUPLEX: + /* + * Writability depends on autoneg. + */ + perm = ((vrp->chip.mii.control & + MII_CONTROL_ANE) == 0) ? MAC_PROP_PERM_RW : + MAC_PROP_PERM_READ; + mac_prop_info_set_perm(prh, perm); + + if (perm == MAC_PROP_PERM_RW) { + mac_prop_info_set_default_uint8(prh, + VR_LINK_DUPLEX_FULL); + } + return; - case MAC_PROP_SPEED: - /* - * Writability depends on autoneg. - */ - if ((vrp->chip.mii.control & - MII_CONTROL_ANE) == 0) - *perm = MAC_PROP_PERM_RW; - else - *perm = MAC_PROP_PERM_READ; - if (vrp->chip.link.speed == - VR_LINK_SPEED_100MBS) - val = 100 * 1000 * 1000; - else if (vrp->chip.link.speed == - VR_LINK_SPEED_10MBS) - val = 10 * 1000 * 1000; - else - val = 0; - break; + case MAC_PROP_SPEED: + perm = ((vrp->chip.mii.control & + MII_CONTROL_ANE) == 0) ? + MAC_PROP_PERM_RW : MAC_PROP_PERM_READ; + mac_prop_info_set_perm(prh, perm); + + if (perm == MAC_PROP_PERM_RW) { + mac_prop_info_set_default_uint64(prh, + 100 * 1000 * 1000); + } + return; - case MAC_PROP_STATUS: - val = vrp->chip.link.state; - break; + case MAC_PROP_STATUS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; - default: - err = ENOTSUP; - break; + default: + return; } - } - if (err == 0 && pr_num != MAC_PROP_PRIVATE) { - if (pr_valsize == sizeof (uint64_t)) - *(uint64_t *)pr_val = val; - else if (pr_valsize == sizeof (uint32_t)) - *(uint32_t *)pr_val = val; - else if (pr_valsize == sizeof (uint16_t)) - *(uint16_t *)pr_val = val; - else if (pr_valsize == sizeof (uint8_t)) - *(uint8_t *)pr_val = val; - else - err = EINVAL; - } - return (err); + + mac_prop_info_set_default_uint8(prh, val); } /* diff --git a/usr/src/uts/common/io/vr/vr.h b/usr/src/uts/common/io/vr/vr.h index f120895b8f..29b0144272 100644 --- a/usr/src/uts/common/io/vr/vr.h +++ b/usr/src/uts/common/io/vr/vr.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -487,11 +487,13 @@ int vr_mac_set_ether_addr(void *vrp, const uint8_t *macaddr); mblk_t *vr_mac_tx_enqueue_list(void *p, mblk_t *mp); int vr_mac_getprop(void *arg, const char *pr_name, - mac_prop_id_t pr_num, uint_t pr_flags, - uint_t pr_valsize, void *pr_val, uint_t *perm); + mac_prop_id_t pr_num, uint_t pr_valsize, + void *pr_val); int vr_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, const void *pr_val); +void vr_mac_propinfo(void *arg, const char *pr_name, + mac_prop_id_t pr_num, mac_prop_info_handle_t prh); uint_t vr_intr(caddr_t arg1, caddr_t arg2); #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/wpi/wpi.c b/usr/src/uts/common/io/wpi/wpi.c index d25acafb21..1913d7d980 100644 --- a/usr/src/uts/common/io/wpi/wpi.c +++ b/usr/src/uts/common/io/wpi/wpi.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -273,8 +273,9 @@ static void wpi_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); static int wpi_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int wpi_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_lenth, - void *wldp_buf, uint_t *); + mac_prop_id_t wldp_pr_num, uint_t wldp_lenth, void *wldp_buf); +static void wpi_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); static void wpi_destroy_locks(wpi_sc_t *sc); static int wpi_send(ieee80211com_t *ic, mblk_t *mp, uint8_t type); static void wpi_thread(wpi_sc_t *sc); @@ -364,7 +365,7 @@ _info(struct modinfo *mip) * Mac Call Back entries */ mac_callbacks_t wpi_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, wpi_m_stat, wpi_m_start, wpi_m_stop, @@ -372,12 +373,14 @@ mac_callbacks_t wpi_m_callbacks = { wpi_m_multicst, wpi_m_unicst, wpi_m_tx, + NULL, wpi_m_ioctl, NULL, NULL, NULL, wpi_m_setprop, - wpi_m_getprop + wpi_m_getprop, + wpi_m_propinfo }; #ifdef DEBUG @@ -2324,16 +2327,26 @@ wpi_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) /* ARGSUSED */ static int wpi_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_name, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; wpi_sc_t *sc = (wpi_sc_t *)arg; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_name, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } + +static void +wpi_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + wpi_sc_t *sc = (wpi_sc_t *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + static int wpi_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_name, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/xge/drv/xgell.c b/usr/src/uts/common/io/xge/drv/xgell.c index d1b85d74f2..a5d857f05d 100644 --- a/usr/src/uts/common/io/xge/drv/xgell.c +++ b/usr/src/uts/common/io/xge/drv/xgell.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -112,6 +112,7 @@ static mac_callbacks_t xgell_m_callbacks = { xgell_m_multicst, NULL, NULL, + NULL, xgell_m_ioctl, xgell_m_getcapab }; @@ -613,15 +614,13 @@ xgell_rx_hcksum_assoc(mblk_t *mp, char *vaddr, int pkt_length, if (!(ext_info->proto & XGE_HAL_FRAME_PROTO_IP_FRAGMENTED)) { if (ext_info->proto & XGE_HAL_FRAME_PROTO_TCP_OR_UDP) { if (ext_info->l3_cksum == XGE_HAL_L3_CKSUM_OK) { - cksum_flags |= HCK_IPV4_HDRCKSUM; + cksum_flags |= HCK_IPV4_HDRCKSUM_OK; } if (ext_info->l4_cksum == XGE_HAL_L4_CKSUM_OK) { cksum_flags |= HCK_FULLCKSUM_OK; } - if (cksum_flags) { - cksum_flags |= HCK_FULLCKSUM; - (void) hcksum_assoc(mp, NULL, NULL, 0, - 0, 0, 0, cksum_flags, 0); + if (cksum_flags != 0) { + mac_hcksum_set(mp, 0, 0, 0, 0, cksum_flags); } } } else if (ext_info->proto & @@ -640,9 +639,8 @@ xgell_rx_hcksum_assoc(mblk_t *mp, char *vaddr, int pkt_length, start = 40; } cksum_flags |= HCK_PARTIALCKSUM; - (void) hcksum_assoc(mp, NULL, NULL, start, 0, - end, ntohs(ext_info->l4_cksum), cksum_flags, - 0); + mac_hcksum_set(mp, start, 0, end, + ntohs(ext_info->l4_cksum), cksum_flags); } } @@ -795,7 +793,8 @@ xgell_rx_1b_callback(xge_hal_channel_h channelh, xge_hal_dtr_h dtr, u8 t_code, xgell_rx_hcksum_assoc(mp, (char *)rx_buffer->vaddr + HEADROOM, pkt_length, &ext_info); - ring->received_bytes += pkt_length; + ring->rx_pkts++; + ring->rx_bytes += pkt_length; if (mp_head == NULL) { mp_head = mp; @@ -954,9 +953,11 @@ xgell_ring_tx(void *arg, mblk_t *mp) uint32_t mss; int handle_cnt, frag_cnt, ret, i, copied; boolean_t used_copy; + uint64_t sent_bytes; _begin: handle_cnt = frag_cnt = 0; + sent_bytes = 0; if (!lldev->is_initialized || lldev->in_reset) return (mp); @@ -1041,7 +1042,7 @@ _begin: continue; } - ring->sent_bytes += mblen; + sent_bytes += mblen; /* * Check the message length to decide to DMA or bcopy() data @@ -1159,14 +1160,14 @@ _begin: * If LSO is required, just call xge_hal_fifo_dtr_mss_set(dtr, mss) to * do all necessary work. */ - lso_info_get(mp, &mss, &lsoflags); + mac_lso_get(mp, &mss, &lsoflags); if (lsoflags & HW_LSO) { xge_assert((mss != 0) && (mss <= XGE_HAL_DEFAULT_MTU)); xge_hal_fifo_dtr_mss_set(dtr, mss); } - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &hckflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &hckflags); if (hckflags & HCK_IPV4_HDRCKSUM) { xge_hal_fifo_dtr_cksum_set_bits(dtr, XGE_HAL_TXD_TX_CKO_IPV4_EN); @@ -1178,6 +1179,10 @@ _begin: xge_hal_fifo_dtr_post(ring->channelh, dtr); + /* Update per-ring tx statistics */ + atomic_add_64(&ring->tx_pkts, 1); + atomic_add_64(&ring->tx_bytes, sent_bytes); + return (NULL); _exit_cleanup: @@ -1458,6 +1463,7 @@ xgell_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = xgell_rx_ring_start; infop->mri_stop = xgell_rx_ring_stop; infop->mri_poll = xgell_rx_poll; + infop->mri_stat = xgell_rx_ring_stat; mintr = &infop->mri_intr; mintr->mi_handle = (mac_intr_handle_t)rx_ring; @@ -1480,6 +1486,7 @@ xgell_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = xgell_tx_ring_start; infop->mri_stop = xgell_tx_ring_stop; infop->mri_tx = xgell_ring_tx; + infop->mri_stat = xgell_tx_ring_stat; break; } @@ -1618,7 +1625,6 @@ xgell_rx_ring_open(xgell_rx_ring_t *rx_ring) mutex_init(&rx_ring->ring_lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(hldev->irqh)); - rx_ring->received_bytes = 0; rx_ring->poll_bytes = -1; rx_ring->polled_bytes = 0; rx_ring->poll_mp = NULL; @@ -1769,7 +1775,6 @@ xgell_tx_ring_open(xgell_tx_ring_t *tx_ring) return (B_FALSE); } - tx_ring->sent_bytes = 0; tx_ring->live = B_TRUE; return (B_TRUE); @@ -2262,6 +2267,56 @@ xgell_m_stat(void *arg, uint_t stat, uint64_t *val) } /* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +xgell_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + xgell_rx_ring_t *rx_ring = (xgell_rx_ring_t *)rh; + + switch (stat) { + case MAC_STAT_RBYTES: + *val = rx_ring->rx_bytes; + break; + + case MAC_STAT_IPACKETS: + *val = rx_ring->rx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +xgell_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + xgell_tx_ring_t *tx_ring = (xgell_tx_ring_t *)rh; + + switch (stat) { + case MAC_STAT_OBYTES: + *val = tx_ring->tx_bytes; + break; + + case MAC_STAT_OPACKETS: + *val = tx_ring->tx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* * xgell_device_alloc - Allocate new LL device */ int diff --git a/usr/src/uts/common/io/xge/drv/xgell.h b/usr/src/uts/common/io/xge/drv/xgell.h index 93845bb655..39c6447ebf 100644 --- a/usr/src/uts/common/io/xge/drv/xgell.h +++ b/usr/src/uts/common/io/xge/drv/xgell.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -329,8 +329,8 @@ struct xgell_rx_ring { xgell_multi_mac_t mmac; /* per group multiple addrs */ xgell_rx_buffer_pool_t bf_pool; /* per ring buffer pool */ - int received_bytes; /* total received bytes */ - int intr_bytes; /* interrupt received bytes */ + uint64_t rx_pkts; /* total received packets */ + uint64_t rx_bytes; /* total received bytes */ int poll_bytes; /* bytes to be polled up */ int polled_bytes; /* total polled bytes */ mblk_t *poll_mp; /* polled messages */ @@ -344,7 +344,8 @@ struct xgell_tx_ring { xge_hal_channel_h channelh; /* hardware channel */ xgelldev_t *lldev; /* driver device */ mac_ring_handle_t ring_handle; /* call back ring handle */ - int sent_bytes; /* bytes sent though the ring */ + uint64_t tx_pkts; /* packets sent */ + uint64_t tx_bytes; /* bytes sent though the ring */ boolean_t need_resched; }; @@ -418,8 +419,9 @@ void xge_disable_intrs(xgelldev_t *lldev); void xge_rem_intrs(xgelldev_t *lldev); +int xgell_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val); - +int xgell_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val); #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/yge/yge.c b/usr/src/uts/common/io/yge/yge.c index c41dda7b60..33d2fb527a 100644 --- a/usr/src/uts/common/io/yge/yge.c +++ b/usr/src/uts/common/io/yge/yge.c @@ -228,8 +228,9 @@ static mblk_t *yge_m_tx(void *, mblk_t *); static int yge_m_stat(void *, uint_t, uint64_t *); static int yge_m_start(void *); static void yge_m_stop(void *); -static int yge_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); +static int yge_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +static void yge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int yge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static void yge_m_ioctl(void *, queue_t *, mblk_t *); @@ -240,7 +241,7 @@ extern int yge_phys_restart(yge_port_t *, boolean_t); extern int yge_phys_init(yge_port_t *, phy_readreg_t, phy_writereg_t); static mac_callbacks_t yge_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, yge_m_stat, yge_m_start, yge_m_stop, @@ -248,12 +249,14 @@ static mac_callbacks_t yge_m_callbacks = { yge_m_multicst, yge_m_unicst, yge_m_tx, + NULL, yge_m_ioctl, NULL, /* mc_getcapab */ NULL, /* mc_open */ NULL, /* mc_close */ yge_m_setprop, yge_m_getprop, + yge_m_propinfo }; static mii_ops_t yge_mii_ops = { @@ -3348,47 +3351,30 @@ err: int yge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { yge_port_t *port = arg; - mac_propval_range_t range; - int err; - err = mii_m_getprop(port->p_mii, pr_name, pr_num, pr_flags, - pr_valsize, pr_val, perm); - if (err != ENOTSUP) { - return (err); - } - - if (pr_valsize == 0) - return (EINVAL); + return (mii_m_getprop(port->p_mii, pr_name, pr_num, pr_valsize, + pr_val)); +} - bzero(pr_val, pr_valsize); - *perm = MAC_PROP_PERM_RW; +static void +yge_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + yge_port_t *port = arg; switch (pr_num) { case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) { - err = ENOTSUP; - break; - } - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = ETHERMTU; - range.range_uint32[0].mpur_max = + mac_prop_info_set_range_uint32(prh, ETHERMTU, port->p_flags & PORT_FLAG_NOJUMBO ? - ETHERMTU : YGE_JUMBO_MTU; - bcopy(&range, pr_val, sizeof (range)); - err = 0; + ETHERMTU : YGE_JUMBO_MTU); break; - default: - err = ENOTSUP; + mii_m_propinfo(port->p_mii, pr_name, pr_num, prh); break; } - return (err); } void diff --git a/usr/src/uts/common/io/zyd/zyd.c b/usr/src/uts/common/io/zyd/zyd.c index bdd5ef37d1..61c4abf3c6 100644 --- a/usr/src/uts/common/io/zyd/zyd.c +++ b/usr/src/uts/common/io/zyd/zyd.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -54,8 +54,9 @@ static int zyd_m_promisc(void *arg, boolean_t on); static void zyd_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); static mblk_t *zyd_m_tx(void *arg, mblk_t *mp); static int zyd_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void zyd_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); static int zyd_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); @@ -72,7 +73,7 @@ void *zyd_ssp; * Mac Call Back entries */ static mac_callbacks_t zyd_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, zyd_m_stat, /* Get the value of a statistic */ zyd_m_start, /* Start the device */ zyd_m_stop, /* Stop the device */ @@ -80,12 +81,14 @@ static mac_callbacks_t zyd_m_callbacks = { zyd_m_multicst, /* Enable or disable a multicast addr */ zyd_m_unicst, /* Set the unicast MAC address */ zyd_m_tx, /* Transmit a packet */ + NULL, zyd_m_ioctl, /* Process an unknown ioctl */ NULL, /* mc_getcapab */ NULL, NULL, zyd_m_setprop, - zyd_m_getprop + zyd_m_getprop, + zyd_m_propinfo }; /* @@ -893,7 +896,7 @@ zyd_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int zyd_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct zyd_softc *sc = (struct zyd_softc *)arg; int err; @@ -903,11 +906,20 @@ zyd_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, } err = ieee80211_getprop(&sc->ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +zyd_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + struct zyd_softc *sc = (struct zyd_softc *)arg; + + ieee80211_propinfo(&sc->ic, pr_name, wldp_pr_num, mph); +} + /* * Transmit a data frame. */ diff --git a/usr/src/uts/common/os/pool.c b/usr/src/uts/common/os/pool.c index 7c3c70de3c..4b4337b3a2 100644 --- a/usr/src/uts/common/os/pool.c +++ b/usr/src/uts/common/os/pool.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,6 +44,7 @@ #include <sys/zone.h> #include <sys/policy.h> #include <sys/schedctl.h> +#include <sys/taskq.h> /* * RESOURCE POOLS @@ -153,6 +154,12 @@ static kthread_t *pool_busy_thread; /* thread holding "pool_lock" */ static kmutex_t pool_barrier_lock; /* synch. with pool_barrier_* */ static kcondvar_t pool_barrier_cv; /* synch. with pool_barrier_* */ static int pool_barrier_count; /* synch. with pool_barrier_* */ +static list_t pool_event_cb_list; /* pool event callbacks */ +static boolean_t pool_event_cb_init = B_FALSE; +static kmutex_t pool_event_cb_lock; +static taskq_t *pool_event_cb_taskq = NULL; + +void pool_event_dispatch(pool_event_t, poolid_t); /* * Boot-time pool initialization. @@ -373,6 +380,21 @@ pool_lookup_pool_by_id(poolid_t poolid) return (NULL); } +pool_t * +pool_lookup_pool_by_pset(int id) +{ + pool_t *pool = pool_default; + psetid_t psetid = (psetid_t)id; + + ASSERT(pool_lock_held()); + for (pool = list_head(&pool_list); pool != NULL; + pool = list_next(&pool_list, pool)) { + if (pool->pool_pset->pset_id == psetid) + return (pool); + } + return (NULL); +} + /* * Create new pool, associate it with default resource sets, and give * it a temporary name. @@ -545,12 +567,14 @@ pool_status(int status) if (ret != 0) return (ret); pool_state = POOL_ENABLED; + pool_event_dispatch(POOL_E_ENABLE, NULL); break; case POOL_DISABLED: ret = pool_disable(); if (ret != 0) return (ret); pool_state = POOL_DISABLED; + pool_event_dispatch(POOL_E_DISABLE, NULL); break; default: ret = EINVAL; @@ -572,6 +596,8 @@ pool_assoc(poolid_t poolid, int idtype, id_t id) switch (idtype) { case PREC_PSET: ret = pool_pset_assoc(poolid, (psetid_t)id); + if (ret == 0) + pool_event_dispatch(POOL_E_CHANGE, poolid); break; default: ret = EINVAL; @@ -595,6 +621,8 @@ pool_dissoc(poolid_t poolid, int idtype) switch (idtype) { case PREC_PSET: ret = pool_pset_assoc(poolid, PS_NONE); + if (ret == 0) + pool_event_dispatch(POOL_E_CHANGE, poolid); break; default: ret = EINVAL; @@ -612,24 +640,48 @@ int pool_transfer(int type, id_t src, id_t dst, uint64_t qty) { int ret = EINVAL; + return (ret); } +static poolid_t +pool_lookup_id_by_pset(int id) +{ + pool_t *pool = pool_default; + psetid_t psetid = (psetid_t)id; + + ASSERT(pool_lock_held()); + for (pool = list_head(&pool_list); pool != NULL; + pool = list_next(&pool_list, pool)) { + if (pool->pool_pset->pset_id == psetid) + return (pool->pool_id); + } + return (POOL_INVALID); +} + /* * Transfer resources specified by their IDs between resource sets. */ int -pool_xtransfer(int type, id_t src, id_t dst, uint_t size, id_t *ids) +pool_xtransfer(int type, id_t src_pset, id_t dst_pset, uint_t size, id_t *ids) { int ret; + poolid_t src_pool, dst_pool; ASSERT(pool_lock_held()); if (pool_state == POOL_DISABLED) return (ENOTACTIVE); switch (type) { case PREC_PSET: - ret = pool_pset_xtransfer((psetid_t)src, (psetid_t)dst, - size, ids); + ret = pool_pset_xtransfer((psetid_t)src_pset, + (psetid_t)dst_pset, size, ids); + + if ((src_pool = pool_lookup_id_by_pset(src_pset)) == -1) + return (EINVAL); + if ((dst_pool = pool_lookup_id_by_pset(dst_pset)) == -1) + return (EINVAL); + pool_event_dispatch(POOL_E_CHANGE, src_pool); + pool_event_dispatch(POOL_E_CHANGE, dst_pool); break; default: ret = EINVAL; @@ -643,7 +695,7 @@ pool_xtransfer(int type, id_t src, id_t dst, uint_t size, id_t *ids) int pool_bind(poolid_t poolid, idtype_t idtype, id_t id) { - pool_t *pool; + pool_t *pool; ASSERT(pool_lock_held()); @@ -1234,6 +1286,17 @@ pool_change_class(proc_t *p, id_t cid) kmem_free(bufs, nlwp * sizeof (void *)); } +void +pool_get_name(pool_t *pool, char **name) +{ + ASSERT(pool_lock_held()); + + (void) nvlist_lookup_string(pool->pool_props, "pool.name", name); + + ASSERT(strlen(*name) != 0); +} + + /* * The meat of the bind operation. The steps in pool_do_bind are: * @@ -1658,3 +1721,71 @@ out: switch (idtype) { ASSERT(pool_barrier_count == 0); return (rv); } + +void +pool_event_cb_register(pool_event_cb_t *cb) +{ + ASSERT(!pool_lock_held() || panicstr); + ASSERT(cb->pec_func != NULL); + + mutex_enter(&pool_event_cb_lock); + if (!pool_event_cb_init) { + list_create(&pool_event_cb_list, sizeof (pool_event_cb_t), + offsetof(pool_event_cb_t, pec_list)); + pool_event_cb_init = B_TRUE; + } + list_insert_tail(&pool_event_cb_list, cb); + mutex_exit(&pool_event_cb_lock); +} + +void +pool_event_cb_unregister(pool_event_cb_t *cb) +{ + ASSERT(!pool_lock_held() || panicstr); + + mutex_enter(&pool_event_cb_lock); + list_remove(&pool_event_cb_list, cb); + mutex_exit(&pool_event_cb_lock); +} + +typedef struct { + pool_event_t tqd_what; + poolid_t tqd_id; +} pool_tqd_t; + +void +pool_event_notify(void *arg) +{ + pool_tqd_t *tqd = (pool_tqd_t *)arg; + pool_event_cb_t *cb; + + ASSERT(!pool_lock_held() || panicstr); + + mutex_enter(&pool_event_cb_lock); + for (cb = list_head(&pool_event_cb_list); cb != NULL; + cb = list_next(&pool_event_cb_list, cb)) { + cb->pec_func(tqd->tqd_what, tqd->tqd_id, cb->pec_arg); + } + mutex_exit(&pool_event_cb_lock); + kmem_free(tqd, sizeof (*tqd)); +} + +void +pool_event_dispatch(pool_event_t what, poolid_t id) +{ + pool_tqd_t *tqd = NULL; + + ASSERT(pool_lock_held()); + + if (pool_event_cb_taskq == NULL) { + pool_event_cb_taskq = taskq_create("pool_event_cb_taskq", 1, + -1, 1, 1, TASKQ_PREPOPULATE); + } + + tqd = kmem_alloc(sizeof (*tqd), KM_SLEEP); + tqd->tqd_what = what; + tqd->tqd_id = id; + + (void) taskq_dispatch(pool_event_cb_taskq, pool_event_notify, tqd, + KM_SLEEP); +} diff --git a/usr/src/uts/common/os/strsubr.c b/usr/src/uts/common/os/strsubr.c index 75bd481d21..c8068ca965 100644 --- a/usr/src/uts/common/os/strsubr.c +++ b/usr/src/uts/common/os/strsubr.c @@ -8559,18 +8559,6 @@ lso_info_cleanup(mblk_t *mp) DB_LSOMSS(mp) = 0; } -void -lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) -{ - ASSERT(DB_TYPE(mp) == M_DATA); - - if (flags != NULL) { - *flags = DB_CKSUMFLAGS(mp) & HW_LSO_FLAGS; - if ((*flags != 0) && (mss != NULL)) - *mss = (uint32_t)DB_LSOMSS(mp); - } -} - /* * Checksum buffer *bp for len bytes with psum partial checksum, * or 0 if none, and return the 16 bit partial checksum. diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index e251fd3222..834725f8f7 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -356,6 +356,7 @@ CHKHDRS= \ mac_impl.h \ mac_provider.h \ mac_soft_ring.h \ + mac_stat.h \ machelf.h \ map.h \ md4.h \ diff --git a/usr/src/uts/common/sys/aggr_impl.h b/usr/src/uts/common/sys/aggr_impl.h index ee0979b798..8363d231cf 100644 --- a/usr/src/uts/common/sys/aggr_impl.h +++ b/usr/src/uts/common/sys/aggr_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -75,6 +75,19 @@ typedef struct aggr_pseudo_rx_group_s { uint_t arg_ring_cnt; } aggr_pseudo_rx_group_t; +typedef struct aggr_pseudo_tx_ring_s { + mac_ring_handle_t atr_rh; /* filled in by aggr_fill_ring() */ + struct aggr_port_s *atr_port; + mac_ring_handle_t atr_hw_rh; + uint_t atr_flags; +} aggr_pseudo_tx_ring_t; + +typedef struct aggr_pseudo_tx_group_s { + mac_group_handle_t atg_gh; /* filled in by aggr_fill_group() */ + uint_t atg_ring_cnt; + aggr_pseudo_tx_ring_t atg_rings[MAX_RINGS_PER_GROUP]; +} aggr_pseudo_tx_group_t; + /* * A link aggregation MAC port. * Note that lp_next is protected by the lg_lock of the group the @@ -93,9 +106,10 @@ typedef struct aggr_port_s { lp_collector_enabled : 1, lp_promisc_on : 1, lp_no_link_update : 1, - lp_grp_added : 1, + lp_rx_grp_added : 1, + lp_tx_grp_added : 1, lp_closing : 1, - lp_pad_bits : 25; + lp_pad_bits : 24; mac_handle_t lp_mh; mac_client_handle_t lp_mch; const mac_info_t *lp_mip; @@ -116,6 +130,17 @@ typedef struct aggr_port_s { aggr_unicst_addr_t *lp_prom_addr; /* handle of the underlying HW RX group */ mac_group_handle_t lp_hwgh; + int lp_tx_ring_cnt; + /* handles of the underlying HW TX rings */ + mac_ring_handle_t *lp_tx_rings; + /* + * Handles of the pseudo TX rings. Each of them maps to + * corresponding hardware TX ring in lp_tx_rings[]. A + * pseudo TX ring is presented to aggr primary mac + * client even when underlying NIC has no TX ring. + */ + mac_ring_handle_t *lp_pseudo_tx_rings; + void *lp_tx_notify_mh; } aggr_port_t; /* @@ -187,7 +212,16 @@ typedef struct aggr_grp_s { mblk_t *lg_lacp_tail; kthread_t *lg_lacp_rx_thread; boolean_t lg_lacp_done; + aggr_pseudo_rx_group_t lg_rx_group; + aggr_pseudo_tx_group_t lg_tx_group; + + kmutex_t lg_tx_flowctl_lock; + kcondvar_t lg_tx_flowctl_cv; + uint_t lg_tx_blocked_cnt; + mac_ring_handle_t *lg_tx_blocked_rings; + kthread_t *lg_tx_notify_thread; + boolean_t lg_tx_notify_done; /* * The following fields are used by aggr to wait for all the @@ -274,7 +308,8 @@ extern void aggr_port_init_callbacks(aggr_port_t *); extern void aggr_recv_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t); -extern mblk_t *aggr_m_tx(void *, mblk_t *); +extern void aggr_tx_ring_update(void *, uintptr_t); +extern void aggr_tx_notify_thread(void *); extern void aggr_send_port_enable(aggr_port_t *); extern void aggr_send_port_disable(aggr_port_t *); extern void aggr_send_update_policy(aggr_grp_t *, uint32_t); @@ -302,6 +337,10 @@ extern void aggr_grp_port_wait(aggr_grp_t *); extern int aggr_port_addmac(aggr_port_t *, const uint8_t *); extern void aggr_port_remmac(aggr_port_t *, const uint8_t *); +extern mblk_t *aggr_ring_tx(void *, mblk_t *); +extern mblk_t *aggr_find_tx_ring(void *, mblk_t *, + uintptr_t, mac_ring_handle_t *); + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h index ed80269fbc..fb2a0749d3 100644 --- a/usr/src/uts/common/sys/dld.h +++ b/usr/src/uts/common/sys/dld.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -27,7 +27,7 @@ #define _SYS_DLD_H /* - * Data-Link Driver (public header). + * Data-Link Driver ioctl interfaces. * * Note that the datastructures defined here define an ioctl interface * that is shared betwen user and kernel space. The dld driver thus @@ -280,10 +280,12 @@ typedef struct dld_ioc_usagelog { #define DLDIOC_SETMACPROP DLDIOC(0x1b) #define DLDIOC_GETMACPROP DLDIOC(0x1c) -#define MAC_PROP_VERSION 1 + +/* pr_flags can be set to a combination of the following flags */ +#define DLD_PROP_DEFAULT 0x0001 +#define DLD_PROP_POSSIBLE 0x0002 typedef struct dld_ioc_macprop_s { - int pr_version; uint_t pr_flags; datalink_id_t pr_linkid; mac_prop_id_t pr_num; @@ -308,7 +310,7 @@ typedef struct dld_hwgrpinfo { uint_t dhi_grp_type; uint_t dhi_n_rings; uint_t dhi_n_clnts; - /* XXXX later we should use dhi_n_clnts * MAXNAMELEN for dhi_clnts */ + uint_t dhi_rings[MAX_RINGS_PER_GROUP]; char dhi_clnts[MAXCLIENTNAMELEN]; } dld_hwgrpinfo_t; diff --git a/usr/src/uts/common/sys/fibre-channel/fca/oce/oce_impl.h b/usr/src/uts/common/sys/fibre-channel/fca/oce/oce_impl.h index ca255b1643..61e0fb1a44 100644 --- a/usr/src/uts/common/sys/fibre-channel/fca/oce/oce_impl.h +++ b/usr/src/uts/common/sys/fibre-channel/fca/oce/oce_impl.h @@ -286,7 +286,10 @@ void oce_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); int oce_m_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size, const void *val); int oce_m_getprop(void *arg, const char *name, mac_prop_id_t id, - uint_t flags, uint_t size, void *val, uint_t *perm); + uint_t size, void *val); +void oce_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh); + int oce_m_stat(void *arg, uint_t stat, uint64_t *val); /* Hardware start/stop functions */ diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h index 7a8fc3293d..ab04126708 100644 --- a/usr/src/uts/common/sys/mac.h +++ b/usr/src/uts/common/sys/mac.h @@ -92,7 +92,7 @@ typedef enum { } link_tagmode_t; /* - * Defines range of uint32 values + * Defines range of uint32_t values */ typedef struct mac_propval_uint32_range_s { uint32_t mpur_min; @@ -100,10 +100,12 @@ typedef struct mac_propval_uint32_range_s { } mac_propval_uint32_range_t; /* - * Data type of the value + * Data type of property values. */ typedef enum { - MAC_PROPVAL_UINT32 = 0x1 + MAC_PROPVAL_UINT8, + MAC_PROPVAL_UINT32, + MAC_PROPVAL_STR } mac_propval_type_t; /* @@ -111,8 +113,6 @@ typedef enum { * range of values (int32, int64, uint32, uint64, et al) or collection/ * enumeration of values (strings). * Can be used as a value-result parameter. - * - * See PSARC 2009/235 for more information. */ typedef struct mac_propval_range_s { uint_t mpr_count; /* count of ranges */ @@ -122,7 +122,7 @@ typedef struct mac_propval_range_s { } u; } mac_propval_range_t; -#define range_uint32 u.mpr_uint32 +#define mpr_range_uint32 u.mpr_uint32 /* * Maximum MAC address length @@ -134,26 +134,15 @@ typedef enum { MAC_LOGTYPE_FLOW } mac_logtype_t; -/* - * Encodings for public properties. - * A most significant bit value of 1 indicates private property, intended - * to allow private property implementations to use internal encodings - * if desired. - * - * Note that there are 2 sets of parameters: the *_EN_* - * values are those that the Administrator configures for autonegotiation. - * The _ADV_* values are those that are currently exposed over the wire. - */ -#define MAXLINKPROPNAME 256 -#define MAC_PROP_DEFAULT 0x0001 /* default property value */ +#define MAXLINKPROPNAME 256 /* max property name len */ /* - * Indicates the linkprop framework is interested in knowing the list of - * possible property values. When used to obtain possible values for a - * property, one may have to change all the drivers. See PSARC 2009/235. + * Public properties. + * + * Note that there are 2 sets of parameters: the *_EN_* values are + * those that the Administrator configures for autonegotiation. The + * _ADV_* values are those that are currently exposed over the wire. */ -#define MAC_PROP_POSSIBLE 0x0002 /* possible property values */ - typedef enum { MAC_PROP_DUPLEX = 0x00000001, MAC_PROP_SPEED, @@ -202,16 +191,20 @@ typedef enum { MAC_PROP_WL_DELKEY, MAC_PROP_WL_KEY, MAC_PROP_WL_MLME, - MAC_PROP_MAXBW, - MAC_PROP_PRIO, - MAC_PROP_BIND_CPU, MAC_PROP_TAGMODE, MAC_PROP_ADV_10GFDX_CAP, MAC_PROP_EN_10GFDX_CAP, MAC_PROP_PVID, MAC_PROP_LLIMIT, MAC_PROP_LDECAY, - MAC_PROP_PROTECT, + MAC_PROP_RESOURCE, + MAC_PROP_RESOURCE_EFF, + MAC_PROP_RXRINGSRANGE, + MAC_PROP_TXRINGSRANGE, + MAC_PROP_MAX_TX_RINGS_AVAIL, + MAC_PROP_MAX_RX_RINGS_AVAIL, + MAC_PROP_MAX_RXHWCLNT_AVAIL, + MAC_PROP_MAX_TXHWCLNT_AVAIL, MAC_PROP_PRIVATE = -1 } mac_prop_id_t; @@ -248,7 +241,8 @@ enum mac_mod_stat { MAC_STAT_LINK_STATE, MAC_STAT_LINK_UP, MAC_STAT_PROMISC, - MAC_STAT_LOWLINK_STATE + MAC_STAT_LOWLINK_STATE, + MAC_STAT_HDROPS }; /* @@ -328,9 +322,13 @@ typedef struct mac_capab_vnic_s { } mac_capab_vnic_t; typedef void (*mac_rename_fn_t)(const char *, void *); +typedef mblk_t *(*mac_tx_ring_fn_t)(void *, mblk_t *, uintptr_t, + mac_ring_handle_t *); typedef struct mac_capab_aggr_s { mac_rename_fn_t mca_rename_fn; int (*mca_unicst)(void *, const uint8_t *); + mac_tx_ring_fn_t mca_find_tx_ring_fn; + void *mca_arg; } mac_capab_aggr_t; /* Bridge transmit and receive function signatures */ @@ -373,6 +371,8 @@ typedef struct mac_intr_s { mac_intr_handle_t mi_handle; mac_intr_enable_t mi_enable; mac_intr_disable_t mi_disable; + ddi_intr_handle_t mi_ddi_handle; + boolean_t mi_ddi_shared; } mac_intr_t; typedef struct mac_rx_fifo_s { @@ -571,12 +571,6 @@ typedef struct mactype_register_s { size_t mtr_mappingcount; } mactype_register_t; -typedef struct mac_prop_s { - mac_prop_id_t mp_id; - char *mp_name; - uint_t mp_flags; -} mac_prop_t; - /* * Driver interface functions. */ @@ -617,6 +611,7 @@ extern int mac_start_logusage(mac_logtype_t, uint_t); extern void mac_stop_logusage(mac_logtype_t); extern mac_handle_t mac_get_lower_mac_handle(mac_handle_t); +extern boolean_t mac_is_vnic_primary(mac_handle_t); /* * Packet hashing for distribution to multiple ports and rings. diff --git a/usr/src/uts/common/sys/mac_client.h b/usr/src/uts/common/sys/mac_client.h index ad3f30aa63..40cd15a1b8 100644 --- a/usr/src/uts/common/sys/mac_client.h +++ b/usr/src/uts/common/sys/mac_client.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -68,6 +68,18 @@ typedef enum { MAC_DIAG_MACNO_HWRINGS } mac_diag_t; +/* + * These are used when MAC clients what to specify tx and rx rings + * properties. MAC_RXRINGS_NONE/MAC_TXRINGS_NONE mean that we should + * not reserve any rings while MAC_RXRINGS_DONTCARE/MAC_TXRINGS_DONTCARE + * mean that the system can decide if it wants to reserve rings or + * not. + */ +#define MAC_RXRINGS_NONE 0 +#define MAC_TXRINGS_NONE MAC_RXRINGS_NONE +#define MAC_RXRINGS_DONTCARE -1 +#define MAC_TXRINGS_DONTCARE MAC_RXRINGS_DONTCARE + typedef enum { MAC_CLIENT_PROMISC_ALL, MAC_CLIENT_PROMISC_FILTERED, @@ -87,11 +99,10 @@ typedef enum { #define MAC_OPEN_FLAGS_IS_VNIC 0x0001 #define MAC_OPEN_FLAGS_EXCLUSIVE 0x0002 #define MAC_OPEN_FLAGS_IS_AGGR_PORT 0x0004 -#define MAC_OPEN_FLAGS_NO_HWRINGS 0x0008 -#define MAC_OPEN_FLAGS_SHARES_DESIRED 0x0010 -#define MAC_OPEN_FLAGS_USE_DATALINK_NAME 0x0020 -#define MAC_OPEN_FLAGS_REQ_HWRINGS 0x0040 -#define MAC_OPEN_FLAGS_MULTI_PRIMARY 0x0080 +#define MAC_OPEN_FLAGS_SHARES_DESIRED 0x0008 +#define MAC_OPEN_FLAGS_USE_DATALINK_NAME 0x0010 +#define MAC_OPEN_FLAGS_MULTI_PRIMARY 0x0020 +#define MAC_OPEN_FLAGS_NO_UNICAST_ADDR 0x0040 /* flags passed to mac_client_close */ #define MAC_CLOSE_FLAGS_IS_VNIC 0x0001 @@ -161,12 +172,12 @@ extern uint_t mac_addr_factory_num(mac_handle_t); extern mac_tx_notify_handle_t mac_client_tx_notify(mac_client_handle_t, mac_tx_notify_t, void *); -extern int mac_set_resources(mac_handle_t, mac_resource_props_t *); -extern void mac_get_resources(mac_handle_t, mac_resource_props_t *); extern int mac_client_set_resources(mac_client_handle_t, mac_resource_props_t *); extern void mac_client_get_resources(mac_client_handle_t, mac_resource_props_t *); +extern void mac_client_get_eff_resources(mac_client_handle_t, + mac_resource_props_t *); /* bridging-related interfaces */ extern int mac_set_pvid(mac_handle_t, uint16_t); @@ -180,15 +191,7 @@ extern void mac_share_unbind(mac_client_handle_t); extern int mac_set_mtu(mac_handle_t, uint_t, uint_t *); -extern uint_t mac_hwgrp_num(mac_handle_t); -extern void mac_get_hwgrp_info(mac_handle_t, int, uint_t *, uint_t *, - uint_t *, uint_t *, char *); - -extern uint32_t mac_no_notification(mac_handle_t); -extern int mac_set_prop(mac_handle_t, mac_prop_t *, void *, uint_t); -extern int mac_get_prop(mac_handle_t, mac_prop_t *, void *, uint_t, uint_t *); - -extern boolean_t mac_is_vnic(mac_handle_t); +extern void mac_client_set_rings(mac_client_handle_t, int, int); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h index bcdeb1da46..ae25df6a0d 100644 --- a/usr/src/uts/common/sys/mac_client_impl.h +++ b/usr/src/uts/common/sys/mac_client_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,6 +31,7 @@ #include <sys/mac_provider.h> #include <sys/mac.h> #include <sys/mac_impl.h> +#include <sys/mac_stat.h> #include <net/if.h> #include <sys/mac_flow_impl.h> @@ -153,16 +154,7 @@ struct mac_client_impl_s { /* Protected by */ uintptr_t mci_tx_notify_id; /* per MAC client stats */ /* None */ - uint64_t mci_stat_multircv; - uint64_t mci_stat_brdcstrcv; - uint64_t mci_stat_multixmt; - uint64_t mci_stat_brdcstxmt; - uint64_t mci_stat_obytes; - uint64_t mci_stat_opackets; - uint64_t mci_stat_oerrors; - uint64_t mci_stat_ibytes; - uint64_t mci_stat_ipackets; - uint64_t mci_stat_ierrors; + mac_misc_stats_t mci_misc_stat; flow_tab_t *mci_subflow_tab; /* Rx quiescence */ @@ -182,6 +174,20 @@ struct mac_client_impl_s { /* Protected by */ struct mac_mcast_addrs_s *mci_mcast_addrs; /* mi_rw_lock */ /* + * Mac protection related fields + */ + kmutex_t mci_protect_lock; + uint32_t mci_protect_flags; /* SL */ + in6_addr_t mci_v6_local_addr; /* SL */ + avl_tree_t mci_v4_pending_txn; /* mci_protect_lock */ + avl_tree_t mci_v4_completed_txn; /* mci_protect_lock */ + avl_tree_t mci_v4_dyn_ip; /* mci_protect_lock */ + avl_tree_t mci_v6_pending_txn; /* mci_protect_lock */ + avl_tree_t mci_v6_cid; /* mci_protect_lock */ + avl_tree_t mci_v6_dyn_ip; /* mci_protect_lock */ + timeout_id_t mci_txn_cleanup_tid; /* mci_protect_lock */ + + /* * Protected by mci_tx_pcpu[0].pcpu_tx_lock */ uint_t mci_tx_flag; @@ -287,12 +293,15 @@ extern int mac_tx_percpu_cnt; #define MCIS_CLIENT_POLL_CAPABLE 0x0020 #define MCIS_DESC_LOGGED 0x0040 #define MCIS_SHARE_BOUND 0x0080 -#define MCIS_NO_HWRINGS 0x0100 -#define MCIS_DISABLE_TX_VID_CHECK 0x0200 -#define MCIS_USE_DATALINK_NAME 0x0400 -#define MCIS_UNICAST_HW 0x0800 -#define MCIS_REQ_HWRINGS 0x1000 -#define MCIS_RX_BYPASS_DISABLE 0x2000 +#define MCIS_DISABLE_TX_VID_CHECK 0x0100 +#define MCIS_USE_DATALINK_NAME 0x0200 +#define MCIS_UNICAST_HW 0x0400 +#define MCIS_IS_AGGR 0x0800 +#define MCIS_RX_BYPASS_DISABLE 0x1000 +#define MCIS_NO_UNICAST_ADDR 0x2000 + +/* Mac protection flags */ +#define MPT_FLAG_V6_LOCAL_ADDR_SET 0x0001 /* in mac_client.c */ extern void mac_promisc_client_dispatch(mac_client_impl_t *, mblk_t *); @@ -301,7 +310,7 @@ extern void mac_client_fini(void); extern void mac_promisc_dispatch(mac_impl_t *, mblk_t *, mac_client_impl_t *); -extern int mac_validate_props(mac_resource_props_t *); +extern int mac_validate_props(mac_impl_t *, mac_resource_props_t *); extern mac_client_impl_t *mac_vnic_lower(mac_impl_t *); extern mac_client_impl_t *mac_primary_client_handle(mac_impl_t *); @@ -316,6 +325,10 @@ boolean_t mac_client_check_flow_vid(mac_client_impl_t *, uint16_t); extern boolean_t mac_is_primary_client(mac_client_impl_t *); +extern int mac_client_set_rings_prop(mac_client_impl_t *, + mac_resource_props_t *, mac_resource_props_t *); +extern void mac_set_prim_vlan_rings(mac_impl_t *, mac_resource_props_t *); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/mac_client_priv.h b/usr/src/uts/common/sys/mac_client_priv.h index 78421a3b80..0ddc1f074d 100644 --- a/usr/src/uts/common/sys/mac_client_priv.h +++ b/usr/src/uts/common/sys/mac_client_priv.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -117,35 +117,71 @@ extern int mac_link_flow_walk(datalink_id_t, int (*)(mac_flowinfo_t *, void *), void *); extern int mac_link_flow_info(char *, mac_flowinfo_t *); -extern void *mac_tx_hold(mac_client_handle_t); -extern void mac_tx_rele(mac_client_handle_t, void *); -extern void mac_rx_client_quiesce(mac_client_handle_t); -extern void mac_rx_client_restart(mac_client_handle_t); -extern void mac_srs_perm_quiesce(mac_client_handle_t, boolean_t); -extern int mac_hwrings_get(mac_client_handle_t, mac_group_handle_t *, - mac_ring_handle_t *, mac_ring_type_t); -extern void mac_hwring_setup(mac_ring_handle_t, mac_resource_handle_t); -extern void mac_hwring_teardown(mac_ring_handle_t); -extern int mac_hwring_disable_intr(mac_ring_handle_t); -extern int mac_hwring_enable_intr(mac_ring_handle_t); -extern int mac_hwring_start(mac_ring_handle_t); -extern void mac_hwring_stop(mac_ring_handle_t); -extern mblk_t *mac_hwring_poll(mac_ring_handle_t, int); -#define MAC_HWRING_POLL(ring, bytes) \ - (((ring)->mr_info.mri_poll) \ - ((ring)->mr_info.mri_driver, (bytes))) - -extern int mac_hwgroup_addmac(mac_group_handle_t, const uint8_t *); -extern int mac_hwgroup_remmac(mac_group_handle_t, const uint8_t *); - -extern void mac_set_upper_mac(mac_client_handle_t, mac_handle_t); +extern void mac_rx_client_quiesce(mac_client_handle_t); +extern void mac_rx_client_restart(mac_client_handle_t); +extern void mac_tx_client_quiesce(mac_client_handle_t); +extern void mac_tx_client_condemn(mac_client_handle_t); +extern void mac_tx_client_restart(mac_client_handle_t); +extern void mac_srs_perm_quiesce(mac_client_handle_t, boolean_t); +extern int mac_hwrings_get(mac_client_handle_t, mac_group_handle_t *, + mac_ring_handle_t *, mac_ring_type_t); +extern uint_t mac_hwring_getinfo(mac_ring_handle_t); +extern void mac_hwring_setup(mac_ring_handle_t, mac_resource_handle_t, + mac_ring_handle_t); +extern void mac_hwring_teardown(mac_ring_handle_t); +extern int mac_hwring_disable_intr(mac_ring_handle_t); +extern int mac_hwring_enable_intr(mac_ring_handle_t); +extern int mac_hwring_start(mac_ring_handle_t); +extern void mac_hwring_stop(mac_ring_handle_t); +extern mblk_t *mac_hwring_poll(mac_ring_handle_t, int); +extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); +extern int mac_hwring_getstat(mac_ring_handle_t, uint_t, uint64_t *); +extern mblk_t *mac_hwring_send_priv(mac_client_handle_t, + mac_ring_handle_t, mblk_t *); + +#define MAC_HWRING_POLL(ring, bytes) \ + (((ring)->mr_info.mri_poll) \ + ((ring)->mr_info.mri_driver, (bytes))) + +extern int mac_hwgroup_addmac(mac_group_handle_t, const uint8_t *); +extern int mac_hwgroup_remmac(mac_group_handle_t, const uint8_t *); + +extern void mac_set_upper_mac(mac_client_handle_t, mac_handle_t, + mac_resource_props_t *); extern int mac_mark_exclusive(mac_handle_t); extern void mac_unmark_exclusive(mac_handle_t); -extern int32_t mac_client_intr_cpu(mac_client_handle_t); -extern void mac_client_set_intr_cpu(void *, mac_client_handle_t, int32_t); -extern void *mac_get_devinfo(mac_handle_t); +extern uint_t mac_hwgrp_num(mac_handle_t, int); +extern void mac_get_hwrxgrp_info(mac_handle_t, int, uint_t *, uint_t *, + uint_t *, uint_t *, uint_t *, char *); +extern void mac_get_hwtxgrp_info(mac_handle_t, int, uint_t *, uint_t *, + uint_t *, uint_t *, uint_t *, char *); + +extern uint_t mac_txavail_get(mac_handle_t); +extern uint_t mac_rxavail_get(mac_handle_t); +extern uint_t mac_txrsvd_get(mac_handle_t); +extern uint_t mac_rxrsvd_get(mac_handle_t); +extern uint_t mac_rxhwlnksavail_get(mac_handle_t); +extern uint_t mac_rxhwlnksrsvd_get(mac_handle_t); +extern uint_t mac_txhwlnksavail_get(mac_handle_t); +extern uint_t mac_txhwlnksrsvd_get(mac_handle_t); + +extern int32_t mac_client_intr_cpu(mac_client_handle_t); +extern void mac_client_set_intr_cpu(void *, mac_client_handle_t, int32_t); +extern void *mac_get_devinfo(mac_handle_t); + +extern boolean_t mac_is_vnic(mac_handle_t); +extern uint32_t mac_no_notification(mac_handle_t); + +extern int mac_set_prop(mac_handle_t, mac_prop_id_t, char *, void *, uint_t); +extern int mac_get_prop(mac_handle_t, mac_prop_id_t, char *, void *, uint_t); +extern int mac_prop_info(mac_handle_t, mac_prop_id_t, char *, void *, + uint_t, mac_propval_range_t *, uint_t *); +extern boolean_t mac_prop_check_size(mac_prop_id_t, uint_t, boolean_t); + +extern uint64_t mac_pseudo_rx_ring_stat_get(mac_ring_handle_t, uint_t); +extern uint64_t mac_pseudo_tx_ring_stat_get(mac_ring_handle_t, uint_t); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/mac_flow.h b/usr/src/uts/common/sys/mac_flow.h index 08c7a211a3..9f9902fc29 100644 --- a/usr/src/uts/common/sys/mac_flow.h +++ b/usr/src/uts/common/sys/mac_flow.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,6 +39,8 @@ extern "C" { #include <netinet/in.h> /* for IPPROTO_* constants */ #include <sys/ethernet.h> +#define MAX_RINGS_PER_GROUP 128 + /* * MAXFLOWNAMELEN defines the longest possible permitted flow name, * including the terminating NUL. @@ -93,29 +95,45 @@ typedef struct flow_desc_s { /* * In MCM_CPUS mode, cpu bindings is user specified. In MCM_FANOUT mode, * user only specifies a fanout count. - * mc_fanout_cnt gives the number of CPUs used for fanout soft rings. - * mc_fanout_cpus[] array stores the CPUs used for fanout soft rings. + * mc_rx_fanout_cnt gives the number of CPUs used for fanout soft rings. + * mc_rx_fanout_cpus[] array stores the CPUs used for fanout soft rings. */ typedef enum { MCM_FANOUT = 1, MCM_CPUS } mac_cpu_mode_t; +/* + * Structure to store the value of the CPUs to be used to re-target + * Tx interrupt. + */ +typedef struct mac_tx_intr_cpus_s { + /* cpu value to re-target intr to */ + int32_t mtc_intr_cpu[MRP_NCPUS]; + /* re-targeted CPU or -1 if failed */ + int32_t mtc_retargeted_cpu[MRP_NCPUS]; +} mac_tx_intr_cpu_t; + typedef struct mac_cpus_props_s { uint32_t mc_ncpus; /* num of cpus */ uint32_t mc_cpus[MRP_NCPUS]; /* cpu list */ - uint32_t mc_fanout_cnt; /* soft ring cpu cnt */ - uint32_t mc_fanout_cpus[MRP_NCPUS]; /* SR cpu list */ - uint32_t mc_pollid; /* poll thr binding */ - uint32_t mc_workerid; /* worker thr binding */ + uint32_t mc_rx_fanout_cnt; /* soft ring cpu cnt */ + uint32_t mc_rx_fanout_cpus[MRP_NCPUS]; /* SR cpu list */ + uint32_t mc_rx_pollid; /* poll thr binding */ + uint32_t mc_rx_workerid; /* worker thr binding */ /* * interrupt cpu: mrp_intr_cpu less than 0 implies platform limitation * in retargetting the interrupt assignment. */ - int32_t mc_intr_cpu; + int32_t mc_rx_intr_cpu; + int32_t mc_tx_fanout_cpus[MRP_NCPUS]; + mac_tx_intr_cpu_t mc_tx_intr_cpus; mac_cpu_mode_t mc_fanout_mode; /* fanout mode */ } mac_cpus_t; +#define mc_tx_intr_cpu mc_tx_intr_cpus.mtc_intr_cpu +#define mc_tx_retargeted_cpu mc_tx_intr_cpus.mtc_retargeted_cpu + /* Priority values */ typedef enum { MPL_LOW, @@ -126,19 +144,41 @@ typedef enum { /* Protection types */ #define MPT_MACNOSPOOF 0x00000001 -#define MPT_IPNOSPOOF 0x00000002 -#define MPT_RESTRICTED 0x00000004 -#define MPT_ALL (MPT_MACNOSPOOF|MPT_IPNOSPOOF|MPT_RESTRICTED) +#define MPT_RESTRICTED 0x00000002 +#define MPT_IPNOSPOOF 0x00000004 +#define MPT_DHCPNOSPOOF 0x00000008 +#define MPT_ALL 0x0000000f #define MPT_RESET 0xffffffff -#define MPT_MAXIPADDR 32 +#define MPT_MAXCNT 32 +#define MPT_MAXIPADDR MPT_MAXCNT +#define MPT_MAXCID MPT_MAXCNT +#define MPT_MAXCIDLEN 256 + +typedef struct mac_ipaddr_s { + uint32_t ip_version; + in6_addr_t ip_addr; +} mac_ipaddr_t; + +typedef enum { + CIDFORM_TYPED = 1, + CIDFORM_HEX, + CIDFORM_STR +} mac_dhcpcid_form_t; + +typedef struct mac_dhcpcid_s { + uchar_t dc_id[MPT_MAXCIDLEN]; + uint32_t dc_len; + mac_dhcpcid_form_t dc_form; +} mac_dhcpcid_t; typedef struct mac_protect_s { uint32_t mp_types; uint32_t mp_ipaddrcnt; - ipaddr_t mp_ipaddrs[MPT_MAXIPADDR]; + mac_ipaddr_t mp_ipaddrs[MPT_MAXIPADDR]; + uint32_t mp_cidcnt; + mac_dhcpcid_t mp_cids[MPT_MAXCID]; } mac_protect_t; - /* The default priority for links */ #define MPL_LINK_DEFAULT MPL_HIGH @@ -150,6 +190,12 @@ typedef struct mac_protect_s { #define MRP_CPUS_USERSPEC 0x00000004 /* CPU/fanout from user */ #define MRP_PRIORITY 0x00000008 /* Priority set */ #define MRP_PROTECT 0x00000010 /* Protection set */ +#define MRP_RX_RINGS 0x00000020 /* Rx rings */ +#define MRP_TX_RINGS 0x00000040 /* Tx rings */ +#define MRP_RXRINGS_UNSPEC 0x00000080 /* unspecified rings */ +#define MRP_TXRINGS_UNSPEC 0x00000100 /* unspecified rings */ +#define MRP_RINGS_RESET 0x00000200 /* resetting rings */ +#define MRP_POOL 0x00000400 /* CPU pool */ #define MRP_THROTTLE MRP_MAXBW @@ -174,21 +220,24 @@ typedef struct mac_resource_props_s { mac_priority_level_t mrp_priority; /* relative flow priority */ mac_cpus_t mrp_cpus; mac_protect_t mrp_protect; + uint32_t mrp_nrxrings; + uint32_t mrp_ntxrings; + char mrp_pool[MAXPATHLEN]; /* CPU pool */ } mac_resource_props_t; -#define mrp_ncpus mrp_cpus.mc_ncpus -#define mrp_cpu mrp_cpus.mc_cpus -#define mrp_fanout_cnt mrp_cpus.mc_fanout_cnt -#define mrp_fanout_cpu mrp_cpus.mc_fanout_cpus -#define mrp_pollid mrp_cpus.mc_pollid -#define mrp_workerid mrp_cpus.mc_workerid -#define mrp_intr_cpu mrp_cpus.mc_intr_cpu -#define mrp_fanout_mode mrp_cpus.mc_fanout_mode +#define mrp_ncpus mrp_cpus.mc_ncpus +#define mrp_cpu mrp_cpus.mc_cpus +#define mrp_rx_fanout_cnt mrp_cpus.mc_rx_fanout_cnt +#define mrp_rx_pollid mrp_cpus.mc_rx_pollid +#define mrp_rx_workerid mrp_cpus.mc_rx_workerid +#define mrp_rx_intr_cpu mrp_cpus.mc_rx_intr_cpu +#define mrp_fanout_mode mrp_cpus.mc_fanout_mode #define MAC_COPY_CPUS(mrp, fmrp) { \ int ncpus; \ (fmrp)->mrp_ncpus = (mrp)->mrp_ncpus; \ - (fmrp)->mrp_intr_cpu = (mrp)->mrp_intr_cpu; \ + (fmrp)->mrp_rx_fanout_cnt = (mrp)->mrp_rx_fanout_cnt; \ + (fmrp)->mrp_rx_intr_cpu = (mrp)->mrp_rx_intr_cpu; \ (fmrp)->mrp_fanout_mode = (mrp)->mrp_fanout_mode; \ if ((mrp)->mrp_ncpus == 0) { \ (fmrp)->mrp_mask &= ~MRP_CPUS; \ @@ -202,24 +251,6 @@ typedef struct mac_resource_props_s { } \ } -typedef struct flow_stats_s { - uint64_t fs_rbytes; - uint64_t fs_ipackets; - uint64_t fs_ierrors; - uint64_t fs_obytes; - uint64_t fs_opackets; - uint64_t fs_oerrors; -} flow_stats_t; - -typedef enum { - FLOW_STAT_RBYTES, - FLOW_STAT_IPACKETS, - FLOW_STAT_IERRORS, - FLOW_STAT_OBYTES, - FLOW_STAT_OPACKETS, - FLOW_STAT_OERRORS -} flow_stat_t; - #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 #pragma pack() #endif diff --git a/usr/src/uts/common/sys/mac_flow_impl.h b/usr/src/uts/common/sys/mac_flow_impl.h index f01d9d486c..307e06c1bf 100644 --- a/usr/src/uts/common/sys/mac_flow_impl.h +++ b/usr/src/uts/common/sys/mac_flow_impl.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -280,7 +280,9 @@ struct flow_entry_s { /* Protected by */ void *fe_rx_ring_group; /* SL */ void *fe_rx_srs[MAX_RINGS_PER_GROUP]; /* fe_lock */ int fe_rx_srs_cnt; /* fe_lock */ + void *fe_tx_ring_group; void *fe_tx_srs; /* WO */ + int fe_tx_ring_cnt; /* * This is a unicast flow, and is a mac_client_impl_t @@ -317,7 +319,8 @@ struct flow_entry_s { /* Protected by */ flow_tab_t *fe_flow_tab; kstat_t *fe_ksp; - flow_stats_t fe_flowstats; + kstat_t *fe_misc_stat_ksp; + boolean_t fe_desc_logged; uint64_t fe_nic_speed; }; @@ -465,23 +468,36 @@ typedef struct flow_tab_info_s { #define FLOW_TAB_EMPTY(ft) ((ft) == NULL || (ft)->ft_flow_count == 0) -/* - * This is used by mac_tx_send. - */ -typedef struct mac_tx_stats_s { - uint_t ts_opackets; - uint_t ts_obytes; - uint_t ts_oerrors; -} mac_tx_stats_t; - -#define FLOW_STAT_UPDATE(f, s, c) { \ - ((flow_entry_t *)(f))->fe_flowstats.fs_##s += ((uint64_t)(c)); \ + +#define MCIP_STAT_UPDATE(m, s, c) { \ + ((mac_client_impl_t *)(m))->mci_misc_stat.mms_##s \ + += ((uint64_t)(c)); \ +} + +#define SRS_RX_STAT_UPDATE(m, s, c) { \ + ((mac_soft_ring_set_t *)(m))->srs_rx.sr_stat.mrs_##s \ + += ((uint64_t)(c)); \ +} + +#define SRS_TX_STAT_UPDATE(m, s, c) { \ + ((mac_soft_ring_set_t *)(m))->srs_tx.st_stat.mts_##s \ + += ((uint64_t)(c)); \ +} + +#define SRS_TX_STATS_UPDATE(m, s) { \ + SRS_TX_STAT_UPDATE((m), opackets, (s)->mts_opackets); \ + SRS_TX_STAT_UPDATE((m), obytes, (s)->mts_obytes); \ + SRS_TX_STAT_UPDATE((m), oerrors, (s)->mts_oerrors); \ +} + +#define SOFTRING_TX_STAT_UPDATE(m, s, c) { \ + ((mac_soft_ring_t *)(m))->s_st_stat.mts_##s += ((uint64_t)(c)); \ } -#define FLOW_TX_STATS_UPDATE(f, s) { \ - FLOW_STAT_UPDATE((f), opackets, (s)->ts_opackets); \ - FLOW_STAT_UPDATE((f), obytes, (s)->ts_obytes); \ - FLOW_STAT_UPDATE((f), oerrors, (s)->ts_oerrors); \ +#define SOFTRING_TX_STATS_UPDATE(m, s) { \ + SOFTRING_TX_STAT_UPDATE((m), opackets, (s)->mts_opackets); \ + SOFTRING_TX_STAT_UPDATE((m), obytes, (s)->mts_obytes); \ + SOFTRING_TX_STAT_UPDATE((m), oerrors, (s)->mts_oerrors); \ } extern void mac_flow_init(); diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h index 760e2a4a18..ff4eeb1221 100644 --- a/usr/src/uts/common/sys/mac_impl.h +++ b/usr/src/uts/common/sys/mac_impl.h @@ -26,10 +26,12 @@ #ifndef _SYS_MAC_IMPL_H #define _SYS_MAC_IMPL_H +#include <sys/cpupart.h> #include <sys/modhash.h> #include <sys/mac_client.h> #include <sys/mac_provider.h> #include <sys/note.h> +#include <sys/avl.h> #include <net/if.h> #include <sys/mac_flow_impl.h> #include <netinet/ip6.h> @@ -85,6 +87,8 @@ typedef struct mac_chain_s { #define MCB_NOTIFY_CB_T 0x2 #define MCB_TX_NOTIFY_CB_T 0x4 +extern boolean_t mac_tx_serialize; + typedef struct mac_cb_s { struct mac_cb_s *mcb_nextp; /* Linked list of callbacks */ void *mcb_objp; /* Ptr to enclosing object */ @@ -189,6 +193,8 @@ typedef enum { #define MR_CONDEMNED 0x2 #define MR_QUIESCE 0x4 +typedef struct mac_impl_s mac_impl_t; + struct mac_ring_s { int mr_index; /* index in the original list */ mac_ring_type_t mr_type; /* ring type */ @@ -196,11 +202,15 @@ struct mac_ring_s { mac_group_handle_t mr_gh; /* reference to group */ mac_classify_type_t mr_classify_type; /* HW vs SW */ - struct mac_soft_ring_set_s *mr_srs; /* associated SRS */ - uint_t mr_refcnt; /* Ring references */ + struct mac_soft_ring_set_s *mr_srs; /* associated SRS */ + mac_ring_handle_t mr_prh; /* associated pseudo ring hdl */ + uint_t mr_refcnt; /* Ring references */ /* ring generation no. to guard against drivers using stale rings */ uint64_t mr_gen_num; + kstat_t *mr_ksp; /* ring kstats */ + mac_impl_t *mr_mip; /* pointer to primary's mip */ + kmutex_t mr_lock; kcondvar_t mr_cv; /* mr_lock */ mac_ring_state_t mr_state; /* mr_lock */ @@ -211,6 +221,7 @@ struct mac_ring_s { #define mr_driver mr_info.mri_driver #define mr_start mr_info.mri_start #define mr_stop mr_info.mri_stop +#define mr_stat mr_info.mri_stat #define MAC_RING_MARK(mr, flag) \ (mr)->mr_flag |= flag; @@ -245,9 +256,9 @@ typedef struct mac_grp_client { struct mac_client_impl_s *mgc_client; } mac_grp_client_t; -#define MAC_RX_GROUP_NO_CLIENT(g) ((g)->mrg_clients == NULL) +#define MAC_GROUP_NO_CLIENT(g) ((g)->mrg_clients == NULL) -#define MAC_RX_GROUP_ONLY_CLIENT(g) \ +#define MAC_GROUP_ONLY_CLIENT(g) \ ((((g)->mrg_clients != NULL) && \ ((g)->mrg_clients->mgc_next == NULL)) ? \ (g)->mrg_clients->mgc_client : NULL) @@ -267,7 +278,6 @@ struct mac_group_s { mac_grp_client_t *mrg_clients; /* clients list */ - struct mac_client_impl_s *mrg_tx_client; /* TX client pointer */ mac_group_info_t mrg_info; /* driver supplied info */ }; @@ -279,8 +289,6 @@ struct mac_group_s { #define GROUP_INTR_ENABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_enable #define GROUP_INTR_DISABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_disable -#define MAC_DEFAULT_GROUP(mh) (((mac_impl_t *)mh)->mi_rx_groups) - #define MAC_RING_TX(mhp, rh, mp, rest) { \ mac_ring_handle_t mrh = rh; \ mac_impl_t *mimpl = (mac_impl_t *)mhp; \ @@ -304,7 +312,8 @@ struct mac_group_s { * rh nulled out if the bridge chooses to send output on a different * link due to forwarding. */ -#define MAC_TX(mip, rh, mp, share_bound) { \ +#define MAC_TX(mip, rh, mp, src_mcip) { \ + mac_ring_handle_t rhandle = (rh); \ /* \ * If there is a bound Hybrid I/O share, send packets through \ * the default tx ring. (When there's a bound Hybrid I/O share, \ @@ -312,17 +321,19 @@ struct mac_group_s { * and not accessible from here.) \ */ \ _NOTE(CONSTANTCONDITION) \ - if (share_bound) \ - rh = NULL; \ + if ((src_mcip)->mci_state_flags & MCIS_SHARE_BOUND) \ + rhandle = (mip)->mi_default_tx_ring; \ + if (mip->mi_promisc_list != NULL) \ + mac_promisc_dispatch(mip, mp, src_mcip); \ /* \ * Grab the proper transmit pointer and handle. Special \ * optimization: we can test mi_bridge_link itself atomically, \ * and if that indicates no bridge send packets through tx ring.\ */ \ if (mip->mi_bridge_link == NULL) { \ - MAC_RING_TX(mip, rh, mp, mp); \ + MAC_RING_TX(mip, rhandle, mp, mp); \ } else { \ - mp = mac_bridge_tx(mip, rh, mp); \ + mp = mac_bridge_tx(mip, rhandle, mp); \ } \ } @@ -346,8 +357,6 @@ typedef enum { MAC_ADDRESS_TYPE_UNICAST_PROMISC /* promiscuous mode */ } mac_address_type_t; -typedef struct mac_impl_s mac_impl_t; - typedef struct mac_address_s { mac_address_type_t ma_type; /* address type */ int ma_nusers; /* number of users */ @@ -406,7 +415,6 @@ struct mac_impl_s { link_state_t mi_lowlinkstate; /* none */ link_state_t mi_lastlowlinkstate; /* none */ uint_t mi_devpromisc; /* SL */ - kmutex_t mi_lock; uint8_t mi_addr[MAXMACADDRLEN]; /* mi_rw_lock */ uint8_t mi_dstaddr[MAXMACADDRLEN]; /* mi_rw_lock */ boolean_t mi_dstaddr_set; @@ -436,6 +444,11 @@ struct mac_impl_s { mac_group_type_t mi_rx_group_type; /* grouping type */ uint_t mi_rx_group_count; mac_group_t *mi_rx_groups; + mac_group_t *mi_rx_donor_grp; + uint_t mi_rxrings_rsvd; + uint_t mi_rxrings_avail; + uint_t mi_rxhwclnt_avail; + uint_t mi_rxhwclnt_used; mac_capab_rings_t mi_rx_rings_cap; @@ -446,8 +459,11 @@ struct mac_impl_s { uint_t mi_tx_group_count; uint_t mi_tx_group_free; mac_group_t *mi_tx_groups; - mac_capab_rings_t mi_tx_rings_cap; + uint_t mi_txrings_rsvd; + uint_t mi_txrings_avail; + uint_t mi_txhwclnt_avail; + uint_t mi_txhwclnt_used; mac_ring_handle_t mi_default_tx_ring; @@ -516,7 +532,7 @@ struct mac_impl_s { * sorted: the first one has the greatest value. */ mac_margin_req_t *mi_mmrp; - mac_priv_prop_t *mi_priv_prop; + char **mi_priv_prop; uint_t mi_priv_prop_count; /* @@ -541,6 +557,72 @@ struct mac_impl_s { #endif }; +/* + * The default TX group is the last one in the list. + */ +#define MAC_DEFAULT_TX_GROUP(mip) \ + (mip)->mi_tx_groups + (mip)->mi_tx_group_count + +/* + * The default RX group is the first one in the list + */ +#define MAC_DEFAULT_RX_GROUP(mip) (mip)->mi_rx_groups + +/* Reserved RX rings */ +#define MAC_RX_RING_RESERVED(m, cnt) { \ + ASSERT((m)->mi_rxrings_avail >= (cnt)); \ + (m)->mi_rxrings_rsvd += (cnt); \ + (m)->mi_rxrings_avail -= (cnt); \ +} + +/* Released RX rings */ +#define MAC_RX_RING_RELEASED(m, cnt) { \ + ASSERT((m)->mi_rxrings_rsvd >= (cnt)); \ + (m)->mi_rxrings_rsvd -= (cnt); \ + (m)->mi_rxrings_avail += (cnt); \ +} + +/* Reserved a RX group */ +#define MAC_RX_GRP_RESERVED(m) { \ + ASSERT((m)->mi_rxhwclnt_avail > 0); \ + (m)->mi_rxhwclnt_avail--; \ + (m)->mi_rxhwclnt_used++; \ +} + +/* Released a RX group */ +#define MAC_RX_GRP_RELEASED(m) { \ + ASSERT((m)->mi_rxhwclnt_used > 0); \ + (m)->mi_rxhwclnt_avail++; \ + (m)->mi_rxhwclnt_used--; \ +} + +/* Reserved TX rings */ +#define MAC_TX_RING_RESERVED(m, cnt) { \ + ASSERT((m)->mi_txrings_avail >= (cnt)); \ + (m)->mi_txrings_rsvd += (cnt); \ + (m)->mi_txrings_avail -= (cnt); \ +} +/* Released TX rings */ +#define MAC_TX_RING_RELEASED(m, cnt) { \ + ASSERT((m)->mi_txrings_rsvd >= (cnt)); \ + (m)->mi_txrings_rsvd -= (cnt); \ + (m)->mi_txrings_avail += (cnt); \ +} + +/* Reserved a TX group */ +#define MAC_TX_GRP_RESERVED(m) { \ + ASSERT((m)->mi_txhwclnt_avail > 0); \ + (m)->mi_txhwclnt_avail--; \ + (m)->mi_txhwclnt_used++; \ +} + +/* Released a TX group */ +#define MAC_TX_GRP_RELEASED(m) { \ + ASSERT((m)->mi_txhwclnt_used > 0); \ + (m)->mi_txhwclnt_avail++; \ + (m)->mi_txhwclnt_used--; \ +} + /* for mi_state_flags */ #define MIS_DISABLED 0x0001 #define MIS_IS_VNIC 0x0002 @@ -570,12 +652,6 @@ typedef struct mac_notify_task_arg { mac_ring_t *mnt_ring; } mac_notify_task_arg_t; -typedef enum { - MAC_RX_NO_RESERVE, - MAC_RX_RESERVE_DEFAULT, - MAC_RX_RESERVE_NONDEFAULT -} mac_rx_group_reserve_type_t; - /* * XXX All MAC_DBG_PRTs must be replaced with call to dtrace probes. For now * it may be easier to have these printfs for easier debugging @@ -599,18 +675,45 @@ extern int mac_dbg; (need_close) = ((uintptr_t)mph & 0x1); \ } +/* + * Type of property information that can be returned by a driver. + * Valid flags of the pr_flags of the mac_prop_info_t data structure. + */ +#define MAC_PROP_INFO_DEFAULT 0x0001 +#define MAC_PROP_INFO_RANGE 0x0002 +#define MAC_PROP_INFO_PERM 0x0004 + +/* + * Property information. pr_flags is a combination of one of the + * MAC_PROP_INFO_* flags, it is reset by the framework before invoking + * the driver's prefix_propinfo() entry point. + * + * Drivers should use MAC_PROP_INFO_SET_*() macros to provide + * information about a property. + */ +typedef struct mac_prop_info_state_s { + uint8_t pr_flags; + uint8_t pr_perm; + void *pr_default; + size_t pr_default_size; + uint8_t pr_default_status; + mac_propval_range_t *pr_range; +} mac_prop_info_state_t; + +#define MAC_PROTECT_ENABLED(mcip, type) \ + (((mcip)->mci_flent-> \ + fe_resource_props.mrp_mask & MRP_PROTECT) != 0 && \ + ((mcip)->mci_flent-> \ + fe_resource_props.mrp_protect.mp_types & (type)) != 0) + typedef struct mac_client_impl_s mac_client_impl_t; extern void mac_init(void); extern int mac_fini(void); -extern void mac_stat_create(mac_impl_t *); -extern void mac_stat_destroy(mac_impl_t *); -extern uint64_t mac_stat_default(mac_impl_t *, uint_t); extern void mac_ndd_ioctl(mac_impl_t *, queue_t *, mblk_t *); -extern void mac_create_soft_ring_kstats(mac_impl_t *, int32_t); -extern boolean_t mac_ip_hdr_length_v6(mblk_t *, ip6_t *, uint16_t *, - uint8_t *, boolean_t *, uint32_t *); +extern boolean_t mac_ip_hdr_length_v6(ip6_t *, uint8_t *, uint16_t *, + uint8_t *, ip6_frag_t **); extern mblk_t *mac_copymsgchain_cksum(mblk_t *); extern mblk_t *mac_fix_cksum(mblk_t *); @@ -649,10 +752,17 @@ extern int mac_rx_group_add_flow(mac_client_impl_t *, flow_entry_t *, mac_group_t *); extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); extern mblk_t *mac_bridge_tx(mac_impl_t *, mac_ring_handle_t, mblk_t *); +extern mac_group_t *mac_reserve_rx_group(mac_client_impl_t *, uint8_t *, + boolean_t); +extern void mac_release_rx_group(mac_client_impl_t *, mac_group_t *); +extern int mac_rx_switch_group(mac_client_impl_t *, mac_group_t *, + mac_group_t *); extern mac_ring_t *mac_reserve_tx_ring(mac_impl_t *, mac_ring_t *); -extern void mac_release_tx_ring(mac_ring_handle_t); -extern mac_group_t *mac_reserve_tx_group(mac_impl_t *, mac_share_handle_t); -extern void mac_release_tx_group(mac_impl_t *, mac_group_t *); +extern mac_group_t *mac_reserve_tx_group(mac_client_impl_t *, boolean_t); +extern void mac_release_tx_group(mac_client_impl_t *, mac_group_t *); +extern void mac_tx_switch_group(mac_client_impl_t *, mac_group_t *, + mac_group_t *); +extern void mac_rx_switch_grp_to_sw(mac_group_t *); /* * MAC address functions are used internally by MAC layer. @@ -676,7 +786,7 @@ extern void mac_link_flow_clean(mac_client_handle_t, flow_entry_t *); * Fanout update routines called when the link speed of the NIC changes * or when a MAC client's share is unbound. */ -extern void mac_fanout_recompute_client(mac_client_impl_t *); +extern void mac_fanout_recompute_client(mac_client_impl_t *, cpupart_t *); extern void mac_fanout_recompute(mac_impl_t *); /* @@ -687,14 +797,15 @@ extern void mac_fanout_recompute(mac_impl_t *); extern int mac_datapath_setup(mac_client_impl_t *, flow_entry_t *, uint32_t); extern void mac_datapath_teardown(mac_client_impl_t *, flow_entry_t *, uint32_t); -extern void mac_srs_group_setup(mac_client_impl_t *, flow_entry_t *, - mac_group_t *, uint32_t); -extern void mac_srs_group_teardown(mac_client_impl_t *, flow_entry_t *, +extern void mac_rx_srs_group_setup(mac_client_impl_t *, flow_entry_t *, + uint32_t); +extern void mac_tx_srs_group_setup(mac_client_impl_t *, flow_entry_t *, + uint32_t); +extern void mac_rx_srs_group_teardown(flow_entry_t *, boolean_t); +extern void mac_tx_srs_group_teardown(mac_client_impl_t *, flow_entry_t *, uint32_t); extern int mac_rx_classify_flow_quiesce(flow_entry_t *, void *); extern int mac_rx_classify_flow_restart(flow_entry_t *, void *); -extern void mac_tx_client_quiesce(mac_client_impl_t *, uint_t); -extern void mac_tx_client_restart(mac_client_impl_t *); extern void mac_client_quiesce(mac_client_impl_t *); extern void mac_client_restart(mac_client_impl_t *); @@ -725,15 +836,17 @@ extern void mac_rx_group_unmark(mac_group_t *, uint_t); extern void mac_tx_client_flush(mac_client_impl_t *); extern void mac_tx_client_block(mac_client_impl_t *); extern void mac_tx_client_unblock(mac_client_impl_t *); +extern void mac_tx_invoke_callbacks(mac_client_impl_t *, mac_tx_cookie_t); extern int i_mac_promisc_set(mac_impl_t *, boolean_t); extern void i_mac_promisc_walker_cleanup(mac_impl_t *); extern mactype_t *mactype_getplugin(const char *); extern void mac_addr_factory_init(mac_impl_t *); extern void mac_addr_factory_fini(mac_impl_t *); -extern void mac_register_priv_prop(mac_impl_t *, mac_priv_prop_t *, uint_t); +extern void mac_register_priv_prop(mac_impl_t *, char **); extern void mac_unregister_priv_prop(mac_impl_t *); extern int mac_init_rings(mac_impl_t *, mac_ring_type_t); extern void mac_free_rings(mac_impl_t *, mac_ring_type_t); +extern void mac_compare_ddi_handle(mac_group_t *, uint_t, mac_ring_t *); extern int mac_start_group(mac_group_t *); extern void mac_stop_group(mac_group_t *); @@ -742,20 +855,41 @@ extern void mac_stop_ring(mac_ring_t *); extern int mac_add_macaddr(mac_impl_t *, mac_group_t *, uint8_t *, boolean_t); extern int mac_remove_macaddr(mac_address_t *); -extern void mac_set_rx_group_state(mac_group_t *, mac_group_state_t); -extern void mac_rx_group_add_client(mac_group_t *, mac_client_impl_t *); -extern void mac_rx_group_remove_client(mac_group_t *, mac_client_impl_t *) -; +extern void mac_set_group_state(mac_group_t *, mac_group_state_t); +extern void mac_group_add_client(mac_group_t *, mac_client_impl_t *); +extern void mac_group_remove_client(mac_group_t *, mac_client_impl_t *); + extern int i_mac_group_add_ring(mac_group_t *, mac_ring_t *, int); extern void i_mac_group_rem_ring(mac_group_t *, mac_ring_t *, boolean_t); - +extern int mac_group_ring_modify(mac_client_impl_t *, mac_group_t *, + mac_group_t *); extern void mac_poll_state_change(mac_handle_t, boolean_t); +extern mac_group_state_t mac_group_next_state(mac_group_t *, + mac_client_impl_t **, mac_group_t *, boolean_t); + extern mblk_t *mac_protect_check(mac_client_handle_t, mblk_t *); extern int mac_protect_set(mac_client_handle_t, mac_resource_props_t *); extern boolean_t mac_protect_enabled(mac_client_handle_t, uint32_t); extern int mac_protect_validate(mac_resource_props_t *); extern void mac_protect_update(mac_resource_props_t *, mac_resource_props_t *); +extern void mac_protect_update_v6_local_addr(mac_client_impl_t *); +extern void mac_protect_intercept_dhcp(mac_client_impl_t *, mblk_t *); +extern void mac_protect_flush_dhcp(mac_client_impl_t *); +extern void mac_protect_cancel_timer(mac_client_impl_t *); +extern void mac_protect_init(mac_client_impl_t *); +extern void mac_protect_fini(mac_client_impl_t *); + +extern int mac_set_resources(mac_handle_t, mac_resource_props_t *); +extern void mac_get_resources(mac_handle_t, mac_resource_props_t *); +extern void mac_get_effective_resources(mac_handle_t, mac_resource_props_t *); + +extern cpupart_t *mac_pset_find(mac_resource_props_t *, boolean_t *); +extern void mac_set_pool_effective(boolean_t, cpupart_t *, + mac_resource_props_t *, mac_resource_props_t *); +extern void mac_set_rings_effective(mac_client_impl_t *); +extern mac_client_impl_t *mac_check_primary_relocation(mac_client_impl_t *, + boolean_t); /* Global callbacks into the bridging module (when loaded) */ extern mac_bridge_tx_t mac_bridge_tx_cb; @@ -763,6 +897,7 @@ extern mac_bridge_rx_t mac_bridge_rx_cb; extern mac_bridge_ref_t mac_bridge_ref_cb; extern mac_bridge_ls_t mac_bridge_ls_cb; + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h index 988f723f67..c96d07b594 100644 --- a/usr/src/uts/common/sys/mac_provider.h +++ b/usr/src/uts/common/sys/mac_provider.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,7 +32,6 @@ #include <sys/sunddi.h> #include <sys/stream.h> #include <sys/mkdev.h> -#include <sys/mac_flow.h> #include <sys/mac.h> /* @@ -44,18 +43,16 @@ extern "C" { #endif /* - * MAC version identifier. This is used by mac_alloc() mac_register() to + * MAC version identifiers. Drivers compiled against the stable V1 version + * of the API should register with MAC_VERSION_V1. ON drivers should use + * MAC_VERSION. This is used by mac_alloc() mac_register() to * verify that incompatible drivers don't register. */ -#define MAC_VERSION 0x2 +#define MAC_VERSION_V1 0x1 +#define MAC_VERSION MAC_VERSION_V1 /* - * Opaque handle types - */ -typedef struct __mac_rule_handle *mac_rule_handle_t; - -/* - * Statistics + * Possible values for ETHER_STAT_XCVR_INUSE statistic. */ #define XCVR_UNDEFINED 0 @@ -82,28 +79,35 @@ typedef struct __mac_rule_handle *mac_rule_handle_t; */ typedef enum { /* - * Capabilities reserved for internal use only + * Public Capabilities (MAC_VERSION_V1) */ - MAC_CAPAB_VNIC = 0x0001, /* data is mac_capab_vnic_t */ - MAC_CAPAB_ANCHOR_VNIC = 0x0002, /* boolean only, no data */ - MAC_CAPAB_AGGR = 0x0004, /* data is mac_capab_aggr_t */ - MAC_CAPAB_NO_NATIVEVLAN = 0x0008, /* boolean only, no data */ - MAC_CAPAB_NO_ZCOPY = 0x0010, /* boolean only, no data */ - MAC_CAPAB_LEGACY = 0x0020, /* data is mac_capab_legacy_t */ - MAC_CAPAB_VRRP = 0x0040, /* data is mac_capab_vrrp_t */ + MAC_CAPAB_HCKSUM = 0x00000001, /* data is a uint32_t */ + MAC_CAPAB_LSO = 0x00000008, /* data is mac_capab_lso_t */ /* - * Public Capabilities + * Reserved capabilities, do not use */ - MAC_CAPAB_HCKSUM = 0x0100, /* data is a uint32_t */ - MAC_CAPAB_LSO = 0x0200, /* data is mac_capab_lso_t */ - MAC_CAPAB_RINGS = 0x0400, /* data is mac_capab_rings_t */ - MAC_CAPAB_MULTIFACTADDR = 0x0800, /* mac_data_multifactaddr_t */ - MAC_CAPAB_SHARES = 0x1000 /* data is mac_capab_share_t */ + MAC_CAPAB_RESERVED1 = 0x00000002, + MAC_CAPAB_RESERVED2 = 0x00000004, - /* add new capabilities here */ -} mac_capab_t; + /* + * Private driver capabilities + */ + MAC_CAPAB_RINGS = 0x00000010, /* data is mac_capab_rings_t */ + MAC_CAPAB_SHARES = 0x00000020, /* data is mac_capab_share_t */ + MAC_CAPAB_MULTIFACTADDR = 0x00000040, /* mac_data_multifactaddr_t */ + /* + * Private driver capabilities for use by the GLDv3 framework only + */ + MAC_CAPAB_VNIC = 0x00010000, /* data is mac_capab_vnic_t */ + MAC_CAPAB_ANCHOR_VNIC = 0x00020000, /* boolean only, no data */ + MAC_CAPAB_AGGR = 0x00040000, /* data is mac_capab_aggr_t */ + MAC_CAPAB_NO_NATIVEVLAN = 0x00080000, /* boolean only, no data */ + MAC_CAPAB_NO_ZCOPY = 0x00100000, /* boolean only, no data */ + MAC_CAPAB_LEGACY = 0x00200000, /* data is mac_capab_legacy_t */ + MAC_CAPAB_VRRP = 0x00400000 /* data is mac_capab_vrrp_t */ +} mac_capab_t; /* * LSO capability @@ -164,6 +168,8 @@ typedef struct mac_capab_legacy_s { void (*ml_fastpath_enable)(void *); } mac_capab_legacy_t; +typedef struct __mac_prop_info_handle *mac_prop_info_handle_t; + /* * MAC driver entry point types. */ @@ -182,12 +188,15 @@ typedef void (*mac_close_t)(void *); typedef int (*mac_set_prop_t)(void *, const char *, mac_prop_id_t, uint_t, const void *); typedef int (*mac_get_prop_t)(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +typedef void (*mac_prop_info_t)(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); /* - * Drivers must set all of these callbacks except for mc_resources, - * mc_ioctl, and mc_getcapab, which are optional. If any of these optional - * callbacks are set, their appropriate flags must be set in mc_callbacks. + * Driver callbacks. The following capabilities are optional, and if + * implemented by the driver, must have a corresponding MC_ flag set + * in the mc_callbacks field. + * * Any future additions to this list must also be accompanied by an * associated mc_callbacks flag so that the framework can grow without * affecting the binary compatibility of the interface. @@ -201,18 +210,31 @@ typedef struct mac_callbacks_s { mac_multicst_t mc_multicst; /* Enable or disable a multicast addr */ mac_unicst_t mc_unicst; /* Set the unicast MAC address */ mac_tx_t mc_tx; /* Transmit a packet */ + void *mc_reserved; /* Reserved, do not use */ mac_ioctl_t mc_ioctl; /* Process an unknown ioctl */ mac_getcapab_t mc_getcapab; /* Get capability information */ mac_open_t mc_open; /* Open the device */ mac_close_t mc_close; /* Close the device */ mac_set_prop_t mc_setprop; mac_get_prop_t mc_getprop; + mac_prop_info_t mc_propinfo; } mac_callbacks_t; -typedef struct mac_priv_prop_s { - char mpp_name[MAXLINKPROPNAME]; - uint_t mpp_flags; -} mac_priv_prop_t; +/* + * Flags for mc_callbacks. Requiring drivers to set the flags associated + * with optional callbacks initialized in the structure allows the mac + * module to add optional callbacks in the future without requiring drivers + * to recompile. + */ +#define MC_RESERVED 0x0001 +#define MC_IOCTL 0x0002 +#define MC_GETCAPAB 0x0004 +#define MC_OPEN 0x0008 +#define MC_CLOSE 0x0010 +#define MC_SETPROP 0x0020 +#define MC_GETPROP 0x0040 +#define MC_PROPINFO 0x0080 +#define MC_PROPERTIES (MC_SETPROP | MC_GETPROP | MC_PROPINFO) /* * Virtualization Capabilities @@ -245,24 +267,16 @@ typedef void (*mac_rx_func_t)(void *, mac_resource_handle_t, mblk_t *, * * MAC_VIRT_HIO: Hybrid I/O capable MAC. Require the support * of the MAC_CAPAB_SHARES capability. - * - * MAC_VIRT_SERIALIZE: Temporary flag *ONLY* for nxge. Mac layer - * uses this to enable mac Tx serializer on - * outbound traffic and to always enqueue - * incoming traffic on Rx soft rings in mac. */ #define MAC_VIRT_NONE 0x0 #define MAC_VIRT_LEVEL1 0x1 #define MAC_VIRT_HIO 0x2 -#define MAC_VIRT_SERIALIZE 0x4 typedef enum { MAC_RING_TYPE_RX = 1, /* Receive ring */ MAC_RING_TYPE_TX /* Transmit ring */ } mac_ring_type_t; -#define MAX_RINGS_PER_GROUP 128 - /* * Grouping type of a ring group * @@ -313,6 +327,8 @@ typedef void (*mac_ring_stop_t)(mac_ring_driver_t); typedef mblk_t *(*mac_ring_send_t)(void *, mblk_t *); typedef mblk_t *(*mac_ring_poll_t)(void *, int); +typedef int (*mac_ring_stat_t)(mac_ring_driver_t, uint_t, uint64_t *); + typedef struct mac_ring_info_s { mac_ring_driver_t mri_driver; mac_ring_start_t mri_start; @@ -322,11 +338,27 @@ typedef struct mac_ring_info_s { mac_ring_send_t send; mac_ring_poll_t poll; } mrfunion; + mac_ring_stat_t mri_stat; + /* + * mri_flags will have some bits set to indicate some special + * property/feature of a ring like serialization needed for a + * Tx ring or packets should always need enqueuing on Rx side, + * etc. + */ + uint_t mri_flags; } mac_ring_info_s; #define mri_tx mrfunion.send #define mri_poll mrfunion.poll +/* + * #defines for mri_flags. The flags are temporary flags that are provided + * only to workaround issues in specific drivers, and they will be + * removed in the future. + */ +#define MAC_RING_TX_SERIALIZE 0x1 +#define MAC_RING_RX_ENQUEUE 0x2 + typedef int (*mac_group_start_t)(mac_group_driver_t); typedef void (*mac_group_stop_t)(mac_group_driver_t); typedef int (*mac_add_mac_addr_t)(void *, const uint8_t *); @@ -415,26 +447,12 @@ typedef struct mac_register_s { uint_t m_max_sdu; void *m_pdata; size_t m_pdata_size; + char **m_priv_props; uint32_t m_margin; - mac_priv_prop_t *m_priv_props; - size_t m_priv_prop_count; uint32_t m_v12n; /* Virtualization level */ } mac_register_t; /* - * Flags for mc_callbacks. Requiring drivers to set the flags associated - * with optional callbacks initialized in the structure allows the mac - * module to add optional callbacks in the future without requiring drivers - * to recompile. - */ -#define MC_IOCTL 0x001 -#define MC_GETCAPAB 0x002 -#define MC_OPEN 0x004 -#define MC_CLOSE 0x008 -#define MC_SETPROP 0x010 -#define MC_GETPROP 0x020 - -/* * Driver interface functions. */ extern void mac_sdu_get(mac_handle_t, uint_t *, uint_t *); @@ -476,6 +494,9 @@ extern void mac_init_ops(struct dev_ops *, const char *); extern void mac_fini_ops(struct dev_ops *); extern int mac_devt_to_instance(dev_t); extern minor_t mac_private_minor(void); +extern void mac_ring_intr_set(mac_ring_handle_t, + ddi_intr_handle_t); + extern mactype_register_t *mactype_alloc(uint_t); extern void mactype_free(mactype_register_t *); @@ -488,6 +509,31 @@ extern boolean_t mac_unicst_verify(mac_handle_t, extern int mac_group_add_ring(mac_group_handle_t, int); extern void mac_group_rem_ring(mac_group_handle_t, mac_ring_handle_t); +extern mac_ring_handle_t mac_find_ring(mac_group_handle_t, int); + +extern void mac_prop_info_set_default_uint8( + mac_prop_info_handle_t, uint8_t); +extern void mac_prop_info_set_default_str( + mac_prop_info_handle_t, const char *); +extern void mac_prop_info_set_default_uint64( + mac_prop_info_handle_t, uint64_t); +extern void mac_prop_info_set_default_uint32( + mac_prop_info_handle_t, uint32_t); +extern void mac_prop_info_set_default_link_flowctrl( + mac_prop_info_handle_t, link_flowctrl_t); +extern void mac_prop_info_set_range_uint32( + mac_prop_info_handle_t, + uint32_t, uint32_t); +extern void mac_prop_info_set_perm(mac_prop_info_handle_t, + uint8_t); + +extern void mac_hcksum_get(mblk_t *, uint32_t *, + uint32_t *, uint32_t *, uint32_t *, + uint32_t *); +extern void mac_hcksum_set(mblk_t *, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t); + +extern void mac_lso_get(mblk_t *, uint32_t *, uint32_t *); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/mac_soft_ring.h b/usr/src/uts/common/sys/mac_soft_ring.h index ed4c47954d..88f1aa7249 100644 --- a/usr/src/uts/common/sys/mac_soft_ring.h +++ b/usr/src/uts/common/sys/mac_soft_ring.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,11 +33,13 @@ extern "C" { #include <sys/types.h> #include <sys/cpuvar.h> +#include <sys/cpupart.h> #include <sys/processor.h> #include <sys/stream.h> #include <sys/squeue.h> #include <sys/dlpi.h> #include <sys/mac_impl.h> +#include <sys/mac_stat.h> #define S_RING_NAMELEN 64 @@ -85,8 +87,6 @@ struct mac_soft_ring_s { /* # of mblocks after which to relieve flow control */ int s_ring_tx_lowat; boolean_t s_ring_tx_woken_up; - uint32_t s_ring_blocked_cnt; /* times blocked for Tx descs */ - uint32_t s_ring_unblocked_cnt; /* unblock calls from driver */ uint32_t s_ring_hiwat_cnt; /* times blocked for Tx descs */ void *s_ring_tx_arg1; @@ -107,9 +107,9 @@ struct mac_soft_ring_s { kthread_t *s_ring_worker; /* kernel thread id */ char s_ring_name[S_RING_NAMELEN + 1]; uint32_t s_ring_total_inpkt; + uint32_t s_ring_total_rbytes; uint32_t s_ring_drops; struct mac_client_impl_s *s_ring_mcip; - void *s_ring_flent; kstat_t *s_ring_ksp; /* Teardown, poll disable control ops */ @@ -119,6 +119,8 @@ struct mac_soft_ring_s { mac_soft_ring_t *s_ring_next; mac_soft_ring_t *s_ring_prev; mac_soft_ring_drain_func_t s_ring_drain_func; + + mac_tx_stats_t s_st_stat; }; typedef void (*mac_srs_drain_proc_t)(mac_soft_ring_set_t *, uint_t); @@ -131,9 +133,6 @@ typedef struct mac_srs_tx_s { void *st_arg1; void *st_arg2; mac_group_t *st_group; /* TX group for share */ - uint32_t st_ring_count; /* no. of tx rings */ - mac_ring_handle_t *st_rings; - boolean_t st_woken_up; /* @@ -156,18 +155,19 @@ typedef struct mac_srs_tx_s { */ uint32_t st_hiwat; /* mblk cnt to apply flow control */ uint32_t st_lowat; /* mblk cnt to relieve flow control */ - uint32_t st_drop_count; + uint32_t st_hiwat_cnt; /* times blocked for Tx descs */ + mac_tx_stats_t st_stat; + mac_capab_aggr_t st_capab_aggr; /* - * Number of times the srs gets blocked due to lack of Tx - * desc is noted down. Corresponding wakeup from driver - * to unblock is also noted down. They should match in a - * correctly working setup. If there is less unblocks - * than blocks, then Tx side waits forever for a wakeup - * from below. The following protected by srs_lock. + * st_soft_rings is used as an array to store aggr Tx soft + * rings. When aggr_find_tx_ring() returns a pseudo ring, + * the associated soft ring has to be found. st_soft_rings + * array stores the soft ring associated with a pseudo Tx + * ring and it can be accessed using the pseudo ring + * index (mr_index). Note that the ring index is unique + * for each ring in a group. */ - uint32_t st_blocked_cnt; /* times blocked for Tx descs */ - uint32_t st_unblocked_cnt; /* unblock calls from driver */ - uint32_t st_hiwat_cnt; /* times blocked for Tx descs */ + mac_soft_ring_t **st_soft_rings; } mac_srs_tx_t; /* Receive side Soft Ring Set */ @@ -191,9 +191,7 @@ typedef struct mac_srs_rx_s { uint32_t sr_hiwat; /* mblk cnt to relieve flow control */ uint32_t sr_lowat; - uint32_t sr_poll_count; - uint32_t sr_intr_count; - uint32_t sr_drop_count; + mac_rx_stats_t sr_stat; /* Times polling was enabled */ uint32_t sr_poll_on; @@ -246,13 +244,6 @@ typedef struct mac_srs_rx_s { uint32_t sr_drain_finish_intr; /* Polling thread needs to schedule worker wakeup */ uint32_t sr_poll_worker_wakeup; - - /* Chains less than 10 pkts */ - uint32_t sr_chain_cnt_undr10; - /* Chains between 10 & 50 pkts */ - uint32_t sr_chain_cnt_10to50; - /* Chains over 50 pkts */ - uint32_t sr_chain_cnt_over50; } mac_srs_rx_t; /* @@ -334,12 +325,14 @@ struct mac_soft_ring_set_s { int srs_tcp_ring_count; mac_soft_ring_t **srs_udp_soft_rings; int srs_udp_ring_count; + mac_soft_ring_t **srs_oth_soft_rings; + int srs_oth_ring_count; /* - * srs_oth_soft_rings is also used by tx_srs in + * srs_tx_soft_rings is used by tx_srs in * when operating in multi tx ring mode. */ - mac_soft_ring_t **srs_oth_soft_rings; - int srs_oth_ring_count; + mac_soft_ring_t **srs_tx_soft_rings; + int srs_tx_ring_count; /* * Bandwidth control related members. @@ -386,6 +379,7 @@ struct mac_soft_ring_set_s { mac_srs_rx_t srs_rx; mac_srs_tx_t srs_tx; + kstat_t *srs_ksp; }; /* @@ -507,7 +501,9 @@ typedef enum { SRS_TX_SERIALIZE, SRS_TX_FANOUT, SRS_TX_BW, - SRS_TX_BW_FANOUT + SRS_TX_BW_FANOUT, + SRS_TX_AGGR, + SRS_TX_BW_AGGR } mac_tx_srs_mode_t; /* @@ -626,9 +622,7 @@ extern struct dls_kstats dls_kstat; (srs)->srs_bw->mac_bw_used += (sz); \ } -#define TX_MULTI_RING_MODE(mac_srs) \ - ((mac_srs)->srs_tx.st_mode == SRS_TX_FANOUT || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_BW_FANOUT) +#define MAC_TX_SOFT_RINGS(mac_srs) ((mac_srs)->srs_tx_ring_count >= 1) /* Soft ring flags for teardown */ #define SRS_POLL_THR_OWNER (SRS_PROC | SRS_POLLING | SRS_GET_PKTS) @@ -639,7 +633,8 @@ extern struct dls_kstats dls_kstat; extern void mac_soft_ring_init(void); extern void mac_soft_ring_finish(void); extern void mac_fanout_setup(mac_client_impl_t *, flow_entry_t *, - mac_resource_props_t *, mac_direct_rx_t, void *, mac_resource_handle_t); + mac_resource_props_t *, mac_direct_rx_t, void *, mac_resource_handle_t, + cpupart_t *); extern void mac_soft_ring_worker_wakeup(mac_soft_ring_t *); extern void mac_soft_ring_blank(void *, time_t, uint_t, int); @@ -654,6 +649,8 @@ extern mac_soft_ring_set_t *mac_srs_create(struct mac_client_impl_s *, extern void mac_srs_free(mac_soft_ring_set_t *); extern void mac_srs_signal(mac_soft_ring_set_t *, uint_t); extern cpu_t *mac_srs_bind(mac_soft_ring_set_t *, processorid_t); +extern void mac_rx_srs_retarget_intr(mac_soft_ring_set_t *, processorid_t); +extern void mac_tx_srs_retarget_intr(mac_soft_ring_set_t *); extern void mac_srs_change_upcall(void *, mac_direct_rx_t, void *); extern void mac_srs_quiesce_initiate(mac_soft_ring_set_t *); @@ -673,12 +670,13 @@ extern void mac_tx_srs_quiesce(mac_soft_ring_set_t *, uint_t); /* Tx SRS, Tx softring */ extern void mac_tx_srs_wakeup(mac_soft_ring_set_t *, mac_ring_handle_t); -extern void mac_tx_srs_setup(struct mac_client_impl_s *, - flow_entry_t *, uint32_t); +extern void mac_tx_srs_setup(struct mac_client_impl_s *, flow_entry_t *); extern mac_tx_func_t mac_tx_get_func(uint32_t); extern mblk_t *mac_tx_send(mac_client_handle_t, mac_ring_handle_t, mblk_t *, mac_tx_stats_t *); extern boolean_t mac_tx_srs_ring_present(mac_soft_ring_set_t *, mac_ring_t *); +extern mac_soft_ring_t *mac_tx_srs_get_soft_ring(mac_soft_ring_set_t *, + mac_ring_t *); extern void mac_tx_srs_add_ring(mac_soft_ring_set_t *, mac_ring_t *); extern void mac_tx_srs_del_ring(mac_soft_ring_set_t *, mac_ring_t *); extern mac_tx_cookie_t mac_tx_srs_no_desc(mac_soft_ring_set_t *, mblk_t *, @@ -695,12 +693,12 @@ extern void mac_client_update_classifier(mac_client_impl_t *, boolean_t); extern void mac_soft_ring_intr_enable(void *); extern boolean_t mac_soft_ring_intr_disable(void *); -extern mac_soft_ring_t *mac_soft_ring_create(int, clock_t, void *, uint16_t, +extern mac_soft_ring_t *mac_soft_ring_create(int, clock_t, uint16_t, pri_t, mac_client_impl_t *, mac_soft_ring_set_t *, processorid_t, mac_direct_rx_t, void *, mac_resource_handle_t); extern cpu_t *mac_soft_ring_bind(mac_soft_ring_t *, processorid_t); extern void mac_soft_ring_unbind(mac_soft_ring_t *); -extern void mac_soft_ring_free(mac_soft_ring_t *, boolean_t); +extern void mac_soft_ring_free(mac_soft_ring_t *); extern void mac_soft_ring_signal(mac_soft_ring_t *, uint_t); extern void mac_rx_soft_ring_process(mac_client_impl_t *, mac_soft_ring_t *, mblk_t *, mblk_t *, int, size_t); diff --git a/usr/src/uts/common/sys/mac_stat.h b/usr/src/uts/common/sys/mac_stat.h new file mode 100644 index 0000000000..2d2989cd76 --- /dev/null +++ b/usr/src/uts/common/sys/mac_stat.h @@ -0,0 +1,124 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _MAC_STAT_H +#define _MAC_STAT_H + +#include <sys/mac_flow_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif + +struct mac_soft_ring_set_s; +struct mac_soft_ring_s; + +typedef struct mac_rx_stats_s { + uint64_t mrs_lclbytes; + uint64_t mrs_lclcnt; + uint64_t mrs_pollcnt; + uint64_t mrs_pollbytes; + uint64_t mrs_intrcnt; + uint64_t mrs_intrbytes; + uint64_t mrs_sdrops; + uint64_t mrs_chaincntundr10; + uint64_t mrs_chaincnt10to50; + uint64_t mrs_chaincntover50; + uint64_t mrs_ierrors; +} mac_rx_stats_t; + +typedef struct mac_tx_stats_s { + uint64_t mts_obytes; + uint64_t mts_opackets; + uint64_t mts_oerrors; + /* + * Number of times the srs gets blocked due to lack of Tx + * desc is noted down. Corresponding wakeup from driver + * to unblock is also noted down. They should match in a + * correctly working setup. If there is less unblocks + * than blocks, then Tx side waits forever for a wakeup + * from below. The following protected by srs_lock. + */ + uint64_t mts_blockcnt; /* times blocked for Tx descs */ + uint64_t mts_unblockcnt; /* unblock calls from driver */ + uint64_t mts_sdrops; +} mac_tx_stats_t; + +typedef struct mac_misc_stats_s { + uint64_t mms_multircv; + uint64_t mms_brdcstrcv; + uint64_t mms_multixmt; + uint64_t mms_brdcstxmt; + uint64_t mms_multircvbytes; + uint64_t mms_brdcstrcvbytes; + uint64_t mms_multixmtbytes; + uint64_t mms_brdcstxmtbytes; + uint64_t mms_txerrors; /* vid_check, tag needed errors */ + + /* + * When a ring is taken away from a mac client, before destroying + * corresponding SRS (for rx ring) or soft ring (for tx ring), add stats + * recorded by that SRS or soft ring to defunct lane stats. + */ + mac_rx_stats_t mms_defunctrxlanestats; + mac_tx_stats_t mms_defuncttxlanestats; + + /* link protection stats */ + uint64_t mms_macspoofed; + uint64_t mms_ipspoofed; + uint64_t mms_dhcpspoofed; + uint64_t mms_restricted; + uint64_t mms_dhcpdropped; +} mac_misc_stats_t; + +extern void mac_misc_stat_create(flow_entry_t *); +extern void mac_misc_stat_delete(flow_entry_t *); + +extern void mac_ring_stat_create(mac_ring_t *); +extern void mac_ring_stat_delete(mac_ring_t *); + +extern void mac_srs_stat_create(struct mac_soft_ring_set_s *); +extern void mac_srs_stat_delete(struct mac_soft_ring_set_s *); +extern void mac_tx_srs_stat_recreate(struct mac_soft_ring_set_s *, + boolean_t); + +extern void mac_soft_ring_stat_create(struct mac_soft_ring_s *); +extern void mac_soft_ring_stat_delete(struct mac_soft_ring_s *); + +extern void mac_stat_rename(mac_client_impl_t *); +extern void mac_pseudo_ring_stat_rename(mac_impl_t *); + +extern void mac_driver_stat_create(mac_impl_t *); +extern void mac_driver_stat_delete(mac_impl_t *); +extern uint64_t mac_driver_stat_default(mac_impl_t *, uint_t); + +extern uint64_t mac_rx_ring_stat_get(void *, uint_t); +extern uint64_t mac_tx_ring_stat_get(void *, uint_t); + +#endif /* _MAC_STAT_H */ diff --git a/usr/src/uts/common/sys/mii.h b/usr/src/uts/common/sys/mii.h index 6a060c8100..c3941affce 100644 --- a/usr/src/uts/common/sys/mii.h +++ b/usr/src/uts/common/sys/mii.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -428,11 +428,8 @@ boolean_t mii_m_loop_ioctl(mii_handle_t mii, queue_t *wq, mblk_t *msg); * mii MII handle. * name Property name. * id Property ID. - * flags Property flags (MAC_PROP_DEFAULT). * sz Size of property in bytes. * val Location to receive property value. - * perm Location to receive property permissions (either - * MAC_PROP_PERM_READ or MAC_PROP_PERM_RW). * * Returns * @@ -441,7 +438,7 @@ boolean_t mii_m_loop_ioctl(mii_handle_t mii, queue_t *wq, mblk_t *msg); * ENOTSUP if the prooperty is not supported by MII or the PHY. */ int mii_m_getprop(mii_handle_t mii, const char *name, mac_prop_id_t id, - uint_t flags, uint_t sz, void *val, uint_t *perm); + uint_t sz, void *val); /* * mii_m_setprop @@ -471,6 +468,25 @@ int mii_m_setprop(mii_handle_t mii, const char *name, mac_prop_id_t id, uint_t sz, const void *val); /* + * mii_m_propinfo + * + * Used to support the driver's mc_setprop() mac callback, + * and only to be called from that function (and without any + * locks held). + * + * Arguments + * + * mii MII handle. + * name Property name. + * id Property ID. + * prh Property info handle. + * + */ +void mii_m_propinfo(mii_handle_t mii, const char *name, mac_prop_id_t id, + mac_prop_info_handle_t prh); + + +/* * mii_m_getstat * * Used to support the driver's mc_getstat() mac callback for diff --git a/usr/src/uts/common/sys/net80211.h b/usr/src/uts/common/sys/net80211.h index a8d45c6174..313b335afa 100644 --- a/usr/src/uts/common/sys/net80211.h +++ b/usr/src/uts/common/sys/net80211.h @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,6 +39,7 @@ #define _SYS_NET80211_H #include <sys/mac.h> +#include <sys/mac_provider.h> #include <sys/ethernet.h> #include <sys/net80211_proto.h> #include <sys/net80211_crypto.h> @@ -731,8 +732,10 @@ void *ieee80211_malloc(size_t); void ieee80211_free(void *); int ieee80211_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -int ieee80211_getprop(void *, const char *, mac_prop_id_t, uint_t, uint_t, - void *, uint_t *); +int ieee80211_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +void ieee80211_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); + struct ieee80211_channel *ieee80211_find_channel(ieee80211com_t *, int, int); const struct ieee80211_rateset *ieee80211_get_suprates(ieee80211com_t *, diff --git a/usr/src/uts/common/sys/nxge/nxge.h b/usr/src/uts/common/sys/nxge/nxge.h index 14801131ce..042590e3e0 100644 --- a/usr/src/uts/common/sys/nxge/nxge.h +++ b/usr/src/uts/common/sys/nxge/nxge.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -601,6 +601,8 @@ typedef struct _nxge_ring_handle_t { p_nxge_t nxgep; int index; /* port-wise */ mac_ring_handle_t ring_handle; + uint64_t ring_gen_num; /* For RX Ring Start */ + uint32_t channel; } nxge_ring_handle_t, *p_nxge_ring_handle_t; /* @@ -791,8 +793,6 @@ struct _nxge_t { nxge_grp_set_t tx_set; boolean_t tdc_is_shared[NXGE_MAX_TDCS]; - boolean_t rx_channel_started[NXGE_MAX_RDCS]; - /* Ring Handles */ nxge_ring_handle_t tx_ring_handles[NXGE_MAX_TDCS]; nxge_ring_handle_t rx_ring_handles[NXGE_MAX_RDCS]; diff --git a/usr/src/uts/common/sys/nxge/nxge_hio.h b/usr/src/uts/common/sys/nxge/nxge_hio.h index 2f809e557f..492da24d55 100644 --- a/usr/src/uts/common/sys/nxge/nxge_hio.h +++ b/usr/src/uts/common/sys/nxge/nxge_hio.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -367,6 +367,8 @@ extern int nxge_hio_rxdma_bind_intr(nxge_t *, rx_rcr_ring_t *, int); /* nxge_hio_guest.c */ extern void nxge_hio_unregister(nxge_t *); +extern int nxge_hio_get_dc_htable_idx(nxge_t *nxge, vpc_type_t type, + uint32_t channel); extern int nxge_guest_regs_map(nxge_t *); extern void nxge_guest_regs_map_free(nxge_t *); @@ -392,7 +394,7 @@ extern nxge_status_t nxge_hio_intr_remove(nxge_t *, vpc_type_t, int); extern nxge_status_t nxge_hio_intr_add(nxge_t *, vpc_type_t, int); extern nxge_status_t nxge_hio_intr_rem(nxge_t *, int); -extern hv_rv_t nxge_hio_ldsv_add(nxge_t *, nxge_hio_dc_t *); +extern int nxge_hio_ldsv_add(nxge_t *, nxge_hio_dc_t *); extern void nxge_hio_ldsv_im(nxge_t *, nxge_ldg_t *, pio_ld_op_t, uint64_t *); extern void nxge_hio_ldgimgn(nxge_t *, nxge_ldg_t *); diff --git a/usr/src/uts/common/sys/nxge/nxge_impl.h b/usr/src/uts/common/sys/nxge/nxge_impl.h index dc6b29be68..1221e542dc 100644 --- a/usr/src/uts/common/sys/nxge/nxge_impl.h +++ b/usr/src/uts/common/sys/nxge/nxge_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -641,6 +641,7 @@ struct _nxge_ldg_t { p_nxge_ldv_t ldvp; nxge_sys_intr_t sys_intr_handler; p_nxge_t nxgep; + uint32_t htable_idx; }; struct _nxge_ldv_t { @@ -885,6 +886,8 @@ int nxge_port_kstat_update(kstat_t *, int); void nxge_save_cntrs(p_nxge_t); int nxge_m_stat(void *arg, uint_t, uint64_t *); +int nxge_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); +int nxge_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); /* nxge_hw.c */ void diff --git a/usr/src/uts/common/sys/nxge/nxge_rxdma.h b/usr/src/uts/common/sys/nxge/nxge_rxdma.h index ab0d0cde60..885f051cef 100644 --- a/usr/src/uts/common/sys/nxge/nxge_rxdma.h +++ b/usr/src/uts/common/sys/nxge/nxge_rxdma.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -209,7 +209,7 @@ typedef struct _rx_rcr_ring_t { p_nxge_rx_ring_stats_t rdc_stats; - int poll_flag; /* 1 if polling mode */ + boolean_t poll_flag; /* B_TRUE, if polling mode */ rcrcfig_a_t rcr_cfga; rcrcfig_b_t rcr_cfgb; @@ -244,6 +244,7 @@ typedef struct _rx_rcr_ring_t { uint32_t rcvd_pkt_bytes; /* Received bytes of a packet */ p_nxge_ldv_t ldvp; p_nxge_ldg_t ldgp; + boolean_t started; } rx_rcr_ring_t, *p_rx_rcr_ring_t; diff --git a/usr/src/uts/common/sys/pattr.h b/usr/src/uts/common/sys/pattr.h index 4d3dc29753..1269aeca10 100644 --- a/usr/src/uts/common/sys/pattr.h +++ b/usr/src/uts/common/sys/pattr.h @@ -67,20 +67,24 @@ typedef struct pattr_hcksum_s { */ #define HCK_IPV4_HDRCKSUM 0x01 /* On Transmit: Compute IP header */ /* checksum in hardware. */ - /* On Receive: IP header checksum */ + +#define HCK_IPV4_HDRCKSUM_OK 0x01 /* On Receive: IP header checksum */ /* was verified by h/w and is */ /* correct. */ + #define HCK_PARTIALCKSUM 0x02 /* On Transmit: Compute partial 1's */ /* complement checksum based on */ /* start, stuff and end offsets. */ /* On Receive : Partial checksum */ /* computed and attached. */ + #define HCK_FULLCKSUM 0x04 /* On Transmit: Compute full(in case */ /* of TCP/UDP, full is pseudo-header */ /* + header + payload) checksum for */ /* this packet. */ /* On Receive : Full checksum */ /* computed in h/w and is attached */ + #define HCK_FULLCKSUM_OK 0x08 /* On Transmit: N/A */ /* On Receive: Full checksum status */ /* If set, implies full checksum */ diff --git a/usr/src/uts/common/sys/pool.h b/usr/src/uts/common/sys/pool.h index 679ca05a86..c4cc9fc22a 100644 --- a/usr/src/uts/common/sys/pool.h +++ b/usr/src/uts/common/sys/pool.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_POOL_H #define _SYS_POOL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/time.h> #include <sys/nvpair.h> @@ -41,6 +38,7 @@ extern "C" { #define POOL_DEFAULT 0 /* default pool's ID */ #define POOL_MAXID 999999 /* maximum possible pool ID */ +#define POOL_INVALID -1 /* pools states */ #define POOL_DISABLED 0 /* pools enabled */ @@ -81,6 +79,7 @@ extern size_t pool_bufsz; /* size of pool_buf */ */ extern pool_t *pool_lookup_pool_by_id(poolid_t); extern pool_t *pool_lookup_pool_by_name(char *); +extern pool_t *pool_lookup_pool_by_pset(int); /* * Configuration routines @@ -102,6 +101,7 @@ extern int pool_propput(int, int, id_t, nvpair_t *); extern int pool_proprm(int, int, id_t, char *); extern int pool_propget(char *, int, int, id_t, nvlist_t **); extern int pool_commit(int); +extern void pool_get_name(pool_t *, char **); /* * Synchronization routines @@ -113,6 +113,25 @@ extern void pool_unlock(void); extern void pool_barrier_enter(void); extern void pool_barrier_exit(void); +typedef enum { + POOL_E_ENABLE, + POOL_E_DISABLE, + POOL_E_CHANGE, +} pool_event_t; + +typedef void pool_event_cb_func_t(pool_event_t, poolid_t, void *); + +typedef struct pool_event_cb { + pool_event_cb_func_t *pec_func; + void *pec_arg; + list_node_t pec_list; +} pool_event_cb_t; + +/* + * Routines used to register interest in changes in cpu pools. + */ +extern void pool_event_cb_register(pool_event_cb_t *); +extern void pool_event_cb_unregister(pool_event_cb_t *); #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/strsubr.h b/usr/src/uts/common/sys/strsubr.h index fd5db10058..7168f50362 100644 --- a/usr/src/uts/common/sys/strsubr.h +++ b/usr/src/uts/common/sys/strsubr.h @@ -1243,7 +1243,6 @@ extern int hcksum_assoc(mblk_t *, struct multidata_s *, struct pdesc_s *, extern void hcksum_retrieve(mblk_t *, struct multidata_s *, struct pdesc_s *, uint32_t *, uint32_t *, uint32_t *, uint32_t *, uint32_t *); extern void lso_info_set(mblk_t *, uint32_t, uint32_t); -extern void lso_info_get(mblk_t *, uint32_t *, uint32_t *); extern void lso_info_cleanup(mblk_t *); extern unsigned int bcksum(uchar_t *, int, unsigned int); extern boolean_t is_vmloaned_mblk(mblk_t *, struct multidata_s *, diff --git a/usr/src/uts/common/sys/vnic.h b/usr/src/uts/common/sys/vnic.h index 7e2aeac90a..3a6f5279ee 100644 --- a/usr/src/uts/common/sys/vnic.h +++ b/usr/src/uts/common/sys/vnic.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -111,9 +111,6 @@ typedef enum { */ #define VNIC_IOC_CREATE_FORCE 0x00000004 -/* Allocate a hardware ring to the vnic */ -#define VNIC_IOC_CREATE_REQ_HWRINGS 0x00000008 - typedef struct vnic_ioc_create { datalink_id_t vc_vnic_id; datalink_id_t vc_link_id; diff --git a/usr/src/uts/common/sys/vnic_impl.h b/usr/src/uts/common/sys/vnic_impl.h index b91f128fef..2bb48a60c6 100644 --- a/usr/src/uts/common/sys/vnic_impl.h +++ b/usr/src/uts/common/sys/vnic_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -63,16 +63,6 @@ typedef struct vnic_s { uint32_t vn_hcksum_txflags; } vnic_t; -#define vn_madd_naddr vn_mma_capab.maddr_naddr -#define vn_maddr_naddrfree vn_mma_capab.maddr_naddrfree -#define vn_maddr_flag vn_mma_capab.maddr_flag -#define vn_maddr_handle vn_mma_capab.maddr_handle -#define vn_maddr_reserve vn_mma_capab.maddr_reserve -#define vn_maddr_add vn_mma_capab.maddr_add -#define vn_maddr_remove vn_mma_capab.maddr_remove -#define vn_maddr_modify vn_mma_capab.maddr_modify -#define vn_maddr_get vn_mma_capab.maddr_get - extern int vnic_dev_create(datalink_id_t, datalink_id_t, vnic_mac_addr_type_t *, int *, uchar_t *, int *, uint_t, uint16_t, vrid_t, int, mac_resource_props_t *, uint32_t, vnic_ioc_diag_t *, cred_t *); diff --git a/usr/src/uts/common/xen/io/xnb.c b/usr/src/uts/common/xen/io/xnb.c index 9ddae7aa30..7c22ff8e52 100644 --- a/usr/src/uts/common/xen/io/xnb.c +++ b/usr/src/uts/common/xen/io/xnb.c @@ -250,8 +250,7 @@ xnb_software_csum(xnb_t *xnbp, mblk_t *mp) * XXPV dme: shouldn't rely on mac_fix_cksum(), not least * because it doesn't cover all of the interesting cases :-( */ - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM, KM_NOSLEEP); + mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM); return (mac_fix_cksum(mp)); } @@ -342,9 +341,7 @@ xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) */ *stuffp = 0; - (void) hcksum_assoc(mp, NULL, NULL, - 0, 0, 0, 0, - HCK_FULLCKSUM, KM_NOSLEEP); + mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM); xnbp->xnb_stat_csum_hardware++; @@ -375,9 +372,8 @@ xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) *stuffp = (uint16_t)(cksum ? cksum : ~cksum); } - (void) hcksum_assoc(mp, NULL, NULL, - start, stuff, length, 0, - HCK_PARTIALCKSUM, KM_NOSLEEP); + mac_hcksum_set(mp, start, stuff, length, 0, + HCK_PARTIALCKSUM); xnbp->xnb_stat_csum_hardware++; @@ -911,13 +907,13 @@ replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) mblk_t *new_mp; new_mp = copyb(mp); - if (new_mp == NULL) + if (new_mp == NULL) { cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" "for %p, len %lu", (void *) mp, len); + } - hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); - (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value, - flags, KM_NOSLEEP); + mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags); + mac_hcksum_set(new_mp, start, stuff, end, value, flags); new_mp->b_next = mp->b_next; new_mp->b_prev = mp->b_prev; diff --git a/usr/src/uts/common/xen/io/xnbo.c b/usr/src/uts/common/xen/io/xnbo.c index 78c6ba02e7..9a51328896 100644 --- a/usr/src/uts/common/xen/io/xnbo.c +++ b/usr/src/uts/common/xen/io/xnbo.c @@ -173,8 +173,7 @@ xnbo_cksum_to_peer(xnb_t *xnbp, mblk_t *mp) * gain some advantage. */ - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, - NULL, &csum, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, &csum, &pflags); /* * If the MAC driver has asserted that the checksum is diff --git a/usr/src/uts/common/xen/io/xnbu.c b/usr/src/uts/common/xen/io/xnbu.c index cf192365cf..c41a584e8b 100644 --- a/usr/src/uts/common/xen/io/xnbu.c +++ b/usr/src/uts/common/xen/io/xnbu.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -70,6 +70,7 @@ static mac_callbacks_t xnbu_callbacks = { xnbu_m_set_mac_addr, xnbu_m_send, NULL, + NULL, xnbu_m_getcapab }; @@ -130,16 +131,8 @@ xnbu_cksum_from_peer(xnb_t *xnbp, mblk_t *mp, uint16_t flags) if ((flags & NETTXF_data_validated) != 0) { /* * The checksum is asserted valid. - * - * The hardware checksum offload specification says - * that we must provide the actual checksum as well as - * an assertion that it is valid, but the protocol - * stack doesn't actually use it so we don't bother. - * If it was necessary we could grovel in the packet - * to find it. */ - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM | HCK_FULLCKSUM_OK, KM_NOSLEEP); + mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM_OK); } return (mp); @@ -152,8 +145,7 @@ xnbu_cksum_to_peer(xnb_t *xnbp, mblk_t *mp) uint16_t r = 0; uint32_t pflags; - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, - NULL, NULL, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); /* * If the protocol stack has requested checksum diff --git a/usr/src/uts/common/xen/io/xnf.c b/usr/src/uts/common/xen/io/xnf.c index b6d4cad439..534b3f0904 100644 --- a/usr/src/uts/common/xen/io/xnf.c +++ b/usr/src/uts/common/xen/io/xnf.c @@ -257,6 +257,7 @@ static mac_callbacks_t xnf_callbacks = { xnf_set_mac_addr, xnf_send, NULL, + NULL, xnf_getcapab }; @@ -1619,8 +1620,7 @@ xnf_send(void *arg, mblk_t *mp) txp->tx_txreq.size = length; txp->tx_txreq.offset = (uintptr_t)txp->tx_bufp & PAGEOFFSET; txp->tx_txreq.flags = 0; - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, - &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); if (pflags != 0) { /* * If the local protocol stack requests checksum @@ -2104,21 +2104,9 @@ xnf_rx_collect(xnf_t *xnfp) * blank" flag, and hence could have a * packet here that we are asserting * is good with a blank checksum. - * - * The hardware checksum offload - * specification says that we must - * provide the actual checksum as well - * as an assertion that it is valid, - * but the protocol stack doesn't - * actually use it and some other - * drivers don't bother, so we don't. - * If it was necessary we could grovel - * in the packet to find it. */ - (void) hcksum_assoc(mp, NULL, - NULL, 0, 0, 0, 0, - HCK_FULLCKSUM | - HCK_FULLCKSUM_OK, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, + HCK_FULLCKSUM_OK); xnfp->xnf_stat_rx_cksum_no_need++; } if (head == NULL) { diff --git a/usr/src/uts/intel/hxge/Makefile b/usr/src/uts/intel/hxge/Makefile index 40f6b64bcb..836f7c0924 100644 --- a/usr/src/uts/intel/hxge/Makefile +++ b/usr/src/uts/intel/hxge/Makefile @@ -20,12 +20,9 @@ # # uts/intel/hxge/Makefile # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# -# ident "%Z%%M% %I% %E% SMI" -# # This makefile drives the production of the Sun # 10G hxge Ethernet leaf driver kernel module. # @@ -71,7 +68,6 @@ CFLAGS += -dalign # # Include hxge specific header files # -INC_PATH += -I$(UTSBASE)/common INC_PATH += -I$(UTSBASE)/common/io/hxge # # diff --git a/usr/src/uts/intel/io/amd8111s/amd8111s_main.c b/usr/src/uts/intel/io/amd8111s/amd8111s_main.c index 1664ee7543..317e55b22a 100644 --- a/usr/src/uts/intel/io/amd8111s/amd8111s_main.c +++ b/usr/src/uts/intel/io/amd8111s/amd8111s_main.c @@ -1,5 +1,5 @@ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -195,6 +195,7 @@ static mac_callbacks_t amd8111s_m_callbacks = { amd8111s_m_multicst, amd8111s_m_unicst, amd8111s_m_tx, + NULL, amd8111s_m_ioctl }; diff --git a/usr/src/uts/intel/io/dnet/dnet.c b/usr/src/uts/intel/io/dnet/dnet.c index 2c045e893c..a6badb9b4b 100644 --- a/usr/src/uts/intel/io/dnet/dnet.c +++ b/usr/src/uts/intel/io/dnet/dnet.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -314,6 +314,7 @@ static mac_callbacks_t dnet_m_callbacks = { dnet_m_multicst, /* mc_multicst */ dnet_m_unicst, /* mc_unicst */ dnet_m_tx, /* mc_tx */ + NULL, NULL, /* mc_ioctl */ NULL, /* mc_getcapab */ NULL, /* mc_open */ diff --git a/usr/src/uts/intel/mii/Makefile b/usr/src/uts/intel/mii/Makefile index 8f3b7b6772..28ac502177 100644 --- a/usr/src/uts/intel/mii/Makefile +++ b/usr/src/uts/intel/mii/Makefile @@ -21,7 +21,7 @@ # # uts/intel/mii/Makefile # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the mii support module. @@ -55,7 +55,7 @@ ALL_TARGET = $(BINARY) LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) -LDFLAGS += -dy +LDFLAGS += -dy -N misc/mac # # Default build targets. diff --git a/usr/src/uts/intel/qlge/Makefile b/usr/src/uts/intel/qlge/Makefile index 52f1c143a2..82f64ac215 100644 --- a/usr/src/uts/intel/qlge/Makefile +++ b/usr/src/uts/intel/qlge/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the qlge driver kernel module. @@ -57,8 +57,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # MODSTUBS_DIR = $(OBJS_DIR) -INC_PATH += -I$(ROOT)/usr/include -INC_PATH += -I$(UTSBASE)/common/sys INC_PATH += -I$(UTSBASE)/common/sys/fibre-channel/fca/qlge LDFLAGS += -dy -Nmisc/mac -Ndrv/ip diff --git a/usr/src/uts/sparc/hxge/Makefile b/usr/src/uts/sparc/hxge/Makefile index 79b504d443..a04957a00f 100644 --- a/usr/src/uts/sparc/hxge/Makefile +++ b/usr/src/uts/sparc/hxge/Makefile @@ -20,7 +20,7 @@ # # uts/sparc/hxge/Makefile # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the Sun @@ -68,7 +68,6 @@ CFLAGS += -dalign # # Include hxge specific header files # -INC_PATH += -I$(UTSBASE)/common INC_PATH += -I$(UTSBASE)/common/io/hxge # # diff --git a/usr/src/uts/sparc/mii/Makefile b/usr/src/uts/sparc/mii/Makefile index 52726241b9..b1e80d5bd6 100644 --- a/usr/src/uts/sparc/mii/Makefile +++ b/usr/src/uts/sparc/mii/Makefile @@ -21,7 +21,7 @@ # # uts/sparc/mii/Makefile # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the mii support module. @@ -55,7 +55,7 @@ ALL_TARGET = $(BINARY) LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) -LDFLAGS += -dy +LDFLAGS += -dy -N misc/mac # # Default build targets. diff --git a/usr/src/uts/sparc/qlge/Makefile b/usr/src/uts/sparc/qlge/Makefile index 52f1c143a2..82f64ac215 100644 --- a/usr/src/uts/sparc/qlge/Makefile +++ b/usr/src/uts/sparc/qlge/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the qlge driver kernel module. @@ -57,8 +57,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # MODSTUBS_DIR = $(OBJS_DIR) -INC_PATH += -I$(ROOT)/usr/include -INC_PATH += -I$(UTSBASE)/common/sys INC_PATH += -I$(UTSBASE)/common/sys/fibre-channel/fca/qlge LDFLAGS += -dy -Nmisc/mac -Ndrv/ip diff --git a/usr/src/uts/sun/io/eri/eri.c b/usr/src/uts/sun/io/eri/eri.c index ab08fafc39..a4ac10cdd3 100644 --- a/usr/src/uts/sun/io/eri/eri.c +++ b/usr/src/uts/sun/io/eri/eri.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -200,6 +200,7 @@ static mac_callbacks_t eri_m_callbacks = { eri_m_multicst, eri_m_unicst, eri_m_tx, + NULL, eri_m_ioctl, eri_m_getcapab }; @@ -264,9 +265,9 @@ static mac_callbacks_t eri_m_callbacks = { if (type == ETHERTYPE_IP || type == ETHERTYPE_IPV6) { \ start_offset = 0; \ end_offset = MBLKL(bp) - ETHERHEADER_SIZE; \ - (void) hcksum_assoc(bp, NULL, NULL, \ + mac_hcksum_set(bp, \ start_offset, 0, end_offset, sum, \ - HCK_PARTIALCKSUM, 0); \ + HCK_PARTIALCKSUM); \ } else { \ /* \ * Strip the PADS for 802.3 \ @@ -3469,8 +3470,7 @@ eri_send_msg(struct eri *erip, mblk_t *mp) } #ifdef ERI_HWCSUM - hcksum_retrieve(mp, NULL, NULL, &start_offset, &stuff_offset, - NULL, NULL, &flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, NULL, NULL, &flags); if (flags & HCK_PARTIALCKSUM) { if (get_ether_type(mp->b_rptr) == ETHERTYPE_VLAN) { diff --git a/usr/src/uts/sun4v/io/vnet.c b/usr/src/uts/sun4v/io/vnet.c index f25860b251..1f857dbe13 100644 --- a/usr/src/uts/sun4v/io/vnet.c +++ b/usr/src/uts/sun4v/io/vnet.c @@ -27,6 +27,7 @@ #include <sys/types.h> #include <sys/errno.h> #include <sys/param.h> +#include <sys/callb.h> #include <sys/stream.h> #include <sys/kmem.h> #include <sys/conf.h> @@ -84,8 +85,12 @@ static void vnet_get_group(void *arg, mac_ring_type_t type, const int index, mac_group_info_t *infop, mac_group_handle_t handle); static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); static void vnet_rx_ring_stop(mac_ring_driver_t rdriver); +static int vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, + uint64_t *val); static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); static void vnet_tx_ring_stop(mac_ring_driver_t rdriver); +static int vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, + uint64_t *val); static int vnet_ring_enable_intr(void *arg); static int vnet_ring_disable_intr(void *arg); static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup); @@ -107,7 +112,6 @@ static void vnet_unbind_rings(vnet_res_t *vresp); static int vnet_hio_stat(void *, uint_t, uint64_t *); static int vnet_hio_start(void *); static void vnet_hio_stop(void *); -static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type); mblk_t *vnet_hio_tx(void *, mblk_t *); /* Forwarding database (FDB) routines */ @@ -129,6 +133,7 @@ static void vnet_res_start_task(void *arg); static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err); static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp); static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp); +static void vnet_tx_notify_thread(void *); /* Exported to vnet_gen */ int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); @@ -168,8 +173,7 @@ extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg); extern void vdds_cleanup_hybrid_res(void *arg); extern void vdds_cleanup_hio(vnet_t *vnetp); -/* Externs imported from mac_impl */ -extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); +extern pri_t minclsyspri; #define DRV_NAME "vnet" #define VNET_FDBE_REFHOLD(p) \ @@ -199,6 +203,7 @@ static mac_callbacks_t vnet_m_callbacks = { vnet_m_multicst, NULL, /* m_unicst entry must be NULL while rx rings are exposed */ NULL, /* m_tx entry must be NULL while tx rings are exposed */ + NULL, vnet_m_ioctl, vnet_m_capab, NULL @@ -232,6 +237,8 @@ uint32_t vnet_ldc_mtu = VNET_LDC_MTU; /* ldc mtu */ /* Configure tx serialization in mac layer for the vnet device */ boolean_t vnet_mac_tx_serialize = B_TRUE; +/* Configure enqueing at Rx soft rings in mac layer for the vnet device */ +boolean_t vnet_mac_rx_queuing = B_TRUE; /* * Set this to non-zero to enable additional internal receive buffer pools @@ -785,6 +792,7 @@ mblk_t * vnet_tx_ring_send(void *arg, mblk_t *mp) { vnet_pseudo_tx_ring_t *tx_ringp; + vnet_tx_ring_stats_t *statsp; vnet_t *vnetp; vnet_res_t *vresp; mblk_t *next; @@ -795,8 +803,10 @@ vnet_tx_ring_send(void *arg, mblk_t *mp) boolean_t is_pvid; /* non-default pvid ? */ boolean_t hres; /* Hybrid resource ? */ void *tx_arg; + size_t size; tx_ringp = (vnet_pseudo_tx_ring_t *)arg; + statsp = &tx_ringp->tx_ring_stats; vnetp = (vnet_t *)tx_ringp->vnetp; DBG1(vnetp, "enter\n"); ASSERT(mp != NULL); @@ -808,6 +818,9 @@ vnet_tx_ring_send(void *arg, mblk_t *mp) next = mp->b_next; mp->b_next = NULL; + /* update stats */ + size = msgsize(mp); + /* * Find fdb entry for the destination * and hold a reference to it. @@ -911,6 +924,8 @@ vnet_tx_ring_send(void *arg, mblk_t *mp) } } + statsp->obytes += size; + statsp->opackets++; mp = next; } @@ -971,6 +986,10 @@ vnet_ring_grp_init(vnet_t *vnetp) } tx_grp->rings = tx_ringp; tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS; + mutex_init(&tx_grp->flowctl_lock, NULL, MUTEX_DRIVER, NULL); + cv_init(&tx_grp->flowctl_cv, NULL, CV_DRIVER, NULL); + tx_grp->flowctl_thread = thread_create(NULL, 0, + vnet_tx_notify_thread, tx_grp, 0, &p0, TS_RUN, minclsyspri); rx_grp = &vnetp->rx_grp[0]; rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP; @@ -1005,8 +1024,21 @@ vnet_ring_grp_uninit(vnet_t *vnetp) { vnet_pseudo_rx_group_t *rx_grp; vnet_pseudo_tx_group_t *tx_grp; + kt_did_t tid = 0; tx_grp = &vnetp->tx_grp[0]; + + /* Inform tx_notify_thread to exit */ + mutex_enter(&tx_grp->flowctl_lock); + if (tx_grp->flowctl_thread != NULL) { + tid = tx_grp->flowctl_thread->t_did; + tx_grp->flowctl_done = B_TRUE; + cv_signal(&tx_grp->flowctl_cv); + } + mutex_exit(&tx_grp->flowctl_lock); + if (tid != 0) + thread_join(tid); + if (tx_grp->rings != NULL) { ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS); kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) * @@ -1090,14 +1122,7 @@ vnet_mac_register(vnet_t *vnetp) macp->m_max_sdu = vnetp->mtu; macp->m_margin = VLAN_TAGSZ; - /* - * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to - * workaround tx lock contention issues in nxge. - */ macp->m_v12n = MAC_VIRT_LEVEL1; - if (vnet_mac_tx_serialize == B_TRUE) { - macp->m_v12n |= MAC_VIRT_SERIALIZE; - } /* * Finally, we're ready to register ourselves with the MAC layer @@ -1404,6 +1429,73 @@ vnet_tx_update(vio_net_handle_t vrh) } /* + * vnet_tx_notify_thread: + * + * vnet_tx_ring_update() callback function wakes up this thread when + * it gets called. This thread will call mac_tx_ring_update() to + * notify upper mac of flow control getting relieved. Note that + * vnet_tx_ring_update() cannot call mac_tx_ring_update() directly + * because vnet_tx_ring_update() is called from lower mac with + * mi_rw_lock held and mac_tx_ring_update() would also try to grab + * the same lock. + */ +static void +vnet_tx_notify_thread(void *arg) +{ + callb_cpr_t cprinfo; + vnet_pseudo_tx_group_t *tx_grp = (vnet_pseudo_tx_group_t *)arg; + vnet_pseudo_tx_ring_t *tx_ringp; + vnet_t *vnetp; + int i; + + CALLB_CPR_INIT(&cprinfo, &tx_grp->flowctl_lock, callb_generic_cpr, + "vnet_tx_notify_thread"); + + mutex_enter(&tx_grp->flowctl_lock); + while (!tx_grp->flowctl_done) { + CALLB_CPR_SAFE_BEGIN(&cprinfo); + cv_wait(&tx_grp->flowctl_cv, &tx_grp->flowctl_lock); + CALLB_CPR_SAFE_END(&cprinfo, &tx_grp->flowctl_lock); + + for (i = 0; i < tx_grp->ring_cnt; i++) { + tx_ringp = &tx_grp->rings[i]; + if (tx_ringp->woken_up) { + tx_ringp->woken_up = B_FALSE; + vnetp = tx_ringp->vnetp; + mac_tx_ring_update(vnetp->mh, tx_ringp->handle); + } + } + } + /* + * The tx_grp is being destroyed, exit the thread. + */ + tx_grp->flowctl_thread = NULL; + CALLB_CPR_EXIT(&cprinfo); + thread_exit(); +} + +void +vnet_tx_ring_update(void *arg1, uintptr_t arg2) +{ + vnet_t *vnetp = (vnet_t *)arg1; + vnet_pseudo_tx_group_t *tx_grp; + vnet_pseudo_tx_ring_t *tx_ringp; + int i; + + tx_grp = &vnetp->tx_grp[0]; + for (i = 0; i < tx_grp->ring_cnt; i++) { + tx_ringp = &tx_grp->rings[i]; + if (tx_ringp->hw_rh == (mac_ring_handle_t)arg2) { + mutex_enter(&tx_grp->flowctl_lock); + tx_ringp->woken_up = B_TRUE; + cv_signal(&tx_grp->flowctl_cv); + mutex_exit(&tx_grp->flowctl_lock); + break; + } + } +} + +/* * Update the new mtu of vnet into the mac layer. First check if the device has * been plumbed and if so fail the mtu update. Returns 0 on success. */ @@ -2053,6 +2145,22 @@ vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data) * we unmap ring->hw_rh. For rings mapped to LDC resources, we * stop the rx callbacks (in vgen) before we remove ring->hw_rh * (vio_net_resource_unreg()). + * Also, we access ring->hw_rh in vnet_rx_ring_stat(). + * Note that for rings mapped to Hybrid resource, though the + * rings are statically registered with the mac layer, its + * hardware ring mapping (ringp->hw_rh) can be torn down in + * vnet_unbind_hwrings() while the kstat operation is in + * progress. To protect against this, we hold a reference to + * the resource in FDB; this ensures that the thread in + * vio_net_resource_unreg() waits for the reference to be + * dropped before unbinding the ring. + * + * We don't need to do this for rings mapped to LDC resources. + * These rings are registered/unregistered dynamically with + * the mac layer and so any attempt to unregister the ring + * while kstat operation is in progress will block in + * mac_group_rem_ring(). Thus implicitly protects the + * resource (ringp->hw_rh) from disappearing. */ if (cap_rings->mr_type == MAC_RING_TYPE_RX) { @@ -2148,10 +2256,22 @@ vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, infop->mri_driver = (mac_ring_driver_t)rx_ringp; infop->mri_start = vnet_rx_ring_start; infop->mri_stop = vnet_rx_ring_stop; + infop->mri_stat = vnet_rx_ring_stat; /* Set the poll function, as this is an rx ring */ infop->mri_poll = vnet_rx_poll; - + /* + * MAC_RING_RX_ENQUEUE bit needed to be set for nxge + * which was not sending packet chains in interrupt + * context. For such drivers, packets are queued in + * Rx soft rings so that we get a chance to switch + * into a polling mode under backlog. This bug (not + * sending packet chains) has now been fixed. Once + * the performance impact is measured, this change + * will be removed. + */ + infop->mri_flags = (vnet_mac_rx_queuing ? + MAC_RING_RX_ENQUEUE : 0); break; } @@ -2178,10 +2298,17 @@ vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, infop->mri_driver = (mac_ring_driver_t)tx_ringp; infop->mri_start = vnet_tx_ring_start; infop->mri_stop = vnet_tx_ring_stop; + infop->mri_stat = vnet_tx_ring_stat; /* Set the transmit function, as this is a tx ring */ infop->mri_tx = vnet_tx_ring_send; - + /* + * MAC_RING_TX_SERIALIZE bit needs to be set while + * hybridIO is enabled to workaround tx lock + * contention issues in nxge. + */ + infop->mri_flags = (vnet_mac_tx_serialize ? + MAC_RING_TX_SERIALIZE : 0); break; } @@ -2325,6 +2452,44 @@ vnet_rx_ring_stop(mac_ring_driver_t arg) rx_ringp->state &= ~VNET_RXRING_STARTED; } +static int +vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)rdriver; + vnet_t *vnetp = (vnet_t *)rx_ringp->vnetp; + vnet_res_t *vresp; + mac_register_t *macp; + mac_callbacks_t *cbp; + + /* + * Refer to vnet_m_capab() function for detailed comments on ring + * synchronization. + */ + if ((rx_ringp->state & VNET_RXRING_HYBRID) != 0) { + READ_ENTER(&vnetp->vsw_fp_rw); + if (vnetp->hio_fp == NULL) { + RW_EXIT(&vnetp->vsw_fp_rw); + return (0); + } + + VNET_FDBE_REFHOLD(vnetp->hio_fp); + RW_EXIT(&vnetp->vsw_fp_rw); + mac_hwring_getstat(rx_ringp->hw_rh, stat, val); + VNET_FDBE_REFRELE(vnetp->hio_fp); + return (0); + } + + ASSERT((rx_ringp->state & + (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0); + vresp = (vnet_res_t *)rx_ringp->hw_rh; + macp = &vresp->macreg; + cbp = macp->m_callbacks; + + cbp->mc_getstat(macp->m_driver, stat, val); + + return (0); +} + /* ARGSUSED */ static int vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) @@ -2343,6 +2508,31 @@ vnet_tx_ring_stop(mac_ring_driver_t arg) tx_ringp->state &= ~VNET_TXRING_STARTED; } +static int +vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)rdriver; + vnet_tx_ring_stats_t *statsp; + + statsp = &tx_ringp->tx_ring_stats; + + switch (stat) { + case MAC_STAT_OPACKETS: + *val = statsp->opackets; + break; + + case MAC_STAT_OBYTES: + *val = statsp->obytes; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + /* * Disable polling for a ring and enable its interrupt. */ @@ -2569,10 +2759,6 @@ vnet_hio_mac_init(vnet_t *vnetp, char *ifname) /* add the recv callback */ mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp); - /* add the notify callback - only tx updates for now */ - vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb, - vnetp); - return (0); fail: @@ -2584,11 +2770,6 @@ fail: void vnet_hio_mac_cleanup(vnet_t *vnetp) { - if (vnetp->hio_mnh != NULL) { - (void) mac_notify_remove(vnetp->hio_mnh, B_TRUE); - vnetp->hio_mnh = NULL; - } - if (vnetp->hio_vhp != NULL) { vio_net_resource_unreg(vnetp->hio_vhp); vnetp->hio_vhp = NULL; @@ -2666,7 +2847,7 @@ vnet_bind_hwrings(vnet_t *vnetp) /* Bind the pseudo ring to the underlying hwring */ mac_hwring_setup(rx_ringp->hw_rh, - (mac_resource_handle_t)rx_ringp); + (mac_resource_handle_t)rx_ringp, NULL); /* Start the hwring if needed */ if (rx_ringp->state & VNET_RXRING_STARTED) { @@ -2703,6 +2884,8 @@ vnet_bind_hwrings(vnet_t *vnetp) tx_ringp->hw_rh = hw_rh[i]; tx_ringp->state |= VNET_TXRING_HYBRID; } + tx_grp->tx_notify_handle = + mac_client_tx_notify(vnetp->hio_mch, vnet_tx_ring_update, vnetp); mac_perim_exit(mph1); return (0); @@ -2734,6 +2917,8 @@ vnet_unbind_hwrings(vnet_t *vnetp) tx_ringp->hw_rh = NULL; } } + (void) mac_client_tx_notify(vnetp->hio_mch, NULL, + tx_grp->tx_notify_handle); rx_grp = &vnetp->rx_grp[0]; for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { @@ -2980,24 +3165,6 @@ vnet_hio_tx(void *arg, mblk_t *mp) return (mp); } -static void -vnet_hio_notify_cb(void *arg, mac_notify_type_t type) -{ - vnet_t *vnetp = (vnet_t *)arg; - mac_perim_handle_t mph; - - mac_perim_enter_by_mh(vnetp->hio_mh, &mph); - switch (type) { - case MAC_NOTE_TX: - vnet_tx_update(vnetp->hio_vhp); - break; - - default: - break; - } - mac_perim_exit(mph); -} - #ifdef VNET_IOC_DEBUG /* diff --git a/usr/src/uts/sun4v/io/vnet_gen.c b/usr/src/uts/sun4v/io/vnet_gen.c index 6bf674fd85..875c8dd93f 100644 --- a/usr/src/uts/sun4v/io/vnet_gen.c +++ b/usr/src/uts/sun4v/io/vnet_gen.c @@ -461,6 +461,7 @@ static mac_callbacks_t vgen_m_callbacks = { vgen_multicst, vgen_unicst, vgen_tx, + NULL, vgen_ioctl, NULL, NULL diff --git a/usr/src/uts/sun4v/io/vsw.c b/usr/src/uts/sun4v/io/vsw.c index f53adb5af5..a061321e86 100644 --- a/usr/src/uts/sun4v/io/vsw.c +++ b/usr/src/uts/sun4v/io/vsw.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -295,10 +295,7 @@ static mac_callbacks_t vsw_m_callbacks = { vsw_m_promisc, vsw_m_multicst, vsw_m_unicst, - vsw_m_tx, - NULL, - NULL, - NULL + vsw_m_tx }; static struct cb_ops vsw_cb_ops = { diff --git a/usr/src/uts/sun4v/io/vsw_phys.c b/usr/src/uts/sun4v/io/vsw_phys.c index f5fc90b929..c725e8bb5f 100644 --- a/usr/src/uts/sun4v/io/vsw_phys.c +++ b/usr/src/uts/sun4v/io/vsw_phys.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -497,17 +497,12 @@ vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type) * Open a MAC client for a port or an interface. * The flags and their purpose as below: * - * MAC_OPEN_FLAGS_NO_HWRINGS -- This flag is used by default - * for all ports/interface so that they are associated with - * default group & resources. It will not be used for the - * ports that have HybridIO is enabled so that the h/w resources - * assigned to it. - * * MAC_OPEN_FLAGS_SHARES_DESIRED -- This flag is used to indicate * that a port desires a Share. This will be the case with the * the ports that have hybrid mode enabled. This will only cause * MAC layer to allocate a share and corresponding resources - * ahead of time. + * ahead of time. Ports that are not HybridIO enabled are + * associated with default group & resources. * * MAC_UNICAST_TAG_DISABLE -- This flag is used for VLAN * support. It will cause MAC to not add any tags, but expect @@ -525,7 +520,7 @@ vsw_maccl_open(vsw_t *vswp, vsw_port_t *port, int type) char mac_cl_name[MAXNAMELEN]; const char *dev_name; mac_client_handle_t *mchp; - uint64_t flags = MAC_OPEN_FLAGS_NO_HWRINGS; + uint64_t flags = 0; ASSERT(MUTEX_HELD(&vswp->mac_lock)); if (vswp->mh == NULL) { @@ -545,10 +540,8 @@ vsw_maccl_open(vsw_t *vswp, vsw_port_t *port, int type) dev_name = ddi_driver_name(vswp->dip); instance = ddi_get_instance(vswp->dip); if (type == VSW_VNETPORT) { - if (port->p_hio_enabled == B_TRUE) { - flags &= ~MAC_OPEN_FLAGS_NO_HWRINGS; + if (port->p_hio_enabled) flags |= MAC_OPEN_FLAGS_SHARES_DESIRED; - } (void) snprintf(mac_cl_name, MAXNAMELEN, "%s%d%s%d", dev_name, instance, "_port", port->p_instance); } else { @@ -561,6 +554,10 @@ vsw_maccl_open(vsw_t *vswp, vsw_port_t *port, int type) cmn_err(CE_NOTE, "!vsw%d:%s mac_client_open() failed\n", vswp->instance, mac_cl_name); } + + if (type != VSW_VNETPORT || !port->p_hio_enabled) + mac_client_set_rings(*mchp, MAC_RXRINGS_NONE, MAC_TXRINGS_NONE); + return (rv); } @@ -1389,7 +1386,7 @@ vsw_maccl_set_bandwidth(vsw_t *vswp, vsw_port_t *port, int type, uint64_t maxbw) { int rv = 0; uint64_t *bw; - mac_resource_props_t mrp; + mac_resource_props_t *mrp; mac_client_handle_t mch; ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); @@ -1409,15 +1406,15 @@ vsw_maccl_set_bandwidth(vsw_t *vswp, vsw_port_t *port, int type, uint64_t maxbw) } if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) { - bzero(&mrp, sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); if (maxbw == 0) { - mrp.mrp_maxbw = MRP_MAXBW_RESETVAL; + mrp->mrp_maxbw = MRP_MAXBW_RESETVAL; } else { - mrp.mrp_maxbw = maxbw; + mrp->mrp_maxbw = maxbw; } - mrp.mrp_mask |= MRP_MAXBW; + mrp->mrp_mask |= MRP_MAXBW; - rv = mac_client_set_resources(mch, &mrp); + rv = mac_client_set_resources(mch, mrp); if (rv != 0) { if (type == VSW_VNETPORT) { cmn_err(CE_NOTE, "!port%d: cannot set " @@ -1434,5 +1431,6 @@ vsw_maccl_set_bandwidth(vsw_t *vswp, vsw_port_t *port, int type, uint64_t maxbw) */ *bw = maxbw; } + kmem_free(mrp, sizeof (*mrp)); } } diff --git a/usr/src/uts/sun4v/sys/vnet.h b/usr/src/uts/sun4v/sys/vnet.h index 21fb92852b..e80324110e 100644 --- a/usr/src/uts/sun4v/sys/vnet.h +++ b/usr/src/uts/sun4v/sys/vnet.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -106,6 +106,11 @@ typedef struct vnet_hio_kstats { kstat_named_t noxmtbuf; /* MIB - ifOutDiscards */ } vnet_hio_kstats_t; +typedef struct vnet_tx_ring_stats { + uint64_t opackets; /* # tx packets */ + uint64_t obytes; /* # bytes transmitted */ +} vnet_tx_ring_stats_t; + /* * A vnet resource structure. */ @@ -214,6 +219,8 @@ typedef struct vnet_pseudo_tx_ring { /* ring handle. Hybrid res: ring hdl */ /* of hardware rx ring; LDC res: hdl */ /* to the res itself (vnet_res_t) */ + boolean_t woken_up; + vnet_tx_ring_stats_t tx_ring_stats; /* ring statistics */ } vnet_pseudo_tx_ring_t; /* @@ -241,6 +248,11 @@ typedef struct vnet_pseudo_tx_group { mac_group_handle_t handle; /* grp handle in mac layer */ uint_t ring_cnt; /* total # of rings in grp */ vnet_pseudo_tx_ring_t *rings; /* array of rings */ + kmutex_t flowctl_lock; /* flow control lock */ + kcondvar_t flowctl_cv; + kthread_t *flowctl_thread; + boolean_t flowctl_done; + void *tx_notify_handle; /* Tx ring notification */ } vnet_pseudo_tx_group_t; /* @@ -298,7 +310,6 @@ typedef struct vnet { mac_handle_t hio_mh; /* HIO mac hdl */ mac_client_handle_t hio_mch; /* HIO mac client hdl */ mac_unicast_handle_t hio_muh; /* HIO mac unicst hdl */ - mac_notify_handle_t hio_mnh; /* HIO notify cb hdl */ mac_group_handle_t rx_hwgh; /* HIO rx ring-group hdl */ mac_group_handle_t tx_hwgh; /* HIO tx ring-group hdl */ } vnet_t; |