diff options
author | Venugopal Iyer <Venu.Iyer@Sun.COM> | 2010-03-09 15:30:01 -0800 |
---|---|---|
committer | Venugopal Iyer <Venu.Iyer@Sun.COM> | 2010-03-09 15:30:01 -0800 |
commit | 0dc2366f7b9f9f36e10909b1e95edbf2a261c2ac (patch) | |
tree | 21e9b7ebf6656a506e66695947b60e562787ea4f | |
parent | e47012d1925f46ba3ba641bef25c0cf3af74d020 (diff) | |
download | illumos-gate-0dc2366f7b9f9f36e10909b1e95edbf2a261c2ac.tar.gz |
PSARC/2009/364 dlstat and flowstat
PSARC/2009/436 Anti-spoofing Link Protection
PSARC/2009/448 pool dladm link property
PSARC/2009/501 Dynamic Ring Grouping on NICs
PSARC/2009/638 Public GLDv3 Interfaces
PSARC/2010/074 Crossbow resource usage updates
6838175 mac_tx should be able to send out a packet without a configured address
6806552 single MAC default TX ring doesn't scale
6809686 back-to-back LACP not recovering after removing one of the aggregated ports
6902209 setting maxbw to zero requires an intermediate reset-linkprop to take effect
6855972 Bind interrupts to the same CPU as poll thread using new interrupt APIs
6863945 aggr pseudo Tx rings
6796839 allow CPU pools to be associated with data-links
6526471 data-links assigned to an exclusive zone should seamlessly be bound the zone's CPUs
6802595 Per links stats can use some improvement
6889685 Crossbow should provide control over provision of h/w rings to MAC clients.
6708310 ixgbe needs to support VMDq
6869019 ixgbe should support IRM (Interrupt Resource Management framework)
6902266 vnet should support entry point for per ring stat querying
6926790 Integrate Link Protection Phase II
6930358 Make the core set of GLDv3 driver APIs committed
6901419 dladm create-aggr -u incorrectly rejects some valid ethernet addresses
6717042 should support "cpus" link properties for aggregations
6908184 bge_set_priv_prop() and bge_get_priv_prop() can't agree on the set of private properties
6907617 bge_m_getprop() shouldn't return default values for read-only properties
6900833 unused code in vnic_impl.h can be removed
214 files changed, 24250 insertions, 7110 deletions
diff --git a/exception_lists/packaging b/exception_lists/packaging index 7f3da27d0e..500926f56f 100644 --- a/exception_lists/packaging +++ b/exception_lists/packaging @@ -90,8 +90,8 @@ usr/include/sys/mac_client_impl.h usr/include/sys/mac_client.h usr/include/sys/mac_flow_impl.h usr/include/sys/mac_impl.h -usr/include/sys/mac_provider.h usr/include/sys/mac_soft_ring.h +usr/include/sys/mac_stat.h # # Private GLDv3 userland libraries and headers # diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile index 36b830bfbf..2d0ce5e6d5 100644 --- a/usr/src/cmd/Makefile +++ b/usr/src/cmd/Makefile @@ -126,6 +126,7 @@ COMMON_SUBDIRS= \ diskmgtd \ dispadmin \ dladm \ + dlstat \ dmesg \ dodatadm \ dtrace \ @@ -157,6 +158,7 @@ COMMON_SUBDIRS= \ filebench \ find \ flowadm \ + flowstat \ fm \ fmli \ fmt \ @@ -563,6 +565,7 @@ MSGSUBDIRS= \ diff \ diffmk \ dladm \ + dlstat \ du \ dumpcs \ ed \ @@ -577,6 +580,7 @@ MSGSUBDIRS= \ filesync \ find \ flowadm \ + flowstat \ fm \ fold \ fs.d \ diff --git a/usr/src/cmd/acctadm/main.c b/usr/src/cmd/acctadm/main.c index 2c610bdc10..8176214b9e 100644 --- a/usr/src/cmd/acctadm/main.c +++ b/usr/src/cmd/acctadm/main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -375,30 +375,7 @@ main(int argc, char *argv[]) } } str2buf(buf, disabled, AC_OFF, type); - } - if (enabled) { - /* - * Lets us get network logging started. - */ - if (type & AC_NET) { - /* - * Default logging interval for AC_NET is - * ACCTADM_NET_LOG_INTERVAL. - */ - (void) priv_set(PRIV_ON, PRIV_EFFECTIVE, - PRIV_SYS_DL_CONFIG, NULL); - err = dladm_start_usagelog(dld_handle, - strcmp(enabled, "basic") == 0 ? - DLADM_LOGTYPE_LINK : DLADM_LOGTYPE_FLOW, - ACCTADM_NET_LOG_INTERVAL); - (void) priv_set(PRIV_OFF, PRIV_EFFECTIVE, - PRIV_SYS_DL_CONFIG, NULL); - if (err != DLADM_STATUS_OK) { - die(gettext("failed to start logging " - "network information, error %d\n"), - errno); - } - } + } else if (enabled) { str2buf(buf, enabled, AC_ON, type); } (void) priv_set(PRIV_ON, PRIV_EFFECTIVE, PRIV_SYS_ACCT, NULL); @@ -408,7 +385,6 @@ main(int argc, char *argv[]) "resources\n"), ac_type_name(type)); } (void) priv_set(PRIV_OFF, PRIV_EFFECTIVE, PRIV_SYS_ACCT, NULL); - tracked = buf2str(buf, AC_BUFSIZE, AC_ON, type); untracked = buf2str(buf, AC_BUFSIZE, AC_OFF, type); if (aconf_set_string(AC_PROP_TRACKED, tracked) == -1) @@ -448,6 +424,31 @@ main(int argc, char *argv[]) modified++; } + /* + * Let's get network logging started. We do this after turning on + * accounting and opening the file so that we can start writing + * immediately. + */ + if (enabled && (type & AC_NET)) { + /* + * Default logging interval for AC_NET is + * ACCTADM_NET_LOG_INTERVAL. + */ + (void) priv_set(PRIV_ON, PRIV_EFFECTIVE, + PRIV_SYS_DL_CONFIG, NULL); + err = dladm_start_usagelog(dld_handle, + strcmp(enabled, "basic") == 0 ? + DLADM_LOGTYPE_LINK : DLADM_LOGTYPE_FLOW, + ACCTADM_NET_LOG_INTERVAL); + (void) priv_set(PRIV_OFF, PRIV_EFFECTIVE, + PRIV_SYS_DL_CONFIG, NULL); + if (err != DLADM_STATUS_OK) { + die(gettext("failed to start logging " + "network information, error %d\n"), + errno); + } + } + if (Dflg) { /* * Disable accounting diff --git a/usr/src/cmd/dladm/dladm.c b/usr/src/cmd/dladm/dladm.c index a55fa79735..713920767c 100644 --- a/usr/src/cmd/dladm/dladm.c +++ b/usr/src/cmd/dladm/dladm.c @@ -320,7 +320,7 @@ static cmd_t cmds[] = { " create-vnic [-t] -l <link> [-m <value> | auto |\n" "\t\t {factory [-n <slot-id>]} | {random [-r <prefix>]} |\n" "\t\t {vrrp -V <vrid> -A {inet | inet6}} [-v <vid> [-f]]\n" - "\t\t [-H] [-p <prop>=<value>[,...]] <vnic-link>" }, + "\t\t [-p <prop>=<value>[,...]] <vnic-link>" }, { "delete-vnic", do_delete_vnic, " delete-vnic [-t] <vnic-link>" }, { "show-vnic", do_show_vnic, @@ -810,18 +810,18 @@ static const ofmt_field_t phys_m_fields[] = { typedef enum { PHYS_H_LINK, - PHYS_H_GROUP, - PHYS_H_GRPTYPE, + PHYS_H_RINGTYPE, PHYS_H_RINGS, PHYS_H_CLIENTS } phys_h_field_index_t; +#define RINGSTRLEN 21 + static const ofmt_field_t phys_h_fields[] = { { "LINK", 13, PHYS_H_LINK, print_phys_one_hwgrp_cb}, -{ "GROUP", 9, PHYS_H_GROUP, print_phys_one_hwgrp_cb}, -{ "GROUPTYPE", 7, PHYS_H_GRPTYPE, print_phys_one_hwgrp_cb}, -{ "RINGS", 17, PHYS_H_RINGS, print_phys_one_hwgrp_cb}, -{ "CLIENTS", 21, PHYS_H_CLIENTS, print_phys_one_hwgrp_cb}, +{ "RINGTYPE", 9, PHYS_H_RINGTYPE, print_phys_one_hwgrp_cb}, +{ "RINGS", RINGSTRLEN, PHYS_H_RINGS, print_phys_one_hwgrp_cb}, +{ "CLIENTS", 24, PHYS_H_CLIENTS, print_phys_one_hwgrp_cb}, { NULL, 0, 0, NULL}} ; @@ -3694,6 +3694,13 @@ typedef struct { static boolean_t print_phys_one_hwgrp_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) { + int i; + boolean_t first = B_TRUE; + int start = -1; + int end = -1; + char ringstr[RINGSTRLEN]; + char ringsubstr[RINGSTRLEN]; + print_phys_hwgrp_state_t *hg_state = ofarg->ofmt_cbarg; dladm_hwgrp_attr_t *attr = hg_state->hs_grp_attr; @@ -3701,15 +3708,78 @@ print_phys_one_hwgrp_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) case PHYS_H_LINK: (void) snprintf(buf, bufsize, "%s", attr->hg_link_name); break; - case PHYS_H_GROUP: - (void) snprintf(buf, bufsize, "%d", attr->hg_grp_num); - break; - case PHYS_H_GRPTYPE: + case PHYS_H_RINGTYPE: (void) snprintf(buf, bufsize, "%s", attr->hg_grp_type == DLADM_HWGRP_TYPE_RX ? "RX" : "TX"); break; case PHYS_H_RINGS: - (void) snprintf(buf, bufsize, "%d", attr->hg_n_rings); + ringstr[0] = '\0'; + for (i = 0; i < attr->hg_n_rings; i++) { + uint_t index = attr->hg_rings[i]; + + if (start == -1) { + start = index; + end = index; + } else if (index == end + 1) { + end = index; + } else { + if (start == end) { + if (first) { + (void) snprintf( + ringsubstr, + RINGSTRLEN, "%d", + start); + first = B_FALSE; + } else { + (void) snprintf( + ringsubstr, + RINGSTRLEN, ",%d", + start); + } + } else { + if (first) { + (void) snprintf( + ringsubstr, + RINGSTRLEN, + "%d-%d", + start, end); + first = B_FALSE; + } else { + (void) snprintf( + ringsubstr, + RINGSTRLEN, + ",%d-%d", + start, end); + } + } + (void) strlcat(ringstr, ringsubstr, + RINGSTRLEN); + start = index; + end = index; + } + } + /* The last one */ + if (start != -1) { + if (first) { + if (start == end) { + (void) snprintf(buf, bufsize, "%d", + start); + } else { + (void) snprintf(buf, bufsize, "%d-%d", + start, end); + } + } else { + if (start == end) { + (void) snprintf(ringsubstr, RINGSTRLEN, + ",%d", start); + } else { + (void) snprintf(ringsubstr, RINGSTRLEN, + ",%d-%d", start, end); + } + (void) strlcat(ringstr, ringsubstr, RINGSTRLEN); + (void) snprintf(buf, bufsize, "%s", ringstr); + } + } break; case PHYS_H_CLIENTS: if (attr->hg_client_names[0] == '\0') { @@ -4232,8 +4302,7 @@ do_show_phys(int argc, char *argv[], const char *use) "link,media,state,speed,duplex,device"; char *all_inactive_fields = "link,device,media,flags"; char *all_mac_fields = "link,slot,address,inuse,client"; - char *all_hwgrp_fields = - "link,group,grouptype,rings,clients"; + char *all_hwgrp_fields = "link,ringtype,rings,clients"; const ofmt_field_t *pf; ofmt_handle_t ofmt; ofmt_status_t oferr; @@ -4534,9 +4603,6 @@ do_create_vnic(int argc, char *argv[], const char *use) case 'f': flags |= DLADM_OPT_FORCE; break; - case 'H': - flags |= DLADM_OPT_HWRINGS; - break; default: die_opterr(optopt, option, use); } @@ -8722,7 +8788,7 @@ warn(const char *format, ...) (void) vfprintf(stderr, format, alist); va_end(alist); - (void) putchar('\n'); + (void) putc('\n', stderr); } /* PRINTFLIKE2 */ @@ -8779,7 +8845,7 @@ die(const char *format, ...) (void) vfprintf(stderr, format, alist); va_end(alist); - (void) putchar('\n'); + (void) putc('\n', stderr); /* close dladm handle if it was opened */ if (handle != NULL) diff --git a/usr/src/cmd/dlstat/Makefile b/usr/src/cmd/dlstat/Makefile new file mode 100644 index 0000000000..8885dbc5a0 --- /dev/null +++ b/usr/src/cmd/dlstat/Makefile @@ -0,0 +1,49 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# + +PROG= dlstat + +ROOTFS_PROG= $(PROG) + +include ../Makefile.cmd + +XGETFLAGS += -a -x $(PROG).xcl +LDLIBS += -L$(ROOT)/lib +LDLIBS += -ldladm -linetutil + +.KEEP_STATE: + +all: $(ROOTFS_PROG) + +install: all $(ROOTSBINPROG) + $(RM) $(ROOTUSRSBINPROG) + -$(SYMLINK) ../../sbin/$(PROG) $(ROOTUSRSBINPROG) + +clean: + +lint: lint_PROG + +include ../Makefile.targ diff --git a/usr/src/cmd/dlstat/dlstat.c b/usr/src/cmd/dlstat/dlstat.c new file mode 100644 index 0000000000..a931ba82ff --- /dev/null +++ b/usr/src/cmd/dlstat/dlstat.c @@ -0,0 +1,2457 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdio.h> +#include <ctype.h> +#include <locale.h> +#include <signal.h> +#include <stdarg.h> +#include <stdlib.h> +#include <fcntl.h> +#include <string.h> +#include <stropts.h> +#include <sys/stat.h> +#include <errno.h> +#include <strings.h> +#include <getopt.h> +#include <unistd.h> +#include <priv.h> +#include <termios.h> +#include <pwd.h> +#include <auth_attr.h> +#include <auth_list.h> +#include <libintl.h> +#include <libdevinfo.h> +#include <libdlpi.h> +#include <libdladm.h> +#include <libdllink.h> +#include <libdlstat.h> +#include <libdlaggr.h> +#include <libinetutil.h> +#include <bsm/adt.h> +#include <bsm/adt_event.h> +#include <stddef.h> +#include <ofmt.h> + +typedef struct link_chain_s { + datalink_id_t lc_linkid; + boolean_t lc_visited; + dladm_stat_chain_t *lc_statchain[DLADM_STAT_NUM_STATS]; + struct link_chain_s *lc_next; +} link_chain_t; + +typedef void * (*stats2str_t)(const char *, void *, + char, boolean_t); + +typedef struct show_state { + link_chain_t *ls_linkchain; + boolean_t ls_stattype[DLADM_STAT_NUM_STATS]; + stats2str_t ls_stats2str[DLADM_STAT_NUM_STATS]; + ofmt_handle_t ls_ofmt; + char ls_unit; + boolean_t ls_parsable; +} show_state_t; + +typedef struct show_history_state_s { + boolean_t hs_plot; + boolean_t hs_parsable; + boolean_t hs_printheader; + boolean_t hs_first; + boolean_t hs_showall; + ofmt_handle_t hs_ofmt; +} show_history_state_t; + +/* + * callback functions for printing output and error diagnostics. + */ +static ofmt_cb_t print_default_cb; + +static void dlstat_ofmt_check(ofmt_status_t, boolean_t, ofmt_handle_t); + +typedef void cmdfunc_t(int, char **, const char *); + +static cmdfunc_t do_show, do_show_history, do_show_phys, do_show_link; +static cmdfunc_t do_show_aggr; + +static void die(const char *, ...); +static void die_optdup(int); +static void die_opterr(int, int, const char *); +static void die_dlerr(dladm_status_t, const char *, ...); +static void warn(const char *, ...); + +typedef struct cmd { + char *c_name; + cmdfunc_t *c_fn; + const char *c_usage; +} cmd_t; + +static cmd_t cmds[] = { + { "", do_show, + "dlstat [-r | -t] [-i <interval>] [link]\n" + " dlstat [-a | -A] [-i <interval>] [-p] [ -o field[,...]]\n" + " [-u R|K|M|G|T|P] [link]"}, + { "show-phys", do_show_phys, + "dlstat show-phys [-r | -t] [-i interval] [-a]\n" + " [-p] [ -o field[,...]] [-u R|K|M|G|T|P] " + "[link]"}, + { "show-link", do_show_link, + "dlstat show-link [-r [-F] | -t] [-i interval] [-a]\n" + " [-p] [ -o field[,...]] [-u R|K|M|G|T|P] " + "[link]\n" + " dlstat show-link -h [-a] [-d] [-F <format>]\n" + " [-s <DD/MM/YYYY,HH:MM:SS>] " + "[-e <DD/MM/YYYY,HH:MM:SS>]\n" + " -f <logfile> [<link>]" }, + { "show-aggr", do_show_aggr, + "dlstat show-aggr [-r | -t] [-i interval] [-p]\n" + " [ -o field[,...]] [-u R|K|M|G|T|P] " + " [link]" } +}; + +#define MAXSTATLEN 15 + +/* + * dlstat : total stat fields + */ +typedef struct total_fields_buf_s { + char t_linkname[MAXLINKNAMELEN]; + char t_ipackets[MAXSTATLEN]; + char t_rbytes[MAXSTATLEN]; + char t_opackets[MAXSTATLEN]; + char t_obytes[MAXSTATLEN]; +} total_fields_buf_t; + +static ofmt_field_t total_s_fields[] = { +{ "LINK", 15, + offsetof(total_fields_buf_t, t_linkname), print_default_cb}, +{ "IPKTS", 8, + offsetof(total_fields_buf_t, t_ipackets), print_default_cb}, +{ "RBYTES", 8, + offsetof(total_fields_buf_t, t_rbytes), print_default_cb}, +{ "OPKTS", 8, + offsetof(total_fields_buf_t, t_opackets), print_default_cb}, +{ "OBYTES", 8, + offsetof(total_fields_buf_t, t_obytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-phys: both Rx and Tx stat fields + */ +typedef struct ring_fields_buf_s { + char r_linkname[MAXLINKNAMELEN]; + char r_type[MAXSTATLEN]; + char r_id[MAXSTATLEN]; + char r_index[MAXSTATLEN]; + char r_packets[MAXSTATLEN]; + char r_bytes[MAXSTATLEN]; +} ring_fields_buf_t; + +static ofmt_field_t ring_s_fields[] = { +{ "LINK", 15, + offsetof(ring_fields_buf_t, r_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(ring_fields_buf_t, r_type), print_default_cb}, +{ "ID", 7, + offsetof(ring_fields_buf_t, r_id), print_default_cb}, +{ "INDEX", 6, + offsetof(ring_fields_buf_t, r_index), print_default_cb}, +{ "PKTS", 8, + offsetof(ring_fields_buf_t, r_packets), print_default_cb}, +{ "BYTES", 8, + offsetof(ring_fields_buf_t, r_bytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-phys -r: Rx Ring stat fields + */ +typedef struct rx_ring_fields_buf_s { + char rr_linkname[MAXLINKNAMELEN]; + char rr_type[MAXSTATLEN]; + char rr_id[MAXSTATLEN]; + char rr_index[MAXSTATLEN]; + char rr_ipackets[MAXSTATLEN]; + char rr_rbytes[MAXSTATLEN]; +} rx_ring_fields_buf_t; + +static ofmt_field_t rx_ring_s_fields[] = { +{ "LINK", 15, + offsetof(rx_ring_fields_buf_t, rr_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(rx_ring_fields_buf_t, rr_type), print_default_cb}, +{ "ID", 7, + offsetof(rx_ring_fields_buf_t, rr_id), print_default_cb}, +{ "INDEX", 6, + offsetof(rx_ring_fields_buf_t, rr_index), print_default_cb}, +{ "IPKTS", 8, + offsetof(rx_ring_fields_buf_t, rr_ipackets), print_default_cb}, +{ "RBYTES", 8, + offsetof(rx_ring_fields_buf_t, rr_rbytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-phys -t: Tx Ring stat fields + */ +typedef struct tx_ring_fields_buf_s { + char tr_linkname[MAXLINKNAMELEN]; + char tr_type[MAXSTATLEN]; + char tr_id[MAXSTATLEN]; + char tr_index[MAXSTATLEN]; + char tr_opackets[MAXSTATLEN]; + char tr_obytes[MAXSTATLEN]; +} tx_ring_fields_buf_t; + +static ofmt_field_t tx_ring_s_fields[] = { +{ "LINK", 15, + offsetof(tx_ring_fields_buf_t, tr_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(tx_ring_fields_buf_t, tr_type), print_default_cb}, +{ "ID", 7, + offsetof(tx_ring_fields_buf_t, tr_id), print_default_cb}, +{ "INDEX", 6, + offsetof(tx_ring_fields_buf_t, tr_index), print_default_cb}, +{ "OPKTS", 8, + offsetof(tx_ring_fields_buf_t, tr_opackets), print_default_cb}, +{ "OBYTES", 8, + offsetof(tx_ring_fields_buf_t, tr_obytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-link: both Rx and Tx lane fields + */ +typedef struct lane_fields_buf_s { + char l_linkname[MAXLINKNAMELEN]; + char l_type[MAXSTATLEN]; + char l_id[MAXSTATLEN]; + char l_index[MAXSTATLEN]; + char l_packets[MAXSTATLEN]; + char l_bytes[MAXSTATLEN]; +} lane_fields_buf_t; + +static ofmt_field_t lane_s_fields[] = { +{ "LINK", 15, + offsetof(lane_fields_buf_t, l_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(lane_fields_buf_t, l_type), print_default_cb}, +{ "ID", 7, + offsetof(lane_fields_buf_t, l_id), print_default_cb}, +{ "INDEX", 6, + offsetof(lane_fields_buf_t, l_index), print_default_cb}, +{ "PKTS", 8, + offsetof(lane_fields_buf_t, l_packets), print_default_cb}, +{ "BYTES", 8, + offsetof(lane_fields_buf_t, l_bytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-link -r, dlstat -r: Rx Lane stat fields + */ +typedef struct rx_lane_fields_buf_s { + char rl_linkname[MAXLINKNAMELEN]; + char rl_type[MAXSTATLEN]; + char rl_id[MAXSTATLEN]; + char rl_index[MAXSTATLEN]; + char rl_ipackets[MAXSTATLEN]; + char rl_rbytes[MAXSTATLEN]; + char rl_intrs[MAXSTATLEN]; + char rl_polls[MAXSTATLEN]; + char rl_sdrops[MAXSTATLEN]; + char rl_chl10[MAXSTATLEN]; + char rl_ch10_50[MAXSTATLEN]; + char rl_chg50[MAXSTATLEN]; +} rx_lane_fields_buf_t; + +static ofmt_field_t rx_lane_s_fields[] = { +{ "LINK", 10, + offsetof(rx_lane_fields_buf_t, rl_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(rx_lane_fields_buf_t, rl_type), print_default_cb}, +{ "ID", 7, + offsetof(rx_lane_fields_buf_t, rl_id), print_default_cb}, +{ "INDEX", 6, + offsetof(rx_lane_fields_buf_t, rl_index), print_default_cb}, +{ "IPKTS", 8, + offsetof(rx_lane_fields_buf_t, rl_ipackets), print_default_cb}, +{ "RBYTES", 8, + offsetof(rx_lane_fields_buf_t, rl_rbytes), print_default_cb}, +{ "INTRS", 8, + offsetof(rx_lane_fields_buf_t, rl_intrs), print_default_cb}, +{ "POLLS", 8, + offsetof(rx_lane_fields_buf_t, rl_polls), print_default_cb}, +{ "SDROPS", 8, + offsetof(rx_lane_fields_buf_t, rl_sdrops), print_default_cb}, +{ "CH<10", 8, + offsetof(rx_lane_fields_buf_t, rl_chl10), print_default_cb}, +{ "CH10-50", 8, + offsetof(rx_lane_fields_buf_t, rl_ch10_50), print_default_cb}, +{ "CH>50", 8, + offsetof(rx_lane_fields_buf_t, rl_chg50), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-link -r -F: Rx fanout stat fields + */ +typedef struct rx_fanout_lane_fields_buf_s { + char rfl_linkname[MAXLINKNAMELEN]; + char rfl_type[MAXSTATLEN]; + char rfl_id[MAXSTATLEN]; + char rfl_index[MAXSTATLEN]; + char rfl_fout[MAXSTATLEN]; + char rfl_ipackets[MAXSTATLEN]; + char rfl_rbytes[MAXSTATLEN]; +} rx_fanout_lane_fields_buf_t; + +static ofmt_field_t rx_fanout_lane_s_fields[] = { +{ "LINK", 15, + offsetof(rx_fanout_lane_fields_buf_t, rfl_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(rx_fanout_lane_fields_buf_t, rfl_type), print_default_cb}, +{ "ID", 7, + offsetof(rx_fanout_lane_fields_buf_t, rfl_id), print_default_cb}, +{ "INDEX", 6, + offsetof(rx_fanout_lane_fields_buf_t, rfl_index), print_default_cb}, +{ "FOUT", 6, + offsetof(rx_fanout_lane_fields_buf_t, rfl_fout), print_default_cb}, +{ "IPKTS", 8, + offsetof(rx_fanout_lane_fields_buf_t, rfl_ipackets), print_default_cb}, +{ "RBYTES", 8, + offsetof(rx_fanout_lane_fields_buf_t, rfl_rbytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-link -t: Tx Lane stat fields + */ +typedef struct tx_lane_fields_buf_s { + char tl_linkname[MAXLINKNAMELEN]; + char tl_index[MAXSTATLEN]; + char tl_type[MAXSTATLEN]; + char tl_id[MAXSTATLEN]; + char tl_opackets[MAXSTATLEN]; + char tl_obytes[MAXSTATLEN]; + char tl_blockcnt[MAXSTATLEN]; + char tl_unblockcnt[MAXSTATLEN]; + char tl_sdrops[MAXSTATLEN]; +} tx_lane_fields_buf_t; + +static ofmt_field_t tx_lane_s_fields[] = { +{ "LINK", 15, + offsetof(tx_lane_fields_buf_t, tl_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(tx_lane_fields_buf_t, tl_type), print_default_cb}, +{ "ID", 7, + offsetof(tx_lane_fields_buf_t, tl_id), print_default_cb}, +{ "INDEX", 6, + offsetof(tx_lane_fields_buf_t, tl_index), print_default_cb}, +{ "OPKTS", 8, + offsetof(tx_lane_fields_buf_t, tl_opackets), print_default_cb}, +{ "OBYTES", 8, + offsetof(tx_lane_fields_buf_t, tl_obytes), print_default_cb}, +{ "BLKCNT", 8, + offsetof(tx_lane_fields_buf_t, tl_blockcnt), print_default_cb}, +{ "UBLKCNT", 8, + offsetof(tx_lane_fields_buf_t, tl_unblockcnt), print_default_cb}, +{ "SDROPS", 8, + offsetof(tx_lane_fields_buf_t, tl_sdrops), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-aggr: aggr port stat fields + */ +typedef struct aggr_port_fields_buf_s { + char ap_linkname[MAXLINKNAMELEN]; + char ap_portname[MAXLINKNAMELEN]; + char ap_ipackets[MAXSTATLEN]; + char ap_rbytes[MAXSTATLEN]; + char ap_opackets[MAXSTATLEN]; + char ap_obytes[MAXSTATLEN]; +} aggr_port_fields_buf_t; + +static ofmt_field_t aggr_port_s_fields[] = { +{ "LINK", 15, + offsetof(aggr_port_fields_buf_t, ap_linkname), print_default_cb}, +{ "PORT", 15, + offsetof(aggr_port_fields_buf_t, ap_portname), print_default_cb}, +{ "IPKTS", 8, + offsetof(aggr_port_fields_buf_t, ap_ipackets), print_default_cb}, +{ "RBYTES", 8, + offsetof(aggr_port_fields_buf_t, ap_rbytes), print_default_cb}, +{ "OPKTS", 8, + offsetof(aggr_port_fields_buf_t, ap_opackets), print_default_cb}, +{ "OBYTES", 8, + offsetof(aggr_port_fields_buf_t, ap_obytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * structures for 'dlstat show-link -h' + */ +typedef struct history_fields_buf_s { + char h_link[12]; + char h_duration[10]; + char h_ipackets[9]; + char h_rbytes[10]; + char h_opackets[9]; + char h_obytes[10]; + char h_bandwidth[14]; +} history_fields_buf_t; + +static ofmt_field_t history_fields[] = { +{ "LINK", 13, + offsetof(history_fields_buf_t, h_link), print_default_cb}, +{ "DURATION", 11, + offsetof(history_fields_buf_t, h_duration), print_default_cb}, +{ "IPKTS", 10, + offsetof(history_fields_buf_t, h_ipackets), print_default_cb}, +{ "RBYTES", 11, + offsetof(history_fields_buf_t, h_rbytes), print_default_cb}, +{ "OPKTS", 10, + offsetof(history_fields_buf_t, h_opackets), print_default_cb}, +{ "OBYTES", 11, + offsetof(history_fields_buf_t, h_obytes), print_default_cb}, +{ "BANDWIDTH", 15, + offsetof(history_fields_buf_t, h_bandwidth), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * structures for 'dlstat show-link -h link' + */ +typedef struct history_l_fields_buf_s { + char hl_link[12]; + char hl_stime[13]; + char hl_etime[13]; + char hl_rbytes[8]; + char hl_obytes[8]; + char hl_bandwidth[14]; +} history_l_fields_buf_t; + +static ofmt_field_t history_l_fields[] = { +/* name, field width, offset */ +{ "LINK", 13, + offsetof(history_l_fields_buf_t, hl_link), print_default_cb}, +{ "START", 14, + offsetof(history_l_fields_buf_t, hl_stime), print_default_cb}, +{ "END", 14, + offsetof(history_l_fields_buf_t, hl_etime), print_default_cb}, +{ "RBYTES", 9, + offsetof(history_l_fields_buf_t, hl_rbytes), print_default_cb}, +{ "OBYTES", 9, + offsetof(history_l_fields_buf_t, hl_obytes), print_default_cb}, +{ "BANDWIDTH", 15, + offsetof(history_l_fields_buf_t, hl_bandwidth), print_default_cb}, +{ NULL, 0, 0, NULL}} +; + +static char *progname; + +/* + * Handle to libdladm. Opened in main() before the sub-command + * specific function is called. + */ +static dladm_handle_t handle = NULL; + +static void +usage(void) +{ + int i; + cmd_t *cmdp; + + (void) fprintf(stderr, gettext("usage: ")); + for (i = 0; i < sizeof (cmds) / sizeof (cmds[0]); i++) { + cmdp = &cmds[i]; + if (cmdp->c_usage != NULL) + (void) fprintf(stderr, "%s\n", gettext(cmdp->c_usage)); + } + + /* close dladm handle if it was opened */ + if (handle != NULL) + dladm_close(handle); + + exit(1); +} + +int +main(int argc, char *argv[]) +{ + int i; + cmd_t *cmdp; + dladm_status_t status; + + (void) setlocale(LC_ALL, ""); +#if !defined(TEXT_DOMAIN) +#define TEXT_DOMAIN "SYS_TEST" +#endif + (void) textdomain(TEXT_DOMAIN); + + progname = argv[0]; + + /* Open the libdladm handle */ + if ((status = dladm_open(&handle)) != DLADM_STATUS_OK) + die_dlerr(status, "could not open /dev/dld"); + + if (argc == 1) { + do_show(argc - 1, NULL, cmds[0].c_usage); + goto done; + } + + for (i = 0; i < sizeof (cmds) / sizeof (cmds[0]); i++) { + cmdp = &cmds[i]; + if (strcmp(argv[1], cmdp->c_name) == 0) { + cmdp->c_fn(argc - 1, &argv[1], cmdp->c_usage); + goto done; + } + } + + do_show(argc, &argv[0], cmds[0].c_usage); + +done: + dladm_close(handle); + return (0); +} + +/*ARGSUSED*/ +static int +show_history_date(dladm_usage_t *history, void *arg) +{ + show_history_state_t *state = arg; + time_t stime; + char timebuf[20]; + dladm_status_t status; + uint32_t flags; + + /* + * Only show history information for existing links unless '-a' + * is specified. + */ + if (!state->hs_showall) { + if ((status = dladm_name2info(handle, history->du_name, + NULL, &flags, NULL, NULL)) != DLADM_STATUS_OK) { + return (status); + } + if ((flags & DLADM_OPT_ACTIVE) == 0) + return (DLADM_STATUS_LINKINVAL); + } + + stime = history->du_stime; + (void) strftime(timebuf, sizeof (timebuf), "%m/%d/%Y", + localtime(&stime)); + (void) printf("%s\n", timebuf); + + return (DLADM_STATUS_OK); +} + +static int +show_history_time(dladm_usage_t *history, void *arg) +{ + show_history_state_t *state = arg; + char buf[DLADM_STRSIZE]; + history_l_fields_buf_t ubuf; + time_t time; + double bw; + dladm_status_t status; + uint32_t flags; + + /* + * Only show history information for existing links unless '-a' + * is specified. + */ + if (!state->hs_showall) { + if ((status = dladm_name2info(handle, history->du_name, + NULL, &flags, NULL, NULL)) != DLADM_STATUS_OK) { + return (status); + } + if ((flags & DLADM_OPT_ACTIVE) == 0) + return (DLADM_STATUS_LINKINVAL); + } + + if (state->hs_plot) { + if (!state->hs_printheader) { + if (state->hs_first) { + (void) printf("# Time"); + state->hs_first = B_FALSE; + } + (void) printf(" %s", history->du_name); + if (history->du_last) { + (void) printf("\n"); + state->hs_first = B_TRUE; + state->hs_printheader = B_TRUE; + } + } else { + if (state->hs_first) { + time = history->du_etime; + (void) strftime(buf, sizeof (buf), "%T", + localtime(&time)); + state->hs_first = B_FALSE; + (void) printf("%s", buf); + } + bw = (double)history->du_bandwidth/1000; + (void) printf(" %.2f", bw); + if (history->du_last) { + (void) printf("\n"); + state->hs_first = B_TRUE; + } + } + return (DLADM_STATUS_OK); + } + + bzero(&ubuf, sizeof (ubuf)); + + (void) snprintf(ubuf.hl_link, sizeof (ubuf.hl_link), "%s", + history->du_name); + time = history->du_stime; + (void) strftime(buf, sizeof (buf), "%T", localtime(&time)); + (void) snprintf(ubuf.hl_stime, sizeof (ubuf.hl_stime), "%s", + buf); + time = history->du_etime; + (void) strftime(buf, sizeof (buf), "%T", localtime(&time)); + (void) snprintf(ubuf.hl_etime, sizeof (ubuf.hl_etime), "%s", + buf); + (void) snprintf(ubuf.hl_rbytes, sizeof (ubuf.hl_rbytes), + "%llu", history->du_rbytes); + (void) snprintf(ubuf.hl_obytes, sizeof (ubuf.hl_obytes), + "%llu", history->du_obytes); + (void) snprintf(ubuf.hl_bandwidth, sizeof (ubuf.hl_bandwidth), + "%s Mbps", dladm_bw2str(history->du_bandwidth, buf)); + + ofmt_print(state->hs_ofmt, &ubuf); + return (DLADM_STATUS_OK); +} + +static int +show_history_res(dladm_usage_t *history, void *arg) +{ + show_history_state_t *state = arg; + char buf[DLADM_STRSIZE]; + history_fields_buf_t ubuf; + dladm_status_t status; + uint32_t flags; + + /* + * Only show history information for existing links unless '-a' + * is specified. + */ + if (!state->hs_showall) { + if ((status = dladm_name2info(handle, history->du_name, + NULL, &flags, NULL, NULL)) != DLADM_STATUS_OK) { + return (status); + } + if ((flags & DLADM_OPT_ACTIVE) == 0) + return (DLADM_STATUS_LINKINVAL); + } + + bzero(&ubuf, sizeof (ubuf)); + + (void) snprintf(ubuf.h_link, sizeof (ubuf.h_link), "%s", + history->du_name); + (void) snprintf(ubuf.h_duration, sizeof (ubuf.h_duration), + "%llu", history->du_duration); + (void) snprintf(ubuf.h_ipackets, sizeof (ubuf.h_ipackets), + "%llu", history->du_ipackets); + (void) snprintf(ubuf.h_rbytes, sizeof (ubuf.h_rbytes), + "%llu", history->du_rbytes); + (void) snprintf(ubuf.h_opackets, sizeof (ubuf.h_opackets), + "%llu", history->du_opackets); + (void) snprintf(ubuf.h_obytes, sizeof (ubuf.h_obytes), + "%llu", history->du_obytes); + (void) snprintf(ubuf.h_bandwidth, sizeof (ubuf.h_bandwidth), + "%s Mbps", dladm_bw2str(history->du_bandwidth, buf)); + + ofmt_print(state->hs_ofmt, &ubuf); + + return (DLADM_STATUS_OK); +} + +static boolean_t +valid_formatspec(char *formatspec_str) +{ + return (strcmp(formatspec_str, "gnuplot") == 0); +} + +/*ARGSUSED*/ +static void +do_show_history(int argc, char *argv[], const char *use) +{ + char *file = NULL; + int opt; + dladm_status_t status; + boolean_t d_arg = B_FALSE; + char *stime = NULL; + char *etime = NULL; + char *resource = NULL; + show_history_state_t state; + boolean_t o_arg = B_FALSE; + boolean_t F_arg = B_FALSE; + char *fields_str = NULL; + char *formatspec_str = NULL; + char *all_l_fields = + "link,start,end,rbytes,obytes,bandwidth"; + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = 0; + + bzero(&state, sizeof (show_history_state_t)); + state.hs_parsable = B_FALSE; + state.hs_printheader = B_FALSE; + state.hs_plot = B_FALSE; + state.hs_first = B_TRUE; + + while ((opt = getopt(argc, argv, "das:e:o:f:F:")) != -1) { + switch (opt) { + case 'd': + d_arg = B_TRUE; + break; + case 'a': + state.hs_showall = B_TRUE; + break; + case 'f': + file = optarg; + break; + case 's': + stime = optarg; + break; + case 'e': + etime = optarg; + break; + case 'o': + o_arg = B_TRUE; + fields_str = optarg; + break; + case 'F': + state.hs_plot = F_arg = B_TRUE; + formatspec_str = optarg; + break; + default: + die_opterr(optopt, opt, use); + break; + } + } + + if (file == NULL) + die("show-link -h requires a file"); + + if (optind == (argc-1)) { + uint32_t flags; + + resource = argv[optind]; + if (!state.hs_showall && + (((status = dladm_name2info(handle, resource, NULL, &flags, + NULL, NULL)) != DLADM_STATUS_OK) || + ((flags & DLADM_OPT_ACTIVE) == 0))) { + die("invalid link: '%s'", resource); + } + } + + if (F_arg && d_arg) + die("incompatible -d and -F options"); + + if (F_arg && !valid_formatspec(formatspec_str)) + die("Format specifier %s not supported", formatspec_str); + + if (state.hs_parsable) + ofmtflags |= OFMT_PARSABLE; + + if (resource == NULL && stime == NULL && etime == NULL) { + oferr = ofmt_open(fields_str, history_fields, ofmtflags, 0, + &ofmt); + } else { + if (!o_arg || (o_arg && strcasecmp(fields_str, "all") == 0)) + fields_str = all_l_fields; + oferr = ofmt_open(fields_str, history_l_fields, ofmtflags, 0, + &ofmt); + + } + dlstat_ofmt_check(oferr, state.hs_parsable, ofmt); + state.hs_ofmt = ofmt; + + if (d_arg) { + /* Print log dates */ + status = dladm_usage_dates(show_history_date, + DLADM_LOGTYPE_LINK, file, resource, &state); + } else if (resource == NULL && stime == NULL && etime == NULL && + !F_arg) { + /* Print summary */ + status = dladm_usage_summary(show_history_res, + DLADM_LOGTYPE_LINK, file, &state); + } else if (resource != NULL) { + /* Print log entries for named resource */ + status = dladm_walk_usage_res(show_history_time, + DLADM_LOGTYPE_LINK, file, resource, stime, etime, &state); + } else { + /* Print time and information for each link */ + status = dladm_walk_usage_time(show_history_time, + DLADM_LOGTYPE_LINK, file, stime, etime, &state); + } + + if (status != DLADM_STATUS_OK) + die_dlerr(status, "show-link -h"); + ofmt_close(ofmt); +} + +boolean_t +dlstat_unit(char *oarg, char *unit) +{ + if ((strcmp(oarg, "R") == 0) || (strcmp(oarg, "K") == 0) || + (strcmp(oarg, "M") == 0) || (strcmp(oarg, "G") == 0) || + (strcmp(oarg, "T") == 0) || (strcmp(oarg, "P") == 0)) { + *unit = oarg[0]; + return (B_TRUE); + } + + return (B_FALSE); +} + +void +map_to_units(char *buf, uint_t bufsize, double num, char unit, + boolean_t parsable) +{ + if (parsable) { + (void) snprintf(buf, bufsize, "%.0lf", num); + return; + } + + if (unit == '\0') { + int index; + + for (index = 0; (int)(num/1000) != 0; index++, num /= 1000) + ; + + switch (index) { + case 0: + unit = '\0'; + break; + case 1: + unit = 'K'; + break; + case 2: + unit = 'M'; + break; + case 3: + unit = 'G'; + break; + case 4: + unit = 'T'; + break; + case 5: + /* Largest unit supported */ + default: + unit = 'P'; + break; + } + } else { + switch (unit) { + case 'R': + /* Already raw numbers */ + unit = '\0'; + break; + case 'K': + num /= 1000; + break; + case 'M': + num /= (1000*1000); + break; + case 'G': + num /= (1000*1000*1000); + break; + case 'T': + num /= (1000.0*1000.0*1000.0*1000.0); + break; + case 'P': + /* Largest unit supported */ + default: + num /= (1000.0*1000.0*1000.0*1000.0*1000.0); + break; + } + } + + if (unit == '\0') + (void) snprintf(buf, bufsize, " %7.0lf%c", num, unit); + else + (void) snprintf(buf, bufsize, " %6.2lf%c", num, unit); +} + +link_chain_t * +get_link_prev_stat(datalink_id_t linkid, void *arg) +{ + show_state_t *state = (show_state_t *)arg; + link_chain_t *link_curr = NULL; + + /* Scan prev linkid list and look for entry matching this entry */ + for (link_curr = state->ls_linkchain; link_curr; + link_curr = link_curr->lc_next) { + if (link_curr->lc_linkid == linkid) + break; + } + /* New link, add it */ + if (link_curr == NULL) { + link_curr = (link_chain_t *)malloc(sizeof (link_chain_t)); + if (link_curr == NULL) + goto done; + link_curr->lc_linkid = linkid; + bzero(&link_curr->lc_statchain, + sizeof (link_curr->lc_statchain)); + link_curr->lc_next = state->ls_linkchain; + state->ls_linkchain = link_curr; + } +done: + return (link_curr); +} + +/* + * Number of links may change while dlstat with -i is executing. + * Free memory allocated for links that are no longer there. + * Prepare for next iteration by marking visited = false for existing stat + * entries. + */ +static void +cleanup_removed_links(show_state_t *state) +{ + link_chain_t *lcurr; + link_chain_t *lprev; + link_chain_t *tofree; + int i; + + /* Delete all nodes from the list that have lc_visited marked false */ + lcurr = state->ls_linkchain; + while (lcurr != NULL) { + if (lcurr->lc_visited) { + lcurr->lc_visited = B_FALSE; + lprev = lcurr; + lcurr = lcurr->lc_next; + continue; + } + /* Is it head of the list? */ + if (lcurr == state->ls_linkchain) + state->ls_linkchain = lcurr->lc_next; + else + lprev->lc_next = lcurr->lc_next; + /* lprev remains the same */ + tofree = lcurr; + lcurr = lcurr->lc_next; + + /* Free stats memory for the removed link */ + for (i = 0; i < DLADM_STAT_NUM_STATS; i++) { + if (state->ls_stattype[i]) + dladm_link_stat_free(tofree->lc_statchain[i]); + } + free(tofree); + } +} + +void * +print_total_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + total_stat_entry_t *sentry = statentry; + total_stat_t *link_stats = &sentry->tse_stats; + total_fields_buf_t *buf; + + buf = malloc(sizeof (total_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->t_linkname, sizeof (buf->t_linkname), "%s", + linkname); + + map_to_units(buf->t_ipackets, sizeof (buf->t_ipackets), + link_stats->ts_ipackets, unit, parsable); + + map_to_units(buf->t_rbytes, sizeof (buf->t_rbytes), + link_stats->ts_rbytes, unit, parsable); + + map_to_units(buf->t_opackets, sizeof (buf->t_opackets), + link_stats->ts_opackets, unit, parsable); + + map_to_units(buf->t_obytes, sizeof (buf->t_obytes), + link_stats->ts_obytes, unit, parsable); + +done: + return (buf); +} + +void * +print_rx_generic_ring_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + ring_stat_entry_t *sentry = statentry; + ring_stat_t *link_stats = &sentry->re_stats; + ring_fields_buf_t *buf; + + buf = malloc(sizeof (ring_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->r_linkname, sizeof (buf->r_linkname), "%s", + linkname); + + (void) snprintf(buf->r_type, sizeof (buf->r_type), "rx"); + + if (sentry->re_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->r_index, sizeof (buf->r_index), "--"); + } else { + (void) snprintf(buf->r_index, sizeof (buf->r_index), + "%llu", sentry->re_index); + } + + map_to_units(buf->r_packets, sizeof (buf->r_packets), + link_stats->r_packets, unit, parsable); + + map_to_units(buf->r_bytes, sizeof (buf->r_bytes), + link_stats->r_bytes, unit, parsable); + +done: + return (buf); +} + +void * +print_tx_generic_ring_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + ring_stat_entry_t *sentry = statentry; + ring_stat_t *link_stats = &sentry->re_stats; + ring_fields_buf_t *buf; + + buf = malloc(sizeof (ring_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->r_linkname, sizeof (buf->r_linkname), "%s", + linkname); + + (void) snprintf(buf->r_type, sizeof (buf->r_type), "tx"); + + if (sentry->re_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->r_index, sizeof (buf->r_index), "--"); + } else { + (void) snprintf(buf->r_index, sizeof (buf->r_index), + "%llu", sentry->re_index); + } + + map_to_units(buf->r_packets, sizeof (buf->r_packets), + link_stats->r_packets, unit, parsable); + + map_to_units(buf->r_bytes, sizeof (buf->r_bytes), + link_stats->r_bytes, unit, parsable); + +done: + return (buf); +} + +void * +print_rx_ring_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + ring_stat_entry_t *sentry = statentry; + ring_stat_t *link_stats = &sentry->re_stats; + rx_ring_fields_buf_t *buf; + + buf = malloc(sizeof (rx_ring_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->rr_linkname, sizeof (buf->rr_linkname), "%s", + linkname); + + (void) snprintf(buf->rr_type, sizeof (buf->rr_type), "rx"); + + if (sentry->re_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->rr_index, sizeof (buf->rr_index), "--"); + } else { + (void) snprintf(buf->rr_index, sizeof (buf->rr_index), + "%llu", sentry->re_index); + } + + map_to_units(buf->rr_ipackets, sizeof (buf->rr_ipackets), + link_stats->r_packets, unit, parsable); + + map_to_units(buf->rr_rbytes, sizeof (buf->rr_rbytes), + link_stats->r_bytes, unit, parsable); + +done: + return (buf); +} + +void * +print_tx_ring_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + ring_stat_entry_t *sentry = statentry; + ring_stat_t *link_stats = &sentry->re_stats; + tx_ring_fields_buf_t *buf; + + buf = malloc(sizeof (tx_ring_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->tr_linkname, sizeof (buf->tr_linkname), "%s", + linkname); + + (void) snprintf(buf->tr_type, sizeof (buf->tr_type), "tx"); + + if (sentry->re_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->tr_index, sizeof (buf->tr_index), "--"); + } else { + (void) snprintf(buf->tr_index, sizeof (buf->tr_index), + "%llu", sentry->re_index); + } + + map_to_units(buf->tr_opackets, sizeof (buf->tr_opackets), + link_stats->r_packets, unit, parsable); + + map_to_units(buf->tr_obytes, sizeof (buf->tr_obytes), + link_stats->r_bytes, unit, parsable); + +done: + return (buf); +} + +void * +print_rx_generic_lane_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + rx_lane_stat_entry_t *sentry = statentry; + rx_lane_stat_t *link_stats = &sentry->rle_stats; + lane_fields_buf_t *buf; + + if (sentry->rle_id == L_DFNCT) + return (NULL); + + buf = malloc(sizeof (lane_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->l_linkname, sizeof (buf->l_linkname), "%s", + linkname); + + (void) snprintf(buf->l_type, sizeof (buf->l_type), "rx"); + + if (sentry->rle_id == L_HWLANE) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "hw"); + else if (sentry->rle_id == L_SWLANE) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "sw"); + else if (sentry->rle_id == L_LOCAL) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "local"); + else if (sentry->rle_id == L_BCAST) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "bcast"); + else + (void) snprintf(buf->l_id, sizeof (buf->l_id), "--"); + + if (sentry->rle_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->l_index, sizeof (buf->l_index), "--"); + } else { + (void) snprintf(buf->l_index, sizeof (buf->l_index), + "%llu", sentry->rle_index); + } + + map_to_units(buf->l_packets, sizeof (buf->l_packets), + link_stats->rl_ipackets, unit, parsable); + + map_to_units(buf->l_bytes, sizeof (buf->l_bytes), + link_stats->rl_rbytes, unit, parsable); + +done: + return (buf); +} + +void * +print_tx_generic_lane_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + tx_lane_stat_entry_t *sentry = statentry; + tx_lane_stat_t *link_stats = &sentry->tle_stats; + lane_fields_buf_t *buf; + + if (sentry->tle_id == L_DFNCT) + return (NULL); + + buf = malloc(sizeof (lane_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->l_linkname, sizeof (buf->l_linkname), "%s", + linkname); + + (void) snprintf(buf->l_type, sizeof (buf->l_type), "tx"); + + if (sentry->tle_id == L_HWLANE) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "hw"); + else if (sentry->tle_id == L_SWLANE) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "sw"); + else if (sentry->tle_id == L_BCAST) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "bcast"); + else + (void) snprintf(buf->l_id, sizeof (buf->l_id), "--"); + + if (sentry->tle_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->l_index, sizeof (buf->l_index), "--"); + } else { + (void) snprintf(buf->l_index, sizeof (buf->l_index), + "%llu", sentry->tle_index); + } + map_to_units(buf->l_packets, sizeof (buf->l_packets), + link_stats->tl_opackets, unit, parsable); + + map_to_units(buf->l_bytes, sizeof (buf->l_bytes), + link_stats->tl_obytes, unit, parsable); + +done: + return (buf); +} + +void * +print_rx_lane_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + rx_lane_stat_entry_t *sentry = statentry; + rx_lane_stat_t *link_stats = &sentry->rle_stats; + rx_lane_fields_buf_t *buf; + + if (sentry->rle_id == L_DFNCT) + return (NULL); + + buf = malloc(sizeof (rx_lane_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->rl_linkname, sizeof (buf->rl_linkname), "%s", + linkname); + + (void) snprintf(buf->rl_type, sizeof (buf->rl_type), "rx"); + + if (sentry->rle_id == L_HWLANE) + (void) snprintf(buf->rl_id, sizeof (buf->rl_id), "hw"); + else if (sentry->rle_id == L_SWLANE) + (void) snprintf(buf->rl_id, sizeof (buf->rl_id), "sw"); + else if (sentry->rle_id == L_LOCAL) + (void) snprintf(buf->rl_id, sizeof (buf->rl_id), "local"); + else if (sentry->rle_id == L_BCAST) + (void) snprintf(buf->rl_id, sizeof (buf->rl_id), "bcast"); + else + (void) snprintf(buf->rl_id, sizeof (buf->rl_id), "--"); + + if (sentry->rle_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->rl_index, sizeof (buf->rl_index), "--"); + } else { + (void) snprintf(buf->rl_index, sizeof (buf->rl_index), + "%llu", sentry->rle_index); + } + + map_to_units(buf->rl_ipackets, sizeof (buf->rl_ipackets), + link_stats->rl_ipackets, unit, parsable); + + map_to_units(buf->rl_rbytes, sizeof (buf->rl_rbytes), + link_stats->rl_rbytes, unit, parsable); + + map_to_units(buf->rl_intrs, sizeof (buf->rl_intrs), + link_stats->rl_intrs, unit, parsable); + + map_to_units(buf->rl_polls, sizeof (buf->rl_polls), + link_stats->rl_polls, unit, parsable); + + map_to_units(buf->rl_sdrops, sizeof (buf->rl_sdrops), + link_stats->rl_sdrops, unit, parsable); + + map_to_units(buf->rl_chl10, sizeof (buf->rl_chl10), + link_stats->rl_chl10, unit, parsable); + + map_to_units(buf->rl_ch10_50, sizeof (buf->rl_ch10_50), + link_stats->rl_ch10_50, unit, parsable); + + map_to_units(buf->rl_chg50, sizeof (buf->rl_chg50), + link_stats->rl_chg50, unit, parsable); + +done: + return (buf); +} + +void * +print_tx_lane_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + tx_lane_stat_entry_t *sentry = statentry; + tx_lane_stat_t *link_stats = &sentry->tle_stats; + tx_lane_fields_buf_t *buf = NULL; + + if (sentry->tle_id == L_DFNCT) + return (NULL); + + buf = malloc(sizeof (tx_lane_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->tl_linkname, sizeof (buf->tl_linkname), "%s", + linkname); + + (void) snprintf(buf->tl_type, sizeof (buf->tl_type), "tx"); + + if (sentry->tle_id == L_HWLANE) + (void) snprintf(buf->tl_id, sizeof (buf->tl_id), "hw"); + else if (sentry->tle_id == L_SWLANE) + (void) snprintf(buf->tl_id, sizeof (buf->tl_id), "sw"); + else if (sentry->tle_id == L_BCAST) + (void) snprintf(buf->tl_id, sizeof (buf->tl_id), "bcast"); + else + (void) snprintf(buf->tl_id, sizeof (buf->tl_id), "--"); + + if (sentry->tle_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->tl_index, sizeof (buf->tl_index), "--"); + } else { + (void) snprintf(buf->tl_index, sizeof (buf->tl_index), + "%llu", sentry->tle_index); + } + + map_to_units(buf->tl_opackets, sizeof (buf->tl_opackets), + link_stats->tl_opackets, unit, parsable); + + map_to_units(buf->tl_obytes, sizeof (buf->tl_obytes), + link_stats->tl_obytes, unit, parsable); + + map_to_units(buf->tl_blockcnt, sizeof (buf->tl_blockcnt), + link_stats->tl_blockcnt, unit, parsable); + + map_to_units(buf->tl_unblockcnt, sizeof (buf->tl_unblockcnt), + link_stats->tl_unblockcnt, unit, parsable); + + map_to_units(buf->tl_sdrops, sizeof (buf->tl_sdrops), + link_stats->tl_sdrops, unit, parsable); + +done: + return (buf); +} + +void * +print_fanout_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + fanout_stat_entry_t *sentry = statentry; + fanout_stat_t *link_stats = &sentry->fe_stats; + rx_fanout_lane_fields_buf_t *buf; + + buf = malloc(sizeof (rx_fanout_lane_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->rfl_linkname, sizeof (buf->rfl_linkname), "%s", + linkname); + + (void) snprintf(buf->rfl_type, sizeof (buf->rfl_type), "rx"); + + if (sentry->fe_id == L_HWLANE) + (void) snprintf(buf->rfl_id, sizeof (buf->rfl_id), "hw"); + else if (sentry->fe_id == L_SWLANE) + (void) snprintf(buf->rfl_id, sizeof (buf->rfl_id), "sw"); + else if (sentry->fe_id == L_LCLSWLANE) + (void) snprintf(buf->rfl_id, sizeof (buf->rfl_id), "lcl/sw"); + else if (sentry->fe_id == L_LOCAL) + (void) snprintf(buf->rfl_id, sizeof (buf->rfl_id), "local"); + else if (sentry->fe_id == L_BCAST) + (void) snprintf(buf->rfl_id, sizeof (buf->rfl_id), "bcast"); + else + (void) snprintf(buf->rfl_id, sizeof (buf->rfl_id), "--"); + + if (sentry->fe_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->rfl_index, sizeof (buf->rfl_index), "--"); + } else { + (void) snprintf(buf->rfl_index, sizeof (buf->rfl_index), + "%llu", sentry->fe_index); + } + + if (sentry->fe_foutindex == DLSTAT_INVALID_ENTRY) + (void) snprintf(buf->rfl_fout, sizeof (buf->rfl_fout), "--"); + else { + (void) snprintf(buf->rfl_fout, sizeof (buf->rfl_fout), "%llu", + sentry->fe_foutindex); + } + + map_to_units(buf->rfl_ipackets, sizeof (buf->rfl_ipackets), + link_stats->f_ipackets, unit, parsable); + + map_to_units(buf->rfl_rbytes, sizeof (buf->rfl_rbytes), + link_stats->f_rbytes, unit, parsable); + +done: + return (buf); +} + +void * +print_aggr_port_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + aggr_port_stat_entry_t *sentry = statentry; + aggr_port_stat_t *link_stats = &sentry->ape_stats; + aggr_port_fields_buf_t *buf; + char portname[MAXLINKNAMELEN]; + + buf = malloc(sizeof (aggr_port_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->ap_linkname, sizeof (buf->ap_linkname), "%s", + linkname); + + if (dladm_datalink_id2info(handle, sentry->ape_portlinkid, NULL, + NULL, NULL, portname, DLPI_LINKNAME_MAX) + != DLADM_STATUS_OK) { + (void) snprintf(buf->ap_portname, + sizeof (buf->ap_portname), "--"); + } else { + (void) snprintf(buf->ap_portname, + sizeof (buf->ap_portname), "%s", portname); + } + + map_to_units(buf->ap_ipackets, sizeof (buf->ap_ipackets), + link_stats->ap_ipackets, unit, parsable); + + map_to_units(buf->ap_rbytes, sizeof (buf->ap_rbytes), + link_stats->ap_rbytes, unit, parsable); + + map_to_units(buf->ap_opackets, sizeof (buf->ap_opackets), + link_stats->ap_opackets, unit, parsable); + + map_to_units(buf->ap_obytes, sizeof (buf->ap_obytes), + link_stats->ap_obytes, unit, parsable); + +done: + return (buf); +} + +dladm_stat_chain_t * +query_link_stats(dladm_handle_t dh, datalink_id_t linkid, void *arg, + dladm_stat_type_t stattype) +{ + link_chain_t *link_node; + dladm_stat_chain_t *curr_stat; + dladm_stat_chain_t *prev_stat = NULL; + dladm_stat_chain_t *diff_stat = NULL; + + /* Get prev iteration stat for this link */ + link_node = get_link_prev_stat(linkid, arg); + if (link_node == NULL) + goto done; + + link_node->lc_visited = B_TRUE; + prev_stat = link_node->lc_statchain[stattype]; + + /* Query library for current stats */ + curr_stat = dladm_link_stat_query(dh, linkid, stattype); + if (curr_stat == NULL) + goto done; + + /* current stats - prev iteration stats */ + diff_stat = dladm_link_stat_diffchain(curr_stat, prev_stat, stattype); + + /* Free prev stats */ + dladm_link_stat_free(prev_stat); + + /* Prev <- curr stats */ + link_node->lc_statchain[stattype] = curr_stat; + +done: + return (diff_stat); +} + +void +walk_dlstat_stats(show_state_t *state, const char *linkname, + dladm_stat_type_t stattype, dladm_stat_chain_t *diff_stat) +{ + dladm_stat_chain_t *curr; + + /* Unpack invidual stat entry and call library consumer's callback */ + for (curr = diff_stat; curr != NULL; curr = curr->dc_next) { + void *fields_buf; + + /* Format the raw numbers for printing */ + fields_buf = state->ls_stats2str[stattype](linkname, + curr->dc_statentry, state->ls_unit, state->ls_parsable); + /* Print the stats */ + if (fields_buf != NULL) + ofmt_print(state->ls_ofmt, fields_buf); + free(fields_buf); + } +} + +static int +show_queried_stats(dladm_handle_t dh, datalink_id_t linkid, void *arg) +{ + show_state_t *state = arg; + int i; + dladm_stat_chain_t *diff_stat; + char linkname[DLPI_LINKNAME_MAX]; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + goto done; + } + + for (i = 0; i < DLADM_STAT_NUM_STATS; i++) { + if (state->ls_stattype[i]) { + /* + * Query library for stats + * Stats are returned as chain of raw numbers + */ + diff_stat = query_link_stats(handle, linkid, arg, i); + walk_dlstat_stats(state, linkname, i, diff_stat); + dladm_link_stat_free(diff_stat); + } + } +done: + return (DLADM_WALK_CONTINUE); +} + +void +show_link_stats(datalink_id_t linkid, show_state_t state, uint32_t interval) +{ + for (;;) { + if (linkid == DATALINK_ALL_LINKID) { + (void) dladm_walk_datalink_id(show_queried_stats, + handle, &state, DATALINK_CLASS_ALL, + DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE); + } else { + (void) show_queried_stats(handle, linkid, &state); + } + + if (interval == 0) + break; + + cleanup_removed_links(&state); + (void) sleep(interval); + } +} + +void +print_all_stats(dladm_handle_t dh, datalink_id_t linkid, + dladm_stat_chain_t *stat_chain) +{ + dladm_stat_chain_t *curr; + name_value_stat_entry_t *stat_entry; + name_value_stat_t *curr_stat; + boolean_t stat_printed = B_FALSE; + char linkname[MAXLINKNAMELEN]; + char prev_linkname[MAXLINKNAMELEN]; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) + return; + + for (curr = stat_chain; curr != NULL; curr = curr->dc_next) { + stat_entry = curr->dc_statentry; + /* + * Print header + * If link name is already printed in previous iteration, + * don't print again + */ + if (strcmp(prev_linkname, linkname) != 0) + printf("%s \n", linkname); + printf(" %s \n", stat_entry->nve_header); + + /* Print stat fields */ + for (curr_stat = stat_entry->nve_stats; curr_stat != NULL; + curr_stat = curr_stat->nv_nextstat) { + printf("\t%15s", curr_stat->nv_statname); + printf("\t\t%15llu\n", curr_stat->nv_statval); + } + + strncpy(prev_linkname, linkname, MAXLINKNAMELEN); + stat_printed = B_TRUE; + } + if (stat_printed) + printf("---------------------------------------------------\n"); +} + +static int +dump_queried_stats(dladm_handle_t dh, datalink_id_t linkid, void *arg) +{ + boolean_t *stattype = arg; + int i; + dladm_stat_chain_t *stat_chain; + + for (i = 0; i < DLADM_STAT_NUM_STATS; i++) { + if (stattype[i]) { + stat_chain = dladm_link_stat_query_all(dh, linkid, i); + print_all_stats(dh, linkid, stat_chain); + dladm_link_stat_query_all_free(stat_chain); + } + } +done: + return (DLADM_WALK_CONTINUE); +} + +void +dump_all_link_stats(datalink_id_t linkid, boolean_t *stattype) +{ + if (linkid == DATALINK_ALL_LINKID) { + (void) dladm_walk_datalink_id(dump_queried_stats, + handle, stattype, DATALINK_CLASS_ALL, + DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE); + } else { + (void) dump_queried_stats(handle, linkid, stattype); + } +} + +static void +do_show(int argc, char *argv[], const char *use) +{ + int option; + boolean_t r_arg = B_FALSE; + boolean_t t_arg = B_FALSE; + boolean_t i_arg = B_FALSE; + boolean_t p_arg = B_FALSE; + boolean_t o_arg = B_FALSE; + boolean_t u_arg = B_FALSE; + boolean_t a_arg = B_FALSE; + boolean_t A_arg = B_FALSE; + uint32_t flags = DLADM_OPT_ACTIVE; + datalink_id_t linkid = DATALINK_ALL_LINKID; + uint32_t interval = 0; + char unit = '\0'; + show_state_t state; + dladm_status_t status; + char *fields_str = NULL; + char *o_fields_str = NULL; + + char *total_stat_fields = + "link,ipkts,rbytes,opkts,obytes"; + char *rx_total_stat_fields = + "link,ipkts,rbytes,intrs,polls,ch<10,ch10-50,ch>50"; + char *tx_total_stat_fields = + "link,opkts,obytes,blkcnt,ublkcnt"; + + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = OFMT_RIGHTJUST; + ofmt_field_t *oftemplate; + + bzero(&state, sizeof (state)); + opterr = 0; + while ((option = getopt_long(argc, argv, ":rtaApi:o:u:", + NULL, NULL)) != -1) { + switch (option) { + case 'r': + if (r_arg) + die_optdup(option); + + r_arg = B_TRUE; + break; + case 't': + if (t_arg) + die_optdup(option); + + t_arg = B_TRUE; + break; + case 'a': + if (a_arg) + die_optdup(option); + + a_arg = B_TRUE; + break; + case 'A': + if (A_arg) + die_optdup(option); + + A_arg = B_TRUE; + break; + case 'i': + if (i_arg) + die_optdup(option); + + i_arg = B_TRUE; + if (!dladm_str2interval(optarg, &interval)) + die("invalid interval value '%s'", optarg); + break; + case 'p': + if (p_arg) + die_optdup(option); + + p_arg = B_TRUE; + break; + case 'o': + o_arg = B_TRUE; + o_fields_str = optarg; + break; + case 'u': + if (u_arg) + die_optdup(option); + + u_arg = B_TRUE; + if (!dlstat_unit(optarg, &unit)) + die("invalid unit value '%s'," + "unit must be R|K|M|G|T|P", optarg); + break; + default: + die_opterr(optopt, option, use); + break; + } + } + + if (r_arg && t_arg) + die("the options -t and -r are not compatible"); + + if (u_arg && p_arg) + die("the options -u and -p are not compatible"); + + if (p_arg && !o_arg) + die("-p requires -o"); + + if (p_arg && strcasecmp(o_fields_str, "all") == 0) + die("\"-o all\" is invalid with -p"); + + if (a_arg && A_arg) + die("the options -a and -A are not compatible"); + + if (a_arg && + (p_arg || o_arg || u_arg || i_arg)) { + die("the option -a is not compatible with " + "-p, -o, -u, -i"); + } + + if (A_arg && + (r_arg || t_arg || p_arg || o_arg || u_arg || i_arg)) { + die("the option -A is not compatible with " + "-r, -t, -p, -o, -u, -i"); + } + + /* get link name (optional last argument) */ + if (optind == (argc-1)) { + if (strlen(argv[optind]) >= MAXLINKNAMELEN) + die("link name too long"); + + if ((status = dladm_name2info(handle, argv[optind], &linkid, + NULL, NULL, NULL)) != DLADM_STATUS_OK) { + die_dlerr(status, "link %s is not valid", argv[optind]); + } + } else if (optind != argc) { + if (argc != 0) + usage(); + } + + if (a_arg) { + boolean_t stattype[DLADM_STAT_NUM_STATS]; + + bzero(&stattype, sizeof (stattype)); + if (r_arg) { + stattype[DLADM_STAT_RX_LANE_TOTAL] = B_TRUE; + } else if (t_arg) { + stattype[DLADM_STAT_TX_LANE_TOTAL] = B_TRUE; + } else { /* Display both Rx and Tx lanes */ + stattype[DLADM_STAT_TOTAL] = B_TRUE; + } + + dump_all_link_stats(linkid, stattype); + return; + } + + if (A_arg) { + boolean_t stattype[DLADM_STAT_NUM_STATS]; + int i; + + for (i = 0; i < DLADM_STAT_NUM_STATS; i++) + stattype[i] = B_TRUE; + + dump_all_link_stats(linkid, stattype); + return; + } + + state.ls_unit = unit; + state.ls_parsable = p_arg; + + if (state.ls_parsable) + ofmtflags |= OFMT_PARSABLE; + + if (r_arg) { + fields_str = rx_total_stat_fields; + oftemplate = rx_lane_s_fields; + state.ls_stattype[DLADM_STAT_RX_LANE_TOTAL] = B_TRUE; + state.ls_stats2str[DLADM_STAT_RX_LANE_TOTAL] = + print_rx_lane_stats; + } else if (t_arg) { + fields_str = tx_total_stat_fields; + oftemplate = tx_lane_s_fields; + state.ls_stattype[DLADM_STAT_TX_LANE_TOTAL] = B_TRUE; + state.ls_stats2str[DLADM_STAT_TX_LANE_TOTAL] = + print_tx_lane_stats; + } else { /* Display both Rx and Tx lanes total */ + fields_str = total_stat_fields; + oftemplate = total_s_fields; + state.ls_stattype[DLADM_STAT_TOTAL] = B_TRUE; + state.ls_stats2str[DLADM_STAT_TOTAL] = print_total_stats; + } + + if (o_arg) { + fields_str = (strcasecmp(o_fields_str, "all") == 0) ? + fields_str : o_fields_str; + } + + oferr = ofmt_open(fields_str, oftemplate, ofmtflags, 0, &ofmt); + dlstat_ofmt_check(oferr, state.ls_parsable, ofmt); + state.ls_ofmt = ofmt; + + show_link_stats(linkid, state, interval); + + ofmt_close(ofmt); +} + +static void +do_show_phys(int argc, char *argv[], const char *use) +{ + int option; + boolean_t r_arg = B_FALSE; + boolean_t t_arg = B_FALSE; + boolean_t i_arg = B_FALSE; + boolean_t p_arg = B_FALSE; + boolean_t o_arg = B_FALSE; + boolean_t u_arg = B_FALSE; + boolean_t a_arg = B_FALSE; + uint32_t flags = DLADM_OPT_ACTIVE; + datalink_id_t linkid = DATALINK_ALL_LINKID; + char linkname[MAXLINKNAMELEN]; + uint32_t interval = 0; + char unit = '\0'; + show_state_t state; + dladm_status_t status; + char *fields_str = NULL; + char *o_fields_str = NULL; + char *ring_stat_fields = + "link,type,index,pkts,bytes"; + char *rx_ring_stat_fields = + "link,type,index,ipkts,rbytes"; + char *tx_ring_stat_fields = + "link,type,index,opkts,obytes"; + + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = OFMT_RIGHTJUST; + ofmt_field_t *oftemplate; + + bzero(&state, sizeof (state)); + opterr = 0; + while ((option = getopt_long(argc, argv, ":rtapi:o:u:", + NULL, NULL)) != -1) { + switch (option) { + case 'r': + if (r_arg) + die_optdup(option); + + r_arg = B_TRUE; + break; + case 't': + if (t_arg) + die_optdup(option); + + t_arg = B_TRUE; + break; + case 'a': + if (a_arg) + die_optdup(option); + + a_arg = B_TRUE; + break; + case 'i': + if (i_arg) + die_optdup(option); + + i_arg = B_TRUE; + if (!dladm_str2interval(optarg, &interval)) + die("invalid interval value '%s'", optarg); + break; + case 'p': + if (p_arg) + die_optdup(option); + + p_arg = B_TRUE; + break; + case 'o': + o_arg = B_TRUE; + o_fields_str = optarg; + break; + case 'u': + if (u_arg) + die_optdup(option); + + u_arg = B_TRUE; + if (!dlstat_unit(optarg, &unit)) + die("invalid unit value '%s'," + "unit must be R|K|M|G|T|P", optarg); + break; + default: + die_opterr(optopt, option, use); + break; + } + } + + if (r_arg && t_arg) + die("the options -t and -r are not compatible"); + + if (u_arg && p_arg) + die("the options -u and -p are not compatible"); + + if (p_arg && !o_arg) + die("-p requires -o"); + + if (p_arg && strcasecmp(o_fields_str, "all") == 0) + die("\"-o all\" is invalid with -p"); + + if (a_arg && + (p_arg || o_arg || u_arg || i_arg)) { + die("the option -a is not compatible with " + "-p, -o, -u, -i"); + } + + + /* get link name (optional last argument) */ + if (optind == (argc-1)) { + if (strlen(argv[optind]) >= MAXLINKNAMELEN) + die("link name too long"); + + if ((status = dladm_name2info(handle, argv[optind], &linkid, + NULL, NULL, NULL)) != DLADM_STATUS_OK) { + die_dlerr(status, "link %s is not valid", argv[optind]); + } + } else if (optind != argc) { + usage(); + } + + if (a_arg) { + boolean_t stattype[DLADM_STAT_NUM_STATS]; + + bzero(&stattype, sizeof (stattype)); + + if (r_arg) { + stattype[DLADM_STAT_RX_RING] = B_TRUE; + } else if (t_arg) { + stattype[DLADM_STAT_TX_RING] = B_TRUE; + } else { /* Display both Rx and Tx lanes */ + stattype[DLADM_STAT_RX_RING] = B_TRUE; + stattype[DLADM_STAT_TX_RING] = B_TRUE; + } + + dump_all_link_stats(linkid, stattype); + return; + } + + state.ls_unit = unit; + state.ls_parsable = p_arg; + + if (state.ls_parsable) + ofmtflags |= OFMT_PARSABLE; + + if (r_arg) { + fields_str = rx_ring_stat_fields; + oftemplate = rx_ring_s_fields; + state.ls_stattype[DLADM_STAT_RX_RING] = B_TRUE; + state.ls_stats2str[DLADM_STAT_RX_RING] = print_rx_ring_stats; + } else if (t_arg) { + fields_str = tx_ring_stat_fields; + oftemplate = tx_ring_s_fields; + state.ls_stattype[DLADM_STAT_TX_RING] = B_TRUE; + state.ls_stats2str[DLADM_STAT_TX_RING] = print_tx_ring_stats; + } else { /* Display both Rx and Tx lanes */ + fields_str = ring_stat_fields; + oftemplate = ring_s_fields; + state.ls_stattype[DLADM_STAT_RX_RING] = B_TRUE; + state.ls_stattype[DLADM_STAT_TX_RING] = B_TRUE; + state.ls_stats2str[DLADM_STAT_RX_RING] = + print_rx_generic_ring_stats; + state.ls_stats2str[DLADM_STAT_TX_RING] = + print_tx_generic_ring_stats; + } + + if (o_arg) { + fields_str = (strcasecmp(o_fields_str, "all") == 0) ? + fields_str : o_fields_str; + } + + oferr = ofmt_open(fields_str, oftemplate, ofmtflags, 0, &ofmt); + dlstat_ofmt_check(oferr, state.ls_parsable, ofmt); + state.ls_ofmt = ofmt; + + show_link_stats(linkid, state, interval); + + ofmt_close(ofmt); +} + +static void +do_show_link(int argc, char *argv[], const char *use) +{ + int option; + boolean_t r_arg = B_FALSE; + boolean_t F_arg = B_FALSE; + boolean_t t_arg = B_FALSE; + boolean_t i_arg = B_FALSE; + boolean_t p_arg = B_FALSE; + boolean_t o_arg = B_FALSE; + boolean_t u_arg = B_FALSE; + boolean_t a_arg = B_FALSE; + uint32_t flags = DLADM_OPT_ACTIVE; + datalink_id_t linkid = DATALINK_ALL_LINKID; + uint32_t interval = 0; + char unit = '\0'; + show_state_t state; + dladm_status_t status; + char *fields_str = NULL; + char *o_fields_str = NULL; + + char *lane_stat_fields = + "link,type,id,index,pkts,bytes"; + char *rx_lane_stat_fields = + "link,type,id,index,ipkts,rbytes,intrs,polls,ch<10,ch10-50,ch>50"; + char *tx_lane_stat_fields = + "link,type,id,index,opkts,obytes,blkcnt,ublkcnt"; + char *rx_fanout_stat_fields = + "link,id,index,fout,ipkts,rbytes"; + + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = OFMT_RIGHTJUST; + ofmt_field_t *oftemplate; + + bzero(&state, sizeof (state)); + opterr = 0; + while ((option = getopt_long(argc, argv, ":hrtFapi:o:u:", + NULL, NULL)) != -1) { + switch (option) { + case 'h': + if (r_arg || F_arg || t_arg || i_arg || p_arg || + o_arg || u_arg || a_arg) { + die("the option -h is not compatible with " + "-r, -F, -t, -i, -p, -o, -u, -a"); + } + do_show_history(argc, &argv[0], use); + return; + case 'r': + if (r_arg) + die_optdup(option); + + r_arg = B_TRUE; + break; + case 'F': + if (F_arg) + die_optdup(option); + + F_arg = B_TRUE; + break; + case 't': + if (t_arg) + die_optdup(option); + + t_arg = B_TRUE; + break; + case 'a': + if (a_arg) + die_optdup(option); + + a_arg = B_TRUE; + break; + case 'i': + if (i_arg) + die_optdup(option); + + i_arg = B_TRUE; + if (!dladm_str2interval(optarg, &interval)) + die("invalid interval value '%s'", optarg); + break; + case 'p': + if (p_arg) + die_optdup(option); + + p_arg = B_TRUE; + break; + case 'o': + o_arg = B_TRUE; + o_fields_str = optarg; + break; + case 'u': + if (u_arg) + die_optdup(option); + + u_arg = B_TRUE; + if (!dlstat_unit(optarg, &unit)) + die("invalid unit value '%s'," + "unit must be R|K|M|G|T|P", optarg); + break; + default: + die_opterr(optopt, option, use); + break; + } + } + + if (r_arg && t_arg) + die("the options -t and -r are not compatible"); + + if (u_arg && p_arg) + die("the options -u and -p are not compatible"); + + if (F_arg && !r_arg) + die("-F must be used with -r"); + + if (p_arg && !o_arg) + die("-p requires -o"); + + if (p_arg && strcasecmp(o_fields_str, "all") == 0) + die("\"-o all\" is invalid with -p"); + + if (a_arg && + (p_arg || o_arg || u_arg || i_arg)) { + die("the option -a is not compatible with " + "-p, -o, -u, -i"); + } + + /* get link name (optional last argument) */ + if (optind == (argc-1)) { + if (strlen(argv[optind]) >= MAXLINKNAMELEN) + die("link name too long"); + + if ((status = dladm_name2info(handle, argv[optind], &linkid, + NULL, NULL, NULL)) != DLADM_STATUS_OK) { + die_dlerr(status, "link %s is not valid", argv[optind]); + } + } else if (optind != argc) { + usage(); + } + + if (a_arg) { + boolean_t stattype[DLADM_STAT_NUM_STATS]; + + bzero(&stattype, sizeof (stattype)); + + if (r_arg) { + if (F_arg) { + stattype[DLADM_STAT_RX_LANE_FOUT] = B_TRUE; + } else { + stattype[DLADM_STAT_RX_LANE] = B_TRUE; + } + } else if (t_arg) { + stattype[DLADM_STAT_TX_LANE] = B_TRUE; + } else { /* Display both Rx and Tx lanes */ + stattype[DLADM_STAT_RX_LANE] = B_TRUE; + stattype[DLADM_STAT_TX_LANE] = B_TRUE; + } + + dump_all_link_stats(linkid, stattype); + return; + } + + state.ls_unit = unit; + state.ls_parsable = p_arg; + + if (state.ls_parsable) + ofmtflags |= OFMT_PARSABLE; + + if (r_arg) { + if (F_arg) { + fields_str = rx_fanout_stat_fields; + oftemplate = rx_fanout_lane_s_fields; + state.ls_stattype[DLADM_STAT_RX_LANE_FOUT] = B_TRUE; + state.ls_stats2str[DLADM_STAT_RX_LANE_FOUT] = + print_fanout_stats; + } else { + fields_str = rx_lane_stat_fields; + oftemplate = rx_lane_s_fields; + state.ls_stattype[DLADM_STAT_RX_LANE] = B_TRUE; + state.ls_stats2str[DLADM_STAT_RX_LANE] = + print_rx_lane_stats; + } + } else if (t_arg) { + fields_str = tx_lane_stat_fields; + oftemplate = tx_lane_s_fields; + state.ls_stattype[DLADM_STAT_TX_LANE] = B_TRUE; + state.ls_stats2str[DLADM_STAT_TX_LANE] = print_tx_lane_stats; + } else { /* Display both Rx and Tx lanes */ + fields_str = lane_stat_fields; + oftemplate = lane_s_fields; + state.ls_stattype[DLADM_STAT_RX_LANE] = B_TRUE; + state.ls_stattype[DLADM_STAT_TX_LANE] = B_TRUE; + state.ls_stats2str[DLADM_STAT_RX_LANE] = + print_rx_generic_lane_stats; + state.ls_stats2str[DLADM_STAT_TX_LANE] = + print_tx_generic_lane_stats; + } + if (o_arg) { + fields_str = (strcasecmp(o_fields_str, "all") == 0) ? + fields_str : o_fields_str; + } + + oferr = ofmt_open(fields_str, oftemplate, ofmtflags, 0, &ofmt); + dlstat_ofmt_check(oferr, state.ls_parsable, ofmt); + + state.ls_ofmt = ofmt; + + show_link_stats(linkid, state, interval); + + ofmt_close(ofmt); +} + +static void +do_show_aggr(int argc, char *argv[], const char *use) +{ + int option; + boolean_t r_arg = B_FALSE; + boolean_t t_arg = B_FALSE; + boolean_t i_arg = B_FALSE; + boolean_t p_arg = B_FALSE; + boolean_t o_arg = B_FALSE; + boolean_t u_arg = B_FALSE; + uint32_t flags = DLADM_OPT_ACTIVE; + datalink_id_t linkid = DATALINK_ALL_LINKID; + uint32_t interval = 0; + char unit = '\0'; + show_state_t state; + dladm_status_t status; + char *fields_str = NULL; + char *o_fields_str = NULL; + + char *aggr_stat_fields = + "link,port,ipkts,rbytes,opkts,obytes"; + char *rx_aggr_stat_fields = "link,port,ipkts,rbytes"; + char *tx_aggr_stat_fields = "link,port,opkts,obytes"; + + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = OFMT_RIGHTJUST; + ofmt_field_t *oftemplate; + + bzero(&state, sizeof (state)); + opterr = 0; + while ((option = getopt_long(argc, argv, ":rtpi:o:u:", + NULL, NULL)) != -1) { + switch (option) { + case 'r': + if (r_arg) + die_optdup(option); + + r_arg = B_TRUE; + break; + case 't': + if (t_arg) + die_optdup(option); + + t_arg = B_TRUE; + break; + case 'i': + if (i_arg) + die_optdup(option); + + i_arg = B_TRUE; + if (!dladm_str2interval(optarg, &interval)) + die("invalid interval value '%s'", optarg); + break; + case 'p': + if (p_arg) + die_optdup(option); + + p_arg = B_TRUE; + break; + case 'o': + o_arg = B_TRUE; + o_fields_str = optarg; + break; + case 'u': + if (u_arg) + die_optdup(option); + + u_arg = B_TRUE; + if (!dlstat_unit(optarg, &unit)) + die("invalid unit value '%s'," + "unit must be R|K|M|G|T|P", optarg); + break; + default: + die_opterr(optopt, option, use); + break; + } + } + + if (r_arg && t_arg) + die("the options -t and -r are not compatible"); + + if (u_arg && p_arg) + die("the options -u and -p are not compatible"); + + if (p_arg && !o_arg) + die("-p requires -o"); + + if (p_arg && strcasecmp(o_fields_str, "all") == 0) + die("\"-o all\" is invalid with -p"); + + + /* get link name (optional last argument) */ + if (optind == (argc-1)) { + if (strlen(argv[optind]) >= MAXLINKNAMELEN) + die("link name too long"); + + if ((status = dladm_name2info(handle, argv[optind], &linkid, + NULL, NULL, NULL)) != DLADM_STATUS_OK) { + die_dlerr(status, "link %s is not valid", argv[optind]); + } + } else if (optind != argc) { + usage(); + } + + state.ls_unit = unit; + state.ls_parsable = p_arg; + + if (state.ls_parsable) + ofmtflags |= OFMT_PARSABLE; + + oftemplate = aggr_port_s_fields; + state.ls_stattype[DLADM_STAT_AGGR_PORT] = B_TRUE; + state.ls_stats2str[DLADM_STAT_AGGR_PORT] = print_aggr_port_stats; + + if (r_arg) + fields_str = rx_aggr_stat_fields; + else if (t_arg) + fields_str = tx_aggr_stat_fields; + else + fields_str = aggr_stat_fields; + + if (o_arg) { + fields_str = (strcasecmp(o_fields_str, "all") == 0) ? + fields_str : o_fields_str; + } + + oferr = ofmt_open(fields_str, oftemplate, ofmtflags, 0, &ofmt); + dlstat_ofmt_check(oferr, state.ls_parsable, ofmt); + state.ls_ofmt = ofmt; + + show_link_stats(linkid, state, interval); + + ofmt_close(ofmt); +} + +/* PRINTFLIKE1 */ +static void +warn(const char *format, ...) +{ + va_list alist; + + format = gettext(format); + (void) fprintf(stderr, "%s: warning: ", progname); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); + + (void) putc('\n', stderr); +} + +/* + * Also closes the dladm handle if it is not NULL. + */ +/* PRINTFLIKE2 */ +static void +die_dlerr(dladm_status_t err, const char *format, ...) +{ + va_list alist; + char errmsg[DLADM_STRSIZE]; + + format = gettext(format); + (void) fprintf(stderr, "%s: ", progname); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); + (void) fprintf(stderr, ": %s\n", dladm_status2str(err, errmsg)); + + /* close dladm handle if it was opened */ + if (handle != NULL) + dladm_close(handle); + + exit(EXIT_FAILURE); +} + +/* PRINTFLIKE1 */ +static void +die(const char *format, ...) +{ + va_list alist; + + format = gettext(format); + (void) fprintf(stderr, "%s: ", progname); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); + + (void) putc('\n', stderr); + + /* close dladm handle if it was opened */ + if (handle != NULL) + dladm_close(handle); + + exit(EXIT_FAILURE); +} + +static void +die_optdup(int opt) +{ + die("the option -%c cannot be specified more than once", opt); +} + +static void +die_opterr(int opt, int opterr, const char *usage) +{ + switch (opterr) { + case ':': + die("option '-%c' requires a value\nusage: %s", opt, + gettext(usage)); + break; + case '?': + default: + die("unrecognized option '-%c'\nusage: %s", opt, + gettext(usage)); + break; + } +} + +/* + * default output callback function that, when invoked, + * prints string which is offset by ofmt_arg->ofmt_id within buf. + */ +static boolean_t +print_default_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) +{ + char *value; + + value = (char *)ofarg->ofmt_cbarg + ofarg->ofmt_id; + (void) strlcpy(buf, value, bufsize); + return (B_TRUE); +} + +static void +dlstat_ofmt_check(ofmt_status_t oferr, boolean_t parsable, + ofmt_handle_t ofmt) +{ + char buf[OFMT_BUFSIZE]; + + if (oferr == OFMT_SUCCESS) + return; + (void) ofmt_strerror(ofmt, oferr, buf, sizeof (buf)); + /* + * All errors are considered fatal in parsable mode. + * NOMEM errors are always fatal, regardless of mode. + * For other errors, we print diagnostics in human-readable + * mode and processs what we can. + */ + if (parsable || oferr == OFMT_ENOFIELDS) { + ofmt_close(ofmt); + die(buf); + } else { + warn(buf); + } +} diff --git a/usr/src/cmd/dlstat/dlstat.xcl b/usr/src/cmd/dlstat/dlstat.xcl new file mode 100644 index 0000000000..bc201f606f --- /dev/null +++ b/usr/src/cmd/dlstat/dlstat.xcl @@ -0,0 +1,110 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# + +msgid " %s \n" +msgid " %.2f" +msgid " %6.2lf%c" +msgid " %7.0lf%c" +msgid " %s" +msgid "---------------------------------------------------\n" +msgid "--" +msgid ": %s\n" +msgid ":hrtFapi:o:u:" +msgid ":rtaApi:o:u:" +msgid ":rtapi:o:u:" +msgid ":rtpi:o:u:" +msgid "" +msgid "\n" +msgid "\t\t%15llu\n" +msgid "\t%15s" +msgid "# Time" +msgid "%.0lf" +msgid "%llu" +msgid "%m/%d/%Y" +msgid "%s \n" +msgid "%s Mbps" +msgid "%s: " +msgid "%s: warning: " +msgid "%s" +msgid "%s\n" +msgid "%T" +msgid "all" +msgid "B" +msgid "BANDWIDTH" +msgid "bcast" +msgid "BLKCNT" +msgid "BYTES" +msgid "CH<10" +msgid "CH>50" +msgid "CH10-50" +msgid "das:e:o:f:F:" +msgid "DURATION" +msgid "END" +msgid "FOUT" +msgid "G" +msgid "gnuplot" +msgid "hw" +msgid "ID" +msgid "INDEX" +msgid "INTRS" +msgid "IPKTS" +msgid "K" +msgid "lcl/sw" +msgid "link,id,index,fout,ipkts,rbytes" +msgid "link,ipkts,rbytes,intrs,polls,ch<10,ch10-50,ch>50" +msgid "link,ipkts,rbytes,opkts,obytes" +msgid "link,opkts,obytes,blkcnt,ublkcnt" +msgid "link,port,ipkts,rbytes,opkts,obytes" +msgid "link,port,ipkts,rbytes" +msgid "link,port,opkts,obytes" +msgid "link,start,end,rbytes,obytes,bandwidth" +msgid "link,type,id,index,ipkts,rbytes,intrs,polls,ch<10,ch10-50,ch>50" +msgid "link,type,id,index,opkts,obytes,blkcnt,ublkcnt" +msgid "link,type,id,index,pkts,bytes" +msgid "link,type,index,ipkts,rbytes" +msgid "link,type,index,opkts,obytes" +msgid "link,type,index,pkts,bytes" +msgid "LINK" +msgid "local" +msgid "M" +msgid "OBYTES" +msgid "OPKTS" +msgid "P" +msgid "PKTS" +msgid "POLLS" +msgid "PORT" +msgid "RBYTES" +msgid "rx" +msgid "SDROPS" +msgid "show-aggr" +msgid "show-link -h" +msgid "show-link" +msgid "show-phys" +msgid "START" +msgid "sw" +msgid "T" +msgid "tx" +msgid "TYPE" +msgid "UBLKCNT" diff --git a/usr/src/cmd/flowadm/Makefile b/usr/src/cmd/flowadm/Makefile index aa057c1f2b..19a15a1b47 100644 --- a/usr/src/cmd/flowadm/Makefile +++ b/usr/src/cmd/flowadm/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -34,7 +34,7 @@ include ../Makefile.cmd XGETFLAGS += -a -x $(PROG).xcl LDLIBS += -L$(ROOT)/lib -LDLIBS += -ldladm -lkstat -linetutil +LDLIBS += -ldladm -linetutil ROOTCFGDIR= $(ROOTETC)/dladm ROOTCFGFILES= $(CONFIGFILES:%=$(ROOTCFGDIR)/%) diff --git a/usr/src/cmd/flowadm/flowadm.c b/usr/src/cmd/flowadm/flowadm.c index 2950adcf48..374fa1675c 100644 --- a/usr/src/cmd/flowadm/flowadm.c +++ b/usr/src/cmd/flowadm/flowadm.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,7 +31,6 @@ #include <string.h> #include <stropts.h> #include <errno.h> -#include <kstat.h> #include <strings.h> #include <getopt.h> #include <unistd.h> @@ -51,45 +50,22 @@ #include <stddef.h> #include <ofmt.h> -typedef struct show_usage_state_s { - boolean_t us_plot; - boolean_t us_parsable; - boolean_t us_printheader; - boolean_t us_first; - boolean_t us_showall; - ofmt_handle_t us_ofmt; -} show_usage_state_t; - typedef struct show_flow_state { - boolean_t fs_firstonly; - boolean_t fs_donefirst; - pktsum_t fs_prevstats; - uint32_t fs_flags; dladm_status_t fs_status; ofmt_handle_t fs_ofmt; const char *fs_flow; - const char *fs_link; boolean_t fs_parsable; boolean_t fs_persist; - boolean_t fs_stats; - uint64_t fs_mask; } show_flow_state_t; typedef void cmdfunc_t(int, char **); static cmdfunc_t do_add_flow, do_remove_flow, do_init_flow, do_show_flow; static cmdfunc_t do_show_flowprop, do_set_flowprop, do_reset_flowprop; -static cmdfunc_t do_show_usage; static int show_flow(dladm_handle_t, dladm_flow_attr_t *, void *); static int show_flows_onelink(dladm_handle_t, datalink_id_t, void *); -static void flow_stats(const char *, datalink_id_t, uint_t, char *, - show_flow_state_t *); -static void get_flow_stats(const char *, pktsum_t *); -static int show_flow_stats(dladm_handle_t, dladm_flow_attr_t *, void *); -static int show_link_flow_stats(dladm_handle_t, datalink_id_t, void *); - static int remove_flow(dladm_handle_t, dladm_flow_attr_t *, void *); static int show_flowprop(dladm_handle_t, dladm_flow_attr_t *, void *); @@ -104,7 +80,7 @@ static void warn(const char *, ...); static void warn_dlerr(dladm_status_t, const char *, ...); /* callback functions for printing output */ -static ofmt_cb_t print_flowprop_cb, print_default_cb, print_flow_stats_cb; +static ofmt_cb_t print_flowprop_cb, print_default_cb; static void flowadm_ofmt_check(ofmt_status_t, boolean_t, ofmt_handle_t); typedef struct cmd { @@ -120,15 +96,12 @@ static cmd_t cmds[] = { { "reset-flowprop", do_reset_flowprop }, { "show-flow", do_show_flow }, { "init-flow", do_init_flow }, - { "show-usage", do_show_usage } }; static const struct option longopts[] = { {"link", required_argument, 0, 'l'}, {"parsable", no_argument, 0, 'p'}, {"parseable", no_argument, 0, 'p'}, - {"statistics", no_argument, 0, 's'}, - {"interval", required_argument, 0, 'i'}, {"temporary", no_argument, 0, 't'}, {"root-dir", required_argument, 0, 'R'}, { 0, 0, 0, 0 } @@ -236,104 +209,6 @@ typedef struct flowprop_args_s { char *fs_propname; char *fs_flowname; } flowprop_args_t; -/* - * structures for 'flowadm show-flow -s' (print statistics) - */ -typedef enum { - FLOW_S_FLOW, - FLOW_S_IPKTS, - FLOW_S_RBYTES, - FLOW_S_IERRORS, - FLOW_S_OPKTS, - FLOW_S_OBYTES, - FLOW_S_OERRORS -} flow_s_field_index_t; - -static ofmt_field_t flow_s_fields[] = { -/* name, field width, index, callback */ -{ "FLOW", 15, FLOW_S_FLOW, print_flow_stats_cb}, -{ "IPACKETS", 10, FLOW_S_IPKTS, print_flow_stats_cb}, -{ "RBYTES", 8, FLOW_S_RBYTES, print_flow_stats_cb}, -{ "IERRORS", 10, FLOW_S_IERRORS, print_flow_stats_cb}, -{ "OPACKETS", 12, FLOW_S_OPKTS, print_flow_stats_cb}, -{ "OBYTES", 12, FLOW_S_OBYTES, print_flow_stats_cb}, -{ "OERRORS", 8, FLOW_S_OERRORS, print_flow_stats_cb}, -NULL_OFMT} -; - -typedef struct flow_args_s { - char *flow_s_flow; - pktsum_t *flow_s_psum; -} flow_args_t; - -/* - * structures for 'flowadm show-usage' - */ -typedef struct usage_fields_buf_s { - char usage_flow[12]; - char usage_duration[10]; - char usage_ipackets[9]; - char usage_rbytes[10]; - char usage_opackets[9]; - char usage_obytes[10]; - char usage_bandwidth[14]; -} usage_fields_buf_t; - -static ofmt_field_t usage_fields[] = { -/* name, field width, offset */ -{ "FLOW", 13, - offsetof(usage_fields_buf_t, usage_flow), print_default_cb}, -{ "DURATION", 11, - offsetof(usage_fields_buf_t, usage_duration), print_default_cb}, -{ "IPACKETS", 10, - offsetof(usage_fields_buf_t, usage_ipackets), print_default_cb}, -{ "RBYTES", 11, - offsetof(usage_fields_buf_t, usage_rbytes), print_default_cb}, -{ "OPACKETS", 10, - offsetof(usage_fields_buf_t, usage_opackets), print_default_cb}, -{ "OBYTES", 11, - offsetof(usage_fields_buf_t, usage_obytes), print_default_cb}, -{ "BANDWIDTH", 15, - offsetof(usage_fields_buf_t, usage_bandwidth), print_default_cb}, -NULL_OFMT} -; - -/* - * structures for 'dladm show-usage link' - */ - -typedef struct usage_l_fields_buf_s { - char usage_l_flow[12]; - char usage_l_stime[13]; - char usage_l_etime[13]; - char usage_l_rbytes[8]; - char usage_l_obytes[8]; - char usage_l_bandwidth[14]; -} usage_l_fields_buf_t; - -static ofmt_field_t usage_l_fields[] = { -/* name, field width, offset */ -{ "FLOW", 13, - offsetof(usage_l_fields_buf_t, usage_l_flow), print_default_cb}, -{ "START", 14, - offsetof(usage_l_fields_buf_t, usage_l_stime), print_default_cb}, -{ "END", 14, - offsetof(usage_l_fields_buf_t, usage_l_etime), print_default_cb}, -{ "RBYTES", 9, - offsetof(usage_l_fields_buf_t, usage_l_rbytes), print_default_cb}, -{ "OBYTES", 9, - offsetof(usage_l_fields_buf_t, usage_l_obytes), print_default_cb}, -{ "BANDWIDTH", 15, - offsetof(usage_l_fields_buf_t, usage_l_bandwidth), print_default_cb}, -NULL_OFMT} -; - -#define PRI_HI 100 -#define PRI_LO 10 -#define PRI_NORM 50 - -#define FLOWADM_CONF "/etc/dladm/flowadm.conf" -#define BLANK_LINE(s) ((s[0] == '\0') || (s[0] == '#') || (s[0] == '\n')) static char *progname; @@ -360,15 +235,12 @@ usage(void) " add-flow [-t] -l <link> -a <attr>=<value>[,...]\n" "\t\t [-p <prop>=<value>,...] <flow>\n" " remove-flow [-t] {-l <link> | <flow>}\n" - " show-flow [-p] [-s [-i <interval>]] [-l <link>] " + " show-flow [-p] [-l <link>] " "[<flow>]\n\n" " set-flowprop [-t] -p <prop>=<value>[,...] <flow>\n" " reset-flowprop [-t] [-p <prop>,...] <flow>\n" " show-flowprop [-cP] [-l <link>] [-p <prop>,...] " - "[<flow>]\n\n" - " show-usage [-a] [-d | -F <format>] " - "[-s <DD/MM/YYYY,HH:MM:SS>]\n" - "\t\t [-e <DD/MM/YYYY,HH:MM:SS>] -f <logfile> [<flow>]\n")); + "[<flow>]\n")); /* close dladm handle if it was opened */ if (handle != NULL) @@ -446,275 +318,6 @@ do_init_flow(int argc, char *argv[]) die_dlerr(status, "flows initialization failed"); } -/* ARGSUSED */ -static int -show_usage_date(dladm_usage_t *usage, void *arg) -{ - show_usage_state_t *state = (show_usage_state_t *)arg; - time_t stime; - char timebuf[20]; - dladm_flow_attr_t attr; - dladm_status_t status; - - /* - * Only show usage information for existing flows unless '-a' - * is specified. - */ - if (!state->us_showall && ((status = dladm_flow_info(handle, - usage->du_name, &attr)) != DLADM_STATUS_OK)) { - return (status); - } - - stime = usage->du_stime; - (void) strftime(timebuf, sizeof (timebuf), "%m/%d/%Y", - localtime(&stime)); - (void) printf("%s\n", timebuf); - - return (DLADM_STATUS_OK); -} - -static int -show_usage_time(dladm_usage_t *usage, void *arg) -{ - show_usage_state_t *state = (show_usage_state_t *)arg; - char buf[DLADM_STRSIZE]; - usage_l_fields_buf_t ubuf; - time_t time; - double bw; - dladm_flow_attr_t attr; - dladm_status_t status; - - /* - * Only show usage information for existing flows unless '-a' - * is specified. - */ - if (!state->us_showall && ((status = dladm_flow_info(handle, - usage->du_name, &attr)) != DLADM_STATUS_OK)) { - return (status); - } - - if (state->us_plot) { - if (!state->us_printheader) { - if (state->us_first) { - (void) printf("# Time"); - state->us_first = B_FALSE; - } - (void) printf(" %s", usage->du_name); - if (usage->du_last) { - (void) printf("\n"); - state->us_first = B_TRUE; - state->us_printheader = B_TRUE; - } - } else { - if (state->us_first) { - time = usage->du_etime; - (void) strftime(buf, sizeof (buf), "%T", - localtime(&time)); - state->us_first = B_FALSE; - (void) printf("%s", buf); - } - bw = (double)usage->du_bandwidth/1000; - (void) printf(" %.2f", bw); - if (usage->du_last) { - (void) printf("\n"); - state->us_first = B_TRUE; - } - } - return (DLADM_STATUS_OK); - } - - bzero(&ubuf, sizeof (ubuf)); - - (void) snprintf(ubuf.usage_l_flow, sizeof (ubuf.usage_l_flow), "%s", - usage->du_name); - time = usage->du_stime; - (void) strftime(buf, sizeof (buf), "%T", localtime(&time)); - (void) snprintf(ubuf.usage_l_stime, sizeof (ubuf.usage_l_stime), "%s", - buf); - time = usage->du_etime; - (void) strftime(buf, sizeof (buf), "%T", localtime(&time)); - (void) snprintf(ubuf.usage_l_etime, sizeof (ubuf.usage_l_etime), "%s", - buf); - (void) snprintf(ubuf.usage_l_rbytes, sizeof (ubuf.usage_l_rbytes), - "%llu", usage->du_rbytes); - (void) snprintf(ubuf.usage_l_obytes, sizeof (ubuf.usage_l_obytes), - "%llu", usage->du_obytes); - (void) snprintf(ubuf.usage_l_bandwidth, sizeof (ubuf.usage_l_bandwidth), - "%s Mbps", dladm_bw2str(usage->du_bandwidth, buf)); - - ofmt_print(state->us_ofmt, (void *)&ubuf); - return (DLADM_STATUS_OK); -} - -static int -show_usage_res(dladm_usage_t *usage, void *arg) -{ - show_usage_state_t *state = (show_usage_state_t *)arg; - char buf[DLADM_STRSIZE]; - usage_fields_buf_t ubuf; - dladm_flow_attr_t attr; - dladm_status_t status; - - /* - * Only show usage information for existing flows unless '-a' - * is specified. - */ - if (!state->us_showall && ((status = dladm_flow_info(handle, - usage->du_name, &attr)) != DLADM_STATUS_OK)) { - return (status); - } - - bzero(&ubuf, sizeof (ubuf)); - - (void) snprintf(ubuf.usage_flow, sizeof (ubuf.usage_flow), "%s", - usage->du_name); - (void) snprintf(ubuf.usage_duration, sizeof (ubuf.usage_duration), - "%llu", usage->du_duration); - (void) snprintf(ubuf.usage_ipackets, sizeof (ubuf.usage_ipackets), - "%llu", usage->du_ipackets); - (void) snprintf(ubuf.usage_rbytes, sizeof (ubuf.usage_rbytes), - "%llu", usage->du_rbytes); - (void) snprintf(ubuf.usage_opackets, sizeof (ubuf.usage_opackets), - "%llu", usage->du_opackets); - (void) snprintf(ubuf.usage_obytes, sizeof (ubuf.usage_obytes), - "%llu", usage->du_obytes); - (void) snprintf(ubuf.usage_bandwidth, sizeof (ubuf.usage_bandwidth), - "%s Mbps", dladm_bw2str(usage->du_bandwidth, buf)); - - ofmt_print(state->us_ofmt, (void *)&ubuf); - - return (DLADM_STATUS_OK); -} - -static boolean_t -valid_formatspec(char *formatspec_str) -{ - if (strcmp(formatspec_str, "gnuplot") == 0) - return (B_TRUE); - return (B_FALSE); -} - -/* ARGSUSED */ -static void -do_show_usage(int argc, char *argv[]) -{ - char *file = NULL; - int opt; - dladm_status_t status; - boolean_t d_arg = B_FALSE; - char *stime = NULL; - char *etime = NULL; - char *resource = NULL; - show_usage_state_t state; - boolean_t o_arg = B_FALSE; - boolean_t F_arg = B_FALSE; - char *fields_str = NULL; - char *formatspec_str = NULL; - char *all_fields = - "flow,duration,ipackets,rbytes,opackets,obytes,bandwidth"; - char *all_l_fields = - "flow,start,end,rbytes,obytes,bandwidth"; - ofmt_handle_t ofmt; - ofmt_status_t oferr; - uint_t ofmtflags = 0; - - bzero(&state, sizeof (show_usage_state_t)); - state.us_parsable = B_FALSE; - state.us_printheader = B_FALSE; - state.us_plot = B_FALSE; - state.us_first = B_TRUE; - - while ((opt = getopt(argc, argv, "das:e:o:f:F:")) != -1) { - switch (opt) { - case 'd': - d_arg = B_TRUE; - break; - case 'a': - state.us_showall = B_TRUE; - break; - case 'f': - file = optarg; - break; - case 's': - stime = optarg; - break; - case 'e': - etime = optarg; - break; - case 'o': - o_arg = B_TRUE; - fields_str = optarg; - break; - case 'F': - state.us_plot = F_arg = B_TRUE; - formatspec_str = optarg; - break; - default: - die_opterr(optopt, opt); - } - } - - if (file == NULL) - die("show-usage requires a file"); - - if (optind == (argc-1)) { - dladm_flow_attr_t attr; - - if (!state.us_showall && - dladm_flow_info(handle, resource, &attr) != - DLADM_STATUS_OK) { - die("invalid flow: '%s'", resource); - } - resource = argv[optind]; - } - - if (state.us_parsable) - ofmtflags |= OFMT_PARSABLE; - if (resource == NULL && stime == NULL && etime == NULL) { - if (!o_arg || (o_arg && strcasecmp(fields_str, "all") == 0)) - fields_str = all_fields; - oferr = ofmt_open(fields_str, usage_fields, ofmtflags, - 0, &ofmt); - } else { - if (!o_arg || (o_arg && strcasecmp(fields_str, "all") == 0)) - fields_str = all_l_fields; - oferr = ofmt_open(fields_str, usage_l_fields, ofmtflags, - 0, &ofmt); - } - - flowadm_ofmt_check(oferr, state.us_parsable, ofmt); - state.us_ofmt = ofmt; - - if (F_arg && d_arg) - die("incompatible -d and -F options"); - - if (F_arg && valid_formatspec(formatspec_str) == B_FALSE) - die("Format specifier %s not supported", formatspec_str); - - if (d_arg) { - /* Print log dates */ - status = dladm_usage_dates(show_usage_date, - DLADM_LOGTYPE_FLOW, file, resource, &state); - } else if (resource == NULL && stime == NULL && etime == NULL && - !F_arg) { - /* Print summary */ - status = dladm_usage_summary(show_usage_res, - DLADM_LOGTYPE_FLOW, file, &state); - } else if (resource != NULL) { - /* Print log entries for named resource */ - status = dladm_walk_usage_res(show_usage_time, - DLADM_LOGTYPE_FLOW, file, resource, stime, etime, &state); - } else { - /* Print time and information for each link */ - status = dladm_walk_usage_time(show_usage_time, - DLADM_LOGTYPE_FLOW, file, stime, etime, &state); - } - - ofmt_close(ofmt); - if (status != DLADM_STATUS_OK) - die_dlerr(status, "show-usage"); -} - static void do_add_flow(int argc, char *argv[]) { @@ -981,176 +584,14 @@ show_flows_onelink(dladm_handle_t dh, datalink_id_t linkid, void *arg) } static void -get_flow_stats(const char *flowname, pktsum_t *stats) -{ - kstat_ctl_t *kcp; - kstat_t *ksp; - - bzero(stats, sizeof (*stats)); - - if ((kcp = kstat_open()) == NULL) { - warn("kstat open operation failed"); - return; - } - - ksp = dladm_kstat_lookup(kcp, NULL, -1, flowname, "flow"); - - if (ksp != NULL) - dladm_get_stats(kcp, ksp, stats); - - (void) kstat_close(kcp); -} - -static boolean_t -print_flow_stats_cb(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) -{ - flow_args_t *fargs = of_arg->ofmt_cbarg; - pktsum_t *diff_stats = fargs->flow_s_psum; - - switch (of_arg->ofmt_id) { - case FLOW_S_FLOW: - (void) snprintf(buf, bufsize, "%s", fargs->flow_s_flow); - break; - case FLOW_S_IPKTS: - (void) snprintf(buf, bufsize, "%llu", - diff_stats->ipackets); - break; - case FLOW_S_RBYTES: - (void) snprintf(buf, bufsize, "%llu", - diff_stats->rbytes); - break; - case FLOW_S_IERRORS: - (void) snprintf(buf, bufsize, "%u", - diff_stats->ierrors); - break; - case FLOW_S_OPKTS: - (void) snprintf(buf, bufsize, "%llu", - diff_stats->opackets); - break; - case FLOW_S_OBYTES: - (void) snprintf(buf, bufsize, "%llu", - diff_stats->obytes); - break; - case FLOW_S_OERRORS: - (void) snprintf(buf, bufsize, "%u", - diff_stats->oerrors); - break; - default: - die("invalid input"); - break; - } - return (B_TRUE); -} - -/* ARGSUSED */ -static int -show_flow_stats(dladm_handle_t handle, dladm_flow_attr_t *attr, void *arg) -{ - show_flow_state_t *state = (show_flow_state_t *)arg; - char *name = attr->fa_flowname; - pktsum_t stats, diff_stats; - flow_args_t fargs; - - if (state->fs_firstonly) { - if (state->fs_donefirst) - return (DLADM_WALK_TERMINATE); - state->fs_donefirst = B_TRUE; - } else { - bzero(&state->fs_prevstats, sizeof (state->fs_prevstats)); - } - - get_flow_stats(name, &stats); - dladm_stats_diff(&diff_stats, &stats, &state->fs_prevstats); - - fargs.flow_s_flow = name; - fargs.flow_s_psum = &diff_stats; - ofmt_print(state->fs_ofmt, (void *)&fargs); - state->fs_prevstats = stats; - - return (DLADM_WALK_CONTINUE); -} - -/* - * Wrapper of dladm_walk_flow(show_flow,...) to make it usable for - * dladm_walk_datalink_id(). Used for showing flow stats for - * all flows on all links. - */ -static int -show_link_flow_stats(dladm_handle_t dh, datalink_id_t linkid, void * arg) -{ - if (dladm_walk_flow(show_flow_stats, dh, linkid, arg, B_FALSE) - == DLADM_STATUS_OK) - return (DLADM_WALK_CONTINUE); - else - return (DLADM_WALK_TERMINATE); -} - -/* ARGSUSED */ -static void -flow_stats(const char *flow, datalink_id_t linkid, uint_t interval, - char *fields_str, show_flow_state_t *state) -{ - dladm_flow_attr_t attr; - ofmt_handle_t ofmt; - ofmt_status_t oferr; - uint_t ofmtflags = 0; - - oferr = ofmt_open(fields_str, flow_s_fields, ofmtflags, 0, &ofmt); - flowadm_ofmt_check(oferr, state->fs_parsable, ofmt); - state->fs_ofmt = ofmt; - - if (flow != NULL && - dladm_flow_info(handle, flow, &attr) != DLADM_STATUS_OK) - die("invalid flow %s", flow); - - /* - * If an interval is specified, continuously show the stats - * for only the first flow. - */ - state->fs_firstonly = (interval != 0); - - for (;;) { - state->fs_donefirst = B_FALSE; - - /* Show stats for named flow */ - if (flow != NULL) { - state->fs_flow = flow; - (void) show_flow_stats(handle, &attr, state); - - /* Show all stats on a link */ - } else if (linkid != DATALINK_INVALID_LINKID) { - (void) dladm_walk_flow(show_flow_stats, handle, linkid, - state, B_FALSE); - - /* Show all stats by datalink */ - } else { - (void) dladm_walk_datalink_id(show_link_flow_stats, - handle, state, DATALINK_CLASS_ALL, - DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE); - } - - if (interval == 0) - break; - - (void) fflush(stdout); - (void) sleep(interval); - } - ofmt_close(ofmt); -} - -static void do_show_flow(int argc, char *argv[]) { char flowname[MAXFLOWNAMELEN]; char linkname[MAXLINKNAMELEN]; datalink_id_t linkid = DATALINK_ALL_LINKID; int option; - boolean_t s_arg = B_FALSE; - boolean_t S_arg = B_FALSE; - boolean_t i_arg = B_FALSE; boolean_t l_arg = B_FALSE; boolean_t o_arg = B_FALSE; - uint32_t interval = 0; show_flow_state_t state; char *fields_str = NULL; ofmt_handle_t ofmt; @@ -1160,7 +601,7 @@ do_show_flow(int argc, char *argv[]) bzero(&state, sizeof (state)); opterr = 0; - while ((option = getopt_long(argc, argv, ":pPsSi:l:o:", + while ((option = getopt_long(argc, argv, ":pPl:o:", longopts, NULL)) != -1) { switch (option) { case 'p': @@ -1170,18 +611,6 @@ do_show_flow(int argc, char *argv[]) case 'P': state.fs_persist = B_TRUE; break; - case 's': - if (s_arg) - die_optdup(option); - - s_arg = B_TRUE; - break; - case 'S': - if (S_arg) - die_optdup(option); - - S_arg = B_TRUE; - break; case 'o': if (o_arg) die_optdup(option); @@ -1189,15 +618,6 @@ do_show_flow(int argc, char *argv[]) o_arg = B_TRUE; fields_str = optarg; break; - case 'i': - if (i_arg) - die_optdup(option); - - i_arg = B_TRUE; - - if (!dladm_str2interval(optarg, &interval)) - die("invalid interval value '%s'", optarg); - break; case 'l': if (strlcpy(linkname, optarg, MAXLINKNAMELEN) >= MAXLINKNAMELEN) @@ -1212,11 +632,6 @@ do_show_flow(int argc, char *argv[]) break; } } - if (i_arg && !(s_arg || S_arg)) - die("the -i option can be used only with -s or -S"); - - if (s_arg && S_arg) - die("the -s option cannot be used with -S"); /* get flow name (optional last argument */ if (optind == (argc-1)) { @@ -1226,17 +641,6 @@ do_show_flow(int argc, char *argv[]) state.fs_flow = flowname; } - if (S_arg) { - dladm_continuous(handle, linkid, state.fs_flow, interval, - FLOW_REPORT); - return; - } - - if (s_arg) { - flow_stats(state.fs_flow, linkid, interval, fields_str, &state); - return; - } - oferr = ofmt_open(fields_str, flow_fields, ofmtflags, 0, &ofmt); flowadm_ofmt_check(oferr, state.fs_parsable, ofmt); state.fs_ofmt = ofmt; @@ -1471,7 +875,7 @@ warn(const char *format, ...) (void) vfprintf(stderr, format, alist); va_end(alist); - (void) putchar('\n'); + (void) putc('\n', stderr); } /* PRINTFLIKE2 */ @@ -1503,7 +907,7 @@ die(const char *format, ...) (void) vfprintf(stderr, format, alist); va_end(alist); - (void) putchar('\n'); + (void) putc('\n', stderr); /* close dladm handle if it was opened */ if (handle != NULL) diff --git a/usr/src/cmd/flowstat/Makefile b/usr/src/cmd/flowstat/Makefile new file mode 100644 index 0000000000..5cb3eb4025 --- /dev/null +++ b/usr/src/cmd/flowstat/Makefile @@ -0,0 +1,70 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +PROG=flowstat + +ROOTFS_PROG= $(PROG) + +POFILE= $(PROG).po + +include ../Makefile.cmd + +XGETFLAGS += -a -x $(PROG).xcl +LDLIBS += -L$(ROOT)/lib +LDLIBS += -ldladm -linetutil + +ROOTCFGDIR= $(ROOTETC)/dladm + +.KEEP_STATE: + +all: $(ROOTFS_PROG) + +# +# Message catalog +# +_msg: $(POFILE) + +$(POFILE): $(PROG).c + $(RM) $@ + $(COMPILE.cpp) $(PROG).c > $(POFILE).i + $(XGETTEXT) $(XGETFLAGS) $(POFILE).i + sed "/^domain/d" messages.po > $@ + $(RM) messages.po $(POFILE).i + +install: all $(ROOTSBINPROG) $(ROOTCFGDIR) + $(RM) $(ROOTUSRSBINPROG) + -$(SYMLINK) ../../sbin/$(PROG) $(ROOTUSRSBINPROG) + +clean: + +lint: lint_PROG + +$(ROOTCFGDIR): + $(INS.dir) + +$(ROOTCFGDIR)/%: $(ROOTCFGDIR) % + $(INS.file) + +include ../Makefile.targ diff --git a/usr/src/cmd/flowstat/flowstat.c b/usr/src/cmd/flowstat/flowstat.c new file mode 100644 index 0000000000..3ddff9e34f --- /dev/null +++ b/usr/src/cmd/flowstat/flowstat.c @@ -0,0 +1,1149 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdio.h> +#include <locale.h> +#include <stdarg.h> +#include <stdlib.h> +#include <fcntl.h> +#include <string.h> +#include <stropts.h> +#include <errno.h> +#include <strings.h> +#include <getopt.h> +#include <unistd.h> +#include <priv.h> +#include <netdb.h> +#include <libintl.h> +#include <libdlflow.h> +#include <libdllink.h> +#include <libdlstat.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <sys/ethernet.h> +#include <inet/ip.h> +#include <inet/ip6.h> +#include <stddef.h> +#include <ofmt.h> + +typedef struct flow_chain_s { + char fc_flowname[MAXFLOWNAMELEN]; + boolean_t fc_visited; + flow_stat_t *fc_stat; + struct flow_chain_s *fc_next; +} flow_chain_t; + +typedef struct show_flow_state { + flow_chain_t *fs_flowchain; + ofmt_handle_t fs_ofmt; + char fs_unit; + boolean_t fs_parsable; +} show_flow_state_t; + +typedef struct show_history_state_s { + boolean_t us_plot; + boolean_t us_parsable; + boolean_t us_printheader; + boolean_t us_first; + boolean_t us_showall; + ofmt_handle_t us_ofmt; +} show_history_state_t; + +static void do_show_history(int, char **); + +static int query_flow_stats(dladm_handle_t, dladm_flow_attr_t *, void *); +static int query_link_flow_stats(dladm_handle_t, datalink_id_t, void *); + +static void die(const char *, ...); +static void die_optdup(int); +static void die_opterr(int, int, const char *); +static void die_dlerr(dladm_status_t, const char *, ...); +static void warn(const char *, ...); + +/* callback functions for printing output */ +static ofmt_cb_t print_default_cb, print_flow_stats_cb; +static void flowstat_ofmt_check(ofmt_status_t, boolean_t, ofmt_handle_t); + +#define NULL_OFMT {NULL, 0, 0, NULL} + +/* + * structures for flowstat (printing live statistics) + */ +typedef enum { + FLOW_S_FLOW, + FLOW_S_IPKTS, + FLOW_S_RBYTES, + FLOW_S_IERRORS, + FLOW_S_OPKTS, + FLOW_S_OBYTES, + FLOW_S_OERRORS +} flow_s_field_index_t; + +static ofmt_field_t flow_s_fields[] = { +/* name, field width, index, callback */ +{ "FLOW", 15, FLOW_S_FLOW, print_flow_stats_cb}, +{ "IPKTS", 8, FLOW_S_IPKTS, print_flow_stats_cb}, +{ "RBYTES", 8, FLOW_S_RBYTES, print_flow_stats_cb}, +{ "IERRS", 8, FLOW_S_IERRORS, print_flow_stats_cb}, +{ "OPKTS", 8, FLOW_S_OPKTS, print_flow_stats_cb}, +{ "OBYTES", 8, FLOW_S_OBYTES, print_flow_stats_cb}, +{ "OERRS", 8, FLOW_S_OERRORS, print_flow_stats_cb}, +NULL_OFMT} +; + +typedef struct flow_args_s { + char *flow_s_flow; + flow_stat_t *flow_s_stat; + char flow_s_unit; + boolean_t flow_s_parsable; +} flow_args_t; + +/* + * structures for 'flowstat -h' + */ +typedef struct history_fields_buf_s { + char history_flow[12]; + char history_duration[10]; + char history_ipackets[9]; + char history_rbytes[10]; + char history_opackets[9]; + char history_obytes[10]; + char history_bandwidth[14]; +} history_fields_buf_t; + +static ofmt_field_t history_fields[] = { +/* name, field width, offset */ +{ "FLOW", 13, + offsetof(history_fields_buf_t, history_flow), print_default_cb}, +{ "DURATION", 11, + offsetof(history_fields_buf_t, history_duration), print_default_cb}, +{ "IPACKETS", 10, + offsetof(history_fields_buf_t, history_ipackets), print_default_cb}, +{ "RBYTES", 11, + offsetof(history_fields_buf_t, history_rbytes), print_default_cb}, +{ "OPACKETS", 10, + offsetof(history_fields_buf_t, history_opackets), print_default_cb}, +{ "OBYTES", 11, + offsetof(history_fields_buf_t, history_obytes), print_default_cb}, +{ "BANDWIDTH", 15, + offsetof(history_fields_buf_t, history_bandwidth), print_default_cb}, +NULL_OFMT} +; + +typedef struct history_l_fields_buf_s { + char history_l_flow[12]; + char history_l_stime[13]; + char history_l_etime[13]; + char history_l_rbytes[8]; + char history_l_obytes[8]; + char history_l_bandwidth[14]; +} history_l_fields_buf_t; + +static ofmt_field_t history_l_fields[] = { +/* name, field width, offset */ +{ "FLOW", 13, + offsetof(history_l_fields_buf_t, history_l_flow), print_default_cb}, +{ "START", 14, + offsetof(history_l_fields_buf_t, history_l_stime), print_default_cb}, +{ "END", 14, + offsetof(history_l_fields_buf_t, history_l_etime), print_default_cb}, +{ "RBYTES", 9, + offsetof(history_l_fields_buf_t, history_l_rbytes), print_default_cb}, +{ "OBYTES", 9, + offsetof(history_l_fields_buf_t, history_l_obytes), print_default_cb}, +{ "BANDWIDTH", 15, + offsetof(history_l_fields_buf_t, history_l_bandwidth), + print_default_cb}, +NULL_OFMT} +; + +static char *progname; + +/* + * Handle to libdladm. Opened in main() before the sub-command + * specific function is called. + */ +static dladm_handle_t handle = NULL; + +const char *usage_ermsg = "flowstat [-r | -t] [-i interval] " + "[-l link] [flow]\n" + " flowstat [-S] [-A] [-i interval] [-p] [ -o field[,...]]\n" + " [-u R|K|M|G|T|P] [-l link] [flow]\n" + " flowstat -h [-a] [-d] [-F format]" + " [-s <DD/MM/YYYY,HH:MM:SS>]\n" + " [-e <DD/MM/YYYY,HH:MM:SS>] -f <logfile> " + "[<flow>]"; + +static void +usage(void) +{ + (void) fprintf(stderr, "%s\n", gettext(usage_ermsg)); + + /* close dladm handle if it was opened */ + if (handle != NULL) + dladm_close(handle); + + exit(1); +} + +boolean_t +flowstat_unit(char *oarg, char *unit) +{ + if ((strcmp(oarg, "R") == 0) || (strcmp(oarg, "K") == 0) || + (strcmp(oarg, "M") == 0) || (strcmp(oarg, "G") == 0) || + (strcmp(oarg, "T") == 0) || (strcmp(oarg, "P") == 0)) { + *unit = oarg[0]; + return (B_TRUE); + } + + return (B_FALSE); +} + +void +map_to_units(char *buf, uint_t bufsize, double num, char unit, + boolean_t parsable) +{ + if (parsable) { + (void) snprintf(buf, bufsize, "%.0lf", num); + return; + } + + if (unit == '\0') { + int index; + + for (index = 0; (int)(num/1000) != 0; index++, num /= 1000) + ; + + switch (index) { + case 0: + unit = '\0'; + break; + case 1: + unit = 'K'; + break; + case 2: + unit = 'M'; + break; + case 3: + unit = 'G'; + break; + case 4: + unit = 'T'; + break; + case 5: + /* Largest unit supported */ + default: + unit = 'P'; + break; + } + } else { + switch (unit) { + case 'R': + /* Already raw numbers */ + unit = '\0'; + break; + case 'K': + num /= 1000; + break; + case 'M': + num /= (1000*1000); + break; + case 'G': + num /= (1000*1000*1000); + break; + case 'T': + num /= (1000.0*1000.0*1000.0*1000.0); + break; + case 'P': + /* Largest unit supported */ + default: + num /= (1000.0*1000.0*1000.0*1000.0*1000.0); + break; + } + } + + if (unit == '\0') + (void) snprintf(buf, bufsize, " %7.0lf%c", num, unit); + else + (void) snprintf(buf, bufsize, " %6.2lf%c", num, unit); +} + +flow_chain_t * +get_flow_prev_stat(const char *flowname, void *arg) +{ + show_flow_state_t *state = arg; + flow_chain_t *flow_curr = NULL; + + /* Scan prev flowname list and look for entry matching this entry */ + for (flow_curr = state->fs_flowchain; flow_curr; + flow_curr = flow_curr->fc_next) { + if (strcmp(flow_curr->fc_flowname, flowname) == 0) + break; + } + + /* New flow, add it */ + if (flow_curr == NULL) { + flow_curr = (flow_chain_t *)malloc(sizeof (flow_chain_t)); + if (flow_curr == NULL) + goto done; + (void) strncpy(flow_curr->fc_flowname, flowname, + MAXFLOWNAMELEN); + flow_curr->fc_stat = NULL; + flow_curr->fc_next = state->fs_flowchain; + state->fs_flowchain = flow_curr; + } +done: + return (flow_curr); +} + +/* + * Number of flows may change while flowstat -i is executing. + * Free memory allocated for flows that are no longer there. + * Prepare for next iteration by marking visited = false for + * existing stat entries. + */ +static void +cleanup_removed_flows(show_flow_state_t *state) +{ + flow_chain_t *fcurr; + flow_chain_t *fprev; + flow_chain_t *tofree; + + /* Delete all nodes from the list that have fc_visited marked false */ + fcurr = state->fs_flowchain; + while (fcurr != NULL) { + if (fcurr->fc_visited) { + fcurr->fc_visited = B_FALSE; + fprev = fcurr; + fcurr = fcurr->fc_next; + continue; + } + + /* Is it head of the list? */ + if (fcurr == state->fs_flowchain) + state->fs_flowchain = fcurr->fc_next; + else + fprev->fc_next = fcurr->fc_next; + + /* fprev remains the same */ + tofree = fcurr; + fcurr = fcurr->fc_next; + + /* Free stats memory for the removed flow */ + dladm_flow_stat_free(tofree->fc_stat); + free(tofree); + } +} + +static boolean_t +print_flow_stats_cb(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + flow_args_t *fargs = of_arg->ofmt_cbarg; + flow_stat_t *diff_stats = fargs->flow_s_stat; + char unit = fargs->flow_s_unit; + boolean_t parsable = fargs->flow_s_parsable; + + switch (of_arg->ofmt_id) { + case FLOW_S_FLOW: + (void) snprintf(buf, bufsize, "%s", fargs->flow_s_flow); + break; + case FLOW_S_IPKTS: + map_to_units(buf, bufsize, diff_stats->fl_ipackets, unit, + parsable); + break; + case FLOW_S_RBYTES: + map_to_units(buf, bufsize, diff_stats->fl_rbytes, unit, + parsable); + break; + case FLOW_S_IERRORS: + map_to_units(buf, bufsize, diff_stats->fl_ierrors, unit, + parsable); + break; + case FLOW_S_OPKTS: + map_to_units(buf, bufsize, diff_stats->fl_opackets, unit, + parsable); + break; + case FLOW_S_OBYTES: + map_to_units(buf, bufsize, diff_stats->fl_obytes, unit, + parsable); + break; + case FLOW_S_OERRORS: + map_to_units(buf, bufsize, diff_stats->fl_oerrors, unit, + parsable); + break; + default: + die("invalid input"); + break; + } + return (B_TRUE); +} + +/* ARGSUSED */ +static int +query_flow_stats(dladm_handle_t handle, dladm_flow_attr_t *attr, void *arg) +{ + show_flow_state_t *state = arg; + flow_chain_t *flow_node; + flow_stat_t *curr_stat; + flow_stat_t *prev_stat; + flow_stat_t *diff_stat; + char *flowname = attr->fa_flowname; + flow_args_t fargs; + + /* Get previous stats for the flow */ + flow_node = get_flow_prev_stat(flowname, arg); + if (flow_node == NULL) + goto done; + + flow_node->fc_visited = B_TRUE; + prev_stat = flow_node->fc_stat; + + /* Query library for current stats */ + curr_stat = dladm_flow_stat_query(flowname); + if (curr_stat == NULL) + goto done; + + /* current stats - prev iteration stats */ + diff_stat = dladm_flow_stat_diff(curr_stat, prev_stat); + + /* Free prev stats */ + dladm_flow_stat_free(prev_stat); + + /* Prev <- curr stats */ + flow_node->fc_stat = curr_stat; + + if (diff_stat == NULL) + goto done; + + /* Print stats */ + fargs.flow_s_flow = flowname; + fargs.flow_s_stat = diff_stat; + fargs.flow_s_unit = state->fs_unit; + fargs.flow_s_parsable = state->fs_parsable; + ofmt_print(state->fs_ofmt, &fargs); + + /* Free diff stats */ + dladm_flow_stat_free(diff_stat); +done: + return (DLADM_WALK_CONTINUE); +} + +/* + * Wrapper of dladm_walk_flow(query_flow_stats,...) to make it usable for + * dladm_walk_datalink_id(). Used for showing flow stats for + * all flows on all links. + */ +static int +query_link_flow_stats(dladm_handle_t dh, datalink_id_t linkid, void * arg) +{ + if (dladm_walk_flow(query_flow_stats, dh, linkid, arg, B_FALSE) + == DLADM_STATUS_OK) + return (DLADM_WALK_CONTINUE); + else + return (DLADM_WALK_TERMINATE); +} + +void +print_all_stats(name_value_stat_entry_t *stat_entry) +{ + name_value_stat_t *curr_stat; + + printf("%s\n", stat_entry->nve_header); + + for (curr_stat = stat_entry->nve_stats; curr_stat != NULL; + curr_stat = curr_stat->nv_nextstat) { + printf("\t%15s", curr_stat->nv_statname); + printf("\t%15llu\n", curr_stat->nv_statval); + } +} + +/* ARGSUSED */ +static int +dump_one_flow_stats(dladm_handle_t handle, dladm_flow_attr_t *attr, void *arg) +{ + char *flowname = attr->fa_flowname; + void *stat; + + stat = dladm_flow_stat_query_all(flowname); + if (stat == NULL) + goto done; + print_all_stats(stat); + dladm_flow_stat_query_all_free(stat); + +done: + return (DLADM_WALK_CONTINUE); +} + +/* + * Wrapper of dladm_walk_flow(query_flow_stats,...) to make it usable for + * dladm_walk_datalink_id(). Used for showing flow stats for + * all flows on all links. + */ +static int +dump_link_flow_stats(dladm_handle_t dh, datalink_id_t linkid, void * arg) +{ + if (dladm_walk_flow(dump_one_flow_stats, dh, linkid, arg, B_FALSE) + == DLADM_STATUS_OK) + return (DLADM_WALK_CONTINUE); + else + return (DLADM_WALK_TERMINATE); +} + +static void +dump_all_flow_stats(dladm_flow_attr_t *attrp, void *arg, datalink_id_t linkid, + boolean_t flow_arg) +{ + /* Show stats for named flow */ + if (flow_arg) { + (void) dump_one_flow_stats(handle, attrp, arg); + + /* Show stats for flows on one link */ + } else if (linkid != DATALINK_INVALID_LINKID) { + (void) dladm_walk_flow(dump_one_flow_stats, handle, linkid, + arg, B_FALSE); + + /* Show stats for all flows on all links */ + } else { + (void) dladm_walk_datalink_id(dump_link_flow_stats, + handle, arg, DATALINK_CLASS_ALL, + DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE); + } +} + +int +main(int argc, char *argv[]) +{ + dladm_status_t status; + int option; + boolean_t r_arg = B_FALSE; + boolean_t t_arg = B_FALSE; + boolean_t p_arg = B_FALSE; + boolean_t i_arg = B_FALSE; + boolean_t o_arg = B_FALSE; + boolean_t u_arg = B_FALSE; + boolean_t A_arg = B_FALSE; + boolean_t S_arg = B_FALSE; + boolean_t flow_arg = B_FALSE; + datalink_id_t linkid = DATALINK_ALL_LINKID; + char linkname[MAXLINKNAMELEN]; + char flowname[MAXFLOWNAMELEN]; + uint32_t interval = 0; + char unit = '\0'; + show_flow_state_t state; + char *fields_str = NULL; + char *o_fields_str = NULL; + + char *total_stat_fields = + "flow,ipkts,rbytes,ierrs,opkts,obytes,oerrs"; + char *rx_stat_fields = + "flow,ipkts,rbytes,ierrs"; + char *tx_stat_fields = + "flow,opkts,obytes,oerrs"; + + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = OFMT_RIGHTJUST; + + dladm_flow_attr_t attr; + + (void) setlocale(LC_ALL, ""); +#if !defined(TEXT_DOMAIN) +#define TEXT_DOMAIN "SYS_TEST" +#endif + (void) textdomain(TEXT_DOMAIN); + + progname = argv[0]; + + /* Open the libdladm handle */ + if ((status = dladm_open(&handle)) != DLADM_STATUS_OK) + die_dlerr(status, "could not open /dev/dld"); + + bzero(&state, sizeof (state)); + + opterr = 0; + while ((option = getopt_long(argc, argv, ":rtApSi:o:u:l:h", + NULL, NULL)) != -1) { + switch (option) { + case 'r': + if (r_arg) + die_optdup(option); + + r_arg = B_TRUE; + break; + case 't': + if (t_arg) + die_optdup(option); + + t_arg = B_TRUE; + break; + case 'A': + if (A_arg) + die_optdup(option); + + A_arg = B_TRUE; + break; + case 'p': + if (p_arg) + die_optdup(option); + + p_arg = B_TRUE; + break; + case 'S': + if (S_arg) + die_optdup(option); + S_arg = B_TRUE; + break; + case 'i': + if (i_arg) + die_optdup(option); + + i_arg = B_TRUE; + if (!dladm_str2interval(optarg, &interval)) + die("invalid interval value '%s'", optarg); + break; + case 'o': + o_arg = B_TRUE; + o_fields_str = optarg; + break; + case 'u': + if (u_arg) + die_optdup(option); + + u_arg = B_TRUE; + if (!flowstat_unit(optarg, &unit)) + die("invalid unit value '%s'," + "unit must be R|K|M|G|T|P", optarg); + break; + case 'l': + if (strlcpy(linkname, optarg, MAXLINKNAMELEN) + >= MAXLINKNAMELEN) + die("link name too long\n"); + if (dladm_name2info(handle, linkname, &linkid, NULL, + NULL, NULL) != DLADM_STATUS_OK) + die("invalid link '%s'", linkname); + break; + case 'h': + if (r_arg || t_arg || p_arg || o_arg || u_arg || + i_arg || S_arg || A_arg) { + die("the option -h is not compatible with " + "-r, -t, -p, -o, -u, -i, -S, -A"); + } + do_show_history(argc, argv); + return (0); + break; + default: + die_opterr(optopt, option, usage_ermsg); + break; + } + } + + if (r_arg && t_arg) + die("the option -t and -r are not compatible"); + + if (u_arg && p_arg) + die("the option -u and -p are not compatible"); + + if (p_arg && !o_arg) + die("-p requires -o"); + + if (p_arg && strcasecmp(o_fields_str, "all") == 0) + die("\"-o all\" is invalid with -p"); + + if (S_arg && + (r_arg || t_arg || p_arg || o_arg || u_arg)) + die("the option -S is not compatible with " + "-r, -t, -p, -o, -u"); + + if (A_arg && + (r_arg || t_arg || p_arg || o_arg || u_arg || i_arg)) + die("the option -A is not compatible with " + "-r, -t, -p, -o, -u, -i"); + + /* get flow name (optional last argument) */ + if (optind == (argc-1)) { + if (strlcpy(flowname, argv[optind], MAXFLOWNAMELEN) + >= MAXFLOWNAMELEN) + die("flow name too long"); + flow_arg = B_TRUE; + } else if (optind != argc) { + usage(); + } + + if (S_arg) { + dladm_continuous(handle, linkid, (flow_arg ? flowname : NULL), + interval, FLOW_REPORT); + return (0); + } + + if (flow_arg && + dladm_flow_info(handle, flowname, &attr) != DLADM_STATUS_OK) + die("invalid flow %s", flowname); + + if (A_arg) { + dump_all_flow_stats(&attr, &state, linkid, flow_arg); + return (0); + } + + state.fs_unit = unit; + state.fs_parsable = p_arg; + + if (state.fs_parsable) + ofmtflags |= OFMT_PARSABLE; + + if (r_arg) + fields_str = rx_stat_fields; + else if (t_arg) + fields_str = tx_stat_fields; + else + fields_str = total_stat_fields; + + if (o_arg) { + fields_str = (strcasecmp(o_fields_str, "all") == 0) ? + fields_str : o_fields_str; + } + + oferr = ofmt_open(fields_str, flow_s_fields, ofmtflags, 0, &ofmt); + flowstat_ofmt_check(oferr, state.fs_parsable, ofmt); + state.fs_ofmt = ofmt; + + for (;;) { + /* Show stats for named flow */ + if (flow_arg) { + (void) query_flow_stats(handle, &attr, &state); + + /* Show stats for flows on one link */ + } else if (linkid != DATALINK_INVALID_LINKID) { + (void) dladm_walk_flow(query_flow_stats, handle, linkid, + &state, B_FALSE); + + /* Show stats for all flows on all links */ + } else { + (void) dladm_walk_datalink_id(query_link_flow_stats, + handle, &state, DATALINK_CLASS_ALL, + DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE); + } + + if (interval == 0) + break; + + (void) fflush(stdout); + cleanup_removed_flows(&state); + (void) sleep(interval); + } + ofmt_close(ofmt); + + dladm_close(handle); + return (0); +} + +/* ARGSUSED */ +static int +show_history_date(dladm_usage_t *history, void *arg) +{ + show_history_state_t *state = (show_history_state_t *)arg; + time_t stime; + char timebuf[20]; + dladm_flow_attr_t attr; + dladm_status_t status; + + /* + * Only show historical information for existing flows unless '-a' + * is specified. + */ + if (!state->us_showall && ((status = dladm_flow_info(handle, + history->du_name, &attr)) != DLADM_STATUS_OK)) { + return (status); + } + + stime = history->du_stime; + (void) strftime(timebuf, sizeof (timebuf), "%m/%d/%Y", + localtime(&stime)); + (void) printf("%s\n", timebuf); + + return (DLADM_STATUS_OK); +} + +static int +show_history_time(dladm_usage_t *history, void *arg) +{ + show_history_state_t *state = (show_history_state_t *)arg; + char buf[DLADM_STRSIZE]; + history_l_fields_buf_t ubuf; + time_t time; + double bw; + dladm_flow_attr_t attr; + dladm_status_t status; + + /* + * Only show historical information for existing flows unless '-a' + * is specified. + */ + if (!state->us_showall && ((status = dladm_flow_info(handle, + history->du_name, &attr)) != DLADM_STATUS_OK)) { + return (status); + } + + if (state->us_plot) { + if (!state->us_printheader) { + if (state->us_first) { + (void) printf("# Time"); + state->us_first = B_FALSE; + } + (void) printf(" %s", history->du_name); + if (history->du_last) { + (void) printf("\n"); + state->us_first = B_TRUE; + state->us_printheader = B_TRUE; + } + } else { + if (state->us_first) { + time = history->du_etime; + (void) strftime(buf, sizeof (buf), "%T", + localtime(&time)); + state->us_first = B_FALSE; + (void) printf("%s", buf); + } + bw = (double)history->du_bandwidth/1000; + (void) printf(" %.2f", bw); + if (history->du_last) { + (void) printf("\n"); + state->us_first = B_TRUE; + } + } + return (DLADM_STATUS_OK); + } + + bzero(&ubuf, sizeof (ubuf)); + + (void) snprintf(ubuf.history_l_flow, sizeof (ubuf.history_l_flow), "%s", + history->du_name); + time = history->du_stime; + (void) strftime(buf, sizeof (buf), "%T", localtime(&time)); + (void) snprintf(ubuf.history_l_stime, sizeof (ubuf.history_l_stime), + "%s", buf); + time = history->du_etime; + (void) strftime(buf, sizeof (buf), "%T", localtime(&time)); + (void) snprintf(ubuf.history_l_etime, sizeof (ubuf.history_l_etime), + "%s", buf); + (void) snprintf(ubuf.history_l_rbytes, sizeof (ubuf.history_l_rbytes), + "%llu", history->du_rbytes); + (void) snprintf(ubuf.history_l_obytes, sizeof (ubuf.history_l_obytes), + "%llu", history->du_obytes); + (void) snprintf(ubuf.history_l_bandwidth, + sizeof (ubuf.history_l_bandwidth), "%s Mbps", + dladm_bw2str(history->du_bandwidth, buf)); + + ofmt_print(state->us_ofmt, (void *)&ubuf); + return (DLADM_STATUS_OK); +} + +static int +show_history_res(dladm_usage_t *history, void *arg) +{ + show_history_state_t *state = (show_history_state_t *)arg; + char buf[DLADM_STRSIZE]; + history_fields_buf_t ubuf; + dladm_flow_attr_t attr; + dladm_status_t status; + + /* + * Only show historical information for existing flows unless '-a' + * is specified. + */ + if (!state->us_showall && ((status = dladm_flow_info(handle, + history->du_name, &attr)) != DLADM_STATUS_OK)) { + return (status); + } + + bzero(&ubuf, sizeof (ubuf)); + + (void) snprintf(ubuf.history_flow, sizeof (ubuf.history_flow), "%s", + history->du_name); + (void) snprintf(ubuf.history_duration, sizeof (ubuf.history_duration), + "%llu", history->du_duration); + (void) snprintf(ubuf.history_ipackets, sizeof (ubuf.history_ipackets), + "%llu", history->du_ipackets); + (void) snprintf(ubuf.history_rbytes, sizeof (ubuf.history_rbytes), + "%llu", history->du_rbytes); + (void) snprintf(ubuf.history_opackets, sizeof (ubuf.history_opackets), + "%llu", history->du_opackets); + (void) snprintf(ubuf.history_obytes, sizeof (ubuf.history_obytes), + "%llu", history->du_obytes); + (void) snprintf(ubuf.history_bandwidth, sizeof (ubuf.history_bandwidth), + "%s Mbps", dladm_bw2str(history->du_bandwidth, buf)); + + ofmt_print(state->us_ofmt, (void *)&ubuf); + + return (DLADM_STATUS_OK); +} + +static boolean_t +valid_formatspec(char *formatspec_str) +{ + return (strcmp(formatspec_str, "gnuplot") == 0); +} + +/* ARGSUSED */ +static void +do_show_history(int argc, char *argv[]) +{ + char *file = NULL; + int opt; + dladm_status_t status; + boolean_t d_arg = B_FALSE; + char *stime = NULL; + char *etime = NULL; + char *resource = NULL; + show_history_state_t state; + boolean_t o_arg = B_FALSE; + boolean_t F_arg = B_FALSE; + char *fields_str = NULL; + char *formatspec_str = NULL; + char *all_fields = + "flow,duration,ipackets,rbytes,opackets,obytes,bandwidth"; + char *all_l_fields = + "flow,start,end,rbytes,obytes,bandwidth"; + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = 0; + + bzero(&state, sizeof (show_history_state_t)); + state.us_parsable = B_FALSE; + state.us_printheader = B_FALSE; + state.us_plot = B_FALSE; + state.us_first = B_TRUE; + + while ((opt = getopt(argc, argv, "das:e:o:f:F:")) != -1) { + switch (opt) { + case 'd': + d_arg = B_TRUE; + break; + case 'a': + state.us_showall = B_TRUE; + break; + case 'f': + file = optarg; + break; + case 's': + stime = optarg; + break; + case 'e': + etime = optarg; + break; + case 'o': + o_arg = B_TRUE; + fields_str = optarg; + break; + case 'F': + state.us_plot = F_arg = B_TRUE; + formatspec_str = optarg; + break; + default: + die_opterr(optopt, opt, usage_ermsg); + } + } + + if (file == NULL) + die("-h requires a file"); + + if (optind == (argc-1)) { + dladm_flow_attr_t attr; + + resource = argv[optind]; + if (!state.us_showall && + dladm_flow_info(handle, resource, &attr) != + DLADM_STATUS_OK) { + die("invalid flow: '%s'", resource); + } + } + + if (state.us_parsable) + ofmtflags |= OFMT_PARSABLE; + if (resource == NULL && stime == NULL && etime == NULL) { + if (!o_arg || (o_arg && strcasecmp(fields_str, "all") == 0)) + fields_str = all_fields; + oferr = ofmt_open(fields_str, history_fields, ofmtflags, + 0, &ofmt); + } else { + if (!o_arg || (o_arg && strcasecmp(fields_str, "all") == 0)) + fields_str = all_l_fields; + oferr = ofmt_open(fields_str, history_l_fields, ofmtflags, + 0, &ofmt); + } + + flowstat_ofmt_check(oferr, state.us_parsable, ofmt); + state.us_ofmt = ofmt; + + if (F_arg && d_arg) + die("incompatible -d and -F options"); + + if (F_arg && !valid_formatspec(formatspec_str)) + die("Format specifier %s not supported", formatspec_str); + + if (d_arg) { + /* Print log dates */ + status = dladm_usage_dates(show_history_date, + DLADM_LOGTYPE_FLOW, file, resource, &state); + } else if (resource == NULL && stime == NULL && etime == NULL && + !F_arg) { + /* Print summary */ + status = dladm_usage_summary(show_history_res, + DLADM_LOGTYPE_FLOW, file, &state); + } else if (resource != NULL) { + /* Print log entries for named resource */ + status = dladm_walk_usage_res(show_history_time, + DLADM_LOGTYPE_FLOW, file, resource, stime, etime, &state); + } else { + /* Print time and information for each flow */ + status = dladm_walk_usage_time(show_history_time, + DLADM_LOGTYPE_FLOW, file, stime, etime, &state); + } + + ofmt_close(ofmt); + if (status != DLADM_STATUS_OK) + die_dlerr(status, "-h"); + dladm_close(handle); +} + +static void +warn(const char *format, ...) +{ + va_list alist; + + format = gettext(format); + (void) fprintf(stderr, "%s: warning: ", progname); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); + + (void) putc('\n', stderr); +} + +/* PRINTFLIKE1 */ +static void +die(const char *format, ...) +{ + va_list alist; + + format = gettext(format); + (void) fprintf(stderr, "%s: ", progname); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); + + (void) putc('\n', stderr); + + /* close dladm handle if it was opened */ + if (handle != NULL) + dladm_close(handle); + + exit(EXIT_FAILURE); +} + +static void +die_optdup(int opt) +{ + die("the option -%c cannot be specified more than once", opt); +} + +static void +die_opterr(int opt, int opterr, const char *usage) +{ + switch (opterr) { + case ':': + die("option '-%c' requires a value\nusage: %s", opt, + gettext(usage)); + break; + case '?': + default: + die("unrecognized option '-%c'\nusage: %s", opt, + gettext(usage)); + break; + } +} + +/* PRINTFLIKE2 */ +static void +die_dlerr(dladm_status_t err, const char *format, ...) +{ + va_list alist; + char errmsg[DLADM_STRSIZE]; + + format = gettext(format); + (void) fprintf(stderr, "%s: ", progname); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); + (void) fprintf(stderr, ": %s\n", dladm_status2str(err, errmsg)); + + /* close dladm handle if it was opened */ + if (handle != NULL) + dladm_close(handle); + + exit(EXIT_FAILURE); +} + + +/* + * default output callback function that, when invoked from dladm_print_output, + * prints string which is offset by of_arg->ofmt_id within buf. + */ +static boolean_t +print_default_cb(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + char *value; + + value = (char *)of_arg->ofmt_cbarg + of_arg->ofmt_id; + (void) strlcpy(buf, value, bufsize); + return (B_TRUE); +} + +static void +flowstat_ofmt_check(ofmt_status_t oferr, boolean_t parsable, + ofmt_handle_t ofmt) +{ + char buf[OFMT_BUFSIZE]; + + if (oferr == OFMT_SUCCESS) + return; + (void) ofmt_strerror(ofmt, oferr, buf, sizeof (buf)); + /* + * All errors are considered fatal in parsable mode. + * NOMEM errors are always fatal, regardless of mode. + * For other errors, we print diagnostics in human-readable + * mode and processs what we can. + */ + if (parsable || oferr == OFMT_ENOFIELDS) { + ofmt_close(ofmt); + die(buf); + } else { + warn(buf); + } +} diff --git a/usr/src/cmd/flowstat/flowstat.xcl b/usr/src/cmd/flowstat/flowstat.xcl new file mode 100644 index 0000000000..369608c062 --- /dev/null +++ b/usr/src/cmd/flowstat/flowstat.xcl @@ -0,0 +1,73 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# + +msgid " %.2f" +msgid " %6.2lf%c" +msgid " %7.0lf%c" +msgid " %s" +msgid "-h" +msgid ": %s\n" +msgid ":rtApSi:o:u:l:h" +msgid "" +msgid "\n" +msgid "\t%15llu\n" +msgid "\t%15s" +msgid "# Time" +msgid "%.0lf" +msgid "%llu" +msgid "%m/%d/%Y" +msgid "%s Mbps" +msgid "%s: " +msgid "%s: warning: " +msgid "%s" +msgid "%s\n" +msgid "%T" +msgid "all" +msgid "B" +msgid "BANDWIDTH" +msgid "das:e:o:f:F:" +msgid "DURATION" +msgid "END" +msgid "flow,duration,ipackets,rbytes,opackets,obytes,bandwidth" +msgid "flow,ipkts,rbytes,ierrs,opkts,obytes,oerrs" +msgid "flow,ipkts,rbytes,ierrs" +msgid "flow,opkts,obytes,oerrs" +msgid "flow,start,end,rbytes,obytes,bandwidth" +msgid "FLOW" +msgid "G" +msgid "gnuplot" +msgid "IERRS" +msgid "IPACKETS" +msgid "IPKTS" +msgid "K" +msgid "M" +msgid "OBYTES" +msgid "OERRS" +msgid "OPACKETS" +msgid "OPKTS" +msgid "P" +msgid "RBYTES" +msgid "START" +msgid "T" diff --git a/usr/src/cmd/mdb/common/modules/mac/mac.c b/usr/src/cmd/mdb/common/modules/mac/mac.c index 4a56960ca7..268d92ac2d 100644 --- a/usr/src/cmd/mdb/common/modules/mac/mac.c +++ b/usr/src/cmd/mdb/common/modules/mac/mac.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -34,6 +34,7 @@ #include <sys/mac_client_impl.h> #include <sys/mac_flow_impl.h> #include <sys/mac_soft_ring.h> +#include <sys/mac_stat.h> #define STRSIZE 64 #define MAC_RX_SRS_SIZE (MAX_RINGS_PER_GROUP * sizeof (uintptr_t)) @@ -59,12 +60,15 @@ #define MAC_SRS_STAT 0x04 #define MAC_SRS_CPU 0x08 #define MAC_SRS_VERBOSE 0x10 +#define MAC_SRS_INTR 0x20 #define MAC_SRS_RXSTAT (MAC_SRS_RX|MAC_SRS_STAT) #define MAC_SRS_TXSTAT (MAC_SRS_TX|MAC_SRS_STAT) #define MAC_SRS_RXCPU (MAC_SRS_RX|MAC_SRS_CPU) #define MAC_SRS_TXCPU (MAC_SRS_TX|MAC_SRS_CPU) #define MAC_SRS_RXCPUVERBOSE (MAC_SRS_RXCPU|MAC_SRS_VERBOSE) #define MAC_SRS_TXCPUVERBOSE (MAC_SRS_TXCPU|MAC_SRS_VERBOSE) +#define MAC_SRS_RXINTR (MAC_SRS_RX|MAC_SRS_INTR) +#define MAC_SRS_TXINTR (MAC_SRS_TX|MAC_SRS_INTR) static char * mac_flow_proto2str(uint8_t protocol) @@ -314,9 +318,28 @@ mac_flow_dcmd_output(uintptr_t addr, uint_t flags, uint_t args) break; } case MAC_FLOW_STATS: { + uint64_t totibytes = 0; + uint64_t totobytes = 0; + mac_soft_ring_set_t *mac_srs; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + int i; + + for (i = 0; i < fe.fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)(fe.fe_rx_srs[i]); + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + totibytes += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + + mac_rx_stat->mrs_lclbytes; + } + mac_srs = (mac_soft_ring_set_t *)(fe.fe_tx_srs); + if (mac_srs != NULL) { + mac_tx_stat = &mac_srs->srs_tx.st_stat; + totobytes = mac_tx_stat->mts_obytes; + } mdb_printf("%?p %-32s %16llu %16llu\n", - addr, fe.fe_flow_name, fe.fe_flowstats.fs_rbytes, - fe.fe_flowstats.fs_obytes); + addr, fe.fe_flow_name, totibytes, totobytes); + break; } } @@ -444,6 +467,10 @@ mac_srs_txmode2str(mac_tx_srs_mode_t mode) return ("BW"); case SRS_TX_BW_FANOUT: return ("BWFO"); + case SRS_TX_AGGR: + return ("AG"); + case SRS_TX_BW_AGGR: + return ("BWAG"); } return ("--"); } @@ -460,6 +487,7 @@ mac_srs_help(void) "\t-s\tdisplay statistics for RX or TX side\n" "\t-c\tdisplay CPU binding for RX or TX side\n" "\t-v\tverbose flag for CPU binding to list cpus\n" + "\t-i\tdisplay mac_ring_t and interrupt information\n" "Note: use -r or -t (to specify RX or TX side respectively) along " "with -c or -s\n"); mdb_printf("\n%<u>Interpreting TX Modes%</u>\n"); @@ -468,6 +496,8 @@ mac_srs_help(void) mdb_printf("\t FO --> Fanout\n"); mdb_printf("\t BW --> Bandwidth\n"); mdb_printf("\tBWFO --> Bandwidth Fanout\n"); + mdb_printf("\t AG --> Aggr\n"); + mdb_printf("\tBWAG --> Bandwidth Aggr\n"); } /* @@ -520,6 +550,7 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 't', MDB_OPT_SETBITS, MAC_SRS_TX, &args, 'c', MDB_OPT_SETBITS, MAC_SRS_CPU, &args, 'v', MDB_OPT_SETBITS, MAC_SRS_VERBOSE, &args, + 'i', MDB_OPT_SETBITS, MAC_SRS_INTR, &args, 's', MDB_OPT_SETBITS, MAC_SRS_STAT, &args) != argc) { return (DCMD_USAGE); } @@ -576,7 +607,7 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) "%08x %08x %8d %8d %3d\n", addr, mci.mci_name, mac_srs_txmode2str(srs.srs_tx.st_mode), srs.srs_state, srs.srs_type, srs.srs_count, srs.srs_size, - srs.srs_oth_ring_count); + srs.srs_tx_ring_count); break; } case MAC_SRS_RXCPU: { @@ -596,30 +627,148 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) return (DCMD_OK); mdb_printf("%?p %-20s %-4d %-4d " "%-6d %-4d %-7d\n", - addr, mci.mci_name, mc.mc_ncpus, mc.mc_pollid, - mc.mc_workerid, mc.mc_intr_cpu, mc.mc_fanout_cnt); + addr, mci.mci_name, mc.mc_ncpus, mc.mc_rx_pollid, + mc.mc_rx_workerid, mc.mc_rx_intr_cpu, mc.mc_rx_fanout_cnt); break; } case MAC_SRS_TXCPU: { mac_cpus_t mc = srs.srs_cpu; + mac_soft_ring_t *s_ringp, s_ring; + boolean_t first = B_TRUE; + int i; if (DCMD_HDRSPEC(flags)) { - mdb_printf("%?s %-20s %-4s %-6s " - "%-4s %-7s\n", - "", "", "NUM", "WORKER", - "INTR", "FANOUT"); - mdb_printf("%<u>%?s %-20s %-4s %-6s " - "%-4s %-7s%</u>\n", - "ADDR", "LINK_NAME", "CPUS", "CPU", - "CPU", "CPU_CNT"); + mdb_printf("%?s %-12s %?s %8s %8s %8s\n", + "", "", "SOFT", "WORKER", "INTR", "RETARGETED"); + mdb_printf("%<u>%?s %-12s %?s %8s %8s %8s%</u>\n", + "ADDR", "LINK_NAME", "RING", "CPU", "CPU", "CPU"); } - if ((args & MAC_SRS_TX) && !(srs.srs_type & SRST_TX)) + if (!(srs.srs_type & SRST_TX)) return (DCMD_OK); - mdb_printf("%?p %-20s %-4d " - "%-6d %-4d %-7d\n", - addr, mci.mci_name, mc.mc_ncpus, - mc.mc_workerid, mc.mc_intr_cpu, mc.mc_fanout_cnt); + + mdb_printf("%?p %-12s ", addr, mci.mci_name); + + /* + * Case of no soft rings, print the info from + * mac_srs_tx_t. + */ + if (srs.srs_tx_ring_count == 0) { + mdb_printf("%?p %8d %8d %8d\n", + 0, mc.mc_tx_fanout_cpus[0], + mc.mc_tx_intr_cpu[0], + mc.mc_tx_retargeted_cpu[0]); + break; + } + + for (s_ringp = srs.srs_soft_ring_head, i = 0; s_ringp != NULL; + s_ringp = s_ring.s_ring_next, i++) { + (void) mdb_vread(&s_ring, sizeof (s_ring), + (uintptr_t)s_ringp); + if (first) { + mdb_printf("%?p %8d %8d %8d\n", + s_ringp, mc.mc_tx_fanout_cpus[i], + mc.mc_tx_intr_cpu[i], + mc.mc_tx_retargeted_cpu[i]); + first = B_FALSE; + continue; + } + mdb_printf("%?s %-12s %?p %8d %8d %8d\n", + "", "", s_ringp, mc.mc_tx_fanout_cpus[i], + mc.mc_tx_intr_cpu[i], mc.mc_tx_retargeted_cpu[i]); + } + break; + } + case MAC_SRS_TXINTR: { + mac_cpus_t mc = srs.srs_cpu; + mac_soft_ring_t *s_ringp, s_ring; + mac_ring_t *m_ringp, m_ring; + boolean_t first = B_TRUE; + int i; + + if (DCMD_HDRSPEC(flags)) { + mdb_printf("%?s %-12s %?s %8s %?s %6s %6s\n", + "", "", "SOFT", "WORKER", "MAC", "", "INTR"); + mdb_printf("%<u>%?s %-12s %?s %8s %?s %6s %6s%</u>\n", + "ADDR", "LINK_NAME", "RING", "CPU", "RING", + "SHARED", "CPU"); + } + if (!(srs.srs_type & SRST_TX)) + return (DCMD_OK); + + mdb_printf("%?p %-12s ", addr, mci.mci_name); + + /* + * Case of no soft rings, print the info from + * mac_srs_tx_t. + */ + if (srs.srs_tx_ring_count == 0) { + m_ringp = srs.srs_tx.st_arg2; + if (m_ringp != NULL) { + (void) mdb_vread(&m_ring, sizeof (m_ring), + (uintptr_t)m_ringp); + mdb_printf("%?p %8d %?p %6d %6d\n", + 0, mc.mc_tx_fanout_cpus[0], m_ringp, + m_ring.mr_info.mri_intr.mi_ddi_shared, + mc.mc_tx_retargeted_cpu[0]); + } else { + mdb_printf("%?p %8d %?p %6d %6d\n", + 0, mc.mc_tx_fanout_cpus[0], 0, + 0, mc.mc_tx_retargeted_cpu[0]); + } + break; + } + + for (s_ringp = srs.srs_soft_ring_head, i = 0; s_ringp != NULL; + s_ringp = s_ring.s_ring_next, i++) { + (void) mdb_vread(&s_ring, sizeof (s_ring), + (uintptr_t)s_ringp); + m_ringp = s_ring.s_ring_tx_arg2; + (void) mdb_vread(&m_ring, sizeof (m_ring), + (uintptr_t)m_ringp); + if (first) { + mdb_printf("%?p %8d %?p %6d %6d\n", + s_ringp, mc.mc_tx_fanout_cpus[i], + m_ringp, + m_ring.mr_info.mri_intr.mi_ddi_shared, + mc.mc_tx_retargeted_cpu[i]); + first = B_FALSE; + continue; + } + mdb_printf("%?s %-12s %?p %8d %?p %6d %6d\n", + "", "", s_ringp, mc.mc_tx_fanout_cpus[i], + m_ringp, m_ring.mr_info.mri_intr.mi_ddi_shared, + mc.mc_tx_retargeted_cpu[i]); + } + break; + } + case MAC_SRS_RXINTR: { + mac_cpus_t mc = srs.srs_cpu; + mac_ring_t *m_ringp, m_ring; + + if (DCMD_HDRSPEC(flags)) { + mdb_printf("%?s %-12s %?s %8s %6s %6s\n", + "", "", "MAC", "", "POLL", "INTR"); + mdb_printf("%<u>%?s %-12s %?s %8s %6s %6s%</u>\n", + "ADDR", "LINK_NAME", "RING", "SHARED", "CPU", + "CPU"); + } + if ((args & MAC_SRS_RX) && (srs.srs_type & SRST_TX)) + return (DCMD_OK); + + mdb_printf("%?p %-12s ", addr, mci.mci_name); + + m_ringp = srs.srs_ring; + if (m_ringp != NULL) { + (void) mdb_vread(&m_ring, sizeof (m_ring), + (uintptr_t)m_ringp); + mdb_printf("%?p %8d %6d %6d\n", + m_ringp, m_ring.mr_info.mri_intr.mi_ddi_shared, + mc.mc_rx_pollid, mc.mc_rx_intr_cpu); + } else { + mdb_printf("%?p %8d %6d %6d\n", + 0, 0, mc.mc_rx_pollid, mc.mc_rx_intr_cpu); + } break; } case MAC_SRS_RXCPUVERBOSE: @@ -640,8 +789,8 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) ((args & MAC_SRS_RX) && (srs.srs_type & SRST_TX))) return (DCMD_OK); mdb_printf("%?p %-20s %-20d %-20d\n", addr, mci.mci_name, - mc.mc_ncpus, mc.mc_fanout_cnt); - if (mc.mc_ncpus == 0 && mc.mc_fanout_cnt == 0) + mc.mc_ncpus, mc.mc_rx_fanout_cnt); + if (mc.mc_ncpus == 0 && mc.mc_rx_fanout_cnt == 0) break; /* print all cpus and cpus for soft rings */ while (!cpu_done || !fanout_done) { @@ -658,14 +807,15 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) else mdb_printf("%*s", len, ""); fanout_done = mac_srs_print_cpu(&fanout_index, - mc.mc_fanout_cnt, mc.mc_fanout_cpus, NULL); + mc.mc_rx_fanout_cnt, + mc.mc_rx_fanout_cpus, NULL); } mdb_printf("\n"); } break; } case MAC_SRS_RXSTAT: { - mac_srs_rx_t srs_rx = srs.srs_rx; + mac_rx_stats_t *mac_rx_stat = &srs.srs_rx.sr_stat; if (DCMD_HDRSPEC(flags)) { mdb_printf("%?s %-16s %8s %8s " @@ -682,13 +832,14 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) mdb_printf("%?p %-16s %8d " "%8d %8d " "%8d %8d\n", - addr, mci.mci_name, srs_rx.sr_intr_count, - srs_rx.sr_poll_count, srs_rx.sr_chain_cnt_undr10, - srs_rx.sr_chain_cnt_10to50, srs_rx.sr_chain_cnt_over50); + addr, mci.mci_name, mac_rx_stat->mrs_intrcnt, + mac_rx_stat->mrs_pollcnt, mac_rx_stat->mrs_chaincntundr10, + mac_rx_stat->mrs_chaincnt10to50, + mac_rx_stat->mrs_chaincntover50); break; } case MAC_SRS_TXSTAT: { - mac_srs_tx_t srs_tx = srs.srs_tx; + mac_tx_stats_t *mac_tx_stat = &srs.srs_tx.st_stat; mac_soft_ring_t *s_ringp, s_ring; boolean_t first = B_TRUE; @@ -708,10 +859,11 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) * Case of no soft rings, print the info from * mac_srs_tx_t. */ - if (srs.srs_oth_ring_count == 0) { + if (srs.srs_tx_ring_count == 0) { mdb_printf("%?p %8d %8d %8d\n", - 0, srs_tx.st_drop_count, srs_tx.st_blocked_cnt, - srs_tx.st_unblocked_cnt); + 0, mac_tx_stat->mts_sdrops, + mac_tx_stat->mts_blockcnt, + mac_tx_stat->mts_unblockcnt); break; } @@ -719,18 +871,19 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) s_ringp = s_ring.s_ring_next) { (void) mdb_vread(&s_ring, sizeof (s_ring), (uintptr_t)s_ringp); + mac_tx_stat = &s_ring.s_st_stat; if (first) { mdb_printf("%?p %8d %8d %8d\n", - s_ringp, s_ring.s_ring_drops, - s_ring.s_ring_blocked_cnt, - s_ring.s_ring_unblocked_cnt); + s_ringp, mac_tx_stat->mts_sdrops, + mac_tx_stat->mts_blockcnt, + mac_tx_stat->mts_unblockcnt); first = B_FALSE; continue; } mdb_printf("%?s %-20s %?p %8d %8d %8d\n", - "", "", s_ringp, s_ring.s_ring_drops, - s_ring.s_ring_blocked_cnt, - s_ring.s_ring_unblocked_cnt); + "", "", s_ringp, mac_tx_stat->mts_sdrops, + mac_tx_stat->mts_blockcnt, + mac_tx_stat->mts_unblockcnt); } break; } @@ -853,8 +1006,9 @@ mac_ring_help(void) static const mdb_dcmd_t dcmds[] = { {"mac_flow", "?[-u] [-aprtsm]", "display Flow Entry structures", mac_flow_dcmd, mac_flow_help}, - {"mac_srs", "?[ -r[s|c[v]] | -t[s|c[v]] ]", "display MAC Soft Ring Set" - " structures", mac_srs_dcmd, mac_srs_help}, + {"mac_srs", "?[ -r[i|s|c[v]] | -t[i|s|c[v]] ]", + "display MAC Soft Ring Set" " structures", mac_srs_dcmd, + mac_srs_help}, {"mac_ring", "?", "display MAC ring (hardware) structures", mac_ring_dcmd, mac_ring_help}, { NULL } diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c index cb2eff9995..8dce6b20aa 100644 --- a/usr/src/cmd/zoneadmd/vplat.c +++ b/usr/src/cmd/zoneadmd/vplat.c @@ -2530,6 +2530,7 @@ static int add_datalink(zlog_t *zlogp, char *zone_name, datalink_id_t linkid, char *dlname) { dladm_status_t err; + boolean_t cpuset, poolset; /* First check if it's in use by global zone. */ if (zonecfg_ifname_exists(AF_INET, dlname) || @@ -2547,6 +2548,36 @@ add_datalink(zlog_t *zlogp, char *zone_name, datalink_id_t linkid, char *dlname) "WARNING: unable to add network interface"); return (-1); } + + /* + * Set the pool of this link if the zone has a pool and + * neither the cpus nor the pool datalink property is + * already set. + */ + err = dladm_linkprop_is_set(dld_handle, linkid, DLADM_PROP_VAL_CURRENT, + "cpus", &cpuset); + if (err != DLADM_STATUS_OK) { + zdlerror(zlogp, err, dlname, + "WARNING: unable to check if cpus link property is set"); + } + err = dladm_linkprop_is_set(dld_handle, linkid, DLADM_PROP_VAL_CURRENT, + "pool", &poolset); + if (err != DLADM_STATUS_OK) { + zdlerror(zlogp, err, dlname, + "WARNING: unable to check if pool link property is set"); + } + + if ((strlen(pool_name) != 0) && !cpuset && !poolset) { + err = dladm_set_linkprop(dld_handle, linkid, "pool", + &pool_name, 1, DLADM_OPT_ACTIVE); + if (err != DLADM_STATUS_OK) { + zerror(zlogp, B_FALSE, "WARNING: unable to set " + "pool %s to datalink %s", pool_name, dlname); + bzero(pool_name, MAXPATHLEN); + } + } else { + bzero(pool_name, MAXPATHLEN); + } return (0); } @@ -2644,6 +2675,72 @@ configure_exclusive_network_interfaces(zlog_t *zlogp) } static int +remove_datalink_pool(zlog_t *zlogp, zoneid_t zoneid) +{ + ushort_t flags; + zone_iptype_t iptype; + int i, dlnum = 0; + datalink_id_t *dllink, *dllinks = NULL; + dladm_status_t err; + + if (strlen(pool_name) == 0) + return (0); + + if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags, + sizeof (flags)) < 0) { + if (vplat_get_iptype(zlogp, &iptype) < 0) { + zerror(zlogp, B_TRUE, "unable to determine " + "ip-type"); + return (-1); + } + } else { + if (flags & ZF_NET_EXCL) + iptype = ZS_EXCLUSIVE; + else + iptype = ZS_SHARED; + } + + if (iptype == ZS_EXCLUSIVE) { + /* + * Get the datalink count and for each datalink, + * attempt to clear the pool property and clear + * the pool_name. + */ + if (zone_list_datalink(zoneid, &dlnum, NULL) != 0) { + zerror(zlogp, B_TRUE, "unable to count network " + "interfaces"); + return (-1); + } + + if (dlnum == 0) + return (0); + + if ((dllinks = malloc(dlnum * sizeof (datalink_id_t))) + == NULL) { + zerror(zlogp, B_TRUE, "memory allocation failed"); + return (-1); + } + if (zone_list_datalink(zoneid, &dlnum, dllinks) != 0) { + zerror(zlogp, B_TRUE, "unable to list network " + "interfaces"); + return (-1); + } + + bzero(pool_name, MAXPATHLEN); + for (i = 0, dllink = dllinks; i < dlnum; i++, dllink++) { + err = dladm_set_linkprop(dld_handle, *dllink, "pool", + NULL, 0, DLADM_OPT_ACTIVE); + if (err != DLADM_STATUS_OK) { + zerror(zlogp, B_TRUE, + "WARNING: unable to clear pool"); + } + } + free(dllinks); + } + return (0); +} + +static int unconfigure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid) { int dlnum = 0; @@ -4006,6 +4103,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid) zerror(zlogp, B_FALSE, "WARNING: %s", zonecfg_strerror(res)); } + (void) zonecfg_get_poolname(handle, zone_name, pool_name, MAXPATHLEN); zonecfg_fini_handle(handle); return (Z_OK); @@ -4253,6 +4351,12 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd) goto error; } + if ((pool_name = malloc(MAXPATHLEN)) == NULL) { + zerror(zlogp, B_TRUE, "memory allocation failed"); + return (Z_NOMEM); + } + bzero(pool_name, MAXPATHLEN); + /* * The following actions are not performed when merely mounting a zone * for administrative use. @@ -4575,6 +4679,11 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) goto error; } + if (remove_datalink_pool(zlogp, zoneid) != 0) { + zerror(zlogp, B_FALSE, "unable clear datalink pool property"); + goto error; + } + if (zone_shutdown(zoneid) != 0) { zerror(zlogp, B_TRUE, "unable to shutdown zone"); goto error; @@ -4699,6 +4808,8 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) } } + free(pool_name); + remove_mlps(zlogp, zoneid); if (zone_destroy(zoneid) != 0) { diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c index 90803d8770..743370c1ad 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.c +++ b/usr/src/cmd/zoneadmd/zoneadmd.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -106,6 +106,7 @@ static char *progname; char *zone_name; /* zone which we are managing */ +char *pool_name; char default_brand[MAXNAMELEN]; char brand_name[MAXNAMELEN]; boolean_t zone_isnative; diff --git a/usr/src/cmd/zoneadmd/zoneadmd.h b/usr/src/cmd/zoneadmd/zoneadmd.h index 25ac7bf801..da6aa369ed 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.h +++ b/usr/src/cmd/zoneadmd/zoneadmd.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -86,6 +86,7 @@ extern mutex_t msglock; extern boolean_t in_death_throes; extern boolean_t bringup_failure_recovery; extern char *zone_name; +extern char *pool_name; extern char brand_name[MAXNAMELEN]; extern char default_brand[MAXNAMELEN]; extern char boot_args[BOOTARGS_MAX]; diff --git a/usr/src/head/libzonecfg.h b/usr/src/head/libzonecfg.h index 870a8350a4..f6c30e028d 100644 --- a/usr/src/head/libzonecfg.h +++ b/usr/src/head/libzonecfg.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -422,6 +422,7 @@ extern int zonecfg_destroy_tmp_pool(char *, char *, int); extern int zonecfg_bind_tmp_pool(zone_dochandle_t, zoneid_t, char *, int); extern int zonecfg_bind_pool(zone_dochandle_t, zoneid_t, char *, int); extern boolean_t zonecfg_warn_poold(zone_dochandle_t); +extern int zonecfg_get_poolname(zone_dochandle_t, char *, char *, size_t); /* * Miscellaneous utility functions. diff --git a/usr/src/lib/libdladm/Makefile.com b/usr/src/lib/libdladm/Makefile.com index 84d4f28fd4..791a1e65dc 100644 --- a/usr/src/lib/libdladm/Makefile.com +++ b/usr/src/lib/libdladm/Makefile.com @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -37,7 +37,7 @@ include ../../Makefile.rootfs LIBS = $(DYNLIB) $(LINTLIB) LDLIBS += -ldevinfo -lc -linetutil -lsocket -lscf -lrcm -lnvpair \ - -lexacct -lnsl -lkstat -lcurses + -lexacct -lnsl -lkstat -lcurses -lpool SRCDIR = ../common $(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC) diff --git a/usr/src/lib/libdladm/common/flowattr.c b/usr/src/lib/libdladm/common/flowattr.c index fd44c8bed9..33fd13de47 100644 --- a/usr/src/lib/libdladm/common/flowattr.c +++ b/usr/src/lib/libdladm/common/flowattr.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,8 +41,6 @@ #include <libdlflow.h> #include <libdlflow_impl.h> -#define V4_PART_OF_V6(v6) ((v6)._S6_un._S6_u32[3]) - /* max port number for UDP, TCP & SCTP */ #define MAX_PORT 65535 diff --git a/usr/src/lib/libdladm/common/flowprop.c b/usr/src/lib/libdladm/common/flowprop.c index 25cb714176..a0531c8439 100644 --- a/usr/src/lib/libdladm/common/flowprop.c +++ b/usr/src/lib/libdladm/common/flowprop.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -64,7 +64,7 @@ static fprop_desc_t prop_table[] = { { "maxbw", { "", NULL }, NULL, 0, B_FALSE, do_set_maxbw, NULL, do_get_maxbw, do_check_maxbw}, - { "priority", { "", NULL }, NULL, 0, B_FALSE, + { "priority", { "", MPL_RESET }, NULL, 0, B_FALSE, do_set_priority, NULL, do_get_priority, do_check_priority} }; @@ -77,8 +77,8 @@ static prop_table_t prop_tbl = { }; static resource_prop_t rsrc_prop_table[] = { - {"maxbw", do_extract_maxbw}, - {"priority", do_extract_priority} + {"maxbw", extract_maxbw}, + {"priority", extract_priority} }; #define DLADM_MAX_RSRC_PROP (sizeof (rsrc_prop_table) / \ sizeof (resource_prop_t)) @@ -387,15 +387,14 @@ do_set_priority(dladm_handle_t handle, const char *flow, val_desc_t *vdp, { dld_ioc_modifyflow_t attr; mac_resource_props_t mrp; - void *val; if (val_cnt != 1) return (DLADM_STATUS_BADVALCNT); bzero(&mrp, sizeof (mrp)); - if (vdp != NULL && (val = (void *)vdp->vd_val) != NULL) { - bcopy(val, &mrp.mrp_priority, sizeof (mac_priority_level_t)); - free(val); + if (vdp != NULL) { + bcopy(&vdp->vd_val, &mrp.mrp_priority, + sizeof (mac_priority_level_t)); } else { mrp.mrp_priority = MPL_RESET; } @@ -416,35 +415,25 @@ static dladm_status_t do_check_priority(fprop_desc_t *pdp, char **prop_val, uint_t val_cnt, val_desc_t **vdpp) { - mac_priority_level_t *pri; + mac_priority_level_t pri; val_desc_t *vdp = NULL; dladm_status_t status = DLADM_STATUS_OK; if (val_cnt != 1) return (DLADM_STATUS_BADVALCNT); - pri = malloc(sizeof (mac_priority_level_t)); - if (pri == NULL) - return (DLADM_STATUS_NOMEM); - - status = dladm_str2pri(*prop_val, pri); - if (status != DLADM_STATUS_OK) { - free(pri); + status = dladm_str2pri(*prop_val, &pri); + if (status != DLADM_STATUS_OK) return (status); - } - if (*pri == -1) { - free(pri); + if (pri == -1) return (DLADM_STATUS_BADVAL); - } vdp = malloc(sizeof (val_desc_t)); - if (vdp == NULL) { - free(pri); + if (vdp == NULL) return (DLADM_STATUS_NOMEM); - } - vdp->vd_val = (uintptr_t)pri; + vdp->vd_val = (uint_t)pri; *vdpp = vdp; return (DLADM_STATUS_OK); } diff --git a/usr/src/lib/libdladm/common/libdladm.c b/usr/src/lib/libdladm/common/libdladm.c index ad23a1a25b..7531a5c368 100644 --- a/usr/src/lib/libdladm/common/libdladm.c +++ b/usr/src/lib/libdladm/common/libdladm.c @@ -89,8 +89,9 @@ typedef struct { static link_protect_t link_protect_types[] = { { MPT_MACNOSPOOF, "mac-nospoof" }, + { MPT_RESTRICTED, "restricted" }, { MPT_IPNOSPOOF, "ip-nospoof" }, - { MPT_RESTRICTED, "restricted" } + { MPT_DHCPNOSPOOF, "dhcp-nospoof" } }; #define LPTYPES (sizeof (link_protect_types) / sizeof (link_protect_t)) @@ -381,6 +382,9 @@ dladm_status2str(dladm_status_t status, char *buf) case DLADM_STATUS_ADDRINUSE: s = "address already in use"; break; + case DLADM_STATUS_POOLCPU: + s = "pool and cpus property are mutually exclusive"; + break; default: s = "<unknown error>"; break; @@ -901,7 +905,7 @@ const char * dladm_ipv4addr2str(void *addr, char *buf) { if (inet_ntop(AF_INET, addr, buf, INET_ADDRSTRLEN) == NULL) - buf[0] = 0; + buf[0] = '\0'; return (buf); } @@ -913,6 +917,22 @@ dladm_str2ipv4addr(char *token, void *addr) DLADM_STATUS_OK : DLADM_STATUS_INVALID_IP); } +const char * +dladm_ipv6addr2str(void *addr, char *buf) +{ + if (inet_ntop(AF_INET6, addr, buf, INET6_ADDRSTRLEN) == NULL) + buf[0] = '\0'; + + return (buf); +} + +dladm_status_t +dladm_str2ipv6addr(char *token, void *addr) +{ + return (inet_pton(AF_INET6, token, addr) == 1 ? + DLADM_STATUS_OK : DLADM_STATUS_INVALID_IP); +} + /* * Find the set bits in a mask. * This is used for expanding a bitmask into individual sub-masks diff --git a/usr/src/lib/libdladm/common/libdladm.h b/usr/src/lib/libdladm/common/libdladm.h index 1cddd9c66e..4e7cb32ba5 100644 --- a/usr/src/lib/libdladm/common/libdladm.h +++ b/usr/src/lib/libdladm/common/libdladm.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -65,11 +65,12 @@ extern "C" { * - DLADM_OPT_VLAN: * Signifies VLAN creation code path * - * - DLADM_OPT_HWRINGS: - * Requires a hardware group of rings when creating a vnic. - * * - DLADM_OPT_NOREFRESH: * Do not refresh the daemon after setting parameter (used by STP mcheck). + * + * - DLADM_OPT_BOOT: + * Bypass check functions during boot (used by pool property since pools + * can come up after link properties are set) */ #define DLADM_OPT_ACTIVE 0x00000001 #define DLADM_OPT_PERSIST 0x00000002 @@ -78,8 +79,8 @@ extern "C" { #define DLADM_OPT_PREFIX 0x00000010 #define DLADM_OPT_ANCHOR 0x00000020 #define DLADM_OPT_VLAN 0x00000040 -#define DLADM_OPT_HWRINGS 0x00000080 -#define DLADM_OPT_NOREFRESH 0x00000100 +#define DLADM_OPT_NOREFRESH 0x00000080 +#define DLADM_OPT_BOOT 0x00000100 #define DLADM_WALK_TERMINATE 0 #define DLADM_WALK_CONTINUE -1 @@ -160,7 +161,8 @@ typedef enum { DLADM_STATUS_MINMAXBW, DLADM_STATUS_NO_HWRINGS, DLADM_STATUS_PERMONLY, - DLADM_STATUS_OPTMISSING + DLADM_STATUS_OPTMISSING, + DLADM_STATUS_POOLCPU } dladm_status_t; typedef enum { @@ -231,6 +233,8 @@ extern dladm_status_t dladm_str2protect(char *, uint32_t *); extern const char *dladm_protect2str(uint32_t, char *); extern dladm_status_t dladm_str2ipv4addr(char *, void *); extern const char *dladm_ipv4addr2str(void *, char *); +extern dladm_status_t dladm_str2ipv6addr(char *, void *); +extern const char *dladm_ipv6addr2str(void *, char *); extern dladm_status_t dladm_parse_flow_props(char *, dladm_arg_list_t **, boolean_t); diff --git a/usr/src/lib/libdladm/common/libdladm_impl.h b/usr/src/lib/libdladm/common/libdladm_impl.h index 88d695a3c7..47bb94b5f7 100644 --- a/usr/src/lib/libdladm/common/libdladm_impl.h +++ b/usr/src/lib/libdladm/common/libdladm_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,6 +38,7 @@ extern "C" { #define MAXLINELEN 1024 #define BUFLEN(lim, ptr) (((lim) > (ptr)) ? ((lim) - (ptr)) : 0) +#define V4_PART_OF_V6(v6) ((v6)._S6_un._S6_u32[3]) /* * The handle contains file descriptors to DLD_CONTROL_DEV and @@ -57,7 +58,7 @@ extern dladm_status_t dladm_errno2status(int); extern dladm_status_t i_dladm_rw_db(dladm_handle_t, const char *, mode_t, dladm_status_t (*)(dladm_handle_t, void *, FILE *, FILE *), void *, boolean_t); -extern dladm_status_t i_dladm_get_state(dladm_handle_t, datalink_id_t, +extern dladm_status_t dladm_get_state(dladm_handle_t, datalink_id_t, link_state_t *); extern void dladm_find_setbits32(uint32_t, uint32_t *, uint32_t *); extern dladm_status_t dladm_parse_args(char *, dladm_arg_list_t **, @@ -92,7 +93,6 @@ extern void dladm_free_args(dladm_arg_list_t *); #define FMADDRLEN "maddrlen" /* uint64_t */ #define FMADDRSLOT "maddrslot" /* uint64_t */ #define FMADDRPREFIXLEN "maddrpreflen" /* uint64_t */ -#define FHWRINGS "hwrings" /* boolean_t */ #define FVRID "vrid" /* uint64_t */ #define FVRAF "vraf" /* uint64_t */ @@ -114,7 +114,7 @@ extern void dladm_free_args(dladm_arg_list_t *); FKEY, FNPORTS, FPORTS, FPOLICY, \ FFIXMACADDR, FFORCE, FLACPMODE, FLACPTIMER, \ FMADDRTYPE, FMADDRLEN, FMADDRSLOT, \ - FMADDRPREFIXLEN, FHWRINGS, \ + FMADDRPREFIXLEN, \ FMACADDR, FSIMNETTYPE, FSIMNETPEER /* @@ -129,7 +129,8 @@ typedef struct val_desc { #define VALCNT(vals) (sizeof ((vals)) / sizeof (val_desc_t)) extern dladm_status_t dladm_link_proplist_extract(dladm_handle_t, - dladm_arg_list_t *, mac_resource_props_t *); + dladm_arg_list_t *, mac_resource_props_t *, + uint_t); extern dladm_status_t dladm_flow_proplist_extract(dladm_arg_list_t *, mac_resource_props_t *); @@ -141,9 +142,10 @@ extern dladm_status_t dladm_flow_proplist_extract(dladm_arg_list_t *, * by the pd_check function. */ typedef dladm_status_t rp_extractf_t(val_desc_t *, uint_t, void *); -extern rp_extractf_t do_extract_maxbw, do_extract_priority, - do_extract_cpus, do_extract_protection, - do_extract_allowedips; +extern rp_extractf_t extract_maxbw, extract_priority, + extract_cpus, extract_protection, + extract_allowedips, extract_allowedcids, + extract_rxrings, extract_txrings, extract_pool; typedef struct resource_prop_s { /* diff --git a/usr/src/lib/libdladm/common/libdlaggr.c b/usr/src/lib/libdladm/common/libdlaggr.c index d715ff013c..8c91c41ce1 100644 --- a/usr/src/lib/libdladm/common/libdlaggr.c +++ b/usr/src/lib/libdladm/common/libdlaggr.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -57,7 +57,7 @@ static uchar_t zero_mac[] = {0, 0, 0, 0, 0, 0}; #define VALID_PORT_MAC(mac) \ (((mac) != NULL) && (bcmp(zero_mac, (mac), ETHERADDRL) != 0) && \ - (!(mac)[0] & 0x01)) + (!((mac)[0] & 0x01))) #define PORT_DELIMITER ":" diff --git a/usr/src/lib/libdladm/common/libdlether.c b/usr/src/lib/libdladm/common/libdlether.c index ef89439ae1..4855dc234d 100644 --- a/usr/src/lib/libdladm/common/libdlether.c +++ b/usr/src/lib/libdladm/common/libdlether.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -299,7 +299,7 @@ dladm_ether_info(dladm_handle_t handle, datalink_id_t linkid, eattr->lei_attr[CURRENT].le_spdx->lesd_speed = (int)(sp64/1000000ull); eattr->lei_attr[CURRENT].le_spdx->lesd_duplex = link_duplex; - status = i_dladm_get_state(handle, linkid, &eattr->lei_state); + status = dladm_get_state(handle, linkid, &eattr->lei_state); if (status != DLADM_STATUS_OK) goto bail; diff --git a/usr/src/lib/libdladm/common/libdlflow.c b/usr/src/lib/libdladm/common/libdlflow.c index 235b948504..9e2131ac0c 100644 --- a/usr/src/lib/libdladm/common/libdlflow.c +++ b/usr/src/lib/libdladm/common/libdlflow.c @@ -63,8 +63,6 @@ #define MAXLINELEN 1024 #define MAXPATHLEN 1024 -#define V4_PART_OF_V6(v6) ((v6)._S6_un._S6_u32[3]) - /* database file parameters */ static const char *BW_LIMIT = "bw_limit"; static const char *PRIORITY = "priority"; diff --git a/usr/src/lib/libdladm/common/libdllink.c b/usr/src/lib/libdladm/common/libdllink.c index 4c2ca93c8c..7bf18ceed5 100644 --- a/usr/src/lib/libdladm/common/libdllink.c +++ b/usr/src/lib/libdladm/common/libdllink.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,6 +39,7 @@ #include <libdevinfo.h> #include <libdlaggr.h> #include <libdlvlan.h> +#include <libdlvnic.h> #include <libdllink.h> #include <libdlmgmt.h> #include <libdladm_impl.h> @@ -156,9 +157,10 @@ dladm_walk_hwgrp(dladm_handle_t handle, datalink_id_t linkid, void *arg, ret = ioctl(dladm_dld_fd(handle), DLDIOC_GETHWGRP, iomp); if (ret == 0) { - int i; - dld_hwgrpinfo_t *dhip; - dladm_hwgrp_attr_t attr; + int i; + int j; + dld_hwgrpinfo_t *dhip; + dladm_hwgrp_attr_t attr; dhip = (dld_hwgrpinfo_t *)(iomp + 1); for (i = 0; i < iomp->dih_n_groups; i++) { @@ -169,6 +171,9 @@ dladm_walk_hwgrp(dladm_handle_t handle, datalink_id_t linkid, void *arg, attr.hg_grp_num = dhip->dhi_grp_num; attr.hg_grp_type = dhip->dhi_grp_type; attr.hg_n_rings = dhip->dhi_n_rings; + for (j = 0; j < dhip->dhi_n_rings; j++) + attr.hg_rings[j] = dhip->dhi_rings[j]; + dladm_sort_index_list(attr.hg_rings, attr.hg_n_rings); attr.hg_n_clnts = dhip->dhi_n_clnts; (void) strlcpy(attr.hg_client_names, dhip->dhi_clnts, sizeof (attr.hg_client_names)); diff --git a/usr/src/lib/libdladm/common/libdllink.h b/usr/src/lib/libdladm/common/libdllink.h index d47059e1d1..9d5d19a898 100644 --- a/usr/src/lib/libdladm/common/libdllink.h +++ b/usr/src/lib/libdladm/common/libdllink.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -101,6 +101,7 @@ typedef struct dladm_hwgrp_attr { uint_t hg_grp_num; dladm_hwgrp_type_t hg_grp_type; uint_t hg_n_rings; + uint_t hg_rings[MAX_RINGS_PER_GROUP]; uint_t hg_n_clnts; char hg_client_names[MAXCLIENTNAMELEN]; } dladm_hwgrp_attr_t; @@ -134,6 +135,8 @@ extern dladm_status_t dladm_walk_linkprop(dladm_handle_t, datalink_id_t, void *, int (*)(dladm_handle_t, datalink_id_t, const char *, void *)); extern boolean_t dladm_attr_is_linkprop(const char *name); +extern dladm_status_t dladm_linkprop_is_set(dladm_handle_t, datalink_id_t, + dladm_prop_type_t, const char *, boolean_t *); extern dladm_status_t dladm_set_secobj(dladm_handle_t, const char *, dladm_secobj_class_t, uint8_t *, uint_t, uint_t); @@ -208,6 +211,8 @@ extern int dladm_walk_macaddr(dladm_handle_t, datalink_id_t, extern int dladm_walk_hwgrp(dladm_handle_t, datalink_id_t, void *, boolean_t (*)(void *, dladm_hwgrp_attr_t *)); +extern void dladm_sort_index_list(uint_t [], uint_t); + extern dladm_status_t dladm_link_get_proplist(dladm_handle_t, datalink_id_t, dladm_arg_list_t **); diff --git a/usr/src/lib/libdladm/common/libdlstat.c b/usr/src/lib/libdladm/common/libdlstat.c index e69c9d8934..264c5f179f 100644 --- a/usr/src/lib/libdladm/common/libdlstat.c +++ b/usr/src/lib/libdladm/common/libdlstat.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,6 +39,7 @@ #include <libdllink.h> #include <libdlflow.h> #include <libdlstat.h> +#include <libdlaggr.h> /* * x86 <sys/regs> ERR conflicts with <curses.h> ERR. @@ -72,7 +73,7 @@ static int statentry = -1, maxstatentries = 0; /* * Search for flowlist entry in stattable which matches - * the flowname and linkide. If no match is found, use + * the flowname and linkid. If no match is found, use * next available slot. If no slots are available, * reallocate table with more slots. * @@ -347,7 +348,8 @@ closedevnet() struct flowlist *flist; /* Close all open /dev/net/ files */ - for (flist = stattable; index <= maxstatentries; index++, flist++) { + + for (flist = stattable; index < maxstatentries; index++, flist++) { if (flist->linkid == DATALINK_INVALID_LINKID) break; if (flist->fd != -1 && flist->fd != INT32_MAX) @@ -711,7 +713,7 @@ dladm_stats_total(pktsum_t *s1, pktsum_t *s2, pktsum_t *s3) s1->snaptime = s2->snaptime; } -#define DIFF_STAT(s2, s3) ((s2) > (s3) ? (s2 - s3) : 0) +#define DIFF_STAT(s2, s3) ((s2) > (s3) ? ((s2) - (s3)) : 0) /* Compute differences between 2 pktsums (s1 = s2 - s3) */ @@ -726,3 +728,2361 @@ dladm_stats_diff(pktsum_t *s1, pktsum_t *s2, pktsum_t *s3) s1->oerrors = DIFF_STAT(s2->oerrors, s3->oerrors); s1->snaptime = DIFF_STAT(s2->snaptime, s3->snaptime); } + +#define DLSTAT_MAC_RX_SWLANE "mac_rx_swlane" +#define DLSTAT_MAC_RX_HWLANE "mac_rx_hwlane" +#define DLSTAT_MAC_TX_SWLANE "mac_tx_swlane" +#define DLSTAT_MAC_TX_HWLANE "mac_tx_hwlane" +#define DLSTAT_MAC_MISC_STAT "mac_misc_stat" +#define DLSTAT_MAC_RX_RING "mac_rx_ring" +#define DLSTAT_MAC_TX_RING "mac_tx_ring" +#define DLSTAT_MAC_FANOUT "mac_rx_swlane0_fanout" + +typedef struct { + const char *si_name; + uint_t si_offset; +} stat_info_t; + +#define A_CNT(arr) (sizeof (arr) / sizeof (arr[0])) + +/* Definitions for rx lane stats */ +#define RL_OFF(f) (offsetof(rx_lane_stat_t, f)) + +static stat_info_t rx_hwlane_stats_list[] = { + {"ipackets", RL_OFF(rl_ipackets)}, + {"rbytes", RL_OFF(rl_rbytes)}, + {"intrs", RL_OFF(rl_intrs)}, + {"intrbytes", RL_OFF(rl_intrbytes)}, + {"polls", RL_OFF(rl_polls)}, + {"pollbytes", RL_OFF(rl_pollbytes)}, + {"rxsdrops", RL_OFF(rl_sdrops)}, + {"chainunder10", RL_OFF(rl_chl10)}, + {"chain10to50", RL_OFF(rl_ch10_50)}, + {"chainover50", RL_OFF(rl_chg50)} +}; +#define RX_HWLANE_STAT_SIZE A_CNT(rx_hwlane_stats_list) + +static stat_info_t rx_swlane_stats_list[] = { + {"ipackets", RL_OFF(rl_ipackets)}, + {"rbytes", RL_OFF(rl_rbytes)}, + {"local", RL_OFF(rl_lclpackets)}, + {"localbytes", RL_OFF(rl_lclbytes)}, + {"intrs", RL_OFF(rl_intrs)}, + {"intrbytes", RL_OFF(rl_intrbytes)}, + {"rxsdrops", RL_OFF(rl_sdrops)} +}; +#define RX_SWLANE_STAT_SIZE A_CNT(rx_swlane_stats_list) + +static stat_info_t rx_lane_stats_list[] = { + {"ipackets", RL_OFF(rl_ipackets)}, + {"rbytes", RL_OFF(rl_rbytes)}, + {"local", RL_OFF(rl_lclpackets)}, + {"localbytes", RL_OFF(rl_lclbytes)}, + {"intrs", RL_OFF(rl_intrs)}, + {"intrbytes", RL_OFF(rl_intrbytes)}, + {"polls", RL_OFF(rl_polls)}, + {"rxsdrops", RL_OFF(rl_sdrops)}, + {"pollbytes", RL_OFF(rl_pollbytes)}, + {"chainunder10", RL_OFF(rl_chl10)}, + {"chain10to50", RL_OFF(rl_ch10_50)}, + {"chainover50", RL_OFF(rl_chg50)} +}; +#define RX_LANE_STAT_SIZE A_CNT(rx_lane_stats_list) + +/* Definitions for tx lane stats */ +#define TL_OFF(f) (offsetof(tx_lane_stat_t, f)) + +static stat_info_t tx_lane_stats_list[] = { + {"opackets", TL_OFF(tl_opackets)}, + {"obytes", TL_OFF(tl_obytes)}, + {"blockcnt", TL_OFF(tl_blockcnt)}, + {"unblockcnt", TL_OFF(tl_unblockcnt)}, + {"txsdrops", TL_OFF(tl_sdrops)} +}; +#define TX_LANE_STAT_SIZE A_CNT(tx_lane_stats_list) + +/* Definitions for tx/rx misc stats */ +#define M_OFF(f) (offsetof(misc_stat_t, f)) + +static stat_info_t misc_stats_list[] = { + {"multircv", M_OFF(ms_multircv)}, + {"brdcstrcv", M_OFF(ms_brdcstrcv)}, + {"multixmt", M_OFF(ms_multixmt)}, + {"brdcstxmt", M_OFF(ms_brdcstxmt)}, + {"multircvbytes", M_OFF(ms_multircvbytes)}, + {"brdcstrcvbytes", M_OFF(ms_brdcstrcvbytes)}, + {"multixmtbytes", M_OFF(ms_multixmtbytes)}, + {"brdcstxmtbytes", M_OFF(ms_brdcstxmtbytes)}, + {"txerrors", M_OFF(ms_txerrors)}, + {"macspoofed", M_OFF(ms_macspoofed)}, + {"ipspoofed", M_OFF(ms_ipspoofed)}, + {"dhcpspoofed", M_OFF(ms_dhcpspoofed)}, + {"restricted", M_OFF(ms_restricted)}, + {"ipackets", M_OFF(ms_ipackets)}, + {"rbytes", M_OFF(ms_rbytes)}, + {"local", M_OFF(ms_local)}, + {"localbytes", M_OFF(ms_localbytes)}, + {"intrs", M_OFF(ms_intrs)}, + {"intrbytes", M_OFF(ms_intrbytes)}, + {"polls", M_OFF(ms_polls)}, + {"pollbytes", M_OFF(ms_pollbytes)}, + {"rxsdrops", M_OFF(ms_rxsdrops)}, + {"chainunder10", M_OFF(ms_chainunder10)}, + {"chain10to50", M_OFF(ms_chain10to50)}, + {"chainover50", M_OFF(ms_chainover50)}, + {"obytes", M_OFF(ms_obytes)}, + {"opackets", M_OFF(ms_opackets)}, + {"blockcnt", M_OFF(ms_blockcnt)}, + {"unblockcnt", M_OFF(ms_unblockcnt)}, + {"txsdrops", M_OFF(ms_txsdrops)} +}; +#define MISC_STAT_SIZE A_CNT(misc_stats_list) + +/* Definitions for rx ring stats */ +#define R_OFF(f) (offsetof(ring_stat_t, f)) + +static stat_info_t rx_ring_stats_list[] = { + {"ipackets", R_OFF(r_packets)}, + {"rbytes", R_OFF(r_bytes)} +}; +#define RX_RING_STAT_SIZE A_CNT(rx_ring_stats_list) + +/* Definitions for tx ring stats */ +static stat_info_t tx_ring_stats_list[] = { + {"opackets", R_OFF(r_packets)}, + {"obytes", R_OFF(r_bytes)} +}; +#define TX_RING_STAT_SIZE A_CNT(tx_ring_stats_list) + +/* Definitions for fanout stats */ +#define F_OFF(f) (offsetof(fanout_stat_t, f)) + +static stat_info_t fanout_stats_list[] = { + {"ipackets", F_OFF(f_ipackets)}, + {"rbytes", F_OFF(f_rbytes)}, +}; +#define FANOUT_STAT_SIZE A_CNT(fanout_stats_list) + +/* Definitions for total stats */ +#define T_OFF(f) (offsetof(total_stat_t, f)) + +static stat_info_t total_stats_list[] = { + {"ipackets", T_OFF(ts_ipackets)}, + {"rbytes", T_OFF(ts_rbytes)}, + {"opackets", T_OFF(ts_opackets)}, + {"obytes", T_OFF(ts_obytes)} +}; +#define TOTAL_STAT_SIZE A_CNT(total_stats_list) + +/* Definitions for aggr stats */ +#define AP_OFF(f) (offsetof(aggr_port_stat_t, f)) + +static stat_info_t aggr_port_stats_list[] = { + {"ipackets64", AP_OFF(ap_ipackets)}, + {"rbytes64", AP_OFF(ap_rbytes)}, + {"opackets64", AP_OFF(ap_opackets)}, + {"obytes64", AP_OFF(ap_obytes)} +}; +#define AGGR_PORT_STAT_SIZE A_CNT(aggr_port_stats_list) + +/* Definitions for flow stats */ +#define FL_OFF(f) (offsetof(flow_stat_t, f)) + +static stat_info_t flow_stats_list[] = { + {"ipackets", FL_OFF(fl_ipackets)}, + {"rbytes", FL_OFF(fl_rbytes)}, + {"opackets", FL_OFF(fl_opackets)}, + {"obytes", FL_OFF(fl_obytes)} +}; +#define FLOW_STAT_SIZE A_CNT(flow_stats_list) + +/* Rx lane specific functions */ +void * dlstat_rx_lane_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_rx_lane_match(void *, void *); +static void * i_dlstat_rx_lane_stat_entry_diff(void *, void *); + +/* Tx lane specific functions */ +void * dlstat_tx_lane_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_tx_lane_match(void *, void *); +static void * i_dlstat_tx_lane_stat_entry_diff(void *, void *); + +/* Rx lane total specific functions */ +void * dlstat_rx_lane_total_stats(dladm_handle_t, + datalink_id_t); + +/* Tx lane total specific functions */ +void * dlstat_tx_lane_total_stats(dladm_handle_t, + datalink_id_t); + +/* Fanout specific functions */ +void * dlstat_fanout_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_fanout_match(void *, void *); +static void * i_dlstat_fanout_stat_entry_diff(void *, void *); + +/* Rx ring specific functions */ +void * dlstat_rx_ring_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_rx_ring_match(void *, void *); +static void * i_dlstat_rx_ring_stat_entry_diff(void *, void *); + +/* Tx ring specific functions */ +void * dlstat_tx_ring_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_tx_ring_match(void *, void *); +static void * i_dlstat_tx_ring_stat_entry_diff(void *, void *); + +/* Rx ring total specific functions */ +void * dlstat_rx_ring_total_stats(dladm_handle_t, + datalink_id_t); + +/* Tx ring total specific functions */ +void * dlstat_tx_ring_total_stats(dladm_handle_t, + datalink_id_t); + +/* Summary specific functions */ +void * dlstat_total_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_total_match(void *, void *); +static void * i_dlstat_total_stat_entry_diff(void *, void *); + +/* Aggr port specific functions */ +void * dlstat_aggr_port_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_aggr_port_match(void *, void *); +static void * i_dlstat_aggr_port_stat_entry_diff(void *, void *); + +/* Misc stat specific functions */ +void * dlstat_misc_stats(dladm_handle_t, datalink_id_t); + +typedef void * dladm_stat_query_t(dladm_handle_t, datalink_id_t); +typedef boolean_t dladm_stat_match_t(void *, void *); +typedef void * dladm_stat_diff_t(void *, void *); + +typedef struct dladm_stat_desc_s { + dladm_stat_type_t ds_stattype; + dladm_stat_query_t *ds_querystat; + dladm_stat_match_t *ds_matchstat; + dladm_stat_diff_t *ds_diffstat; + uint_t ds_offset; + stat_info_t *ds_statlist; + uint_t ds_statsize; +} dladm_stat_desc_t; + +/* + * dladm_stat_table has one entry for each supported stat. ds_querystat returns + * a chain of 'stat entries' for the queried stat. + * Each stat entry has set of identifiers (ids) and an object containing actual + * stat values. These stat entry objects are chained together in a linked list + * of datatype dladm_stat_chain_t. Head of this list is returned to the caller + * of dladm_link_stat_query. + * + * One node in the chain is shown below: + * + * ------------------------- + * | dc_statentry | + * | -------------- | + * | | ids | | + * | -------------- | + * | | stat fields | | + * | -------------- | + * ------------------------- + * | dc_next ---------|------> to next stat entry + * ------------------------- + * + * In particular, for query DLADM_STAT_RX_LANE, dc_statentry carries pointer to + * object of type rx_lane_stat_entry_t. + * + * dladm_link_stat_query_all returns similar chain. However, instead of storing + * stat fields as raw numbers, it stores those as chain of <name, value> pairs. + * The resulting structure is depicted below: + * + * ------------------------- + * | dc_statentry | + * | -------------- | --------------- + * | | nv_header | | | name, val | + * | -------------- | --------------- + * | | nve_stats---|----|-->| nv_nextstat--|---> to next name, val pair + * | -------------- | --------------- + * ------------------------- + * | dc_next ---------|------> to next stat entry + * ------------------------- + */ +static dladm_stat_desc_t dladm_stat_table[] = { +{ DLADM_STAT_RX_LANE, dlstat_rx_lane_stats, + i_dlstat_rx_lane_match, i_dlstat_rx_lane_stat_entry_diff, + offsetof(rx_lane_stat_entry_t, rle_stats), + rx_lane_stats_list, RX_LANE_STAT_SIZE}, + +{ DLADM_STAT_TX_LANE, dlstat_tx_lane_stats, + i_dlstat_tx_lane_match, i_dlstat_tx_lane_stat_entry_diff, + offsetof(tx_lane_stat_entry_t, tle_stats), + tx_lane_stats_list, TX_LANE_STAT_SIZE}, + +{ DLADM_STAT_RX_LANE_TOTAL, dlstat_rx_lane_total_stats, + i_dlstat_rx_lane_match, i_dlstat_rx_lane_stat_entry_diff, + offsetof(rx_lane_stat_entry_t, rle_stats), + rx_lane_stats_list, RX_LANE_STAT_SIZE}, + +{ DLADM_STAT_TX_LANE_TOTAL, dlstat_tx_lane_total_stats, + i_dlstat_tx_lane_match, i_dlstat_tx_lane_stat_entry_diff, + offsetof(tx_lane_stat_entry_t, tle_stats), + tx_lane_stats_list, TX_LANE_STAT_SIZE}, + +{ DLADM_STAT_RX_LANE_FOUT, dlstat_fanout_stats, + i_dlstat_fanout_match, i_dlstat_fanout_stat_entry_diff, + offsetof(fanout_stat_entry_t, fe_stats), + fanout_stats_list, FANOUT_STAT_SIZE}, + +{ DLADM_STAT_RX_RING, dlstat_rx_ring_stats, + i_dlstat_rx_ring_match, i_dlstat_rx_ring_stat_entry_diff, + offsetof(ring_stat_entry_t, re_stats), + rx_ring_stats_list, RX_RING_STAT_SIZE}, + +{ DLADM_STAT_TX_RING, dlstat_tx_ring_stats, + i_dlstat_tx_ring_match, i_dlstat_tx_ring_stat_entry_diff, + offsetof(ring_stat_entry_t, re_stats), + tx_ring_stats_list, TX_RING_STAT_SIZE}, + +{ DLADM_STAT_RX_RING_TOTAL, dlstat_rx_ring_total_stats, + i_dlstat_rx_ring_match, i_dlstat_rx_ring_stat_entry_diff, + offsetof(ring_stat_entry_t, re_stats), + rx_ring_stats_list, RX_RING_STAT_SIZE}, + +{ DLADM_STAT_TX_RING_TOTAL, dlstat_tx_ring_total_stats, + i_dlstat_tx_ring_match, i_dlstat_tx_ring_stat_entry_diff, + offsetof(ring_stat_entry_t, re_stats), + tx_ring_stats_list, TX_RING_STAT_SIZE}, + +{ DLADM_STAT_TOTAL, dlstat_total_stats, + i_dlstat_total_match, i_dlstat_total_stat_entry_diff, + offsetof(total_stat_entry_t, tse_stats), + total_stats_list, TOTAL_STAT_SIZE}, + +{ DLADM_STAT_AGGR_PORT, dlstat_aggr_port_stats, + i_dlstat_aggr_port_match, i_dlstat_aggr_port_stat_entry_diff, + offsetof(aggr_port_stat_entry_t, ape_stats), + aggr_port_stats_list, AGGR_PORT_STAT_SIZE}, +/* + * We don't support -i <interval> query with misc stats. Several table fields + * are left uninitialized thus. + */ +{ DLADM_STAT_MISC, dlstat_misc_stats, + NULL, NULL, + 0, + misc_stats_list, MISC_STAT_SIZE} +}; + +/* Internal functions */ +static void * +dlstat_diff_stats(void *arg1, void *arg2, dladm_stat_type_t stattype) +{ + return (dladm_stat_table[stattype].ds_diffstat(arg1, arg2)); +} + +static boolean_t +dlstat_match_stats(void *arg1, void *arg2, dladm_stat_type_t stattype) +{ + return (dladm_stat_table[stattype].ds_matchstat(arg1, arg2)); +} + +/* Diff between two stats */ +static void +i_dlstat_diff_stats(void *diff, void *op1, void *op2, + stat_info_t stats_list[], uint_t size) +{ + int i; + + for (i = 0; i < size; i++) { + uint64_t *op1_val = (void *) + ((uchar_t *)op1 + stats_list[i].si_offset); + uint64_t *op2_val = (void *) + ((uchar_t *)op2 + stats_list[i].si_offset); + uint64_t *diff_val = (void *) + ((uchar_t *)diff + stats_list[i].si_offset); + + *diff_val = DIFF_STAT(*op1_val, *op2_val); + } +} + +/* + * Perform diff = s1 - s2, where diff, s1, s2 are structure objects of same + * datatype. slist is list of offsets of the fields within the structure. + */ +#define DLSTAT_DIFF_STAT(s1, s2, diff, f, slist, sz) { \ + if (s2 == NULL) { \ + bcopy(&s1->f, &diff->f, sizeof (s1->f)); \ + } else { \ + i_dlstat_diff_stats(&diff->f, &s1->f, \ + &s2->f, slist, sz); \ + } \ +} + +/* Sum two stats */ +static void +i_dlstat_sum_stats(void *sum, void *op1, void *op2, + stat_info_t stats_list[], uint_t size) +{ + int i; + + for (i = 0; i < size; i++) { + uint64_t *op1_val = (void *) + ((uchar_t *)op1 + stats_list[i].si_offset); + uint64_t *op2_val = (void *) + ((uchar_t *)op2 + stats_list[i].si_offset); + uint64_t *sum_val = (void *) + ((uchar_t *)sum + stats_list[i].si_offset); + + *sum_val = *op1_val + *op2_val; + } +} + +/* Look up kstat value */ +static void +i_dlstat_get_stats(kstat_ctl_t *kcp, kstat_t *ksp, void *stats, + stat_info_t stats_list[], uint_t size) +{ + int i; + + if (kstat_read(kcp, ksp, NULL) == -1) + return; + + for (i = 0; i < size; i++) { + uint64_t *val = (void *) + ((uchar_t *)stats + stats_list[i].si_offset); + + if (dladm_kstat_value(ksp, stats_list[i].si_name, + KSTAT_DATA_UINT64, val) < 0) + return; + } +} + +/* Append linked list list1 to linked list list2 and return resulting list */ +static dladm_stat_chain_t * +i_dlstat_join_lists(dladm_stat_chain_t *list1, dladm_stat_chain_t *list2) +{ + dladm_stat_chain_t *curr; + + if (list1 == NULL) + return (list2); + + /* list1 has at least one element, find last element in list1 */ + curr = list1; + while (curr->dc_next != NULL) + curr = curr->dc_next; + + curr->dc_next = list2; + return (list1); +} + +uint_t default_idlist[] = {0}; +uint_t default_idlist_size = 1; + +typedef enum { + DLSTAT_RX_RING_IDLIST, + DLSTAT_TX_RING_IDLIST, + DLSTAT_RX_HWLANE_IDLIST, + DLSTAT_TX_HWLANE_IDLIST, + DLSTAT_FANOUT_IDLIST +} dlstat_idlist_type_t; + +void +dladm_sort_index_list(uint_t idlist[], uint_t size) +{ + int i, j; + + for (j = 1; j < size; j++) { + int key = idlist[j]; + for (i = j - 1; (i >= 0) && (idlist[i] > key); i--) + idlist[i + 1] = idlist[i]; + idlist[i + 1] = key; + } +} + +/* Support for legacy drivers */ +void +i_query_legacy_stats(const char *linkname, pktsum_t *stats) +{ + kstat_ctl_t *kcp; + kstat_t *ksp; + + bzero(stats, sizeof (*stats)); + + if ((kcp = kstat_open()) == NULL) + return; + + ksp = dladm_kstat_lookup(kcp, "link", 0, linkname, NULL); + + if (ksp != NULL) + dladm_get_stats(kcp, ksp, stats); + + (void) kstat_close(kcp); +} + +void * +i_dlstat_legacy_rx_lane_stats(const char *linkname) +{ + dladm_stat_chain_t *head = NULL; + pktsum_t stats; + rx_lane_stat_entry_t *rx_lane_stat_entry; + + bzero(&stats, sizeof (pktsum_t)); + + /* Query for dls stats */ + i_query_legacy_stats(linkname, &stats); + + /* Convert to desired data type */ + rx_lane_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (rx_lane_stat_entry == NULL) + goto done; + + rx_lane_stat_entry->rle_index = DLSTAT_INVALID_ENTRY; + rx_lane_stat_entry->rle_id = L_SWLANE; + + rx_lane_stat_entry->rle_stats.rl_ipackets = stats.ipackets; + rx_lane_stat_entry->rle_stats.rl_intrs = stats.ipackets; + rx_lane_stat_entry->rle_stats.rl_rbytes = stats.rbytes; + + /* Allocate memory for wrapper */ + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(rx_lane_stat_entry); + goto done; + } + + head->dc_statentry = rx_lane_stat_entry; + head->dc_next = NULL; +done: + return (head); +} + +void * +i_dlstat_legacy_tx_lane_stats(const char *linkname) +{ + dladm_stat_chain_t *head = NULL; + pktsum_t stats; + tx_lane_stat_entry_t *tx_lane_stat_entry; + + bzero(&stats, sizeof (pktsum_t)); + + /* Query for dls stats */ + i_query_legacy_stats(linkname, &stats); + + /* Convert to desired data type */ + tx_lane_stat_entry = calloc(1, sizeof (tx_lane_stat_entry_t)); + if (tx_lane_stat_entry == NULL) + goto done; + + tx_lane_stat_entry->tle_index = DLSTAT_INVALID_ENTRY; + tx_lane_stat_entry->tle_id = L_SWLANE; + + tx_lane_stat_entry->tle_stats.tl_opackets = stats.opackets; + tx_lane_stat_entry->tle_stats.tl_obytes = stats.obytes; + + /* Allocate memory for wrapper */ + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(tx_lane_stat_entry); + goto done; + } + + head->dc_statentry = tx_lane_stat_entry; + head->dc_next = NULL; +done: + return (head); +} + +/* + * Ideally, we would want an ioctl to return list of ring-ids (or lane-ids) + * for a given data-link (or mac client). We could then query for specific + * kstats based on these ring-ids (lane-ids). + * Ring-ids (or lane-ids) could be returned like any other link properties + * queried by dladm show-linkprop. However, non-global zones do not have + * access to this information today. + * We thus opt for an implementation that relies heavily on kstat internals: + * i_dlstat_*search routines and i_dlstat_get_idlist. + */ +/* rx hwlane specific */ +static boolean_t +i_dlstat_rx_hwlane_search(kstat_t *ksp) +{ + return (ksp->ks_instance == 0 && + strstr(ksp->ks_name, "mac_rx") != 0 && + strstr(ksp->ks_name, "hwlane") != 0 && + strstr(ksp->ks_name, "fanout") == 0 && + strcmp(ksp->ks_class, "net") == 0); +} + +/* tx hwlane specific */ +static boolean_t +i_dlstat_tx_hwlane_search(kstat_t *ksp) +{ + return (ksp->ks_instance == 0 && + strstr(ksp->ks_name, "mac_tx") != 0 && + strstr(ksp->ks_name, "hwlane") != 0 && + strcmp(ksp->ks_class, "net") == 0); +} + +/* rx fanout specific */ +static boolean_t +i_dlstat_fanout_search(kstat_t *ksp) +{ + return (ksp->ks_instance == 0 && + strstr(ksp->ks_name, "mac_rx") != 0 && + strstr(ksp->ks_name, "swlane") != 0 && + strstr(ksp->ks_name, "fanout") != 0 && + strcmp(ksp->ks_class, "net") == 0); +} + +/* rx ring specific */ +static boolean_t +i_dlstat_rx_ring_search(kstat_t *ksp) +{ + return (ksp->ks_instance == 0 && + strstr(ksp->ks_name, "mac_rx") != 0 && + strstr(ksp->ks_name, "ring") != 0 && + strcmp(ksp->ks_class, "net") == 0); +} + +/* tx ring specific */ +static boolean_t +i_dlstat_tx_ring_search(kstat_t *ksp) +{ + return (ksp->ks_instance == 0) && + strstr(ksp->ks_name, "mac_tx") != 0 && + strstr(ksp->ks_name, "ring") != 0 && + strcmp(ksp->ks_class, "net") == 0; +} + +typedef boolean_t dladm_search_kstat_t(kstat_t *); +typedef struct dladm_extract_idlist_s { + dlstat_idlist_type_t di_type; + char *di_prefix; + dladm_search_kstat_t *di_searchkstat; +} dladm_extract_idlist_t; + +static dladm_extract_idlist_t dladm_extract_idlist[] = { +{ DLSTAT_RX_RING_IDLIST, DLSTAT_MAC_RX_RING, + i_dlstat_rx_ring_search}, +{ DLSTAT_TX_RING_IDLIST, DLSTAT_MAC_TX_RING, + i_dlstat_tx_ring_search}, +{ DLSTAT_RX_HWLANE_IDLIST, DLSTAT_MAC_RX_HWLANE, + i_dlstat_rx_hwlane_search}, +{ DLSTAT_TX_HWLANE_IDLIST, DLSTAT_MAC_TX_HWLANE, + i_dlstat_tx_hwlane_search}, +{ DLSTAT_FANOUT_IDLIST, DLSTAT_MAC_FANOUT, + i_dlstat_fanout_search} +}; + +static void +i_dlstat_get_idlist(const char *modname, dlstat_idlist_type_t idlist_type, + uint_t idlist[], uint_t *size) +{ + kstat_ctl_t *kcp; + kstat_t *ksp; + char *prefix; + int prefixlen; + boolean_t (*fptr_searchkstat)(kstat_t *); + + *size = 0; + + if ((kcp = kstat_open()) == NULL) { + warn("kstat_open operation failed"); + goto done; + } + + prefix = dladm_extract_idlist[idlist_type].di_prefix; + fptr_searchkstat = dladm_extract_idlist[idlist_type].di_searchkstat; + prefixlen = strlen(prefix); + for (ksp = kcp->kc_chain; ksp != NULL; ksp = ksp->ks_next) { + if ((strcmp(ksp->ks_module, modname) == 0) && + fptr_searchkstat(ksp)) { + idlist[(*size)++] = atoi(&ksp->ks_name[prefixlen]); + } + } + dladm_sort_index_list(idlist, *size); + +done: + (void) kstat_close(kcp); +} + +static dladm_stat_chain_t * +i_dlstat_query_stats(const char *modname, const char *prefix, + uint_t idlist[], uint_t idlist_size, + void * (*fn)(kstat_ctl_t *, kstat_t *, int)) +{ + kstat_ctl_t *kcp; + kstat_t *ksp; + char statname[MAXLINKNAMELEN]; + int i = 0; + dladm_stat_chain_t *head = NULL, *prev = NULL; + dladm_stat_chain_t *curr; + + if ((kcp = kstat_open()) == NULL) { + warn("kstat_open operation failed"); + return (NULL); + } + + for (i = 0; i < idlist_size; i++) { + uint_t index = idlist[i]; + + (void) snprintf(statname, sizeof (statname), "%s%d", prefix, + index); + + ksp = dladm_kstat_lookup(kcp, modname, 0, statname, NULL); + if (ksp == NULL) + continue; + + curr = malloc(sizeof (dladm_stat_chain_t)); + if (curr == NULL) + break; + + curr->dc_statentry = fn(kcp, ksp, index); + if (curr->dc_statentry == NULL) { + free(curr); + break; + } + + (void) strlcpy(curr->dc_statheader, statname, + sizeof (curr->dc_statheader)); + curr->dc_next = NULL; + + if (head == NULL) /* First node */ + head = curr; + else + prev->dc_next = curr; + + prev = curr; + } +done: + (void) kstat_close(kcp); + return (head); +} + +static misc_stat_entry_t * +i_dlstat_misc_stats(const char *linkname) +{ + kstat_ctl_t *kcp; + kstat_t *ksp; + misc_stat_entry_t *misc_stat_entry = NULL; + + if ((kcp = kstat_open()) == NULL) + return (NULL); + + ksp = dladm_kstat_lookup(kcp, linkname, 0, DLSTAT_MAC_MISC_STAT, NULL); + if (ksp == NULL) + goto done; + + misc_stat_entry = calloc(1, sizeof (misc_stat_entry_t)); + if (misc_stat_entry == NULL) + goto done; + + i_dlstat_get_stats(kcp, ksp, &misc_stat_entry->mse_stats, + misc_stats_list, MISC_STAT_SIZE); +done: + (void) kstat_close(kcp); + return (misc_stat_entry); +} + +/* Rx lane statistic specific functions */ +static boolean_t +i_dlstat_rx_lane_match(void *arg1, void *arg2) +{ + rx_lane_stat_entry_t *s1 = arg1; + rx_lane_stat_entry_t *s2 = arg2; + + return (s1->rle_index == s2->rle_index && + s1->rle_id == s2->rle_id); +} + +static void * +i_dlstat_rx_lane_stat_entry_diff(void *arg1, void *arg2) +{ + rx_lane_stat_entry_t *s1 = arg1; + rx_lane_stat_entry_t *s2 = arg2; + rx_lane_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (rx_lane_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + diff_entry->rle_index = s1->rle_index; + diff_entry->rle_id = s1->rle_id; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, rle_stats, rx_lane_stats_list, + RX_LANE_STAT_SIZE); + +done: + return (diff_entry); +} + +static void * +i_dlstat_rx_hwlane_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + rx_lane_stat_entry_t *rx_lane_stat_entry; + + rx_lane_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (rx_lane_stat_entry == NULL) + goto done; + + rx_lane_stat_entry->rle_index = i; + rx_lane_stat_entry->rle_id = L_HWLANE; + + i_dlstat_get_stats(kcp, ksp, &rx_lane_stat_entry->rle_stats, + rx_hwlane_stats_list, RX_HWLANE_STAT_SIZE); + +done: + return (rx_lane_stat_entry); +} + +/*ARGSUSED*/ +static void * +i_dlstat_rx_swlane_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + rx_lane_stat_entry_t *rx_lane_stat_entry; + + rx_lane_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (rx_lane_stat_entry == NULL) + goto done; + + rx_lane_stat_entry->rle_index = DLSTAT_INVALID_ENTRY; + rx_lane_stat_entry->rle_id = L_SWLANE; + + i_dlstat_get_stats(kcp, ksp, &rx_lane_stat_entry->rle_stats, + rx_swlane_stats_list, RX_SWLANE_STAT_SIZE); + + rx_lane_stat_entry->rle_stats.rl_ipackets = + rx_lane_stat_entry->rle_stats.rl_intrs; + rx_lane_stat_entry->rle_stats.rl_rbytes = + rx_lane_stat_entry->rle_stats.rl_intrbytes; +done: + return (rx_lane_stat_entry); +} + +/*ARGSUSED*/ +static void * +i_dlstat_rx_local_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + rx_lane_stat_entry_t *local_stat_entry; + rx_lane_stat_entry_t *rx_lane_stat_entry; + + rx_lane_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (rx_lane_stat_entry == NULL) + goto done; + + local_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (local_stat_entry == NULL) + goto done; + + local_stat_entry->rle_index = DLSTAT_INVALID_ENTRY; + local_stat_entry->rle_id = L_LOCAL; + + i_dlstat_get_stats(kcp, ksp, &rx_lane_stat_entry->rle_stats, + rx_swlane_stats_list, RX_SWLANE_STAT_SIZE); + + local_stat_entry->rle_stats.rl_ipackets = + rx_lane_stat_entry->rle_stats.rl_lclpackets; + local_stat_entry->rle_stats.rl_rbytes = + rx_lane_stat_entry->rle_stats.rl_lclbytes; + +done: + free(rx_lane_stat_entry); + return (local_stat_entry); +} + +static dladm_stat_chain_t * +i_dlstat_rx_local_stats(const char *linkname) +{ + dladm_stat_chain_t *local_stats = NULL; + + local_stats = i_dlstat_query_stats(linkname, DLSTAT_MAC_RX_SWLANE, + default_idlist, default_idlist_size, + i_dlstat_rx_local_retrieve_stat); + + if (local_stats != NULL) { + (void) strlcpy(local_stats->dc_statheader, "mac_rx_local", + sizeof (local_stats->dc_statheader)); + } + return (local_stats); +} + +static dladm_stat_chain_t * +i_dlstat_rx_bcast_stats(const char *linkname) +{ + misc_stat_entry_t *misc_stat_entry; + dladm_stat_chain_t *head = NULL; + rx_lane_stat_entry_t *rx_lane_stat_entry; + + misc_stat_entry = i_dlstat_misc_stats(linkname); + if (misc_stat_entry == NULL) + goto done; + + rx_lane_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (rx_lane_stat_entry == NULL) + goto done; + + rx_lane_stat_entry->rle_index = DLSTAT_INVALID_ENTRY; + rx_lane_stat_entry->rle_id = L_BCAST; + + rx_lane_stat_entry->rle_stats.rl_ipackets = + misc_stat_entry->mse_stats.ms_brdcstrcv + + misc_stat_entry->mse_stats.ms_multircv; + rx_lane_stat_entry->rle_stats.rl_intrs = + misc_stat_entry->mse_stats.ms_brdcstrcv + + misc_stat_entry->mse_stats.ms_multircv; + rx_lane_stat_entry->rle_stats.rl_rbytes = + misc_stat_entry->mse_stats.ms_brdcstrcvbytes + + misc_stat_entry->mse_stats.ms_multircvbytes; + + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(rx_lane_stat_entry); + goto done; + } + + head->dc_statentry = rx_lane_stat_entry; + head->dc_next = NULL; + + free(misc_stat_entry); +done: + return (head); +} + +static dladm_stat_chain_t * +i_dlstat_rx_defunctlane_stats(const char *linkname) +{ + misc_stat_entry_t *misc_stat_entry; + dladm_stat_chain_t *head = NULL; + rx_lane_stat_entry_t *rx_lane_stat_entry; + + misc_stat_entry = i_dlstat_misc_stats(linkname); + if (misc_stat_entry == NULL) + goto done; + + rx_lane_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (rx_lane_stat_entry == NULL) + goto done; + + rx_lane_stat_entry->rle_index = DLSTAT_INVALID_ENTRY; + rx_lane_stat_entry->rle_id = L_DFNCT; + + rx_lane_stat_entry->rle_stats.rl_ipackets = + misc_stat_entry->mse_stats.ms_ipackets; + rx_lane_stat_entry->rle_stats.rl_rbytes = + misc_stat_entry->mse_stats.ms_rbytes; + rx_lane_stat_entry->rle_stats.rl_intrs = + misc_stat_entry->mse_stats.ms_intrs; + rx_lane_stat_entry->rle_stats.rl_polls = + misc_stat_entry->mse_stats.ms_polls; + rx_lane_stat_entry->rle_stats.rl_sdrops = + misc_stat_entry->mse_stats.ms_rxsdrops; + rx_lane_stat_entry->rle_stats.rl_chl10 = + misc_stat_entry->mse_stats.ms_chainunder10; + rx_lane_stat_entry->rle_stats.rl_ch10_50 = + misc_stat_entry->mse_stats.ms_chain10to50; + rx_lane_stat_entry->rle_stats.rl_chg50 = + misc_stat_entry->mse_stats.ms_chainover50; + + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(rx_lane_stat_entry); + goto done; + } + + head->dc_statentry = rx_lane_stat_entry; + head->dc_next = NULL; + +done: + return (head); +} + +static dladm_stat_chain_t * +i_dlstat_rx_hwlane_stats(const char *linkname) +{ + uint_t rx_hwlane_idlist[MAX_RINGS_PER_GROUP]; + uint_t rx_hwlane_idlist_size; + + i_dlstat_get_idlist(linkname, DLSTAT_RX_HWLANE_IDLIST, + rx_hwlane_idlist, &rx_hwlane_idlist_size); + + return (i_dlstat_query_stats(linkname, DLSTAT_MAC_RX_HWLANE, + rx_hwlane_idlist, rx_hwlane_idlist_size, + i_dlstat_rx_hwlane_retrieve_stat)); +} + +/*ARGSUSED*/ +static dladm_stat_chain_t * +i_dlstat_rx_swlane_stats(dladm_handle_t dh, datalink_id_t linkid, + const char *linkname) +{ + return (i_dlstat_query_stats(linkname, DLSTAT_MAC_RX_SWLANE, + default_idlist, default_idlist_size, + i_dlstat_rx_swlane_retrieve_stat)); +} + +void * +dlstat_rx_lane_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *head = NULL; + dladm_stat_chain_t *local_stats = NULL; + dladm_stat_chain_t *bcast_stats = NULL; + dladm_stat_chain_t *defunctlane_stats = NULL; + dladm_stat_chain_t *lane_stats = NULL; + char linkname[MAXLINKNAMELEN]; + boolean_t is_legacy_driver; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + goto done; + } + + /* Check if it is legacy driver */ + if (dladm_linkprop_is_set(dh, linkid, DLADM_PROP_VAL_CURRENT, + "_softmac", &is_legacy_driver) != DLADM_STATUS_OK) { + goto done; + } + + if (is_legacy_driver) { + head = i_dlstat_legacy_rx_lane_stats(linkname); + goto done; + } + + local_stats = i_dlstat_rx_local_stats(linkname); + bcast_stats = i_dlstat_rx_bcast_stats(linkname); + defunctlane_stats = i_dlstat_rx_defunctlane_stats(linkname); + lane_stats = i_dlstat_rx_hwlane_stats(linkname); + if (lane_stats == NULL) + lane_stats = i_dlstat_rx_swlane_stats(dh, linkid, linkname); + + head = i_dlstat_join_lists(local_stats, bcast_stats); + head = i_dlstat_join_lists(head, defunctlane_stats); + head = i_dlstat_join_lists(head, lane_stats); +done: + return (head); +} + +/* Tx lane statistic specific functions */ +static boolean_t +i_dlstat_tx_lane_match(void *arg1, void *arg2) +{ + tx_lane_stat_entry_t *s1 = arg1; + tx_lane_stat_entry_t *s2 = arg2; + + return (s1->tle_index == s2->tle_index && + s1->tle_id == s2->tle_id); +} + +static void * +i_dlstat_tx_lane_stat_entry_diff(void *arg1, void *arg2) +{ + tx_lane_stat_entry_t *s1 = arg1; + tx_lane_stat_entry_t *s2 = arg2; + tx_lane_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (tx_lane_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + diff_entry->tle_index = s1->tle_index; + diff_entry->tle_id = s1->tle_id; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, tle_stats, tx_lane_stats_list, + TX_LANE_STAT_SIZE); + +done: + return (diff_entry); +} + +static void * +i_dlstat_tx_hwlane_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + tx_lane_stat_entry_t *tx_lane_stat_entry; + + tx_lane_stat_entry = calloc(1, sizeof (tx_lane_stat_entry_t)); + if (tx_lane_stat_entry == NULL) + goto done; + + tx_lane_stat_entry->tle_index = i; + tx_lane_stat_entry->tle_id = L_HWLANE; + + i_dlstat_get_stats(kcp, ksp, &tx_lane_stat_entry->tle_stats, + tx_lane_stats_list, TX_LANE_STAT_SIZE); + +done: + return (tx_lane_stat_entry); +} + +/*ARGSUSED*/ +static void * +i_dlstat_tx_swlane_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + tx_lane_stat_entry_t *tx_lane_stat_entry; + + tx_lane_stat_entry = calloc(1, sizeof (tx_lane_stat_entry_t)); + if (tx_lane_stat_entry == NULL) + goto done; + + tx_lane_stat_entry->tle_index = DLSTAT_INVALID_ENTRY; + tx_lane_stat_entry->tle_id = L_SWLANE; + + i_dlstat_get_stats(kcp, ksp, &tx_lane_stat_entry->tle_stats, + tx_lane_stats_list, TX_LANE_STAT_SIZE); + +done: + return (tx_lane_stat_entry); +} + +static dladm_stat_chain_t * +i_dlstat_tx_bcast_stats(const char *linkname) +{ + misc_stat_entry_t *misc_stat_entry; + dladm_stat_chain_t *head = NULL; + tx_lane_stat_entry_t *tx_lane_stat_entry; + + misc_stat_entry = i_dlstat_misc_stats(linkname); + if (misc_stat_entry == NULL) + goto done; + + tx_lane_stat_entry = calloc(1, sizeof (tx_lane_stat_entry_t)); + if (tx_lane_stat_entry == NULL) + goto done; + + tx_lane_stat_entry->tle_index = DLSTAT_INVALID_ENTRY; + tx_lane_stat_entry->tle_id = L_BCAST; + + tx_lane_stat_entry->tle_stats.tl_opackets = + misc_stat_entry->mse_stats.ms_brdcstxmt + + misc_stat_entry->mse_stats.ms_multixmt; + + tx_lane_stat_entry->tle_stats.tl_obytes = + misc_stat_entry->mse_stats.ms_brdcstxmtbytes + + misc_stat_entry->mse_stats.ms_multixmtbytes; + + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(tx_lane_stat_entry); + goto done; + } + + head->dc_statentry = tx_lane_stat_entry; + head->dc_next = NULL; + + free(misc_stat_entry); +done: + return (head); +} + +static dladm_stat_chain_t * +i_dlstat_tx_defunctlane_stats(const char *linkname) +{ + misc_stat_entry_t *misc_stat_entry; + dladm_stat_chain_t *head = NULL; + tx_lane_stat_entry_t *tx_lane_stat_entry; + + misc_stat_entry = i_dlstat_misc_stats(linkname); + if (misc_stat_entry == NULL) + goto done; + + tx_lane_stat_entry = calloc(1, sizeof (tx_lane_stat_entry_t)); + if (tx_lane_stat_entry == NULL) + goto done; + + tx_lane_stat_entry->tle_index = DLSTAT_INVALID_ENTRY; + tx_lane_stat_entry->tle_id = L_DFNCT; + + tx_lane_stat_entry->tle_stats.tl_opackets = + misc_stat_entry->mse_stats.ms_opackets; + tx_lane_stat_entry->tle_stats.tl_obytes = + misc_stat_entry->mse_stats.ms_obytes; + tx_lane_stat_entry->tle_stats.tl_sdrops = + misc_stat_entry->mse_stats.ms_txsdrops; + + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(tx_lane_stat_entry); + goto done; + } + + head->dc_statentry = tx_lane_stat_entry; + head->dc_next = NULL; + +done: + return (head); +} + +static dladm_stat_chain_t * +i_dlstat_tx_hwlane_stats(const char *linkname) +{ + uint_t tx_hwlane_idlist[MAX_RINGS_PER_GROUP]; + uint_t tx_hwlane_idlist_size; + + i_dlstat_get_idlist(linkname, DLSTAT_TX_HWLANE_IDLIST, + tx_hwlane_idlist, &tx_hwlane_idlist_size); + + return (i_dlstat_query_stats(linkname, DLSTAT_MAC_TX_HWLANE, + tx_hwlane_idlist, tx_hwlane_idlist_size, + i_dlstat_tx_hwlane_retrieve_stat)); +} + +/*ARGSUSED*/ +static dladm_stat_chain_t * +i_dlstat_tx_swlane_stats(dladm_handle_t dh, datalink_id_t linkid, + const char *linkname) +{ + return (i_dlstat_query_stats(linkname, DLSTAT_MAC_TX_SWLANE, + default_idlist, default_idlist_size, + i_dlstat_tx_swlane_retrieve_stat)); +} + +void * +dlstat_tx_lane_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *head = NULL; + dladm_stat_chain_t *bcast_stats = NULL; + dladm_stat_chain_t *defunctlane_stats = NULL; + dladm_stat_chain_t *lane_stats; + char linkname[MAXLINKNAMELEN]; + boolean_t is_legacy_driver; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + goto done; + } + + /* Check if it is legacy driver */ + if (dladm_linkprop_is_set(dh, linkid, DLADM_PROP_VAL_CURRENT, + "_softmac", &is_legacy_driver) != DLADM_STATUS_OK) { + goto done; + } + + if (is_legacy_driver) { + head = i_dlstat_legacy_tx_lane_stats(linkname); + goto done; + } + + bcast_stats = i_dlstat_tx_bcast_stats(linkname); + defunctlane_stats = i_dlstat_tx_defunctlane_stats(linkname); + lane_stats = i_dlstat_tx_hwlane_stats(linkname); + if (lane_stats == NULL) + lane_stats = i_dlstat_tx_swlane_stats(dh, linkid, linkname); + + head = i_dlstat_join_lists(bcast_stats, defunctlane_stats); + head = i_dlstat_join_lists(head, lane_stats); + +done: + return (head); +} + +/* Rx lane total statistic specific functions */ +void * +dlstat_rx_lane_total_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *total_head = NULL; + dladm_stat_chain_t *rx_lane_head, *curr; + rx_lane_stat_entry_t *total_stats; + + /* Get per rx lane stats */ + rx_lane_head = dlstat_rx_lane_stats(dh, linkid); + if (rx_lane_head == NULL) + goto done; + + total_stats = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (total_stats == NULL) + goto done; + + total_stats->rle_index = DLSTAT_INVALID_ENTRY; + total_stats->rle_id = DLSTAT_INVALID_ENTRY; + + for (curr = rx_lane_head; curr != NULL; curr = curr->dc_next) { + rx_lane_stat_entry_t *curr_lane_stats = curr->dc_statentry; + + i_dlstat_sum_stats(&total_stats->rle_stats, + &curr_lane_stats->rle_stats, &total_stats->rle_stats, + rx_lane_stats_list, RX_LANE_STAT_SIZE); + } + + total_head = malloc(sizeof (dladm_stat_chain_t)); + if (total_head == NULL) { + free(total_stats); + goto done; + } + + total_head->dc_statentry = total_stats; + (void) strlcpy(total_head->dc_statheader, "mac_rx_lane_total", + sizeof (total_head->dc_statheader)); + total_head->dc_next = NULL; + free(rx_lane_head); + +done: + return (total_head); +} + +/* Tx lane total statistic specific functions */ +void * +dlstat_tx_lane_total_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *total_head = NULL; + dladm_stat_chain_t *tx_lane_head, *curr; + tx_lane_stat_entry_t *total_stats; + + /* Get per tx lane stats */ + tx_lane_head = dlstat_tx_lane_stats(dh, linkid); + if (tx_lane_head == NULL) + goto done; + + total_stats = calloc(1, sizeof (tx_lane_stat_entry_t)); + if (total_stats == NULL) + goto done; + + total_stats->tle_index = DLSTAT_INVALID_ENTRY; + total_stats->tle_id = DLSTAT_INVALID_ENTRY; + + for (curr = tx_lane_head; curr != NULL; curr = curr->dc_next) { + tx_lane_stat_entry_t *curr_lane_stats = curr->dc_statentry; + + i_dlstat_sum_stats(&total_stats->tle_stats, + &curr_lane_stats->tle_stats, &total_stats->tle_stats, + tx_lane_stats_list, TX_LANE_STAT_SIZE); + } + + total_head = malloc(sizeof (dladm_stat_chain_t)); + if (total_head == NULL) { + free(total_stats); + goto done; + } + + total_head->dc_statentry = total_stats; + (void) strlcpy(total_head->dc_statheader, "mac_tx_lane_total", + sizeof (total_head->dc_statheader)); + total_head->dc_next = NULL; + free(tx_lane_head); + +done: + return (total_head); +} + +/* Fanout specific functions */ +static boolean_t +i_dlstat_fanout_match(void *arg1, void *arg2) +{ + fanout_stat_entry_t *s1 = arg1; + fanout_stat_entry_t *s2 = arg2; + + return (s1->fe_index == s2->fe_index && + s1->fe_id == s2->fe_id && + s1->fe_foutindex == s2->fe_foutindex); +} + +static void * +i_dlstat_fanout_stat_entry_diff(void *arg1, void *arg2) +{ + fanout_stat_entry_t *s1 = arg1; + fanout_stat_entry_t *s2 = arg2; + fanout_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (fanout_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + diff_entry->fe_index = s1->fe_index; + diff_entry->fe_id = s1->fe_id; + diff_entry->fe_foutindex = s1->fe_foutindex; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, fe_stats, fanout_stats_list, + FANOUT_STAT_SIZE); + +done: + return (diff_entry); +} + +static void * +i_dlstat_fanout_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + fanout_stat_entry_t *fanout_stat_entry; + + fanout_stat_entry = calloc(1, sizeof (fanout_stat_entry_t)); + if (fanout_stat_entry == NULL) + goto done; + + /* Set by the caller later */ + fanout_stat_entry->fe_index = DLSTAT_INVALID_ENTRY; + fanout_stat_entry->fe_id = DLSTAT_INVALID_ENTRY; + + fanout_stat_entry->fe_foutindex = i; + + i_dlstat_get_stats(kcp, ksp, &fanout_stat_entry->fe_stats, + fanout_stats_list, FANOUT_STAT_SIZE); + +done: + return (fanout_stat_entry); +} + +static void * +i_dlstat_query_fanout_stats(dladm_handle_t dh, datalink_id_t linkid, + uint_t idlist[], uint_t idlist_size, + const char *modname, const char *prefix) +{ + int i; + char statprefix[MAXLINKNAMELEN]; + char linkname[MAXLINKNAMELEN]; + dladm_stat_chain_t *curr, *curr_head; + dladm_stat_chain_t *head = NULL, *prev = NULL; + uint_t fanout_idlist[MAX_RINGS_PER_GROUP]; + uint_t fanout_idlist_size; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + return (NULL); + } + + i_dlstat_get_idlist(linkname, DLSTAT_FANOUT_IDLIST, + fanout_idlist, &fanout_idlist_size); + + for (i = 0; i < idlist_size; i++) { + uint_t index = idlist[i]; + + (void) snprintf(statprefix, sizeof (statprefix), "%s%d_fanout", + prefix, index); + + curr_head = i_dlstat_query_stats(modname, statprefix, + fanout_idlist, fanout_idlist_size, + i_dlstat_fanout_retrieve_stat); + + if (curr_head == NULL) /* Last lane */ + break; + + if (head == NULL) /* First lane */ + head = curr_head; + else /* Link new lane list to end of previous lane list */ + prev->dc_next = curr_head; + + /* Walk new lane list and set ids */ + for (curr = curr_head; curr != NULL; curr = curr->dc_next) { + fanout_stat_entry_t *curr_stats = curr->dc_statentry; + + curr_stats->fe_index = index; + curr_stats->fe_id = L_HWLANE; + /* + * Save last pointer of previous linked list. + * This pointer is used to chain linked lists + * generated in each iteration. + */ + prev = curr; + } + } + + return (head); +} + +void * +dlstat_fanout_swlane_and_local_stats(dladm_handle_t dh, datalink_id_t linkid, + const char *linkname) +{ + return (i_dlstat_query_fanout_stats(dh, linkid, + default_idlist, default_idlist_size, linkname, + DLSTAT_MAC_RX_SWLANE)); +} + +void * +dlstat_fanout_hwlane_stats(dladm_handle_t dh, datalink_id_t linkid, + const char *linkname) +{ + uint_t rx_hwlane_idlist[MAX_RINGS_PER_GROUP]; + uint_t rx_hwlane_idlist_size; + + i_dlstat_get_idlist(linkname, DLSTAT_RX_HWLANE_IDLIST, + rx_hwlane_idlist, &rx_hwlane_idlist_size); + + return (i_dlstat_query_fanout_stats(dh, linkid, rx_hwlane_idlist, + rx_hwlane_idlist_size, linkname, DLSTAT_MAC_RX_HWLANE)); +} + +void * +dlstat_fanout_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *head = NULL; + dladm_stat_chain_t *fout_hwlane_stats; + dladm_stat_chain_t *fout_swlane_and_local_stats; + fanout_stat_entry_t *fout_stats; + char linkname[MAXLINKNAMELEN]; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + goto done; + } + + fout_swlane_and_local_stats = + dlstat_fanout_swlane_and_local_stats(dh, linkid, linkname); + fout_hwlane_stats = dlstat_fanout_hwlane_stats(dh, linkid, linkname); + + if (fout_swlane_and_local_stats == NULL) { + head = fout_hwlane_stats; + goto done; + } + + fout_stats = fout_swlane_and_local_stats->dc_statentry; + + if (fout_hwlane_stats != NULL) { /* hwlane(s), only local traffic */ + fout_stats->fe_id = L_LOCAL; + fout_stats->fe_index = DLSTAT_INVALID_ENTRY; + } else { /* no hwlane, mix of local+sw classified */ + fout_stats->fe_id = L_LCLSWLANE; + fout_stats->fe_index = DLSTAT_INVALID_ENTRY; + } + + fout_swlane_and_local_stats->dc_next = fout_hwlane_stats; + head = fout_swlane_and_local_stats; + +done: + return (head); +} + +/* Rx ring statistic specific functions */ +static boolean_t +i_dlstat_rx_ring_match(void *arg1, void *arg2) +{ + rx_lane_stat_entry_t *s1 = arg1; + rx_lane_stat_entry_t *s2 = arg2; + + return (s1->rle_index == s2->rle_index); +} + +static void * +i_dlstat_rx_ring_stat_entry_diff(void *arg1, void *arg2) +{ + ring_stat_entry_t *s1 = arg1; + ring_stat_entry_t *s2 = arg2; + ring_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (ring_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + diff_entry->re_index = s1->re_index; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, re_stats, rx_ring_stats_list, + RX_RING_STAT_SIZE); + +done: + return (diff_entry); +} + +static void * +i_dlstat_rx_ring_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + ring_stat_entry_t *rx_ring_stat_entry; + + rx_ring_stat_entry = calloc(1, sizeof (ring_stat_entry_t)); + if (rx_ring_stat_entry == NULL) + goto done; + + rx_ring_stat_entry->re_index = i; + + i_dlstat_get_stats(kcp, ksp, &rx_ring_stat_entry->re_stats, + rx_ring_stats_list, RX_RING_STAT_SIZE); + +done: + return (rx_ring_stat_entry); +} + +void * +dlstat_rx_ring_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + uint_t rx_ring_idlist[MAX_RINGS_PER_GROUP]; + uint_t rx_ring_idlist_size; + dladm_phys_attr_t dpa; + char linkname[MAXLINKNAMELEN]; + char *modname; + datalink_class_t class; + + /* + * kstats corresponding to physical device rings continue to use + * device names even if the link is renamed using dladm rename-link. + * Thus, given a linkid, we lookup the physical device name. + * However, if an aggr is renamed, kstats corresponding to its + * pseudo rings are renamed as well. + */ + if (dladm_datalink_id2info(dh, linkid, NULL, &class, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + return (NULL); + } + + if (class != DATALINK_CLASS_AGGR) { + if (dladm_phys_info(dh, linkid, &dpa, DLADM_OPT_ACTIVE) != + DLADM_STATUS_OK) { + return (NULL); + } + modname = dpa.dp_dev; + } else + modname = linkname; + + i_dlstat_get_idlist(modname, DLSTAT_RX_RING_IDLIST, + rx_ring_idlist, &rx_ring_idlist_size); + + return (i_dlstat_query_stats(modname, DLSTAT_MAC_RX_RING, + rx_ring_idlist, rx_ring_idlist_size, + i_dlstat_rx_ring_retrieve_stat)); +} + +/* Tx ring statistic specific functions */ +static boolean_t +i_dlstat_tx_ring_match(void *arg1, void *arg2) +{ + tx_lane_stat_entry_t *s1 = arg1; + tx_lane_stat_entry_t *s2 = arg2; + + return (s1->tle_index == s2->tle_index); +} + +static void * +i_dlstat_tx_ring_stat_entry_diff(void *arg1, void *arg2) +{ + ring_stat_entry_t *s1 = arg1; + ring_stat_entry_t *s2 = arg2; + ring_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (ring_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + diff_entry->re_index = s1->re_index; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, re_stats, tx_ring_stats_list, + TX_RING_STAT_SIZE); + +done: + return (diff_entry); +} + +static void * +i_dlstat_tx_ring_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + ring_stat_entry_t *tx_ring_stat_entry; + + tx_ring_stat_entry = calloc(1, sizeof (ring_stat_entry_t)); + if (tx_ring_stat_entry == NULL) + goto done; + + tx_ring_stat_entry->re_index = i; + + i_dlstat_get_stats(kcp, ksp, &tx_ring_stat_entry->re_stats, + tx_ring_stats_list, TX_RING_STAT_SIZE); + +done: + return (tx_ring_stat_entry); +} + +void * +dlstat_tx_ring_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + uint_t tx_ring_idlist[MAX_RINGS_PER_GROUP]; + uint_t tx_ring_idlist_size; + dladm_phys_attr_t dpa; + char linkname[MAXLINKNAMELEN]; + char *modname; + datalink_class_t class; + + /* + * kstats corresponding to physical device rings continue to use + * device names even if the link is renamed using dladm rename-link. + * Thus, given a linkid, we lookup the physical device name. + * However, if an aggr is renamed, kstats corresponding to its + * pseudo rings are renamed as well. + */ + if (dladm_datalink_id2info(dh, linkid, NULL, &class, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + return (NULL); + } + + if (class != DATALINK_CLASS_AGGR) { + if (dladm_phys_info(dh, linkid, &dpa, DLADM_OPT_ACTIVE) != + DLADM_STATUS_OK) { + return (NULL); + } + modname = dpa.dp_dev; + } else + modname = linkname; + + i_dlstat_get_idlist(modname, DLSTAT_TX_RING_IDLIST, + tx_ring_idlist, &tx_ring_idlist_size); + + return (i_dlstat_query_stats(modname, DLSTAT_MAC_TX_RING, + tx_ring_idlist, tx_ring_idlist_size, + i_dlstat_tx_ring_retrieve_stat)); +} + +/* Rx ring total statistic specific functions */ +void * +dlstat_rx_ring_total_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *total_head = NULL; + dladm_stat_chain_t *rx_ring_head, *curr; + ring_stat_entry_t *total_stats; + + /* Get per rx ring stats */ + rx_ring_head = dlstat_rx_ring_stats(dh, linkid); + if (rx_ring_head == NULL) + goto done; + + total_stats = calloc(1, sizeof (ring_stat_entry_t)); + if (total_stats == NULL) + goto done; + + total_stats->re_index = DLSTAT_INVALID_ENTRY; + + for (curr = rx_ring_head; curr != NULL; curr = curr->dc_next) { + ring_stat_entry_t *curr_ring_stats = curr->dc_statentry; + + i_dlstat_sum_stats(&total_stats->re_stats, + &curr_ring_stats->re_stats, &total_stats->re_stats, + rx_ring_stats_list, RX_RING_STAT_SIZE); + } + + total_head = malloc(sizeof (dladm_stat_chain_t)); + if (total_head == NULL) { + free(total_stats); + goto done; + } + + total_head->dc_statentry = total_stats; + (void) strlcpy(total_head->dc_statheader, "mac_rx_ring_total", + sizeof (total_head->dc_statheader)); + total_head->dc_next = NULL; + free(rx_ring_head); + +done: + return (total_head); +} + +/* Tx ring total statistic specific functions */ +void * +dlstat_tx_ring_total_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *total_head = NULL; + dladm_stat_chain_t *tx_ring_head, *curr; + ring_stat_entry_t *total_stats; + + /* Get per tx ring stats */ + tx_ring_head = dlstat_tx_ring_stats(dh, linkid); + if (tx_ring_head == NULL) + goto done; + + total_stats = calloc(1, sizeof (ring_stat_entry_t)); + if (total_stats == NULL) + goto done; + + total_stats->re_index = DLSTAT_INVALID_ENTRY; + + for (curr = tx_ring_head; curr != NULL; curr = curr->dc_next) { + ring_stat_entry_t *curr_ring_stats = curr->dc_statentry; + + i_dlstat_sum_stats(&total_stats->re_stats, + &curr_ring_stats->re_stats, &total_stats->re_stats, + tx_ring_stats_list, TX_RING_STAT_SIZE); + } + + total_head = malloc(sizeof (dladm_stat_chain_t)); + if (total_head == NULL) { + free(total_stats); + goto done; + } + + total_head->dc_statentry = total_stats; + (void) strlcpy(total_head->dc_statheader, "mac_tx_ring_total", + sizeof (total_head->dc_statheader)); + total_head->dc_next = NULL; + free(tx_ring_head); + +done: + return (total_head); +} + +/* Summary statistic specific functions */ +/*ARGSUSED*/ +static boolean_t +i_dlstat_total_match(void *arg1, void *arg2) +{ /* Always single entry for total */ + return (B_TRUE); +} + +static void * +i_dlstat_total_stat_entry_diff(void *arg1, void *arg2) +{ + total_stat_entry_t *s1 = arg1; + total_stat_entry_t *s2 = arg2; + total_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (total_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, tse_stats, total_stats_list, + TOTAL_STAT_SIZE); + +done: + return (diff_entry); +} + +void * +dlstat_total_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *head = NULL; + dladm_stat_chain_t *rx_total; + dladm_stat_chain_t *tx_total; + total_stat_entry_t *total_stat_entry; + rx_lane_stat_entry_t *rx_lane_stat_entry; + tx_lane_stat_entry_t *tx_lane_stat_entry; + + /* Get total rx lane stats */ + rx_total = dlstat_rx_lane_total_stats(dh, linkid); + if (rx_total == NULL) + goto done; + + /* Get total tx lane stats */ + tx_total = dlstat_tx_lane_total_stats(dh, linkid); + if (tx_total == NULL) + goto done; + + /* Build total stat */ + total_stat_entry = calloc(1, sizeof (total_stat_entry_t)); + if (total_stat_entry == NULL) + goto done; + + rx_lane_stat_entry = rx_total->dc_statentry; + tx_lane_stat_entry = tx_total->dc_statentry; + + /* Extract total rx ipackets, rbytes */ + total_stat_entry->tse_stats.ts_ipackets = + rx_lane_stat_entry->rle_stats.rl_ipackets; + total_stat_entry->tse_stats.ts_rbytes = + rx_lane_stat_entry->rle_stats.rl_rbytes; + + /* Extract total tx opackets, obytes */ + total_stat_entry->tse_stats.ts_opackets = + tx_lane_stat_entry->tle_stats.tl_opackets; + total_stat_entry->tse_stats.ts_obytes = + tx_lane_stat_entry->tle_stats.tl_obytes; + + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(total_stat_entry); + goto done; + } + + head->dc_statentry = total_stat_entry; + (void) strlcpy(head->dc_statheader, "mac_lane_total", + sizeof (head->dc_statheader)); + head->dc_next = NULL; + free(rx_total); + free(tx_total); + +done: + return (head); +} + +/* Aggr total statistic(summed across all component ports) specific functions */ +void * +dlstat_aggr_total_stats(dladm_stat_chain_t *head) +{ + dladm_stat_chain_t *curr; + dladm_stat_chain_t *total_head; + aggr_port_stat_entry_t *total_stats; + + total_stats = calloc(1, sizeof (aggr_port_stat_entry_t)); + if (total_stats == NULL) + goto done; + + total_stats->ape_portlinkid = DATALINK_INVALID_LINKID; + + for (curr = head; curr != NULL; curr = curr->dc_next) { + aggr_port_stat_entry_t *curr_aggr_port_stats; + + curr_aggr_port_stats = curr->dc_statentry; + + i_dlstat_sum_stats(&total_stats->ape_stats, + &curr_aggr_port_stats->ape_stats, &total_stats->ape_stats, + aggr_port_stats_list, AGGR_PORT_STAT_SIZE); + } + + total_head = malloc(sizeof (dladm_stat_chain_t)); + if (total_head == NULL) { + free(total_stats); + goto done; + } + + total_head->dc_statentry = total_stats; + total_head->dc_next = NULL; + +done: + return (total_head); +} + +/* Aggr port statistic specific functions */ +static boolean_t +i_dlstat_aggr_port_match(void *arg1, void *arg2) +{ + aggr_port_stat_entry_t *s1 = arg1; + aggr_port_stat_entry_t *s2 = arg2; + + return (s1->ape_portlinkid == s2->ape_portlinkid); +} + +static void * +i_dlstat_aggr_port_stat_entry_diff(void *arg1, void *arg2) +{ + aggr_port_stat_entry_t *s1 = arg1; + aggr_port_stat_entry_t *s2 = arg2; + aggr_port_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (aggr_port_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + diff_entry->ape_portlinkid = s1->ape_portlinkid; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, ape_stats, aggr_port_stats_list, + AGGR_PORT_STAT_SIZE); + +done: + return (diff_entry); +} + +/* + * Query dls stats for the aggr port. This results in query for stats into + * the corresponding device driver. + */ +static aggr_port_stat_entry_t * +i_dlstat_single_port_stats(const char *portname, datalink_id_t linkid) +{ + kstat_ctl_t *kcp; + kstat_t *ksp; + char module[DLPI_LINKNAME_MAX]; + uint_t instance; + aggr_port_stat_entry_t *aggr_port_stat_entry = NULL; + + if (dladm_parselink(portname, module, &instance) != DLADM_STATUS_OK) + goto done; + + if ((kcp = kstat_open()) == NULL) { + warn("kstat open operation failed"); + return (NULL); + } + + ksp = dladm_kstat_lookup(kcp, module, instance, "mac", NULL); + if (ksp == NULL) + goto done; + + aggr_port_stat_entry = calloc(1, sizeof (aggr_port_stat_entry_t)); + if (aggr_port_stat_entry == NULL) + goto done; + + /* Save port's linkid */ + aggr_port_stat_entry->ape_portlinkid = linkid; + + i_dlstat_get_stats(kcp, ksp, &aggr_port_stat_entry->ape_stats, + aggr_port_stats_list, AGGR_PORT_STAT_SIZE); +done: + (void) kstat_close(kcp); + return (aggr_port_stat_entry); +} + +void * +dlstat_aggr_port_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_aggr_grp_attr_t ginfo; + int i; + dladm_aggr_port_attr_t *portp; + dladm_phys_attr_t dpa; + aggr_port_stat_entry_t *aggr_port_stat_entry; + dladm_stat_chain_t *head = NULL, *prev = NULL, *curr; + dladm_stat_chain_t *total_stats; + + /* Get aggr info */ + bzero(&ginfo, sizeof (dladm_aggr_grp_attr_t)); + if (dladm_aggr_info(dh, linkid, &ginfo, DLADM_OPT_ACTIVE) + != DLADM_STATUS_OK) + goto done; + /* For every port that is member of this aggr do */ + for (i = 0; i < ginfo.lg_nports; i++) { + portp = &(ginfo.lg_ports[i]); + if (dladm_phys_info(dh, portp->lp_linkid, &dpa, + DLADM_OPT_ACTIVE) != DLADM_STATUS_OK) { + goto done; + } + + aggr_port_stat_entry = i_dlstat_single_port_stats(dpa.dp_dev, + portp->lp_linkid); + + /* Create dladm_stat_chain_t object for this stat */ + curr = malloc(sizeof (dladm_stat_chain_t)); + if (curr == NULL) { + free(aggr_port_stat_entry); + goto done; + } + (void) strlcpy(curr->dc_statheader, dpa.dp_dev, + sizeof (curr->dc_statheader)); + curr->dc_statentry = aggr_port_stat_entry; + curr->dc_next = NULL; + + /* Chain this aggr port stat entry */ + /* head of the stat list */ + if (prev == NULL) + head = curr; + else + prev->dc_next = curr; + prev = curr; + } + + /* + * Prepend the stat list with cumulative aggr stats i.e. summed over all + * component ports + */ + total_stats = dlstat_aggr_total_stats(head); + if (total_stats != NULL) { + total_stats->dc_next = head; + head = total_stats; + } + +done: + free(ginfo.lg_ports); + return (head); +} + +/* Misc stat specific functions */ +void * +dlstat_misc_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + misc_stat_entry_t *misc_stat_entry; + dladm_stat_chain_t *head = NULL; + char linkname[MAXLINKNAMELEN]; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + goto done; + } + + misc_stat_entry = i_dlstat_misc_stats(linkname); + if (misc_stat_entry == NULL) + goto done; + + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(misc_stat_entry); + goto done; + } + + head->dc_statentry = misc_stat_entry; + (void) strlcpy(head->dc_statheader, "mac_misc_stat", + sizeof (head->dc_statheader)); + head->dc_next = NULL; + +done: + return (head); +} + +/* Exported functions */ +dladm_stat_chain_t * +dladm_link_stat_query(dladm_handle_t dh, datalink_id_t linkid, + dladm_stat_type_t stattype) +{ + return (dladm_stat_table[stattype].ds_querystat(dh, linkid)); +} + +dladm_stat_chain_t * +dladm_link_stat_diffchain(dladm_stat_chain_t *op1, dladm_stat_chain_t *op2, + dladm_stat_type_t stattype) +{ + dladm_stat_chain_t *op1_curr, *op2_curr; + dladm_stat_chain_t *diff_curr; + dladm_stat_chain_t *diff_prev = NULL, *diff_head = NULL; + + /* Perform op1 - op2, store result in diff */ + for (op1_curr = op1; op1_curr != NULL; op1_curr = op1_curr->dc_next) { + for (op2_curr = op2; op2_curr != NULL; + op2_curr = op2_curr->dc_next) { + if (dlstat_match_stats(op1_curr->dc_statentry, + op2_curr->dc_statentry, stattype)) { + break; + } + } + diff_curr = malloc(sizeof (dladm_stat_chain_t)); + if (diff_curr == NULL) + goto done; + + diff_curr->dc_next = NULL; + + if (op2_curr == NULL) { + /* prev iteration did not have this stat entry */ + diff_curr->dc_statentry = + dlstat_diff_stats(op1_curr->dc_statentry, + NULL, stattype); + } else { + diff_curr->dc_statentry = + dlstat_diff_stats(op1_curr->dc_statentry, + op2_curr->dc_statentry, stattype); + } + + if (diff_curr->dc_statentry == NULL) { + free(diff_curr); + goto done; + } + + if (diff_prev == NULL) /* head of the diff stat list */ + diff_head = diff_curr; + else + diff_prev->dc_next = diff_curr; + diff_prev = diff_curr; + } +done: + return (diff_head); +} + +void +dladm_link_stat_free(dladm_stat_chain_t *curr) +{ + while (curr != NULL) { + dladm_stat_chain_t *tofree = curr; + + curr = curr->dc_next; + free(tofree->dc_statentry); + free(tofree); + } +} + +/* Query all link stats */ +static name_value_stat_t * +i_dlstat_convert_stats(void *stats, stat_info_t stats_list[], uint_t size) +{ + int i; + name_value_stat_t *head_stat = NULL, *prev_stat = NULL; + name_value_stat_t *curr_stat; + + for (i = 0; i < size; i++) { + uint64_t *val = (void *) + ((uchar_t *)stats + stats_list[i].si_offset); + + curr_stat = calloc(1, sizeof (name_value_stat_t)); + if (curr_stat == NULL) + break; + + (void) strlcpy(curr_stat->nv_statname, stats_list[i].si_name, + sizeof (curr_stat->nv_statname)); + curr_stat->nv_statval = *val; + curr_stat->nv_nextstat = NULL; + + if (head_stat == NULL) /* First node */ + head_stat = curr_stat; + else + prev_stat->nv_nextstat = curr_stat; + + prev_stat = curr_stat; + } + return (head_stat); +} + +void * +build_nvs_entry(char *statheader, void *statentry, dladm_stat_type_t stattype) +{ + name_value_stat_entry_t *name_value_stat_entry; + dladm_stat_desc_t *stattbl_ptr; + void *statfields; + + stattbl_ptr = &dladm_stat_table[stattype]; + + /* Allocate memory for query all stat entry */ + name_value_stat_entry = calloc(1, sizeof (name_value_stat_entry_t)); + if (name_value_stat_entry == NULL) + goto done; + + /* Header for these stat fields */ + (void) strlcpy(name_value_stat_entry->nve_header, statheader, + sizeof (name_value_stat_entry->nve_header)); + + /* Extract stat fields from the statentry */ + statfields = (uchar_t *)statentry + + dladm_stat_table[stattype].ds_offset; + + /* Convert curr_stat to <statname, statval> pair */ + name_value_stat_entry->nve_stats = + i_dlstat_convert_stats(statfields, + stattbl_ptr->ds_statlist, stattbl_ptr->ds_statsize); +done: + return (name_value_stat_entry); +} + +void * +i_walk_dlstat_chain(dladm_stat_chain_t *stat_head, dladm_stat_type_t stattype) +{ + dladm_stat_chain_t *curr; + dladm_stat_chain_t *nvstat_head = NULL, *nvstat_prev = NULL; + dladm_stat_chain_t *nvstat_curr; + + /* + * For every stat in the chain, build header and convert all + * its stat fields + */ + for (curr = stat_head; curr != NULL; curr = curr->dc_next) { + nvstat_curr = malloc(sizeof (dladm_stat_chain_t)); + if (nvstat_curr == NULL) + break; + + nvstat_curr->dc_statentry = build_nvs_entry(curr->dc_statheader, + curr->dc_statentry, stattype); + + if (nvstat_curr->dc_statentry == NULL) { + free(nvstat_curr); + break; + } + + nvstat_curr->dc_next = NULL; + + if (nvstat_head == NULL) /* First node */ + nvstat_head = nvstat_curr; + else + nvstat_prev->dc_next = nvstat_curr; + + nvstat_prev = nvstat_curr; + } +done: + return (nvstat_head); +} + +dladm_stat_chain_t * +dladm_link_stat_query_all(dladm_handle_t dh, datalink_id_t linkid, + dladm_stat_type_t stattype) +{ + dladm_stat_chain_t *stat_head; + dladm_stat_chain_t *nvstat_head = NULL; + + /* Query the requested stat */ + stat_head = dladm_link_stat_query(dh, linkid, stattype); + if (stat_head == NULL) + goto done; + + /* + * Convert every statfield in every stat-entry of stat chain to + * <statname, statval> pair + */ + nvstat_head = i_walk_dlstat_chain(stat_head, stattype); + + /* Free stat_head */ + dladm_link_stat_free(stat_head); + +done: + return (nvstat_head); +} + +void +dladm_link_stat_query_all_free(dladm_stat_chain_t *curr) +{ + while (curr != NULL) { + dladm_stat_chain_t *tofree = curr; + name_value_stat_entry_t *nv_entry = curr->dc_statentry; + name_value_stat_t *nv_curr = nv_entry->nve_stats; + + while (nv_curr != NULL) { + name_value_stat_t *nv_tofree = nv_curr; + + nv_curr = nv_curr->nv_nextstat; + free(nv_tofree); + } + + curr = curr->dc_next; + free(nv_entry); + free(tofree); + } +} + +/* flow stats specific routines */ +flow_stat_t * +dladm_flow_stat_query(const char *flowname) +{ + kstat_ctl_t *kcp; + kstat_t *ksp; + flow_stat_t *flow_stat = NULL; + + if ((kcp = kstat_open()) == NULL) + return (NULL); + + flow_stat = calloc(1, sizeof (flow_stat_t)); + if (flow_stat == NULL) + goto done; + + ksp = dladm_kstat_lookup(kcp, NULL, -1, flowname, "flow"); + + if (ksp != NULL) { + i_dlstat_get_stats(kcp, ksp, flow_stat, flow_stats_list, + FLOW_STAT_SIZE); + } + +done: + (void) kstat_close(kcp); + return (flow_stat); +} + +flow_stat_t * +dladm_flow_stat_diff(flow_stat_t *op1, flow_stat_t *op2) +{ + flow_stat_t *diff_stat; + + diff_stat = calloc(1, sizeof (flow_stat_t)); + if (diff_stat == NULL) + goto done; + + if (op2 == NULL) { + bcopy(op1, diff_stat, sizeof (flow_stat_t)); + } else { + i_dlstat_diff_stats(diff_stat, op1, op2, flow_stats_list, + FLOW_STAT_SIZE); + } +done: + return (diff_stat); +} + +void +dladm_flow_stat_free(flow_stat_t *curr) +{ + free(curr); +} + +/* Query all flow stats */ +name_value_stat_entry_t * +dladm_flow_stat_query_all(const char *flowname) +{ + flow_stat_t *flow_stat; + name_value_stat_entry_t *name_value_stat_entry = NULL; + + /* Query flow stats */ + flow_stat = dladm_flow_stat_query(flowname); + if (flow_stat == NULL) + goto done; + + /* Allocate memory for query all stat entry */ + name_value_stat_entry = calloc(1, sizeof (name_value_stat_entry_t)); + if (name_value_stat_entry == NULL) { + dladm_flow_stat_free(flow_stat); + goto done; + } + + /* Header for these stat fields */ + (void) strncpy(name_value_stat_entry->nve_header, flowname, + MAXFLOWNAMELEN); + + /* Convert every statfield in flow_stat to <statname, statval> pair */ + name_value_stat_entry->nve_stats = + i_dlstat_convert_stats(flow_stat, flow_stats_list, FLOW_STAT_SIZE); + + /* Free flow_stat */ + dladm_flow_stat_free(flow_stat); + +done: + return (name_value_stat_entry); +} + +void +dladm_flow_stat_query_all_free(name_value_stat_entry_t *curr) +{ + name_value_stat_t *nv_curr = curr->nve_stats; + + while (nv_curr != NULL) { + name_value_stat_t *nv_tofree = nv_curr; + + nv_curr = nv_curr->nv_nextstat; + free(nv_tofree); + } +} diff --git a/usr/src/lib/libdladm/common/libdlstat.h b/usr/src/lib/libdladm/common/libdlstat.h index 51e28627a9..381dafe22d 100644 --- a/usr/src/lib/libdladm/common/libdlstat.h +++ b/usr/src/lib/libdladm/common/libdlstat.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,6 +41,216 @@ extern "C" { #define LINK_REPORT 1 #define FLOW_REPORT 2 +#define DLSTAT_INVALID_ENTRY -1 +#define MAXSTATNAMELEN 256 +/* + * Definitions common to all stats + */ +typedef struct dladm_stat_chain_s { + char dc_statheader[MAXSTATNAMELEN]; + void *dc_statentry; + struct dladm_stat_chain_s *dc_next; +} dladm_stat_chain_t; + +typedef enum { + DLADM_STAT_RX_LANE = 0, /* Per lane rx stats */ + DLADM_STAT_TX_LANE, /* Per lane tx stats */ + DLADM_STAT_RX_LANE_TOTAL, /* Stats summed across all rx lanes */ + DLADM_STAT_TX_LANE_TOTAL, /* Stats summed across all tx lanes */ + DLADM_STAT_RX_LANE_FOUT, /* Per fanout (rx lane) stats */ + DLADM_STAT_RX_RING, /* Per ring rx stats */ + DLADM_STAT_TX_RING, /* Per ring tx stats */ + DLADM_STAT_RX_RING_TOTAL, /* Stats summed across all rx rings */ + DLADM_STAT_TX_RING_TOTAL, /* Stats summed across all tx rings */ + DLADM_STAT_TOTAL, /* Summary view */ + DLADM_STAT_AGGR_PORT, /* Aggr port stats */ + DLADM_STAT_MISC, /* Misc stats */ + DLADM_STAT_NUM_STATS /* This must always be the last entry */ +} dladm_stat_type_t; + +/* + * Definitions for rx lane stats + */ +typedef struct rx_lane_stat_s { + uint64_t rl_ipackets; + uint64_t rl_rbytes; + uint64_t rl_lclpackets; + uint64_t rl_lclbytes; + uint64_t rl_intrs; + uint64_t rl_intrbytes; + uint64_t rl_pollbytes; + uint64_t rl_polls; + uint64_t rl_sdrops; + uint64_t rl_chl10; + uint64_t rl_ch10_50; + uint64_t rl_chg50; +} rx_lane_stat_t; + +typedef enum { + L_HWLANE, + L_SWLANE, + L_LOCAL, + L_LCLSWLANE, + L_BCAST, + L_DFNCT +} lane_type_t; + +typedef struct rx_lane_stat_entry_s { + int64_t rle_index; + lane_type_t rle_id; + rx_lane_stat_t rle_stats; +} rx_lane_stat_entry_t; + +/* + * Definitions for tx lane stats + */ +typedef struct tx_lane_stat_s { + uint64_t tl_opackets; + uint64_t tl_obytes; + uint64_t tl_blockcnt; + uint64_t tl_unblockcnt; + uint64_t tl_sdrops; +} tx_lane_stat_t; + +typedef struct tx_lane_stat_entry_s { + int64_t tle_index; + lane_type_t tle_id; + tx_lane_stat_t tle_stats; +} tx_lane_stat_entry_t; + +/* + * Definitions for tx/rx misc stats + */ +typedef struct misc_stat_s { + uint64_t ms_multircv; + uint64_t ms_brdcstrcv; + uint64_t ms_multixmt; + uint64_t ms_brdcstxmt; + uint64_t ms_multircvbytes; + uint64_t ms_brdcstrcvbytes; + uint64_t ms_multixmtbytes; + uint64_t ms_brdcstxmtbytes; + uint64_t ms_txerrors; + uint64_t ms_macspoofed; + uint64_t ms_ipspoofed; + uint64_t ms_dhcpspoofed; + uint64_t ms_restricted; + uint64_t ms_dhcpdropped; + uint64_t ms_ipackets; + uint64_t ms_rbytes; + uint64_t ms_local; + uint64_t ms_localbytes; + uint64_t ms_intrs; + uint64_t ms_intrbytes; + uint64_t ms_polls; + uint64_t ms_pollbytes; + uint64_t ms_rxsdrops; + uint64_t ms_chainunder10; + uint64_t ms_chain10to50; + uint64_t ms_chainover50; + uint64_t ms_obytes; + uint64_t ms_opackets; + uint64_t ms_blockcnt; + uint64_t ms_unblockcnt; + uint64_t ms_txsdrops; +} misc_stat_t; + +/* + * To be consistent with other stat entries, misc stat + * is wrapped in stat entry + */ +typedef struct misc_stat_entry_s { + misc_stat_t mse_stats; +} misc_stat_entry_t; + +/* + * Definitions for ring stats: used by rx as well as tx + */ +typedef struct ring_stat_s { + uint64_t r_packets; + uint64_t r_bytes; +} ring_stat_t; + +typedef struct ring_stat_entry_s { + int64_t re_index; + ring_stat_t re_stats; +} ring_stat_entry_t; + +/* + * Definitions for fanout stats + */ +typedef struct fanout_stat_s { + uint64_t f_ipackets; + uint64_t f_rbytes; +} fanout_stat_t; + +typedef struct fanout_stat_entry_s { + int64_t fe_index; + lane_type_t fe_id; /* hw, sw, local */ + int64_t fe_foutindex; /* fanout index */ + fanout_stat_t fe_stats; +} fanout_stat_entry_t; + +/* + * Definitions for total stats + */ +typedef struct total_stat_s { + uint64_t ts_ipackets; + uint64_t ts_rbytes; + uint64_t ts_opackets; + uint64_t ts_obytes; +} total_stat_t; + +/* + * To be consistent with other stat entries, total stat + * is wrapped in stat entry + */ +typedef struct total_stat_entry_s { + total_stat_t tse_stats; +} total_stat_entry_t; + +/* + * Definitions for aggr stats + */ +typedef struct aggr_port_stat_s { + uint64_t ap_ipackets; + uint64_t ap_rbytes; + uint64_t ap_opackets; + uint64_t ap_obytes; +} aggr_port_stat_t; + +typedef struct aggr_port_stat_entry_s { + datalink_id_t ape_portlinkid; + aggr_port_stat_t ape_stats; +} aggr_port_stat_entry_t; + +/* + * Definitions for query all stats + */ +typedef struct name_value_stat_s { + char nv_statname[MAXSTATNAMELEN]; + uint64_t nv_statval; + struct name_value_stat_s *nv_nextstat; +} name_value_stat_t; + +typedef struct name_value_stat_entry_s { + char nve_header[MAXSTATNAMELEN]; + name_value_stat_t *nve_stats; +} name_value_stat_entry_t; + +/* + * Definitions for flow stats + */ +typedef struct flow_stat_s { + uint64_t fl_ipackets; + uint64_t fl_rbytes; + uint64_t fl_ierrors; + uint64_t fl_opackets; + uint64_t fl_obytes; + uint64_t fl_oerrors; + uint64_t fl_sdrops; +} flow_stat_t; + typedef struct pktsum_s { hrtime_t snaptime; uint64_t ipackets; @@ -65,6 +275,18 @@ extern dladm_status_t dladm_get_single_mac_stat(dladm_handle_t, datalink_id_t, extern void dladm_stats_total(pktsum_t *, pktsum_t *, pktsum_t *); extern void dladm_stats_diff(pktsum_t *, pktsum_t *, pktsum_t *); +extern dladm_stat_chain_t *dladm_link_stat_query(dladm_handle_t, + datalink_id_t, dladm_stat_type_t); +extern dladm_stat_chain_t *dladm_link_stat_diffchain(dladm_stat_chain_t *, + dladm_stat_chain_t *, dladm_stat_type_t); +extern dladm_stat_chain_t *dladm_link_stat_query_all(dladm_handle_t, + datalink_id_t, dladm_stat_type_t); + +extern flow_stat_t *dladm_flow_stat_query(const char *); +extern flow_stat_t *dladm_flow_stat_diff(flow_stat_t *, + flow_stat_t *); +extern name_value_stat_entry_t *dladm_flow_stat_query_all(const char *); + #ifdef __cplusplus } #endif diff --git a/usr/src/lib/libdladm/common/libdlvnic.c b/usr/src/lib/libdladm/common/libdlvnic.c index a49f51a6ca..7ff9563e1a 100644 --- a/usr/src/lib/libdladm/common/libdlvnic.c +++ b/usr/src/lib/libdladm/common/libdlvnic.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -117,7 +117,6 @@ i_dladm_vnic_create_sys(dladm_handle_t handle, dladm_vnic_attr_t *attr) ioc.vc_vrid = attr->va_vrid; ioc.vc_af = attr->va_af; ioc.vc_flags = attr->va_force ? VNIC_IOC_CREATE_FORCE : 0; - ioc.vc_flags |= attr->va_hwrings ? VNIC_IOC_CREATE_REQ_HWRINGS : 0; if (attr->va_mac_len > 0 || ioc.vc_mac_prefix_len > 0) bcopy(attr->va_mac_addr, ioc.vc_mac_addr, MAXMACADDRLEN); @@ -218,14 +217,6 @@ i_dladm_vnic_info_persist(dladm_handle_t handle, datalink_id_t linkid, goto done; } - status = dladm_get_conf_field(handle, conf, FHWRINGS, - &attrp->va_hwrings, sizeof (boolean_t)); - - if (status != DLADM_STATUS_OK && status != DLADM_STATUS_NOTFOUND) - goto done; - if (status == DLADM_STATUS_NOTFOUND) - attrp->va_hwrings = B_FALSE; - if ((status = dladm_datalink_id2info(handle, linkid, NULL, &class, NULL, NULL, 0)) != DLADM_STATUS_OK) goto done; @@ -521,7 +512,7 @@ dladm_vnic_create(dladm_handle_t handle, const char *vnic, datalink_id_t linkid, /* Extract resource_ctl and cpu_list from proplist */ if (proplist != NULL) { status = dladm_link_proplist_extract(handle, proplist, - &attr.va_resource_props); + &attr.va_resource_props, 0); if (status != DLADM_STATUS_OK) goto done; } @@ -541,7 +532,6 @@ dladm_vnic_create(dladm_handle_t handle, const char *vnic, datalink_id_t linkid, attr.va_vrid = vrid; attr.va_af = af; attr.va_force = (flags & DLADM_OPT_FORCE) != 0; - attr.va_hwrings = (flags & DLADM_OPT_HWRINGS) != 0; status = i_dladm_vnic_create_sys(handle, &attr); if (status != DLADM_STATUS_OK) @@ -745,14 +735,6 @@ dladm_vnic_persist_conf(dladm_handle_t handle, const char *name, goto done; } - if (attrp->va_hwrings) { - boolean_t hwrings = attrp->va_hwrings; - status = dladm_set_conf_field(handle, conf, FHWRINGS, - DLADM_TYPE_BOOLEAN, &hwrings); - if (status != DLADM_STATUS_OK) - goto done; - } - if (attrp->va_vid != 0) { u64 = attrp->va_vid; status = dladm_set_conf_field(handle, conf, FVLANID, @@ -776,9 +758,6 @@ typedef struct dladm_vnic_up_arg_s { dladm_status_t status; } dladm_vnic_up_arg_t; -#define DLADM_VNIC_UP_FIRST_WALK 0x1 -#define DLADM_VNIC_UP_SECOND_WALK 0x2 - static int i_dladm_vnic_up(dladm_handle_t handle, datalink_id_t linkid, void *arg) { @@ -786,7 +765,6 @@ i_dladm_vnic_up(dladm_handle_t handle, datalink_id_t linkid, void *arg) dladm_vnic_attr_t attr; dladm_status_t status; dladm_arg_list_t *proplist; - uint32_t flags = ((dladm_vnic_up_arg_t *)arg)->flags; bzero(&attr, sizeof (attr)); @@ -794,14 +772,6 @@ i_dladm_vnic_up(dladm_handle_t handle, datalink_id_t linkid, void *arg) if (status != DLADM_STATUS_OK) goto done; - /* - * Create the vnics that request hardware group first - * Create the vnics that don't request hardware group in the second walk - */ - if ((flags == DLADM_VNIC_UP_FIRST_WALK && !attr.va_hwrings) || - (flags == DLADM_VNIC_UP_SECOND_WALK && attr.va_hwrings)) - goto done; - /* Get all properties for this vnic */ status = dladm_link_get_proplist(handle, linkid, &proplist); if (status != DLADM_STATUS_OK) @@ -809,7 +779,7 @@ i_dladm_vnic_up(dladm_handle_t handle, datalink_id_t linkid, void *arg) if (proplist != NULL) { status = dladm_link_proplist_extract(handle, proplist, - &attr.va_resource_props); + &attr.va_resource_props, DLADM_OPT_BOOT); } status = i_dladm_vnic_create_sys(handle, &attr); @@ -834,11 +804,6 @@ dladm_vnic_up(dladm_handle_t handle, datalink_id_t linkid, uint32_t flags) (DATALINK_CLASS_VNIC | DATALINK_CLASS_ETHERSTUB); if (linkid == DATALINK_ALL_LINKID) { - vnic_arg.flags = DLADM_VNIC_UP_FIRST_WALK; - (void) dladm_walk_datalink_id(i_dladm_vnic_up, handle, - &vnic_arg, class, DATALINK_ANY_MEDIATYPE, - DLADM_OPT_PERSIST); - vnic_arg.flags = DLADM_VNIC_UP_SECOND_WALK; (void) dladm_walk_datalink_id(i_dladm_vnic_up, handle, &vnic_arg, class, DATALINK_ANY_MEDIATYPE, DLADM_OPT_PERSIST); diff --git a/usr/src/lib/libdladm/common/libdlvnic.h b/usr/src/lib/libdladm/common/libdlvnic.h index 8b859f9e04..94b656aadf 100644 --- a/usr/src/lib/libdladm/common/libdlvnic.h +++ b/usr/src/lib/libdladm/common/libdlvnic.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -47,7 +47,6 @@ typedef struct dladm_vnic_attr { uint_t va_mac_prefix_len; uint16_t va_vid; boolean_t va_force; - boolean_t va_hwrings; vrid_t va_vrid; int va_af; mac_resource_props_t va_resource_props; diff --git a/usr/src/lib/libdladm/common/linkprop.c b/usr/src/lib/libdladm/common/linkprop.c index bd4190b159..50e79616c0 100644 --- a/usr/src/lib/libdladm/common/linkprop.c +++ b/usr/src/lib/libdladm/common/linkprop.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -59,6 +59,11 @@ #include <sys/vlan.h> #include <libdlbridge.h> #include <stp_in.h> +#include <netinet/dhcp.h> +#include <netinet/dhcp6.h> +#include <net/if_types.h> +#include <libinetutil.h> +#include <pool.h> /* * The linkprop get() callback. @@ -106,7 +111,8 @@ typedef dladm_status_t pd_setf_t(dladm_handle_t, prop_desc_t *, datalink_id_t, */ typedef dladm_status_t pd_checkf_t(dladm_handle_t, prop_desc_t *pdp, datalink_id_t, char **propstrp, uint_t cnt, - val_desc_t *propval, datalink_media_t); + uint_t flags, val_desc_t *propval, + datalink_media_t); typedef struct link_attr_s { mac_prop_id_t pp_id; @@ -114,50 +120,47 @@ typedef struct link_attr_s { char *pp_name; } link_attr_t; +typedef struct dladm_linkprop_args_s { + dladm_status_t dla_status; + uint_t dla_flags; +} dladm_linkprop_args_t; + static dld_ioc_macprop_t *i_dladm_buf_alloc_by_name(size_t, datalink_id_t, const char *, uint_t, dladm_status_t *); static dld_ioc_macprop_t *i_dladm_buf_alloc_by_id(size_t, datalink_id_t, mac_prop_id_t, uint_t, dladm_status_t *); -static dld_ioc_macprop_t *i_dladm_get_public_prop(dladm_handle_t, datalink_id_t, - char *, uint_t, dladm_status_t *, uint_t *); +static dladm_status_t i_dladm_get_public_prop(dladm_handle_t, datalink_id_t, + char *, uint_t, uint_t *, void *, size_t); -static dladm_status_t i_dladm_set_private_prop(dladm_handle_t, datalink_id_t, +static dladm_status_t i_dladm_set_private_prop(dladm_handle_t, datalink_id_t, const char *, char **, uint_t, uint_t); -static dladm_status_t i_dladm_get_priv_prop(dladm_handle_t, datalink_id_t, +static dladm_status_t i_dladm_get_priv_prop(dladm_handle_t, datalink_id_t, const char *, char **, uint_t *, dladm_prop_type_t, uint_t); -static link_attr_t *dladm_name2prop(const char *); -static link_attr_t *dladm_id2prop(mac_prop_id_t); - -static pd_getf_t do_get_zone, do_get_autopush, do_get_rate_mod, - do_get_rate_prop, do_get_channel_prop, - do_get_powermode_prop, do_get_radio_prop, - i_dladm_duplex_get, i_dladm_status_get, - i_dladm_binary_get, i_dladm_uint32_get, - i_dladm_flowctl_get, i_dladm_maxbw_get, - i_dladm_cpus_get, i_dladm_priority_get, - i_dladm_tagmode_get, i_dladm_range_get, - get_stp_prop, get_bridge_forward, - get_bridge_pvid, - /* the above need to be renamed to "do_get_xxx" */ - do_get_protection; - -static pd_setf_t do_set_zone, do_set_rate_prop, - do_set_powermode_prop, do_set_radio_prop, - i_dladm_set_public_prop, do_set_res, do_set_cpus, - set_stp_prop, set_bridge_forward, set_bridge_pvid, - do_set_protection; - -static pd_checkf_t do_check_zone, do_check_autopush, do_check_rate, - do_check_hoplimit, do_check_encaplim, - i_dladm_uint32_check, do_check_maxbw, do_check_cpus, - do_check_priority, check_stp_prop, check_bridge_pvid, - do_check_allowedips, do_check_prop; - -static dladm_status_t i_dladm_speed_get(dladm_handle_t, prop_desc_t *, - datalink_id_t, char **, uint_t *, uint_t, uint_t *); static dladm_status_t i_dladm_macprop(dladm_handle_t, void *, boolean_t); static const char *dladm_perm2str(uint_t, char *); +static link_attr_t *dladm_name2prop(const char *); +static link_attr_t *dladm_id2prop(mac_prop_id_t); + +static pd_getf_t get_zone, get_autopush, get_rate_mod, get_rate, + get_speed, get_channel, get_powermode, get_radio, + get_duplex, get_link_state, get_binary, get_uint32, + get_flowctl, get_maxbw, get_cpus, get_priority, + get_tagmode, get_range, get_stp, get_bridge_forward, + get_bridge_pvid, get_protection, get_rxrings, + get_txrings, get_cntavail, + get_allowedips, get_allowedcids, get_pool, + get_rings_range; + +static pd_setf_t set_zone, set_rate, set_powermode, set_radio, + set_public_prop, set_resource, set_stp_prop, + set_bridge_forward, set_bridge_pvid; + +static pd_checkf_t check_zone, check_autopush, check_rate, check_hoplimit, + check_encaplim, check_uint32, check_maxbw, check_cpus, + check_stp_prop, check_bridge_pvid, check_allowedips, + check_allowedcids, check_rings, + check_pool, check_prop; struct prop_desc { /* @@ -326,12 +329,6 @@ static link_attr_t link_attr[] = { { MAC_PROP_WL_MLME, sizeof (wl_mlme_t), "mlme"}, - { MAC_PROP_MAXBW, sizeof (mac_resource_props_t), "maxbw"}, - - { MAC_PROP_PRIO, sizeof (mac_resource_props_t), "priority"}, - - { MAC_PROP_BIND_CPU, sizeof (mac_resource_props_t), "cpus"}, - { MAC_PROP_TAGMODE, sizeof (link_tagmode_t), "tagmode"}, { MAC_PROP_IPTUN_HOPLIMIT, sizeof (uint32_t), "hoplimit"}, @@ -344,7 +341,24 @@ static link_attr_t link_attr[] = { { MAC_PROP_LDECAY, sizeof (uint32_t), "learn_decay"}, - { MAC_PROP_PROTECT, sizeof (mac_resource_props_t), "protection"}, + { MAC_PROP_RESOURCE, sizeof (mac_resource_props_t), "resource"}, + + { MAC_PROP_RESOURCE_EFF, sizeof (mac_resource_props_t), + "resource-effective"}, + + { MAC_PROP_RXRINGSRANGE, sizeof (mac_propval_range_t), "rxrings"}, + + { MAC_PROP_TXRINGSRANGE, sizeof (mac_propval_range_t), "txrings"}, + + { MAC_PROP_MAX_TX_RINGS_AVAIL, sizeof (uint_t), + "txrings-available"}, + + { MAC_PROP_MAX_RX_RINGS_AVAIL, sizeof (uint_t), + "rxrings-available"}, + + { MAC_PROP_MAX_RXHWCLNT_AVAIL, sizeof (uint_t), "rxhwclnt-available"}, + + { MAC_PROP_MAX_TXHWCLNT_AVAIL, sizeof (uint_t), "txhwclnt-available"}, { MAC_PROP_PRIVATE, 0, "driver-private"} }; @@ -395,8 +409,9 @@ static val_desc_t link_tagmode_vals[] = { static val_desc_t link_protect_vals[] = { { "mac-nospoof", MPT_MACNOSPOOF }, + { "restricted", MPT_RESTRICTED }, { "ip-nospoof", MPT_IPNOSPOOF }, - { "restricted", MPT_RESTRICTED } + { "dhcp-nospoof", MPT_DHCPNOSPOOF }, }; static val_desc_t dladm_wlan_radio_vals[] = { @@ -418,164 +433,180 @@ static val_desc_t stp_p2p_vals[] = { #define VALCNT(vals) (sizeof ((vals)) / sizeof (val_desc_t)) #define RESET_VAL ((uintptr_t)-1) +#define UNSPEC_VAL ((uintptr_t)-2) static prop_desc_t prop_table[] = { { "channel", { NULL, 0 }, NULL, 0, NULL, NULL, - do_get_channel_prop, NULL, 0, + get_channel, NULL, 0, DATALINK_CLASS_PHYS, DL_WIFI }, { "powermode", { "off", DLADM_WLAN_PM_OFF }, dladm_wlan_powermode_vals, VALCNT(dladm_wlan_powermode_vals), - do_set_powermode_prop, NULL, - do_get_powermode_prop, NULL, 0, + set_powermode, NULL, + get_powermode, NULL, 0, DATALINK_CLASS_PHYS, DL_WIFI }, { "radio", { "on", DLADM_WLAN_RADIO_ON }, dladm_wlan_radio_vals, VALCNT(dladm_wlan_radio_vals), - do_set_radio_prop, NULL, - do_get_radio_prop, NULL, 0, + set_radio, NULL, + get_radio, NULL, 0, DATALINK_CLASS_PHYS, DL_WIFI }, { "speed", { "", 0 }, NULL, 0, - do_set_rate_prop, do_get_rate_mod, - do_get_rate_prop, do_check_rate, 0, + set_rate, get_rate_mod, + get_rate, check_rate, 0, DATALINK_CLASS_PHYS, DATALINK_ANY_MEDIATYPE }, { "autopush", { "", 0 }, NULL, 0, - i_dladm_set_public_prop, NULL, - do_get_autopush, do_check_autopush, PD_CHECK_ALLOC, + set_public_prop, NULL, + get_autopush, check_autopush, PD_CHECK_ALLOC, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "zone", { "", 0 }, NULL, 0, - do_set_zone, NULL, - do_get_zone, do_check_zone, PD_TEMPONLY|PD_CHECK_ALLOC, + set_zone, NULL, + get_zone, check_zone, PD_TEMPONLY|PD_CHECK_ALLOC, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "duplex", { "", 0 }, link_duplex_vals, VALCNT(link_duplex_vals), - NULL, NULL, i_dladm_duplex_get, NULL, + NULL, NULL, get_duplex, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "state", { "up", LINK_STATE_UP }, link_status_vals, VALCNT(link_status_vals), - NULL, NULL, i_dladm_status_get, NULL, + NULL, NULL, get_link_state, NULL, 0, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "adv_autoneg_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "mtu", { "", 0 }, NULL, 0, - i_dladm_set_public_prop, i_dladm_range_get, - i_dladm_uint32_get, i_dladm_uint32_check, 0, DATALINK_CLASS_ALL, + set_public_prop, get_range, + get_uint32, check_uint32, 0, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "flowctrl", { "", 0 }, link_flow_vals, VALCNT(link_flow_vals), - i_dladm_set_public_prop, NULL, i_dladm_flowctl_get, NULL, + set_public_prop, NULL, get_flowctl, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_10gfdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_10gfdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_1000fdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_1000fdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_1000hdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_1000hdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_100fdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_100fdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_100hdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_100hdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_10fdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_10fdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_10hdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_10hdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "maxbw", { "--", RESET_VAL }, NULL, 0, - do_set_res, NULL, - i_dladm_maxbw_get, do_check_maxbw, PD_CHECK_ALLOC, + set_resource, NULL, + get_maxbw, check_maxbw, PD_CHECK_ALLOC, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "cpus", { "--", RESET_VAL }, NULL, 0, - do_set_cpus, NULL, - i_dladm_cpus_get, do_check_cpus, 0, + set_resource, NULL, + get_cpus, check_cpus, 0, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, - { "priority", { "high", RESET_VAL }, - link_priority_vals, VALCNT(link_priority_vals), do_set_res, NULL, - i_dladm_priority_get, do_check_priority, PD_CHECK_ALLOC, + { "cpus-effective", { "--", 0 }, + NULL, 0, NULL, NULL, + get_cpus, 0, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "pool", { "--", RESET_VAL }, NULL, 0, + set_resource, NULL, + get_pool, check_pool, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "pool-effective", { "--", 0 }, + NULL, 0, NULL, NULL, + get_pool, 0, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "priority", { "high", MPL_RESET }, + link_priority_vals, VALCNT(link_priority_vals), set_resource, + NULL, get_priority, check_prop, 0, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "tagmode", { "vlanonly", LINK_TAGMODE_VLANONLY }, link_tagmode_vals, VALCNT(link_tagmode_vals), - i_dladm_set_public_prop, NULL, i_dladm_tagmode_get, + set_public_prop, NULL, get_tagmode, NULL, 0, DATALINK_CLASS_PHYS | DATALINK_CLASS_AGGR | DATALINK_CLASS_VNIC, DL_ETHER }, { "hoplimit", { "", 0 }, NULL, 0, - i_dladm_set_public_prop, i_dladm_range_get, i_dladm_uint32_get, - do_check_hoplimit, 0, DATALINK_CLASS_IPTUN, DATALINK_ANY_MEDIATYPE}, + set_public_prop, get_range, get_uint32, + check_hoplimit, 0, DATALINK_CLASS_IPTUN, DATALINK_ANY_MEDIATYPE}, { "encaplimit", { "", 0 }, NULL, 0, - i_dladm_set_public_prop, i_dladm_range_get, i_dladm_uint32_get, - do_check_encaplim, 0, DATALINK_CLASS_IPTUN, DL_IPV6}, + set_public_prop, get_range, get_uint32, + check_encaplim, 0, DATALINK_CLASS_IPTUN, DL_IPV6}, { "forward", { "1", 1 }, link_01_vals, VALCNT(link_01_vals), @@ -588,70 +619,118 @@ static prop_desc_t prop_table[] = { DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "learn_limit", { "1000", 1000 }, NULL, 0, - i_dladm_set_public_prop, NULL, i_dladm_uint32_get, - i_dladm_uint32_check, 0, + set_public_prop, NULL, get_uint32, + check_uint32, 0, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "learn_decay", { "200", 200 }, NULL, 0, - i_dladm_set_public_prop, NULL, i_dladm_uint32_get, - i_dladm_uint32_check, 0, + set_public_prop, NULL, get_uint32, + check_uint32, 0, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "stp", { "1", 1 }, link_01_vals, VALCNT(link_01_vals), - set_stp_prop, NULL, get_stp_prop, NULL, PD_AFTER_PERM, + set_stp_prop, NULL, get_stp, NULL, PD_AFTER_PERM, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "stp_priority", { "128", 128 }, NULL, 0, - set_stp_prop, NULL, get_stp_prop, check_stp_prop, PD_AFTER_PERM, + set_stp_prop, NULL, get_stp, check_stp_prop, PD_AFTER_PERM, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "stp_cost", { "auto", 0 }, NULL, 0, - set_stp_prop, NULL, get_stp_prop, check_stp_prop, PD_AFTER_PERM, + set_stp_prop, NULL, get_stp, check_stp_prop, PD_AFTER_PERM, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "stp_edge", { "1", 1 }, link_01_vals, VALCNT(link_01_vals), - set_stp_prop, NULL, get_stp_prop, NULL, PD_AFTER_PERM, + set_stp_prop, NULL, get_stp, NULL, PD_AFTER_PERM, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "stp_p2p", { "auto", P2P_AUTO }, stp_p2p_vals, VALCNT(stp_p2p_vals), - set_stp_prop, NULL, get_stp_prop, NULL, PD_AFTER_PERM, + set_stp_prop, NULL, get_stp, NULL, PD_AFTER_PERM, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "stp_mcheck", { "0", 0 }, link_01_vals, VALCNT(link_01_vals), - set_stp_prop, NULL, get_stp_prop, check_stp_prop, PD_AFTER_PERM, + set_stp_prop, NULL, get_stp, check_stp_prop, PD_AFTER_PERM, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "protection", { "--", RESET_VAL }, link_protect_vals, VALCNT(link_protect_vals), - do_set_protection, NULL, do_get_protection, do_check_prop, 0, + set_resource, NULL, get_protection, check_prop, 0, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "allowed-ips", { "--", 0 }, - NULL, 0, do_set_protection, NULL, - do_get_protection, do_check_allowedips, 0, + NULL, 0, set_resource, NULL, + get_allowedips, check_allowedips, PD_CHECK_ALLOC, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "allowed-dhcp-cids", { "--", 0 }, + NULL, 0, set_resource, NULL, + get_allowedcids, check_allowedcids, PD_CHECK_ALLOC, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "rxrings", { "--", RESET_VAL }, NULL, 0, + set_resource, get_rings_range, get_rxrings, check_rings, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "rxrings-effective", { "--", 0 }, + NULL, 0, NULL, NULL, + get_rxrings, NULL, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "txrings", { "--", RESET_VAL }, NULL, 0, + set_resource, get_rings_range, get_txrings, check_rings, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "txrings-effective", { "--", 0 }, + NULL, 0, NULL, NULL, + get_txrings, NULL, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "txrings-available", { "", 0 }, NULL, 0, + NULL, NULL, get_cntavail, NULL, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "rxrings-available", { "", 0 }, NULL, 0, + NULL, NULL, get_cntavail, NULL, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "rxhwclnt-available", { "", 0 }, NULL, 0, + NULL, NULL, get_cntavail, NULL, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "txhwclnt-available", { "", 0 }, NULL, 0, + NULL, NULL, get_cntavail, NULL, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + }; #define DLADM_MAX_PROPS (sizeof (prop_table) / sizeof (prop_desc_t)) static resource_prop_t rsrc_prop_table[] = { - {"maxbw", do_extract_maxbw}, - {"priority", do_extract_priority}, - {"cpus", do_extract_cpus}, - {"protection", do_extract_protection}, - {"allowed-ips", do_extract_allowedips} + {"maxbw", extract_maxbw}, + {"priority", extract_priority}, + {"cpus", extract_cpus}, + {"cpus-effective", extract_cpus}, + {"pool", extract_pool}, + {"pool-effective", extract_pool}, + {"protection", extract_protection}, + {"allowed-ips", extract_allowedips}, + {"allowed-dhcp-cids", extract_allowedcids}, + {"rxrings", extract_rxrings}, + {"rxrings-effective", extract_rxrings}, + {"txrings", extract_txrings}, + {"txrings-effective", extract_txrings} }; #define DLADM_MAX_RSRC_PROP (sizeof (rsrc_prop_table) / \ sizeof (resource_prop_t)) @@ -691,8 +770,9 @@ static dladm_status_t i_dladm_getset_defval(dladm_handle_t, prop_desc_t *, /* ARGSUSED */ static dladm_status_t -do_check_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { int i, j; @@ -737,17 +817,17 @@ i_dladm_set_single_prop(dladm_handle_t handle, datalink_id_t linkid, return (DLADM_STATUS_PROPRDONLY); if (prop_val != NULL) { - vdp = malloc(sizeof (val_desc_t) * val_cnt); + vdp = calloc(val_cnt, sizeof (val_desc_t)); if (vdp == NULL) return (DLADM_STATUS_NOMEM); if (pdp->pd_check != NULL) { needfree = ((pdp->pd_flags & PD_CHECK_ALLOC) != 0); status = pdp->pd_check(handle, pdp, linkid, prop_val, - val_cnt, vdp, media); + val_cnt, flags, vdp, media); } else if (pdp->pd_optval != NULL) { - status = do_check_prop(handle, pdp, linkid, prop_val, - val_cnt, vdp, media); + status = check_prop(handle, pdp, linkid, prop_val, + val_cnt, flags, vdp, media); } else { status = DLADM_STATUS_BADARG; } @@ -765,7 +845,7 @@ i_dladm_set_single_prop(dladm_handle_t handle, datalink_id_t linkid, cnt = 1; defval = (strlen(pdp->pd_defval.vd_name) > 0); if ((pdp->pd_flags & PD_CHECK_ALLOC) != 0 || defval) { - if ((vdp = malloc(sizeof (val_desc_t))) == NULL) + if ((vdp = calloc(1, sizeof (val_desc_t))) == NULL) return (DLADM_STATUS_NOMEM); if (defval) { @@ -773,7 +853,7 @@ i_dladm_set_single_prop(dladm_handle_t handle, datalink_id_t linkid, sizeof (val_desc_t)); } else if (pdp->pd_check != NULL) { status = pdp->pd_check(handle, pdp, linkid, - prop_val, cnt, vdp, media); + prop_val, cnt, flags, vdp, media); if (status != DLADM_STATUS_OK) goto done; } @@ -842,7 +922,6 @@ i_dladm_set_linkprop(dladm_handle_t handle, datalink_id_t linkid, status = DLADM_STATUS_NOTFOUND; } } - return (status); } @@ -955,9 +1034,9 @@ dladm_get_linkprop(dladm_handle_t handle, datalink_id_t linkid, uint_t perm_flags; if (type == DLADM_PROP_VAL_DEFAULT) - dld_flags |= MAC_PROP_DEFAULT; + dld_flags |= DLD_PROP_DEFAULT; else if (type == DLADM_PROP_VAL_MODIFIABLE) - dld_flags |= MAC_PROP_POSSIBLE; + dld_flags |= DLD_PROP_POSSIBLE; if (linkid == DATALINK_INVALID_LINKID || prop_name == NULL || prop_val == NULL || val_cntp == NULL || *val_cntp == 0) @@ -1124,7 +1203,7 @@ dladm_get_linkprop_values(dladm_handle_t handle, datalink_id_t linkid, prop_val[valc] = (char *)(prop_val + *val_cntp) + valc * DLADM_PROP_VAL_MAX; - dld_flags = (type == DLADM_PROP_VAL_DEFAULT) ? MAC_PROP_DEFAULT : 0; + dld_flags = (type == DLADM_PROP_VAL_DEFAULT) ? DLD_PROP_DEFAULT : 0; switch (type) { case DLADM_PROP_VAL_CURRENT: @@ -1177,7 +1256,7 @@ dladm_get_linkprop_values(dladm_handle_t handle, datalink_id_t linkid, status = DLADM_STATUS_NOMEM; else status = pdp->pd_check(handle, pdp, linkid, - prop_val, *val_cntp, vdp, media); + prop_val, *val_cntp, 0, vdp, media); if (status == DLADM_STATUS_OK) { for (valc = 0; valc < *val_cntp; valc++) ret_val[valc] = vdp[valc].vd_val; @@ -1211,9 +1290,10 @@ static int i_dladm_init_one_prop(dladm_handle_t handle, datalink_id_t linkid, const char *prop_name, void *arg) { - char *buf, **propvals; - uint_t i, valcnt = DLADM_MAX_PROP_VALCNT; - dladm_status_t status, *retval = arg; + char *buf, **propvals; + uint_t i, valcnt = DLADM_MAX_PROP_VALCNT; + dladm_status_t status; + dladm_linkprop_args_t *dla = arg; if ((buf = malloc((sizeof (char *) + DLADM_PROP_VAL_MAX) * DLADM_MAX_PROP_VALCNT)) == NULL) { @@ -1233,9 +1313,10 @@ i_dladm_init_one_prop(dladm_handle_t handle, datalink_id_t linkid, } status = dladm_set_linkprop(handle, linkid, prop_name, propvals, - valcnt, DLADM_OPT_ACTIVE); + valcnt, dla->dla_flags | DLADM_OPT_ACTIVE); + if (status != DLADM_STATUS_OK) - *retval = status; + dla->dla_status = status; done: if (buf != NULL) @@ -1269,9 +1350,16 @@ dladm_init_linkprop(dladm_handle_t handle, datalink_id_t linkid, dladm_status_t status = DLADM_STATUS_OK; datalink_media_t dmedia; uint32_t media; + dladm_linkprop_args_t *dla; dmedia = any_media ? DATALINK_ANY_MEDIATYPE : DL_WIFI; + dla = malloc(sizeof (dladm_linkprop_args_t)); + if (dla == NULL) + return (DLADM_STATUS_NOMEM); + dla->dla_flags = DLADM_OPT_BOOT; + dla->dla_status = DLADM_STATUS_OK; + if (linkid == DATALINK_ALL_LINKID) { (void) dladm_walk_datalink_id(i_dladm_init_linkprop, handle, NULL, DATALINK_CLASS_ALL, dmedia, DLADM_OPT_PERSIST); @@ -1279,36 +1367,32 @@ dladm_init_linkprop(dladm_handle_t handle, datalink_id_t linkid, ((dladm_datalink_id2info(handle, linkid, NULL, NULL, &media, NULL, 0) == DLADM_STATUS_OK) && DATALINK_MEDIA_ACCEPTED(dmedia, media))) { - (void) dladm_walk_linkprop(handle, linkid, &status, + (void) dladm_walk_linkprop(handle, linkid, (void *)dla, i_dladm_init_one_prop); + status = dla->dla_status; } + free(dla); return (status); } /* ARGSUSED */ static dladm_status_t -do_get_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { char zone_name[ZONENAME_MAX]; zoneid_t zid; dladm_status_t status; - char *cp; - dld_ioc_macprop_t *dip; if (flags != 0) return (DLADM_STATUS_NOTSUP); - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + perm_flags, &zid, sizeof (zid)); if (status != DLADM_STATUS_OK) return (status); - cp = dip->pr_val; - (void) memcpy(&zid, cp, sizeof (zid)); - free(dip); - *val_cnt = 1; if (zid != GLOBAL_ZONEID) { if (getzonenamebyid(zid, zone_name, sizeof (zone_name)) < 0) { @@ -1393,13 +1477,11 @@ cleanup: /* ARGSUSED */ static dladm_status_t -do_set_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +set_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) { dladm_status_t status = DLADM_STATUS_OK; zoneid_t zid_old, zid_new; - char *cp; - dld_ioc_macprop_t *dip; dld_ioc_zid_t *dzp; if (val_cnt != 1) @@ -1407,20 +1489,16 @@ do_set_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, dzp = (dld_ioc_zid_t *)vdp->vd_val; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, NULL); + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + NULL, &zid_old, sizeof (zid_old)); if (status != DLADM_STATUS_OK) return (status); - cp = dip->pr_val; - (void) memcpy(&zid_old, cp, sizeof (zid_old)); - free(dip); - zid_new = dzp->diz_zid; if (zid_new == zid_old) return (DLADM_STATUS_OK); - if ((status = i_dladm_set_public_prop(handle, pdp, linkid, vdp, val_cnt, + if ((status = set_public_prop(handle, pdp, linkid, vdp, val_cnt, flags, media)) != DLADM_STATUS_OK) return (status); @@ -1440,8 +1518,9 @@ do_set_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -do_check_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { char *zone_name; zoneid_t zoneid; @@ -1490,35 +1569,33 @@ done: /* ARGSUSED */ static dladm_status_t -i_dladm_maxbw_get(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_maxbw(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; mac_resource_props_t mrp; dladm_status_t status; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, "resource", flags, + perm_flags, &mrp, sizeof (mrp)); + if (status != DLADM_STATUS_OK) return (status); - bcopy(dip->pr_val, &mrp, sizeof (mac_resource_props_t)); - free(dip); - if ((mrp.mrp_mask & MRP_MAXBW) == 0) { - (*prop_val)[0] = '\0'; - } else { - (void) dladm_bw2str(mrp.mrp_maxbw, prop_val[0]); + *val_cnt = 0; + return (DLADM_STATUS_OK); } + + (void) dladm_bw2str(mrp.mrp_maxbw, prop_val[0]); *val_cnt = 1; return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -do_check_maxbw(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_maxbw(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { uint64_t *maxbw; dladm_status_t status = DLADM_STATUS_OK; @@ -1547,11 +1624,15 @@ do_check_maxbw(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, /* ARGSUSED */ dladm_status_t -do_extract_maxbw(val_desc_t *vdp, uint_t cnt, void *arg) +extract_maxbw(val_desc_t *vdp, uint_t cnt, void *arg) { mac_resource_props_t *mrp = arg; - bcopy((char *)vdp->vd_val, &mrp->mrp_maxbw, sizeof (uint64_t)); + if (vdp->vd_val == RESET_VAL) { + mrp->mrp_maxbw = MRP_MAXBW_RESETVAL; + } else { + bcopy((char *)vdp->vd_val, &mrp->mrp_maxbw, sizeof (uint64_t)); + } mrp->mrp_mask |= MRP_MAXBW; return (DLADM_STATUS_OK); @@ -1559,34 +1640,33 @@ do_extract_maxbw(val_desc_t *vdp, uint_t cnt, void *arg) /* ARGSUSED */ static dladm_status_t -i_dladm_cpus_get(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_cpus(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; + dladm_status_t status; mac_resource_props_t mrp; int i; uint32_t ncpus; - uchar_t *cp; - dladm_status_t status; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) - return (status); + if (strcmp(pdp->pd_name, "cpus-effective") == 0) { + status = i_dladm_get_public_prop(handle, linkid, + "resource-effective", flags, perm_flags, &mrp, + sizeof (mrp)); + } else { + status = i_dladm_get_public_prop(handle, linkid, + "resource", flags, perm_flags, &mrp, sizeof (mrp)); + } - cp = (uchar_t *)dip->pr_val; - (void) memcpy(&mrp, cp, sizeof (mac_resource_props_t)); - free(dip); + if (status != DLADM_STATUS_OK) + return (status); ncpus = mrp.mrp_ncpus; - if (ncpus > *val_cnt) return (DLADM_STATUS_TOOSMALL); if (ncpus == 0) { - (*prop_val)[0] = '\0'; - *val_cnt = 1; + *val_cnt = 0; return (DLADM_STATUS_OK); } @@ -1600,194 +1680,215 @@ i_dladm_cpus_get(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -do_set_res(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) +check_cpus(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { + uint32_t cpuid; + int i, j, rc; + char *endp; + long nproc = sysconf(_SC_NPROCESSORS_CONF); mac_resource_props_t mrp; - dladm_status_t status = DLADM_STATUS_OK; - dld_ioc_macprop_t *dip; + dladm_status_t status; + uint_t perm_flags; + + /* Get the current pool property */ + status = i_dladm_get_public_prop(handle, linkid, "resource", 0, + &perm_flags, &mrp, sizeof (mrp)); + + if (status == DLADM_STATUS_OK) { + /* Can't set cpus if a pool is set */ + if (strlen(mrp.mrp_pool) != 0) + return (DLADM_STATUS_POOLCPU); + } bzero(&mrp, sizeof (mac_resource_props_t)); - dip = i_dladm_buf_alloc_by_name(0, linkid, pdp->pd_name, - flags, &status); - if (dip == NULL) - return (status); + for (i = 0; i < val_cnt; i++) { + errno = 0; + cpuid = strtol(prop_val[i], &endp, 10); + if (errno != 0 || *endp != '\0') + return (DLADM_STATUS_BADVAL); - if (vdp->vd_val == RESET_VAL) { - switch (dip->pr_num) { - case MAC_PROP_MAXBW: - mrp.mrp_maxbw = MRP_MAXBW_RESETVAL; - mrp.mrp_mask = MRP_MAXBW; - break; - case MAC_PROP_PRIO: - mrp.mrp_priority = MPL_RESET; - mrp.mrp_mask = MRP_PRIORITY; - break; - default: - free(dip); - return (DLADM_STATUS_BADARG); - } - } else { - switch (dip->pr_num) { - case MAC_PROP_MAXBW: - bcopy((void *)vdp->vd_val, &mrp.mrp_maxbw, - sizeof (uint64_t)); - mrp.mrp_mask = MRP_MAXBW; - break; - case MAC_PROP_PRIO: - bcopy((void *)vdp->vd_val, &mrp.mrp_priority, - sizeof (mac_priority_level_t)); - mrp.mrp_mask = MRP_PRIORITY; - break; - default: - free(dip); - return (DLADM_STATUS_BADARG); + if (cpuid >= nproc) + return (DLADM_STATUS_CPUMAX); + + rc = p_online(cpuid, P_STATUS); + if (rc < 1) + return (DLADM_STATUS_CPUERR); + + if (rc != P_ONLINE) + return (DLADM_STATUS_CPUNOTONLINE); + + vdp[i].vd_val = (uintptr_t)cpuid; + } + + /* Check for duplicates */ + for (i = 0; i < val_cnt; i++) { + for (j = 0; j < val_cnt; j++) { + if (i != j && vdp[i].vd_val == vdp[j].vd_val) + return (DLADM_STATUS_BADVAL); } } + return (DLADM_STATUS_OK); +} - (void) memcpy(dip->pr_val, &mrp, dip->pr_valsize); - status = i_dladm_macprop(handle, dip, B_TRUE); - free(dip); - return (status); +/* ARGSUSED */ +dladm_status_t +extract_cpus(val_desc_t *vdp, uint_t cnt, void *arg) +{ + mac_resource_props_t *mrp = arg; + int i; + + if (vdp[0].vd_val == RESET_VAL) { + bzero(&mrp->mrp_cpus, sizeof (mac_cpus_t)); + mrp->mrp_mask |= MRP_CPUS; + return (DLADM_STATUS_OK); + } + + for (i = 0; i < cnt; i++) + mrp->mrp_cpu[i] = (uint32_t)vdp[i].vd_val; + + mrp->mrp_ncpus = cnt; + mrp->mrp_mask |= (MRP_CPUS|MRP_CPUS_USERSPEC); + mrp->mrp_fanout_mode = MCM_CPUS; + mrp->mrp_rx_intr_cpu = -1; + + return (DLADM_STATUS_OK); } +/* + * Get the pool datalink property from the kernel. This is used + * for both the user specified pool and effective pool properties. + */ /* ARGSUSED */ static dladm_status_t -do_set_cpus(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) +get_pool(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t *val_cnt, datalink_media_t media, + uint_t flags, uint_t *perm_flags) { mac_resource_props_t mrp; dladm_status_t status; - dld_ioc_macprop_t *dip; - datalink_class_t class; - /* - * CPU bindings can be set on VNIC and regular physical links. - * However VNICs fails the dladm_phys_info test(). So apply - * the phys_info test only on physical links. - */ - if ((status = dladm_datalink_id2info(handle, linkid, NULL, &class, - NULL, NULL, 0)) != DLADM_STATUS_OK) { - return (status); - } - - /* - * We set intr_cpu to -1. The interrupt will be retargetted, - * if possible when the setup is complete in MAC. - */ - bzero(&mrp, sizeof (mac_resource_props_t)); - mrp.mrp_mask = MRP_CPUS; - if (vdp != NULL && vdp->vd_val != RESET_VAL) { - mac_resource_props_t *vmrp; - - vmrp = (mac_resource_props_t *)vdp->vd_val; - if (vmrp->mrp_ncpus > 0) { - bcopy(vmrp, &mrp, sizeof (mac_resource_props_t)); - mrp.mrp_mask = MRP_CPUS; - } - mrp.mrp_mask |= MRP_CPUS_USERSPEC; - mrp.mrp_fanout_mode = MCM_CPUS; - mrp.mrp_intr_cpu = -1; + if (strcmp(pdp->pd_name, "pool-effective") == 0) { + status = i_dladm_get_public_prop(handle, linkid, + "resource-effective", flags, perm_flags, &mrp, + sizeof (mrp)); + } else { + status = i_dladm_get_public_prop(handle, linkid, + "resource", flags, perm_flags, &mrp, sizeof (mrp)); } - dip = i_dladm_buf_alloc_by_name(0, linkid, pdp->pd_name, - flags, &status); - if (dip == NULL) + if (status != DLADM_STATUS_OK) return (status); - (void) memcpy(dip->pr_val, &mrp, dip->pr_valsize); - status = i_dladm_macprop(handle, dip, B_TRUE); - free(dip); - return (status); + if (strlen(mrp.mrp_pool) == 0) { + (*prop_val)[0] = '\0'; + } else { + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, + "%s", mrp.mrp_pool); + } + *val_cnt = 1; + + return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -do_check_cpus(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_pool(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { - uint32_t cpuid; - int i, j, rc; - long nproc = sysconf(_SC_NPROCESSORS_CONF); - mac_resource_props_t *mrp; + pool_conf_t *poolconf; + pool_t *pool; + mac_resource_props_t mrp; + dladm_status_t status; + uint_t perm_flags; + char *poolname; + + /* Get the current cpus property */ + status = i_dladm_get_public_prop(handle, linkid, "resource", 0, + &perm_flags, &mrp, sizeof (mrp)); - mrp = malloc(sizeof (mac_resource_props_t)); - if (mrp == NULL) + if (status == DLADM_STATUS_OK) { + /* Can't set pool if cpus are set */ + if (mrp.mrp_ncpus != 0) + return (DLADM_STATUS_POOLCPU); + } + + poolname = malloc(sizeof (mrp.mrp_pool)); + if (poolname == NULL) return (DLADM_STATUS_NOMEM); - for (i = 0; i < val_cnt; i++) { - errno = 0; - cpuid = strtol(prop_val[i], (char **)NULL, 10); - if (errno != 0 || cpuid >= nproc) { - free(mrp); - return (DLADM_STATUS_CPUMAX); - } - rc = p_online(cpuid, P_STATUS); - if (rc < 1) { - free(mrp); - return (DLADM_STATUS_CPUERR); - } - if (rc != P_ONLINE) { - free(mrp); - return (DLADM_STATUS_CPUNOTONLINE); + /* Check for pool's availability if not booting */ + if ((flags & DLADM_OPT_BOOT) == 0) { + + /* Allocate and open pool configuration */ + if ((poolconf = pool_conf_alloc()) == NULL) + return (DLADM_STATUS_BADVAL); + + if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) + != PO_SUCCESS) { + pool_conf_free(poolconf); + return (DLADM_STATUS_BADVAL); } - mrp->mrp_cpu[i] = cpuid; - } - mrp->mrp_ncpus = (uint32_t)val_cnt; - /* Check for duplicates */ - for (i = 0; i < val_cnt; i++) { - for (j = 0; j < val_cnt; j++) { - if (i != j && mrp->mrp_cpu[i] == mrp->mrp_cpu[j]) { - free(mrp); - return (DLADM_STATUS_BADARG); - } + /* Look for pool name */ + if ((pool = pool_get_pool(poolconf, *prop_val)) == NULL) { + pool_conf_free(poolconf); + return (DLADM_STATUS_BADVAL); } + + pool_conf_free(poolconf); + free(pool); } - vdp->vd_val = (uintptr_t)mrp; + + (void) strlcpy(poolname, *prop_val, sizeof (mrp.mrp_pool)); + vdp->vd_val = (uintptr_t)poolname; return (DLADM_STATUS_OK); } /* ARGSUSED */ dladm_status_t -do_extract_cpus(val_desc_t *vdp, uint_t cnt, void *arg) +extract_pool(val_desc_t *vdp, uint_t cnt, void *arg) { - mac_resource_props_t *mrp = arg; - mac_resource_props_t *vmrp = (mac_resource_props_t *)vdp->vd_val; - int i; + mac_resource_props_t *mrp = (mac_resource_props_t *)arg; - for (i = 0; i < vmrp->mrp_ncpus; i++) { - mrp->mrp_cpu[i] = vmrp->mrp_cpu[i]; + if (vdp->vd_val == RESET_VAL) { + bzero(&mrp->mrp_pool, sizeof (mrp->mrp_pool)); + mrp->mrp_mask |= MRP_POOL; + return (DLADM_STATUS_OK); } - mrp->mrp_ncpus = vmrp->mrp_ncpus; - mrp->mrp_mask |= (MRP_CPUS|MRP_CPUS_USERSPEC); + + (void) strlcpy(mrp->mrp_pool, (char *)vdp->vd_val, + sizeof (mrp->mrp_pool)); + mrp->mrp_mask |= MRP_POOL; + /* + * Use MCM_CPUS since the fanout count is not user specified + * and will be determined by the cpu list generated from the + * pool. + */ mrp->mrp_fanout_mode = MCM_CPUS; - mrp->mrp_intr_cpu = -1; return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -i_dladm_priority_get(dladm_handle_t handle, prop_desc_t *pdp, +get_priority(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; mac_resource_props_t mrp; mac_priority_level_t pri; dladm_status_t status; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, "resource", flags, + perm_flags, &mrp, sizeof (mrp)); + if (status != DLADM_STATUS_OK) return (status); - bcopy(dip->pr_val, &mrp, sizeof (mac_resource_props_t)); - free(dip); - pri = ((mrp.mrp_mask & MRP_PRIORITY) == 0) ? MPL_HIGH : mrp.mrp_priority; @@ -1797,77 +1898,297 @@ i_dladm_priority_get(dladm_handle_t handle, prop_desc_t *pdp, } /* ARGSUSED */ -static dladm_status_t -do_check_priority(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +dladm_status_t +extract_priority(val_desc_t *vdp, uint_t cnt, void *arg) { - mac_priority_level_t *pri; - dladm_status_t status = DLADM_STATUS_OK; + mac_resource_props_t *mrp = arg; + + if (cnt != 1) + return (DLADM_STATUS_BADVAL); + + mrp->mrp_priority = (mac_priority_level_t)vdp->vd_val; + mrp->mrp_mask |= MRP_PRIORITY; + + return (DLADM_STATUS_OK); +} + +/* + * Determines the size of the structure that needs to be sent to drivers + * for retrieving the property range values. + */ +static int +i_dladm_range_size(mac_propval_range_t *r, size_t *sz) +{ + uint_t count = r->mpr_count; + + *sz = sizeof (mac_propval_range_t); + --count; + + switch (r->mpr_type) { + case MAC_PROPVAL_UINT32: + *sz += (count * sizeof (mac_propval_uint32_range_t)); + return (0); + default: + break; + } + *sz = 0; + return (EINVAL); +} + +/* ARGSUSED */ +static dladm_status_t +check_rings(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t val_cnt, uint_t flags, + val_desc_t *v, datalink_media_t media) +{ if (val_cnt != 1) - return (DLADM_STATUS_BADVALCNT); + return (DLADM_STATUS_BADVAL); + if (strncasecmp(prop_val[0], "hw", strlen("hw")) == 0) { + v->vd_val = UNSPEC_VAL; + } else if (strncasecmp(prop_val[0], "sw", strlen("sw")) == 0) { + v->vd_val = 0; + } else { + v->vd_val = strtoul(prop_val[0], NULL, 0); + if (v->vd_val == 0) + return (DLADM_STATUS_BADVAL); + } + return (DLADM_STATUS_OK); +} - pri = malloc(sizeof (mac_priority_level_t)); - if (pri == NULL) - return (DLADM_STATUS_NOMEM); +/* ARGSUSED */ +static dladm_status_t +get_rings_range(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t *val_cnt, + datalink_media_t media, uint_t flags, uint_t *perm_flags) +{ + dld_ioc_macprop_t *dip; + dladm_status_t status = DLADM_STATUS_OK; + mac_propval_range_t *rangep; + size_t sz; + mac_propval_uint32_range_t *ur; - status = dladm_str2pri(*prop_val, pri); - if (status != DLADM_STATUS_OK) { - free(pri); + sz = sizeof (mac_propval_range_t); + + if ((dip = i_dladm_buf_alloc_by_name(sz, linkid, pdp->pd_name, flags, + &status)) == NULL) + return (status); + + status = i_dladm_macprop(handle, dip, B_FALSE); + if (status != DLADM_STATUS_OK) return (status); + + rangep = (mac_propval_range_t *)(void *)&dip->pr_val; + *val_cnt = 1; + ur = &rangep->mpr_range_uint32[0]; + /* This is the case where the dev doesn't have any rings/groups */ + if (rangep->mpr_count == 0) { + (*prop_val)[0] = '\0'; + /* + * This is the case where the dev supports rings, but static + * grouping. + */ + } else if (ur->mpur_min == ur->mpur_max && + ur->mpur_max == 0) { + (void) snprintf(prop_val[0], DLADM_PROP_VAL_MAX, "sw,hw"); + /* + * This is the case where the dev supports rings and dynamic + * grouping, but has only one value (say 2 rings and 2 groups). + */ + } else if (ur->mpur_min == ur->mpur_max) { + (void) snprintf(prop_val[0], DLADM_PROP_VAL_MAX, "sw,hw,%d", + ur->mpur_min); + /* + * This is the case where the dev supports rings and dynamic + * grouping and has a range of rings. + */ + } else { + (void) snprintf(prop_val[0], DLADM_PROP_VAL_MAX, + "sw,hw,<%ld-%ld>", ur->mpur_min, ur->mpur_max); } + free(dip); + return (status); +} - if (*pri < MPL_LOW || *pri > MPL_HIGH) { - free(pri); - return (DLADM_STATUS_BADVAL); + +/* ARGSUSED */ +static dladm_status_t +get_rxrings(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t *val_cnt, datalink_media_t media, + uint_t flags, uint_t *perm_flags) +{ + mac_resource_props_t mrp; + dladm_status_t status; + uint32_t nrings = 0; + + /* + * Get the number of (effective-)rings from the resource property. + */ + if (strcmp(pdp->pd_name, "rxrings-effective") == 0) { + status = i_dladm_get_public_prop(handle, linkid, + "resource-effective", flags, perm_flags, &mrp, + sizeof (mrp)); + } else { + /* + * Get the permissions from the "rxrings" property. + */ + status = i_dladm_get_public_prop(handle, linkid, "rxrings", + flags, perm_flags, NULL, 0); + if (status != DLADM_STATUS_OK) + return (status); + + status = i_dladm_get_public_prop(handle, linkid, + "resource", flags, NULL, &mrp, sizeof (mrp)); } - vdp->vd_val = (uintptr_t)pri; + if (status != DLADM_STATUS_OK) + return (status); + + if ((mrp.mrp_mask & MRP_RX_RINGS) == 0) { + *val_cnt = 0; + return (DLADM_STATUS_OK); + } + nrings = mrp.mrp_nrxrings; + *val_cnt = 1; + if (mrp.mrp_mask & MRP_RXRINGS_UNSPEC) + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "hw"); + else if (nrings == 0) + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "sw"); + else + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "%ld", nrings); return (DLADM_STATUS_OK); } /* ARGSUSED */ dladm_status_t -do_extract_priority(val_desc_t *vdp, uint_t cnt, void *arg) +extract_rxrings(val_desc_t *vdp, uint_t cnt, void *arg) { - mac_resource_props_t *mrp = arg; + mac_resource_props_t *mrp = (mac_resource_props_t *)arg; - bcopy((char *)vdp->vd_val, &mrp->mrp_priority, - sizeof (mac_priority_level_t)); - mrp->mrp_mask |= MRP_PRIORITY; + mrp->mrp_nrxrings = 0; + if (vdp->vd_val == RESET_VAL) + mrp->mrp_mask = MRP_RINGS_RESET; + else if (vdp->vd_val == UNSPEC_VAL) + mrp->mrp_mask = MRP_RXRINGS_UNSPEC; + else + mrp->mrp_nrxrings = vdp->vd_val; + mrp->mrp_mask |= MRP_RX_RINGS; return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -do_set_protection(dladm_handle_t handle, prop_desc_t *pdp, +get_txrings(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t *val_cnt, datalink_media_t media, + uint_t flags, uint_t *perm_flags) +{ + mac_resource_props_t mrp; + dladm_status_t status; + uint32_t nrings = 0; + + + /* + * Get the number of (effective-)rings from the resource property. + */ + if (strcmp(pdp->pd_name, "txrings-effective") == 0) { + status = i_dladm_get_public_prop(handle, linkid, + "resource-effective", flags, perm_flags, &mrp, + sizeof (mrp)); + } else { + /* + * Get the permissions from the "txrings" property. + */ + status = i_dladm_get_public_prop(handle, linkid, "txrings", + flags, perm_flags, NULL, 0); + if (status != DLADM_STATUS_OK) + return (status); + + /* + * Get the number of rings from the "resource" property. + */ + status = i_dladm_get_public_prop(handle, linkid, "resource", + flags, NULL, &mrp, sizeof (mrp)); + } + + if (status != DLADM_STATUS_OK) + return (status); + + if ((mrp.mrp_mask & MRP_TX_RINGS) == 0) { + *val_cnt = 0; + return (DLADM_STATUS_OK); + } + nrings = mrp.mrp_ntxrings; + *val_cnt = 1; + if (mrp.mrp_mask & MRP_TXRINGS_UNSPEC) + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "hw"); + else if (nrings == 0) + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "sw"); + else + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "%ld", nrings); + return (DLADM_STATUS_OK); +} + +/* ARGSUSED */ +dladm_status_t +extract_txrings(val_desc_t *vdp, uint_t cnt, void *arg) +{ + mac_resource_props_t *mrp = (mac_resource_props_t *)arg; + + mrp->mrp_ntxrings = 0; + if (vdp->vd_val == RESET_VAL) + mrp->mrp_mask = MRP_RINGS_RESET; + else if (vdp->vd_val == UNSPEC_VAL) + mrp->mrp_mask = MRP_TXRINGS_UNSPEC; + else + mrp->mrp_ntxrings = vdp->vd_val; + mrp->mrp_mask |= MRP_TX_RINGS; + + return (DLADM_STATUS_OK); +} + +/* ARGSUSED */ +static dladm_status_t +get_cntavail(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, + uint_t *perm_flags) +{ + if (flags & DLD_PROP_DEFAULT) + return (DLADM_STATUS_NOTDEFINED); + + return (get_uint32(handle, pdp, linkid, prop_val, val_cnt, media, + flags, perm_flags)); +} + +/* ARGSUSED */ +static dladm_status_t +set_resource(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) { mac_resource_props_t mrp; dladm_status_t status = DLADM_STATUS_OK; dld_ioc_macprop_t *dip; + int i; bzero(&mrp, sizeof (mac_resource_props_t)); - dip = i_dladm_buf_alloc_by_name(0, linkid, "protection", + dip = i_dladm_buf_alloc_by_name(0, linkid, "resource", flags, &status); if (dip == NULL) return (status); - if (strcmp(pdp->pd_name, "protection") == 0) { - status = do_extract_protection(vdp, val_cnt, &mrp); - if (status != DLADM_STATUS_OK) - goto done; + for (i = 0; i < DLADM_MAX_RSRC_PROP; i++) { + resource_prop_t *rp = &rsrc_prop_table[i]; + + if (strcmp(pdp->pd_name, rp->rp_name) != 0) + continue; - } else if (strcmp(pdp->pd_name, "allowed-ips") == 0) { - status = do_extract_allowedips(vdp, val_cnt, &mrp); + status = rp->rp_extract(vdp, val_cnt, &mrp); if (status != DLADM_STATUS_OK) goto done; - } else { - status = DLADM_STATUS_BADARG; - goto done; + + break; } (void) memcpy(dip->pr_val, &mrp, dip->pr_valsize); @@ -1880,59 +2201,77 @@ done: /* ARGSUSED */ static dladm_status_t -do_get_protection(dladm_handle_t handle, prop_desc_t *pdp, +get_protection(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; mac_resource_props_t mrp; mac_protect_t *p; dladm_status_t status; - int i; + uint32_t i, cnt = 0, setbits[32]; - dip = i_dladm_get_public_prop(handle, linkid, "protection", flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, "resource", flags, + perm_flags, &mrp, sizeof (mrp)); + if (status != DLADM_STATUS_OK) return (status); - bcopy(dip->pr_val, &mrp, sizeof (mac_resource_props_t)); - free(dip); - p = &mrp.mrp_protect; - if ((mrp.mrp_mask & MRP_PROTECT) != 0 && - strcmp(pdp->pd_name, "protection") == 0) { - uint32_t cnt = 0, setbits[32]; + if ((mrp.mrp_mask & MRP_PROTECT) == 0) { + *val_cnt = 0; + return (DLADM_STATUS_OK); + } + dladm_find_setbits32(p->mp_types, setbits, &cnt); + if (cnt > *val_cnt) + return (DLADM_STATUS_BADVALCNT); - dladm_find_setbits32(p->mp_types, setbits, &cnt); - if (cnt > *val_cnt) - return (DLADM_STATUS_BADVALCNT); + for (i = 0; i < cnt; i++) + (void) dladm_protect2str(setbits[i], prop_val[i]); - for (i = 0; i < cnt; i++) - (void) dladm_protect2str(setbits[i], prop_val[i]); + *val_cnt = cnt; + return (DLADM_STATUS_OK); +} - *val_cnt = cnt; +/* ARGSUSED */ +static dladm_status_t +get_allowedips(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t *val_cnt, + datalink_media_t media, uint_t flags, uint_t *perm_flags) +{ + mac_resource_props_t mrp; + mac_protect_t *p; + dladm_status_t status; + int i; + + status = i_dladm_get_public_prop(handle, linkid, "resource", flags, + perm_flags, &mrp, sizeof (mrp)); + if (status != DLADM_STATUS_OK) + return (status); + + p = &mrp.mrp_protect; + if (p->mp_ipaddrcnt == 0) { + *val_cnt = 0; return (DLADM_STATUS_OK); } + if (p->mp_ipaddrcnt > *val_cnt) + return (DLADM_STATUS_BADVALCNT); - if (p->mp_ipaddrcnt > 0 && - strcmp(pdp->pd_name, "allowed-ips") == 0) { - if (p->mp_ipaddrcnt > *val_cnt) - return (DLADM_STATUS_BADVALCNT); + for (i = 0; i < p->mp_ipaddrcnt; i++) { + if (p->mp_ipaddrs[i].ip_version == IPV4_VERSION) { + ipaddr_t v4addr; - for (i = 0; i < p->mp_ipaddrcnt; i++) { - (void) dladm_ipv4addr2str(&p->mp_ipaddrs[i], + v4addr = V4_PART_OF_V6(p->mp_ipaddrs[i].ip_addr); + (void) dladm_ipv4addr2str(&v4addr, prop_val[i]); + } else { + (void) dladm_ipv6addr2str(&p->mp_ipaddrs[i].ip_addr, prop_val[i]); } - *val_cnt = p->mp_ipaddrcnt; - return (DLADM_STATUS_OK); } - - *val_cnt = 0; + *val_cnt = p->mp_ipaddrcnt; return (DLADM_STATUS_OK); } dladm_status_t -do_extract_protection(val_desc_t *vdp, uint_t cnt, void *arg) +extract_protection(val_desc_t *vdp, uint_t cnt, void *arg) { mac_resource_props_t *mrp = arg; uint32_t types = 0; @@ -1947,7 +2286,7 @@ do_extract_protection(val_desc_t *vdp, uint_t cnt, void *arg) } dladm_status_t -do_extract_allowedips(val_desc_t *vdp, uint_t cnt, void *arg) +extract_allowedips(val_desc_t *vdp, uint_t cnt, void *arg) { mac_resource_props_t *mrp = arg; mac_protect_t *p = &mrp->mrp_protect; @@ -1956,63 +2295,441 @@ do_extract_allowedips(val_desc_t *vdp, uint_t cnt, void *arg) if (vdp->vd_val == 0) { cnt = (uint_t)-1; } else { - for (i = 0; i < cnt; i++) - p->mp_ipaddrs[i] = (ipaddr_t)vdp[i].vd_val; + for (i = 0; i < cnt; i++) { + bcopy((void *)vdp[i].vd_val, &p->mp_ipaddrs[i], + sizeof (mac_ipaddr_t)); + } } p->mp_ipaddrcnt = cnt; mrp->mrp_mask |= MRP_PROTECT; return (DLADM_STATUS_OK); } +static dladm_status_t +check_single_ip(char *buf, mac_ipaddr_t *addr) +{ + dladm_status_t status; + ipaddr_t v4addr; + in6_addr_t v6addr; + boolean_t isv4 = B_TRUE; + + status = dladm_str2ipv4addr(buf, &v4addr); + if (status == DLADM_STATUS_INVALID_IP) { + status = dladm_str2ipv6addr(buf, &v6addr); + if (status == DLADM_STATUS_OK) + isv4 = B_FALSE; + } + if (status != DLADM_STATUS_OK) + return (status); + + if (isv4) { + if (v4addr == INADDR_ANY) + return (DLADM_STATUS_INVALID_IP); + + IN6_IPADDR_TO_V4MAPPED(v4addr, &addr->ip_addr); + addr->ip_version = IPV4_VERSION; + } else { + if (IN6_IS_ADDR_UNSPECIFIED(&v6addr)) + return (DLADM_STATUS_INVALID_IP); + + addr->ip_addr = v6addr; + addr->ip_version = IPV6_VERSION; + } + return (DLADM_STATUS_OK); +} + /* ARGSUSED */ static dladm_status_t -do_check_allowedips(dladm_handle_t handle, prop_desc_t *pdp, - datalink_id_t linkid, char **prop_val, uint_t val_cnt, +check_allowedips(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, datalink_media_t media) { dladm_status_t status; - ipaddr_t addr; + mac_ipaddr_t *addr; int i; if (val_cnt > MPT_MAXIPADDR) return (DLADM_STATUS_BADVALCNT); for (i = 0; i < val_cnt; i++) { - status = dladm_str2ipv4addr(prop_val[i], &addr); + if ((addr = calloc(1, sizeof (mac_ipaddr_t))) == NULL) { + status = DLADM_STATUS_NOMEM; + goto fail; + } + vdp[i].vd_val = (uintptr_t)addr; + + status = check_single_ip(prop_val[i], addr); if (status != DLADM_STATUS_OK) - return (status); + goto fail; + } + return (DLADM_STATUS_OK); - if (addr == 0) - return (DLADM_STATUS_BADVAL); +fail: + for (i = 0; i < val_cnt; i++) { + free((void *)vdp[i].vd_val); + vdp[i].vd_val = NULL; + } + return (status); +} - vdp[i].vd_val = (uintptr_t)addr; +static void +dladm_cid2str(mac_dhcpcid_t *cid, char *buf) +{ + char tmp_buf[DLADM_STRSIZE]; + uint_t hexlen; + + switch (cid->dc_form) { + case CIDFORM_TYPED: { + uint16_t duidtype, hwtype; + uint32_t timestamp, ennum; + char *lladdr; + + if (cid->dc_len < sizeof (duidtype)) + goto fail; + + bcopy(cid->dc_id, &duidtype, sizeof (duidtype)); + duidtype = ntohs(duidtype); + switch (duidtype) { + case DHCPV6_DUID_LLT: { + duid_llt_t llt; + + if (cid->dc_len < sizeof (llt)) + goto fail; + + bcopy(cid->dc_id, &llt, sizeof (llt)); + hwtype = ntohs(llt.dllt_hwtype); + timestamp = ntohl(llt.dllt_time); + lladdr = _link_ntoa(cid->dc_id + sizeof (llt), + NULL, cid->dc_len - sizeof (llt), IFT_OTHER); + if (lladdr == NULL) + goto fail; + + (void) snprintf(buf, DLADM_STRSIZE, "%d.%d.%d.%s", + duidtype, hwtype, timestamp, lladdr); + free(lladdr); + break; + } + case DHCPV6_DUID_EN: { + duid_en_t en; + + if (cid->dc_len < sizeof (en)) + goto fail; + + bcopy(cid->dc_id, &en, sizeof (en)); + ennum = DHCPV6_GET_ENTNUM(&en); + hexlen = sizeof (tmp_buf); + if (octet_to_hexascii(cid->dc_id + sizeof (en), + cid->dc_len - sizeof (en), tmp_buf, &hexlen) != 0) + goto fail; + + (void) snprintf(buf, DLADM_STRSIZE, "%d.%d.%s", + duidtype, ennum, tmp_buf); + break; + } + case DHCPV6_DUID_LL: { + duid_ll_t ll; + + if (cid->dc_len < sizeof (ll)) + goto fail; + + bcopy(cid->dc_id, &ll, sizeof (ll)); + hwtype = ntohs(ll.dll_hwtype); + lladdr = _link_ntoa(cid->dc_id + sizeof (ll), + NULL, cid->dc_len - sizeof (ll), IFT_OTHER); + if (lladdr == NULL) + goto fail; + + (void) snprintf(buf, DLADM_STRSIZE, "%d.%d.%s", + duidtype, hwtype, lladdr); + free(lladdr); + break; + } + default: { + hexlen = sizeof (tmp_buf); + if (octet_to_hexascii(cid->dc_id + sizeof (duidtype), + cid->dc_len - sizeof (duidtype), + tmp_buf, &hexlen) != 0) + goto fail; + + (void) snprintf(buf, DLADM_STRSIZE, "%d.%s", + duidtype, tmp_buf); + } + } + break; + } + case CIDFORM_HEX: { + hexlen = sizeof (tmp_buf); + if (octet_to_hexascii(cid->dc_id, cid->dc_len, + tmp_buf, &hexlen) != 0) + goto fail; + + (void) snprintf(buf, DLADM_STRSIZE, "0x%s", tmp_buf); + break; } + case CIDFORM_STR: { + int i; + + for (i = 0; i < cid->dc_len; i++) { + if (!isprint(cid->dc_id[i])) + goto fail; + } + (void) snprintf(buf, DLADM_STRSIZE, "%s", cid->dc_id); + break; + } + default: + goto fail; + } + return; + +fail: + (void) snprintf(buf, DLADM_STRSIZE, "<unknown>"); +} + +static dladm_status_t +dladm_str2cid(char *buf, mac_dhcpcid_t *cid) +{ + char *ptr = buf; + char tmp_buf[DLADM_STRSIZE]; + uint_t hexlen, cidlen; + + bzero(cid, sizeof (*cid)); + if (isdigit(*ptr) && + ptr[strspn(ptr, "0123456789")] == '.') { + char *cp; + ulong_t duidtype; + ulong_t subtype; + ulong_t timestamp; + uchar_t *lladdr; + int addrlen; + + errno = 0; + duidtype = strtoul(ptr, &cp, 0); + if (ptr == cp || errno != 0 || *cp != '.' || + duidtype > USHRT_MAX) + return (DLADM_STATUS_BADARG); + ptr = cp + 1; + + if (duidtype != 0 && duidtype <= DHCPV6_DUID_LL) { + errno = 0; + subtype = strtoul(ptr, &cp, 0); + if (ptr == cp || errno != 0 || *cp != '.') + return (DLADM_STATUS_BADARG); + ptr = cp + 1; + } + switch (duidtype) { + case DHCPV6_DUID_LLT: { + duid_llt_t llt; + + errno = 0; + timestamp = strtoul(ptr, &cp, 0); + if (ptr == cp || errno != 0 || *cp != '.') + return (DLADM_STATUS_BADARG); + + ptr = cp + 1; + lladdr = _link_aton(ptr, &addrlen); + if (lladdr == NULL) + return (DLADM_STATUS_BADARG); + + cidlen = sizeof (llt) + addrlen; + if (cidlen > sizeof (cid->dc_id)) { + free(lladdr); + return (DLADM_STATUS_TOOSMALL); + } + llt.dllt_dutype = htons(duidtype); + llt.dllt_hwtype = htons(subtype); + llt.dllt_time = htonl(timestamp); + bcopy(&llt, cid->dc_id, sizeof (llt)); + bcopy(lladdr, cid->dc_id + sizeof (llt), addrlen); + free(lladdr); + break; + } + case DHCPV6_DUID_LL: { + duid_ll_t ll; + + lladdr = _link_aton(ptr, &addrlen); + if (lladdr == NULL) + return (DLADM_STATUS_BADARG); + + cidlen = sizeof (ll) + addrlen; + if (cidlen > sizeof (cid->dc_id)) { + free(lladdr); + return (DLADM_STATUS_TOOSMALL); + } + ll.dll_dutype = htons(duidtype); + ll.dll_hwtype = htons(subtype); + bcopy(&ll, cid->dc_id, sizeof (ll)); + bcopy(lladdr, cid->dc_id + sizeof (ll), addrlen); + free(lladdr); + break; + } + default: { + hexlen = sizeof (tmp_buf); + if (hexascii_to_octet(ptr, strlen(ptr), + tmp_buf, &hexlen) != 0) + return (DLADM_STATUS_BADARG); + + if (duidtype == DHCPV6_DUID_EN) { + duid_en_t en; + + en.den_dutype = htons(duidtype); + DHCPV6_SET_ENTNUM(&en, subtype); + + cidlen = sizeof (en) + hexlen; + if (cidlen > sizeof (cid->dc_id)) + return (DLADM_STATUS_TOOSMALL); + + bcopy(&en, cid->dc_id, sizeof (en)); + bcopy(tmp_buf, cid->dc_id + sizeof (en), + hexlen); + } else { + uint16_t dutype = htons(duidtype); + + cidlen = sizeof (dutype) + hexlen; + if (cidlen > sizeof (cid->dc_id)) + return (DLADM_STATUS_TOOSMALL); + + bcopy(&dutype, cid->dc_id, sizeof (dutype)); + bcopy(tmp_buf, cid->dc_id + sizeof (dutype), + hexlen); + } + break; + } + } + cid->dc_form = CIDFORM_TYPED; + } else if (strncasecmp("0x", ptr, 2) == 0 && ptr[2] != '\0') { + ptr += 2; + hexlen = sizeof (tmp_buf); + if (hexascii_to_octet(ptr, strlen(ptr), tmp_buf, + &hexlen) != 0) { + return (DLADM_STATUS_BADARG); + } + cidlen = hexlen; + if (cidlen > sizeof (cid->dc_id)) + return (DLADM_STATUS_TOOSMALL); + + bcopy(tmp_buf, cid->dc_id, cidlen); + cid->dc_form = CIDFORM_HEX; + } else { + cidlen = strlen(ptr); + if (cidlen > sizeof (cid->dc_id)) + return (DLADM_STATUS_TOOSMALL); + + bcopy(ptr, cid->dc_id, cidlen); + cid->dc_form = CIDFORM_STR; + } + cid->dc_len = cidlen; return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -do_get_autopush(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_allowedcids(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t *val_cnt, + datalink_media_t media, uint_t flags, uint_t *perm_flags) +{ + mac_resource_props_t mrp; + mac_protect_t *p; + dladm_status_t status; + int i; + + status = i_dladm_get_public_prop(handle, linkid, "resource", flags, + perm_flags, &mrp, sizeof (mrp)); + if (status != DLADM_STATUS_OK) + return (status); + + p = &mrp.mrp_protect; + if (p->mp_cidcnt == 0) { + *val_cnt = 0; + return (DLADM_STATUS_OK); + } + if (p->mp_cidcnt > *val_cnt) + return (DLADM_STATUS_BADVALCNT); + + for (i = 0; i < p->mp_cidcnt; i++) { + mac_dhcpcid_t *cid = &p->mp_cids[i]; + + dladm_cid2str(cid, prop_val[i]); + } + *val_cnt = p->mp_cidcnt; + return (DLADM_STATUS_OK); +} + +dladm_status_t +extract_allowedcids(val_desc_t *vdp, uint_t cnt, void *arg) +{ + mac_resource_props_t *mrp = arg; + mac_protect_t *p = &mrp->mrp_protect; + int i; + + if (vdp->vd_val == 0) { + cnt = (uint_t)-1; + } else { + for (i = 0; i < cnt; i++) { + bcopy((void *)vdp[i].vd_val, &p->mp_cids[i], + sizeof (mac_dhcpcid_t)); + } + } + p->mp_cidcnt = cnt; + mrp->mrp_mask |= MRP_PROTECT; + return (DLADM_STATUS_OK); +} + +/* ARGSUSED */ +static dladm_status_t +check_allowedcids(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t val_cnt, + uint_t flags, val_desc_t *vdp, datalink_media_t media) +{ + dladm_status_t status; + mac_dhcpcid_t *cid; + int i; + + if (val_cnt > MPT_MAXCID) + return (DLADM_STATUS_BADVALCNT); + + for (i = 0; i < val_cnt; i++) { + if ((cid = calloc(1, sizeof (mac_dhcpcid_t))) == NULL) { + status = DLADM_STATUS_NOMEM; + goto fail; + } + vdp[i].vd_val = (uintptr_t)cid; + + status = dladm_str2cid(prop_val[i], cid); + if (status != DLADM_STATUS_OK) + goto fail; + } + return (DLADM_STATUS_OK); + +fail: + for (i = 0; i < val_cnt; i++) { + free((void *)vdp[i].vd_val); + vdp[i].vd_val = NULL; + } + return (status); +} + +/* ARGSUSED */ +static dladm_status_t +get_autopush(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { struct dlautopush dlap; int i, len; dladm_status_t status; - dld_ioc_macprop_t *dip; - if (flags & MAC_PROP_DEFAULT) + if (flags & DLD_PROP_DEFAULT) return (DLADM_STATUS_NOTDEFINED); - *val_cnt = 1; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) { - (*prop_val)[0] = '\0'; + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + perm_flags, &dlap, sizeof (dlap)); + if (status != DLADM_STATUS_OK) + return (status); + + if (dlap.dap_npush == 0) { + *val_cnt = 0; return (DLADM_STATUS_OK); } - (void) memcpy(&dlap, dip->pr_val, sizeof (dlap)); - for (i = 0, len = 0; i < dlap.dap_npush; i++) { if (i != 0) { (void) snprintf(*prop_val + len, @@ -2029,8 +2746,7 @@ do_get_autopush(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, len += (strlen(AP_ANCHOR) + 1); } } - free(dip); -done: + *val_cnt = 1; return (DLADM_STATUS_OK); } @@ -2073,8 +2789,9 @@ i_dladm_add_ap_module(const char *module, struct dlautopush *dlap) */ /* ARGSUSED */ static dladm_status_t -do_check_autopush(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_autopush(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { char *module; struct dlautopush *dlap; @@ -2112,7 +2829,7 @@ do_check_autopush(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -do_get_rate_common(dladm_handle_t handle, prop_desc_t *pdp, +get_rate_common(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, uint_t id, uint_t *perm_flags) { @@ -2154,22 +2871,22 @@ done: } static dladm_status_t -do_get_rate_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_rate(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { if (media != DL_WIFI) { - return (i_dladm_speed_get(handle, pdp, linkid, prop_val, - val_cnt, flags, perm_flags)); + return (get_speed(handle, pdp, linkid, prop_val, + val_cnt, media, flags, perm_flags)); } - return (do_get_rate_common(handle, pdp, linkid, prop_val, val_cnt, + return (get_rate_common(handle, pdp, linkid, prop_val, val_cnt, MAC_PROP_WL_DESIRED_RATES, perm_flags)); } /* ARGSUSED */ static dladm_status_t -do_get_rate_mod(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_rate_mod(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { @@ -2182,7 +2899,7 @@ do_get_rate_mod(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, return (DLADM_STATUS_NOTSUP); case DL_WIFI: - return (do_get_rate_common(handle, pdp, linkid, prop_val, + return (get_rate_common(handle, pdp, linkid, prop_val, val_cnt, MAC_PROP_WL_SUPPORTED_RATES, perm_flags)); default: return (DLADM_STATUS_BADARG); @@ -2190,7 +2907,7 @@ do_get_rate_mod(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, } static dladm_status_t -do_set_rate(dladm_handle_t handle, datalink_id_t linkid, +set_wlan_rate(dladm_handle_t handle, datalink_id_t linkid, dladm_wlan_rates_t *rates) { int i; @@ -2218,7 +2935,7 @@ do_set_rate(dladm_handle_t handle, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -do_set_rate_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +set_rate(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) { dladm_wlan_rates_t rates; @@ -2236,16 +2953,16 @@ do_set_rate_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, rates.wr_cnt = 1; rates.wr_rates[0] = vdp[0].vd_val; - status = do_set_rate(handle, linkid, &rates); + status = set_wlan_rate(handle, linkid, &rates); -done: return (status); } /* ARGSUSED */ static dladm_status_t -do_check_rate(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_rate(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { int i; uint_t modval_cnt = MAX_SUPPORT_RATES; @@ -2269,7 +2986,7 @@ do_check_rate(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, i * DLADM_STRSIZE; } - status = do_get_rate_mod(handle, NULL, linkid, modval, &modval_cnt, + status = get_rate_mod(handle, NULL, linkid, modval, &modval_cnt, media, 0, &perm_flags); if (status != DLADM_STATUS_OK) goto done; @@ -2290,7 +3007,7 @@ done: } static dladm_status_t -do_get_phyconf(dladm_handle_t handle, datalink_id_t linkid, void *buf, +get_phyconf(dladm_handle_t handle, datalink_id_t linkid, void *buf, int buflen) { return (i_dladm_wlan_param(handle, linkid, buf, MAC_PROP_WL_PHY_CONFIG, @@ -2299,54 +3016,43 @@ do_get_phyconf(dladm_handle_t handle, datalink_id_t linkid, void *buf, /* ARGSUSED */ static dladm_status_t -do_get_channel_prop(dladm_handle_t handle, prop_desc_t *pdp, +get_channel(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { uint32_t channel; char buf[WLDP_BUFSIZE]; - dladm_status_t status = DLADM_STATUS_OK; + dladm_status_t status; wl_phy_conf_t wl_phy_conf; - if ((status = do_get_phyconf(handle, linkid, buf, sizeof (buf))) + if ((status = get_phyconf(handle, linkid, buf, sizeof (buf))) != DLADM_STATUS_OK) - goto done; + return (status); (void) memcpy(&wl_phy_conf, buf, sizeof (wl_phy_conf)); - if (!i_dladm_wlan_convert_chan(&wl_phy_conf, &channel)) { - status = DLADM_STATUS_NOTFOUND; - goto done; - } + if (!i_dladm_wlan_convert_chan(&wl_phy_conf, &channel)) + return (DLADM_STATUS_NOTFOUND); (void) snprintf(*prop_val, DLADM_STRSIZE, "%u", channel); *val_cnt = 1; *perm_flags = MAC_PROP_PERM_READ; -done: - return (status); -} - -static dladm_status_t -do_get_powermode(dladm_handle_t handle, datalink_id_t linkid, void *buf, - int buflen) -{ - return (i_dladm_wlan_param(handle, linkid, buf, MAC_PROP_WL_POWER_MODE, - buflen, B_FALSE)); + return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -do_get_powermode_prop(dladm_handle_t handle, prop_desc_t *pdp, +get_powermode(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { wl_ps_mode_t mode; const char *s; char buf[WLDP_BUFSIZE]; - dladm_status_t status = DLADM_STATUS_OK; + dladm_status_t status; - if ((status = do_get_powermode(handle, linkid, buf, sizeof (buf))) - != DLADM_STATUS_OK) - goto done; + if ((status = i_dladm_wlan_param(handle, linkid, buf, + MAC_PROP_WL_POWER_MODE, sizeof (buf), B_FALSE)) != DLADM_STATUS_OK) + return (status); (void) memcpy(&mode, buf, sizeof (mode)); switch (mode.wl_ps_mode) { @@ -2360,25 +3066,29 @@ do_get_powermode_prop(dladm_handle_t handle, prop_desc_t *pdp, s = "fast"; break; default: - status = DLADM_STATUS_NOTFOUND; - goto done; + return (DLADM_STATUS_NOTFOUND); } (void) snprintf(*prop_val, DLADM_STRSIZE, "%s", s); *val_cnt = 1; *perm_flags = MAC_PROP_PERM_RW; -done: - return (status); + return (DLADM_STATUS_OK); } +/* ARGSUSED */ static dladm_status_t -do_set_powermode(dladm_handle_t handle, datalink_id_t linkid, - dladm_wlan_powermode_t *pm) +set_powermode(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags, + datalink_media_t media) { - wl_ps_mode_t ps_mode; + dladm_wlan_powermode_t powermode = vdp->vd_val; + wl_ps_mode_t ps_mode; + + if (val_cnt != 1) + return (DLADM_STATUS_BADVALCNT); (void) memset(&ps_mode, 0xff, sizeof (ps_mode)); - switch (*pm) { + switch (powermode) { case DLADM_WLAN_PM_OFF: ps_mode.wl_ps_mode = WL_PM_AM; break; @@ -2397,42 +3107,18 @@ do_set_powermode(dladm_handle_t handle, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -do_set_powermode_prop(dladm_handle_t handle, prop_desc_t *pdp, - datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags, - datalink_media_t media) -{ - dladm_wlan_powermode_t powermode = (dladm_wlan_powermode_t)vdp->vd_val; - dladm_status_t status; - - if (val_cnt != 1) - return (DLADM_STATUS_BADVALCNT); - - status = do_set_powermode(handle, linkid, &powermode); - - return (status); -} - -static dladm_status_t -do_get_radio(dladm_handle_t handle, datalink_id_t linkid, void *buf, int buflen) -{ - return (i_dladm_wlan_param(handle, linkid, buf, MAC_PROP_WL_RADIO, - buflen, B_FALSE)); -} - -/* ARGSUSED */ -static dladm_status_t -do_get_radio_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_radio(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { wl_radio_t radio; const char *s; char buf[WLDP_BUFSIZE]; - dladm_status_t status = DLADM_STATUS_OK; + dladm_status_t status; - if ((status = do_get_radio(handle, linkid, buf, sizeof (buf))) - != DLADM_STATUS_OK) - goto done; + if ((status = i_dladm_wlan_param(handle, linkid, buf, + MAC_PROP_WL_RADIO, sizeof (buf), B_FALSE)) != DLADM_STATUS_OK) + return (status); (void) memcpy(&radio, buf, sizeof (radio)); switch (radio) { @@ -2443,23 +3129,26 @@ do_get_radio_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, s = "off"; break; default: - status = DLADM_STATUS_NOTFOUND; - goto done; + return (DLADM_STATUS_NOTFOUND); } (void) snprintf(*prop_val, DLADM_STRSIZE, "%s", s); *val_cnt = 1; *perm_flags = MAC_PROP_PERM_RW; -done: - return (status); + return (DLADM_STATUS_OK); } +/* ARGSUSED */ static dladm_status_t -do_set_radio(dladm_handle_t handle, datalink_id_t linkid, - dladm_wlan_radio_t *radio) +set_radio(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) { - wl_radio_t r; + dladm_wlan_radio_t radio = vdp->vd_val; + wl_radio_t r; + + if (val_cnt != 1) + return (DLADM_STATUS_BADVALCNT); - switch (*radio) { + switch (radio) { case DLADM_WLAN_RADIO_ON: r = B_TRUE; break; @@ -2475,25 +3164,9 @@ do_set_radio(dladm_handle_t handle, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -do_set_radio_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) -{ - dladm_wlan_radio_t radio = (dladm_wlan_radio_t)vdp->vd_val; - dladm_status_t status; - - if (val_cnt != 1) - return (DLADM_STATUS_BADVALCNT); - - status = do_set_radio(handle, linkid, &radio); - - return (status); -} - -/* ARGSUSED */ -static dladm_status_t -do_check_hoplimit(dladm_handle_t handle, prop_desc_t *pdp, - datalink_id_t linkid, char **prop_val, uint_t val_cnt, val_desc_t *vdp, - datalink_media_t media) +check_hoplimit(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t val_cnt, uint_t flags, + val_desc_t *vdp, datalink_media_t media) { int32_t hlim; char *ep; @@ -2512,8 +3185,9 @@ do_check_hoplimit(dladm_handle_t handle, prop_desc_t *pdp, /* ARGSUSED */ static dladm_status_t -do_check_encaplim(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_encaplim(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { int32_t elim; char *ep; @@ -2685,7 +3359,6 @@ i_dladm_buf_alloc_impl(size_t valsize, datalink_id_t linkid, bzero(dip, dsize); dip->pr_valsize = valsize; (void) strlcpy(dip->pr_name, prop_name, sizeof (dip->pr_name)); - dip->pr_version = MAC_PROP_VERSION; dip->pr_linkid = linkid; dip->pr_num = propid; dip->pr_flags = flags; @@ -2718,7 +3391,7 @@ i_dladm_buf_alloc_by_id(size_t valsize, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -i_dladm_set_public_prop(dladm_handle_t handle, prop_desc_t *pdp, +set_public_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) { @@ -2783,32 +3456,37 @@ i_dladm_macprop(dladm_handle_t handle, void *dip, boolean_t set) return (status); } -static dld_ioc_macprop_t * +static dladm_status_t i_dladm_get_public_prop(dladm_handle_t handle, datalink_id_t linkid, - char *prop_name, uint_t flags, dladm_status_t *status, uint_t *perm_flags) + char *prop_name, uint_t flags, uint_t *perm_flags, void *arg, size_t size) { - dld_ioc_macprop_t *dip = NULL; + dld_ioc_macprop_t *dip; + dladm_status_t status; - dip = i_dladm_buf_alloc_by_name(0, linkid, prop_name, flags, status); + dip = i_dladm_buf_alloc_by_name(0, linkid, prop_name, flags, &status); if (dip == NULL) - return (NULL); + return (DLADM_STATUS_NOMEM); - *status = i_dladm_macprop(handle, dip, B_FALSE); - if (*status != DLADM_STATUS_OK) { + status = i_dladm_macprop(handle, dip, B_FALSE); + if (status != DLADM_STATUS_OK) { free(dip); - return (NULL); + return (status); } + if (perm_flags != NULL) *perm_flags = dip->pr_perm_flags; - return (dip); + if (arg != NULL) + (void) memcpy(arg, dip->pr_val, size); + free(dip); + return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -i_dladm_uint32_check(dladm_handle_t handle, prop_desc_t *pdp, - datalink_id_t linkid, char **prop_val, uint_t val_cnt, val_desc_t *v, - datalink_media_t media) +check_uint32(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t val_cnt, uint_t flags, + val_desc_t *v, datalink_media_t media) { if (val_cnt != 1) return (DLADM_STATUS_BADVAL); @@ -2818,7 +3496,7 @@ i_dladm_uint32_check(dladm_handle_t handle, prop_desc_t *pdp, /* ARGSUSED */ static dladm_status_t -i_dladm_duplex_get(dladm_handle_t handle, prop_desc_t *pdp, +get_duplex(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { @@ -2846,8 +3524,9 @@ i_dladm_duplex_get(dladm_handle_t handle, prop_desc_t *pdp, /* ARGSUSED */ static dladm_status_t -i_dladm_speed_get(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t *val_cnt, uint_t flags, uint_t *perm_flags) +get_speed(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, + uint_t *perm_flags) { uint64_t ifspeed = 0; dladm_status_t status; @@ -2870,14 +3549,14 @@ i_dladm_speed_get(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -i_dladm_status_get(dladm_handle_t handle, prop_desc_t *pdp, +get_link_state(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { link_state_t link_state; dladm_status_t status; - status = i_dladm_get_state(handle, linkid, &link_state); + status = dladm_get_state(handle, linkid, &link_state); if (status != DLADM_STATUS_OK) return (status); @@ -2899,74 +3578,45 @@ i_dladm_status_get(dladm_handle_t handle, prop_desc_t *pdp, /* ARGSUSED */ static dladm_status_t -i_dladm_binary_get(dladm_handle_t handle, prop_desc_t *pdp, +get_binary(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; - dladm_status_t status; + dladm_status_t status; + uint_t v = 0; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + perm_flags, &v, sizeof (v)); + if (status != DLADM_STATUS_OK) return (status); - (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "%x", dip->pr_val[0]); - free(dip); + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "%d", (uint_t)(v > 0)); *val_cnt = 1; return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -i_dladm_uint32_get(dladm_handle_t handle, prop_desc_t *pdp, +get_uint32(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; - uint32_t v = 0; - uchar_t *cp; - dladm_status_t status; + dladm_status_t status; + uint32_t v = 0; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + perm_flags, &v, sizeof (v)); + if (status != DLADM_STATUS_OK) return (status); - cp = (uchar_t *)dip->pr_val; - (void) memcpy(&v, cp, sizeof (v)); (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "%ld", v); - free(dip); *val_cnt = 1; return (DLADM_STATUS_OK); } -/* - * Determines the size of the structure that needs to be sent to drivers - * for retrieving the property range values. - */ -static int -i_dladm_range_size(mac_propval_range_t *r, size_t *sz) -{ - uint_t count = r->mpr_count; - - *sz = sizeof (mac_propval_range_t); - --count; - - switch (r->mpr_type) { - case MAC_PROPVAL_UINT32: - *sz += (count * sizeof (mac_propval_uint32_range_t)); - return (0); - default: - break; - } - *sz = 0; - return (EINVAL); -} - /* ARGSUSED */ static dladm_status_t -i_dladm_range_get(dladm_handle_t handle, prop_desc_t *pdp, +get_range(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { @@ -3004,14 +3654,20 @@ retry: free(dip); return (status); } + rangep = (mac_propval_range_t *)(void *)&dip->pr_val; + if (rangep->mpr_count == 0) { + *val_cnt = 1; + (void) snprintf(prop_val[0], DLADM_PROP_VAL_MAX, "--"); + goto done; + } switch (rangep->mpr_type) { case MAC_PROPVAL_UINT32: { mac_propval_uint32_range_t *ur; uint_t count = rangep->mpr_count, i; - ur = &rangep->range_uint32[0]; + ur = &rangep->mpr_range_uint32[0]; for (i = 0; i < count; i++, ur++) { if (ur->mpur_min == ur->mpur_max) { @@ -3029,26 +3685,24 @@ retry: status = DLADM_STATUS_BADARG; break; } +done: free(dip); return (status); } /* ARGSUSED */ static dladm_status_t -i_dladm_tagmode_get(dladm_handle_t handle, prop_desc_t *pdp, +get_tagmode(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; link_tagmode_t mode; dladm_status_t status; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + perm_flags, &mode, sizeof (mode)); + if (status != DLADM_STATUS_OK) return (status); - (void) memcpy(&mode, dip->pr_val, sizeof (mode)); - free(dip); switch (mode) { case LINK_TAGMODE_NORMAL: @@ -3066,22 +3720,18 @@ i_dladm_tagmode_get(dladm_handle_t handle, prop_desc_t *pdp, /* ARGSUSED */ static dladm_status_t -i_dladm_flowctl_get(dladm_handle_t handle, prop_desc_t *pdp, +get_flowctl(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; - link_flowctrl_t v; - dladm_status_t status; - uchar_t *cp; + link_flowctrl_t v; + dladm_status_t status; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + perm_flags, &v, sizeof (v)); + if (status != DLADM_STATUS_OK) return (status); - cp = (uchar_t *)dip->pr_val; - (void) memcpy(&v, cp, sizeof (v)); switch (v) { case LINK_FLOWCTRL_NONE: (void) sprintf(*prop_val, "no"); @@ -3096,7 +3746,6 @@ i_dladm_flowctl_get(dladm_handle_t handle, prop_desc_t *pdp, (void) sprintf(*prop_val, "bi"); break; } - free(dip); *val_cnt = 1; return (DLADM_STATUS_OK); } @@ -3141,7 +3790,7 @@ i_dladm_set_private_prop(dladm_handle_t handle, datalink_id_t linkid, } dip = i_dladm_buf_alloc_by_name(bufsize + 1, linkid, prop_name, - (prop_val != NULL ? 0 : MAC_PROP_DEFAULT), &status); + (prop_val != NULL ? 0 : DLD_PROP_DEFAULT), &status); if (dip == NULL) return (status); @@ -3255,7 +3904,7 @@ i_dladm_getset_defval(dladm_handle_t handle, prop_desc_t *pdp, * a setprop to reset the value to default. */ status = pdp->pd_get(handle, pdp, linkid, prop_vals, &cnt, media, - MAC_PROP_DEFAULT, &perm_flags); + DLD_PROP_DEFAULT, &perm_flags); if (status == DLADM_STATUS_OK) { if (perm_flags == MAC_PROP_PERM_RW) { status = i_dladm_set_single_prop(handle, linkid, @@ -3270,7 +3919,7 @@ i_dladm_getset_defval(dladm_handle_t handle, prop_desc_t *pdp, /* ARGSUSED */ static dladm_status_t -get_stp_prop(dladm_handle_t handle, struct prop_desc *pd, datalink_id_t linkid, +get_stp(dladm_handle_t handle, struct prop_desc *pd, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { @@ -3336,8 +3985,8 @@ set_stp_prop(dladm_handle_t handle, prop_desc_t *pd, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t check_stp_prop(dladm_handle_t handle, struct prop_desc *pd, - datalink_id_t linkid, char **prop_val, uint_t val_cnt, val_desc_t *vdp, - datalink_media_t media) + datalink_id_t linkid, char **prop_val, uint_t val_cnt, uint_t flags, + val_desc_t *vdp, datalink_media_t media) { char *cp; boolean_t iscost; @@ -3488,8 +4137,8 @@ set_bridge_pvid(dladm_handle_t handle, prop_desc_t *pd, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t check_bridge_pvid(dladm_handle_t handle, struct prop_desc *pd, - datalink_id_t linkid, char **prop_val, uint_t val_cnt, val_desc_t *vdp, - datalink_media_t media) + datalink_id_t linkid, char **prop_val, uint_t val_cnt, uint_t flags, + val_desc_t *vdp, datalink_media_t media) { char *cp; @@ -3613,7 +4262,7 @@ dladm_link_get_proplist(dladm_handle_t handle, datalink_id_t linkid, */ static dladm_status_t i_dladm_link_proplist_extract_one(dladm_handle_t handle, - dladm_arg_list_t *proplist, const char *name, void *arg) + dladm_arg_list_t *proplist, const char *name, uint_t flags, void *arg) { dladm_status_t status; dladm_arg_info_t *aip = NULL; @@ -3647,7 +4296,7 @@ i_dladm_link_proplist_extract_one(dladm_handle_t handle, /* Check property value */ if (pdp->pd_check != NULL) { status = pdp->pd_check(handle, pdp, 0, aip->ai_val, - aip->ai_count, vdp, 0); + aip->ai_count, flags, vdp, 0); } else { status = DLADM_STATUS_BADARG; } @@ -3684,14 +4333,14 @@ i_dladm_link_proplist_extract_one(dladm_handle_t handle, */ dladm_status_t dladm_link_proplist_extract(dladm_handle_t handle, dladm_arg_list_t *proplist, - mac_resource_props_t *mrp) + mac_resource_props_t *mrp, uint_t flags) { dladm_status_t status; int i; for (i = 0; i < DLADM_MAX_RSRC_PROP; i++) { status = i_dladm_link_proplist_extract_one(handle, - proplist, rsrc_prop_table[i].rp_name, mrp); + proplist, rsrc_prop_table[i].rp_name, flags, mrp); if (status != DLADM_STATUS_OK) return (status); } @@ -3708,20 +4357,13 @@ dladm_perm2str(uint_t perm, char *buf) } dladm_status_t -i_dladm_get_state(dladm_handle_t handle, datalink_id_t linkid, +dladm_get_state(dladm_handle_t handle, datalink_id_t linkid, link_state_t *state) { - dld_ioc_macprop_t *dip; - dladm_status_t status; uint_t perms; - dip = i_dladm_get_public_prop(handle, linkid, "state", 0, &status, - &perms); - if (status != DLADM_STATUS_OK) - return (status); - (void) memcpy(state, dip->pr_val, sizeof (*state)); - free(dip); - return (status); + return (i_dladm_get_public_prop(handle, linkid, "state", 0, + &perms, state, sizeof (*state))); } boolean_t @@ -3752,3 +4394,45 @@ dladm_attr_is_linkprop(const char *name) return (!is_nonprop); } + +dladm_status_t +dladm_linkprop_is_set(dladm_handle_t handle, datalink_id_t linkid, + dladm_prop_type_t type, const char *prop_name, boolean_t *is_set) +{ + char *buf, **propvals; + uint_t valcnt = DLADM_MAX_PROP_VALCNT; + int i; + dladm_status_t status = DLADM_STATUS_OK; + + *is_set = B_FALSE; + + if ((buf = malloc((sizeof (char *) + DLADM_PROP_VAL_MAX) * + DLADM_MAX_PROP_VALCNT)) == NULL) + return (DLADM_STATUS_NOMEM); + + propvals = (char **)(void *)buf; + for (i = 0; i < valcnt; i++) { + propvals[i] = buf + + sizeof (char *) * DLADM_MAX_PROP_VALCNT + + i * DLADM_PROP_VAL_MAX; + } + + if (dladm_get_linkprop(handle, linkid, type, prop_name, propvals, + &valcnt) != DLADM_STATUS_OK) { + goto done; + } + + if ((strcmp(prop_name, "pool") == 0) && (strlen(*propvals) != 0)) { + *is_set = B_TRUE; + } else if ((strcmp(prop_name, "cpus") == 0) && (valcnt != 0)) { + *is_set = B_TRUE; + } else if ((strcmp(prop_name, "_softmac") == 0) && (valcnt != 0) && + (strcmp(propvals[0], "true") == 0)) { + *is_set = B_TRUE; + } + +done: + if (buf != NULL) + free(buf); + return (status); +} diff --git a/usr/src/lib/libdladm/common/mapfile-vers b/usr/src/lib/libdladm/common/mapfile-vers index f64b2d3cd1..429355e6a0 100644 --- a/usr/src/lib/libdladm/common/mapfile-vers +++ b/usr/src/lib/libdladm/common/mapfile-vers @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -57,6 +57,7 @@ SUNWprivate_1.1 { dladm_set_linkprop; dladm_walk_linkprop; dladm_attr_is_linkprop; + dladm_linkprop_is_set; dladm_valid_secobj_name; dladm_init_secobj; dladm_get_secobj; @@ -161,6 +162,8 @@ SUNWprivate_1.1 { dladm_str2protect; dladm_ipv4addr2str; dladm_str2ipv4addr; + dladm_ipv6addr2str; + dladm_str2ipv6addr; dladm_start_usagelog; dladm_stop_usagelog; dladm_walk_usage_res; @@ -241,6 +244,18 @@ SUNWprivate_1.1 { dladm_bridge_get_nick; dladm_bridge_set_nick; dladm_bridge_get_privprop; + + dladm_link_stat_query; + dladm_link_stat_diffchain; + dladm_link_stat_free; + dladm_link_stat_query_all; + dladm_link_stat_query_all_free; + + dladm_flow_stat_query; + dladm_flow_stat_diff; + dladm_flow_stat_free; + dladm_flow_stat_query_all; + dladm_flow_stat_query_all_free; local: *; }; diff --git a/usr/src/lib/libdladm/common/usage.c b/usr/src/lib/libdladm/common/usage.c index 82a13e4f5f..a74e81ee59 100644 --- a/usr/src/lib/libdladm/common/usage.c +++ b/usr/src/lib/libdladm/common/usage.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1333,8 +1333,10 @@ dladm_usage_summary(int (*fn)(dladm_usage_t *, void *), int logtype, ns = ne->net_entry_tstats; nd = ne->net_entry_desc; - if (ns->net_stat_ibytes + ns->net_stat_obytes == 0) + if (ns->net_stat_ibytes + ns->net_stat_obytes == 0) { + ne = ne->net_entry_next; continue; + } bcopy(&nd->net_desc_name, &usage.du_name, sizeof (usage.du_name)); usage.du_duration = ne->net_entry_ttime; diff --git a/usr/src/lib/libinetutil/common/ofmt.c b/usr/src/lib/libinetutil/common/ofmt.c index c0fce1839e..9d5ad12e53 100644 --- a/usr/src/lib/libinetutil/common/ofmt.c +++ b/usr/src/lib/libinetutil/common/ofmt.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <errno.h> @@ -62,11 +62,9 @@ typedef struct ofmt_state_s { uint_t os_overflow; struct winsize os_winsize; int os_nrow; - boolean_t os_parsable; - boolean_t os_wrap; + uint_t os_flags; int os_nbad; char **os_badfields; - boolean_t os_multiline; int os_maxnamelen; /* longest name (f. multiline) */ } ofmt_state_t; /* @@ -187,8 +185,8 @@ ofmt_open(const char *str, const ofmt_field_t *template, uint_t flags, ofmt_state_t *os; int nfields = 0; ofmt_status_t err = OFMT_SUCCESS; - boolean_t parsable = ((flags & OFMT_PARSABLE) != 0); - boolean_t wrap = ((flags & OFMT_WRAP) != 0); + boolean_t parsable = (flags & OFMT_PARSABLE); + boolean_t wrap = (flags & OFMT_WRAP); boolean_t multiline = (flags & OFMT_MULTILINE); *ofmt = NULL; @@ -231,10 +229,8 @@ ofmt_open(const char *str, const ofmt_field_t *template, uint_t flags, goto nomem; *ofmt = os; os->os_fields = (ofmt_field_t *)&os[1]; - os->os_parsable = parsable; - os->os_wrap = wrap; + os->os_flags = flags; - os->os_multiline = multiline; of = os->os_fields; of_index = 0; /* @@ -321,13 +317,16 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value, uint_t width = ofp->of_width; uint_t valwidth; uint_t compress; + boolean_t parsable = (os->os_flags & OFMT_PARSABLE); + boolean_t multiline = (os->os_flags & OFMT_MULTILINE); + boolean_t rightjust = (os->os_flags & OFMT_RIGHTJUST); char c; /* * Parsable fields are separated by ':'. If such a field contains * a ':' or '\', this character is prefixed by a '\'. */ - if (os->os_parsable) { + if (parsable) { if (os->os_nfields == 1) { (void) printf("%s", value); return; @@ -339,7 +338,7 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value, } if (!os->os_lastfield) (void) putchar(':'); - } else if (os->os_multiline) { + } else if (multiline) { if (value[0] == '\0') value = OFMT_VAL_UNDEF; (void) printf("%*.*s: %s", os->os_maxnamelen, @@ -348,7 +347,10 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value, (void) putchar('\n'); } else { if (os->os_lastfield) { - (void) printf("%s", value); + if (rightjust) + (void) printf("%*s", width, value); + else + (void) printf("%s", value); os->os_overflow = 0; return; } @@ -356,7 +358,10 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value, valwidth = strlen(value); if (valwidth + os->os_overflow >= width) { os->os_overflow += valwidth - width + 1; - (void) printf("%s ", value); + if (rightjust) + (void) printf("%*s ", width, value); + else + (void) printf("%s ", value); return; } @@ -365,7 +370,10 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value, os->os_overflow -= compress; width -= compress; } - (void) printf("%-*s", width, value); + if (rightjust) + (void) printf("%*s ", width, value); + else + (void) printf("%-*s", width, value); } } @@ -417,20 +425,23 @@ ofmt_print(ofmt_handle_t ofmt, void *arg) boolean_t escsep, more_rows; ofmt_arg_t ofarg; split_t **sp = NULL; + boolean_t parsable = (os->os_flags & OFMT_PARSABLE); + boolean_t multiline = (os->os_flags & OFMT_MULTILINE); + boolean_t wrap = (os->os_flags & OFMT_WRAP); - if (os->os_wrap) { + if (wrap) { sp = calloc(sizeof (split_t *), os->os_nfields); if (sp == NULL) return; } - if ((os->os_nrow++ % os->os_winsize.ws_row) == 0 && !os->os_parsable && - !os->os_multiline) { + if ((os->os_nrow++ % os->os_winsize.ws_row) == 0 && + !parsable && !multiline) { ofmt_print_header(os); os->os_nrow++; } - if (os->os_multiline && os->os_nrow > 1) + if (multiline && os->os_nrow > 1) (void) putchar('\n'); of = os->os_fields; @@ -443,7 +454,7 @@ ofmt_print(ofmt_handle_t ofmt, void *arg) ofarg.ofmt_cbarg = arg; if ((*of[i].of_cb)(&ofarg, value, sizeof (value))) { - if (os->os_wrap) { + if (wrap) { /* * 'value' will be split at comma boundaries * and stored into sp[i]. @@ -454,8 +465,9 @@ ofmt_print(ofmt_handle_t ofmt, void *arg) sp[i]->s_currfield < sp[i]->s_nfields) more_rows = B_TRUE; } + ofmt_print_field(os, &of[i], - (*value == '\0' && !os->os_parsable) ? + (*value == '\0' && !parsable) ? OFMT_VAL_UNDEF : value, escsep); } else { ofmt_print_field(os, &of[i], OFMT_VAL_UNKNOWN, escsep); @@ -531,6 +543,7 @@ ofmt_strerror(ofmt_handle_t ofmt, ofmt_status_t err, char *buf, uint_t bufsize) int i; const char *s; char ebuf[OFMT_BUFSIZE]; + boolean_t parsable; /* * ebuf is intended for optional error-specific data to be appended @@ -547,7 +560,8 @@ ofmt_strerror(ofmt_handle_t ofmt, ofmt_status_t err, char *buf, uint_t bufsize) * Enumerate the singular/plural version of the warning * and error to simplify and improve localization. */ - if (!os->os_parsable) { + parsable = (os->os_flags & OFMT_PARSABLE); + if (!parsable) { if (os->os_nbad > 1) s = "ignoring unknown output fields:"; else diff --git a/usr/src/lib/libinetutil/common/ofmt.h b/usr/src/lib/libinetutil/common/ofmt.h index 81693ae325..e69d43e20a 100644 --- a/usr/src/lib/libinetutil/common/ofmt.h +++ b/usr/src/lib/libinetutil/common/ofmt.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -152,6 +152,8 @@ typedef enum { */ typedef struct ofmt_arg_s { uint_t ofmt_id; + uint_t ofmt_width; + uint_t ofmt_index; void *ofmt_cbarg; } ofmt_arg_t; @@ -178,6 +180,7 @@ extern ofmt_status_t ofmt_open(const char *, const ofmt_field_t *, uint_t, #define OFMT_PARSABLE 0x00000001 /* machine parsable mode */ #define OFMT_WRAP 0x00000002 /* wrap output if field width is exceeded */ #define OFMT_MULTILINE 0x00000004 /* "long" output: "name: value" lines */ +#define OFMT_RIGHTJUST 0x00000008 /* right justified output */ /* * ofmt_close() must be called to free resources associated diff --git a/usr/src/lib/libsecdb/exec_attr.txt b/usr/src/lib/libsecdb/exec_attr.txt index d6258a1e2c..db47b15bd0 100644 --- a/usr/src/lib/libsecdb/exec_attr.txt +++ b/usr/src/lib/libsecdb/exec_attr.txt @@ -171,8 +171,10 @@ Network Management:solaris:cmd:::/sbin/routeadm:euid=0;\ privs=proc_chroot,proc_owner,sys_ip_config Network Management:solaris:cmd:::/sbin/dladm:euid=dladm;egid=netadm;\ privs=sys_dl_config,net_rawaccess,proc_audit -Network Management:solaris:cmd:::/sbin/flowadm:euid=dladm;egid=netadm;\ +Network Management:solaris:cmd:::/sbin/dlstat:euid=dladm;egid=sys; +Network Management:solaris:cmd:::/sbin/flowadm:euid=dladm;egid=sys;\ privs=sys_dl_config,net_rawaccess,proc_audit +Network Management:solaris:cmd:::/sbin/flowstat:euid=dladm;egid=sys; Network Management:suser:cmd:::/usr/bin/netstat:uid=0 Network Management:suser:cmd:::/usr/bin/rup:euid=0 Network Management:suser:cmd:::/usr/bin/ruptime:euid=0 @@ -189,6 +191,7 @@ Network Management:suser:cmd:::/usr/sbin/spray:euid=0 Network Observability:solaris:cmd:::/usr/sbin/snoop:privs=net_observability Network Link Security:solaris:cmd:::/sbin/dladm:euid=dladm;egid=sys;\ privs=sys_dl_config,net_rawaccess,proc_audit +Network Link Security:solaris:cmd:::/sbin/dlstat:euid=dladm;egid=sys; Network IPsec Management:solaris:cmd:::/usr/lib/inet/certdb:euid=0;privs=none Network IPsec Management:solaris:cmd:::/usr/lib/inet/certlocal:euid=0;privs=none Network IPsec Management:solaris:cmd:::/usr/lib/inet/certrldb:euid=0;privs=none diff --git a/usr/src/lib/libzonecfg/Makefile.com b/usr/src/lib/libzonecfg/Makefile.com index f2b2bd03b9..b0ab5e7e61 100644 --- a/usr/src/lib/libzonecfg/Makefile.com +++ b/usr/src/lib/libzonecfg/Makefile.com @@ -19,11 +19,9 @@ # CDDL HEADER END # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# LIBRARY= libzonecfg.a VERS= .1 diff --git a/usr/src/lib/libzonecfg/common/libzonecfg.c b/usr/src/lib/libzonecfg/common/libzonecfg.c index ab0f4c498d..bdd63a31e8 100644 --- a/usr/src/lib/libzonecfg/common/libzonecfg.c +++ b/usr/src/lib/libzonecfg/common/libzonecfg.c @@ -4321,6 +4321,26 @@ zonecfg_bind_pool(zone_dochandle_t handle, zoneid_t zoneid, char *pool_err, return (Z_OK); } +int +zonecfg_get_poolname(zone_dochandle_t handle, char *zone, char *pool, + size_t poolsize) +{ + int err; + struct zone_psettab pset_tab; + + err = zonecfg_lookup_pset(handle, &pset_tab); + if ((err != Z_NO_ENTRY) && (err != Z_OK)) + return (err); + + /* pset was found so a temporary pool was created */ + if (err == Z_OK) { + (void) snprintf(pool, poolsize, TMP_POOL_NAME, zone); + return (Z_OK); + } + + /* lookup the poolname in zonecfg */ + return (zonecfg_get_pool(handle, pool, poolsize)); +} static boolean_t svc_enabled(char *svc_name) diff --git a/usr/src/lib/libzonecfg/common/mapfile-vers b/usr/src/lib/libzonecfg/common/mapfile-vers index dbc66657b3..e8e3021648 100644 --- a/usr/src/lib/libzonecfg/common/mapfile-vers +++ b/usr/src/lib/libzonecfg/common/mapfile-vers @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -129,6 +129,7 @@ SUNWprivate_1.1 { zonecfg_getnwifent; zonecfg_getpkgdata; zonecfg_get_pool; + zonecfg_get_poolname; zonecfg_get_privset; zonecfg_getpsetent; zonecfg_getrctlent; diff --git a/usr/src/pkg/manifests/SUNWcs.mf b/usr/src/pkg/manifests/SUNWcs.mf index 004d8894b8..19a5ec6bfb 100644 --- a/usr/src/pkg/manifests/SUNWcs.mf +++ b/usr/src/pkg/manifests/SUNWcs.mf @@ -2649,10 +2649,12 @@ link path=usr/sbin/cryptoadm target=../../sbin/cryptoadm link path=usr/sbin/dcopy target=./clri link path=usr/sbin/devnm target=./df link path=usr/sbin/dladm target=../../sbin/dladm +link path=usr/sbin/dlstat target=../../sbin/dlstat link path=usr/sbin/edquota target=../lib/fs/ufs/edquota link path=usr/sbin/fdisk target=../../sbin/fdisk link path=usr/sbin/fiocompress target=../../sbin/fiocompress link path=usr/sbin/flowadm target=../../sbin/flowadm +link path=usr/sbin/flowstat target=../../sbin/flowstat link path=usr/sbin/fsdb target=./clri link path=usr/sbin/fsirand target=../lib/fs/ufs/fsirand link path=usr/sbin/fssnap target=./clri diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf index 633e04cb85..828c6eaa42 100644 --- a/usr/src/pkg/manifests/system-header.mf +++ b/usr/src/pkg/manifests/system-header.mf @@ -1162,6 +1162,7 @@ file path=usr/include/sys/lwp_timer_impl.h file path=usr/include/sys/lwp_upimutex_impl.h file path=usr/include/sys/mac.h file path=usr/include/sys/mac_flow.h +file path=usr/include/sys/mac_provider.h file path=usr/include/sys/machelf.h file path=usr/include/sys/machlock.h file path=usr/include/sys/machsig.h diff --git a/usr/src/pkg/manifests/system-network.mf b/usr/src/pkg/manifests/system-network.mf index 1384abb2ec..a0f2b9af96 100644 --- a/usr/src/pkg/manifests/system-network.mf +++ b/usr/src/pkg/manifests/system-network.mf @@ -77,7 +77,9 @@ file path=etc/nwam/loc/NoNet/ipf.conf.dfl group=netadm owner=netadm \ file path=etc/nwam/loc/NoNet/ipf6.conf.dfl group=netadm owner=netadm \ preserve=true file path=sbin/dladm mode=0555 +file path=sbin/dlstat mode=0555 file path=sbin/flowadm mode=0555 +file path=sbin/flowstat mode=0555 legacy pkg=SUNWcnetr arch=$(ARCH) category=system \ desc="core software for network infrastructure configuration" \ hotline="Please contact your local service provider" \ diff --git a/usr/src/uts/common/inet/ip/ip6_input.c b/usr/src/uts/common/inet/ip/ip6_input.c index d596c313c5..8f305114d1 100644 --- a/usr/src/uts/common/inet/ip/ip6_input.c +++ b/usr/src/uts/common/inet/ip/ip6_input.c @@ -1910,6 +1910,13 @@ ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h, hck_flags = DB_CKSUMFLAGS(mp); + if (hck_flags & HCK_FULLCKSUM_OK) { + /* + * Hardware has already verified the checksum. + */ + return (B_TRUE); + } + if (hck_flags & HCK_FULLCKSUM) { /* * Full checksum has been computed by the hardware @@ -1918,9 +1925,6 @@ ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h, * order to protect against faulty hardware, compare * it against -0 (0xFFFF) to see if it's valid. */ - if (hck_flags & HCK_FULLCKSUM_OK) - return (B_TRUE); - cksum = DB_CKSUM16(mp); if (cksum == 0xFFFF) return (B_TRUE); diff --git a/usr/src/uts/common/inet/ip/ip_input.c b/usr/src/uts/common/inet/ip/ip_input.c index a54b2e8737..0781560daf 100644 --- a/usr/src/uts/common/inet/ip/ip_input.c +++ b/usr/src/uts/common/inet/ip/ip_input.c @@ -2260,6 +2260,13 @@ ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, hck_flags = DB_CKSUMFLAGS(mp); + if (hck_flags & HCK_FULLCKSUM_OK) { + /* + * Hardware has already verified the checksum. + */ + return (B_TRUE); + } + if (hck_flags & HCK_FULLCKSUM) { /* * Full checksum has been computed by the hardware @@ -2268,9 +2275,6 @@ ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, * order to protect against faulty hardware, compare * it against -0 (0xFFFF) to see if it's valid. */ - if (hck_flags & HCK_FULLCKSUM_OK) - return (B_TRUE); - cksum = DB_CKSUM16(mp); if (cksum == 0xFFFF) return (B_TRUE); diff --git a/usr/src/uts/common/inet/ip/ip_netinfo.c b/usr/src/uts/common/inet/ip/ip_netinfo.c index 0d0d943676..3849d1fe06 100644 --- a/usr/src/uts/common/inet/ip/ip_netinfo.c +++ b/usr/src/uts/common/inet/ip/ip_netinfo.c @@ -1175,10 +1175,10 @@ ip_isvalidchecksum(net_handle_t neti, mblk_t *mp) ASSERT(mp != NULL); if (dohwcksum && - DB_CKSUM16(mp) != 0xFFFF && - (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) && - (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM_OK) && - (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) + ((DB_CKSUM16(mp) != 0xFFFF && + (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM)) || + (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM_OK)) && + (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM_OK)) return (1); hlen = (ipha->ipha_version_and_hdr_length & 0x0F) << 2; diff --git a/usr/src/uts/common/inet/iptun/iptun.c b/usr/src/uts/common/inet/iptun/iptun.c index 099a14fc2e..215221241d 100644 --- a/usr/src/uts/common/inet/iptun/iptun.c +++ b/usr/src/uts/common/inet/iptun/iptun.c @@ -379,92 +379,58 @@ iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, /* ARGSUSED */ static int iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { iptun_t *iptun = barg; - mac_propval_range_t range; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); - boolean_t is_possible = (pr_flags & MAC_PROP_POSSIBLE); int err; if ((err = iptun_enter(iptun)) != 0) return (err); - if ((pr_flags & ~(MAC_PROP_DEFAULT | MAC_PROP_POSSIBLE)) != 0) { + switch (pr_num) { + case MAC_PROP_IPTUN_HOPLIMIT: + ASSERT(pr_valsize >= sizeof (uint32_t)); + *(uint32_t *)pr_val = iptun->iptun_hoplimit; + break; + + case MAC_PROP_IPTUN_ENCAPLIMIT: + *(uint32_t *)pr_val = iptun->iptun_encaplimit; + break; + default: err = ENOTSUP; - goto done; - } - if (is_default && is_possible) { - err = EINVAL; - goto done; } +done: + iptun_exit(iptun); + return (err); +} - *perm = MAC_PROP_PERM_RW; - - if (is_possible) { - if (pr_valsize < sizeof (mac_propval_range_t)) { - err = EINVAL; - goto done; - } - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - } else if (pr_valsize < sizeof (uint32_t)) { - err = EINVAL; - goto done; - } +/* ARGSUSED */ +static void +iptun_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + iptun_t *iptun = barg; switch (pr_num) { case MAC_PROP_IPTUN_HOPLIMIT: - if (is_possible) { - range.range_uint32[0].mpur_min = IPTUN_MIN_HOPLIMIT; - range.range_uint32[0].mpur_max = IPTUN_MAX_HOPLIMIT; - } else if (is_default) { - *(uint32_t *)pr_val = IPTUN_DEFAULT_HOPLIMIT; - } else { - *(uint32_t *)pr_val = iptun->iptun_hoplimit; - } + mac_prop_info_set_range_uint32(prh, + IPTUN_MIN_HOPLIMIT, IPTUN_MAX_HOPLIMIT); + mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_HOPLIMIT); break; + case MAC_PROP_IPTUN_ENCAPLIMIT: - if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) { - err = ENOTSUP; - goto done; - } - if (is_possible) { - range.range_uint32[0].mpur_min = IPTUN_MIN_ENCAPLIMIT; - range.range_uint32[0].mpur_max = IPTUN_MAX_ENCAPLIMIT; - } else if (is_default) { - *(uint32_t *)pr_val = IPTUN_DEFAULT_ENCAPLIMIT; - } else { - *(uint32_t *)pr_val = iptun->iptun_encaplimit; - } + if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) + break; + mac_prop_info_set_range_uint32(prh, + IPTUN_MIN_ENCAPLIMIT, IPTUN_MAX_ENCAPLIMIT); + mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_ENCAPLIMIT); break; - case MAC_PROP_MTU: { - uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0); - - if (is_possible) { - range.range_uint32[0].mpur_min = - iptun->iptun_typeinfo->iti_minmtu; - range.range_uint32[0].mpur_max = maxmtu; - } else { - /* - * The MAC module knows the current value and should - * never call us for it. There is also no default - * MTU, as by default, it is a dynamic property. - */ - err = ENOTSUP; - goto done; - } + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + iptun->iptun_typeinfo->iti_minmtu, + iptun_get_maxmtu(iptun, NULL, 0)); break; } - default: - err = EINVAL; - goto done; - } - if (is_possible) - bcopy(&range, pr_val, sizeof (range)); -done: - iptun_exit(iptun); - return (err); } uint_t @@ -3514,7 +3480,7 @@ iptun_output_common(iptun_t *iptun, ip_xmit_attr_t *ixa, mblk_t *mp) } static mac_callbacks_t iptun_m_callbacks = { - .mc_callbacks = (MC_SETPROP | MC_GETPROP), + .mc_callbacks = (MC_SETPROP | MC_GETPROP | MC_PROPINFO), .mc_getstat = iptun_m_getstat, .mc_start = iptun_m_start, .mc_stop = iptun_m_stop, @@ -3522,6 +3488,8 @@ static mac_callbacks_t iptun_m_callbacks = { .mc_multicst = iptun_m_multicst, .mc_unicst = iptun_m_unicst, .mc_tx = iptun_m_tx, + .mc_reserved = NULL, .mc_setprop = iptun_m_setprop, - .mc_getprop = iptun_m_getprop + .mc_getprop = iptun_m_getprop, + .mc_propinfo = iptun_m_propinfo }; diff --git a/usr/src/uts/common/io/afe/afe.c b/usr/src/uts/common/io/afe/afe.c index ca67e753b9..a80775c502 100644 --- a/usr/src/uts/common/io/afe/afe.c +++ b/usr/src/uts/common/io/afe/afe.c @@ -29,7 +29,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -119,9 +119,11 @@ static int afe_m_stat(void *, uint_t, uint64_t *); static int afe_m_start(void *); static void afe_m_stop(void *); static int afe_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int afe_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void afe_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static unsigned afe_intr(caddr_t); static void afe_startmac(afe_t *); static void afe_stopmac(afe_t *); @@ -173,7 +175,7 @@ static mii_ops_t afe_mii_ops = { }; static mac_callbacks_t afe_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, afe_m_stat, afe_m_start, afe_m_stop, @@ -181,12 +183,14 @@ static mac_callbacks_t afe_m_callbacks = { afe_m_multicst, afe_m_unicst, afe_m_tx, + NULL, afe_m_ioctl, /* mc_ioctl */ NULL, /* mc_getcapab */ NULL, /* mc_open */ NULL, /* mc_close */ afe_m_setprop, afe_m_getprop, + afe_m_propinfo }; @@ -2372,12 +2376,12 @@ afe_m_stat(void *arg, uint_t stat, uint64_t *val) } int -afe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +afe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { afe_t *afep = arg; - return (mii_m_getprop(afep->afe_mii, name, num, flags, sz, val, perm)); + return (mii_m_getprop(afep->afe_mii, name, num, sz, val)); } int @@ -2389,6 +2393,15 @@ afe_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, return (mii_m_setprop(afep->afe_mii, name, num, sz, val)); } +static void +afe_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + afe_t *afep = arg; + + mii_m_propinfo(afep->afe_mii, name, num, prh); +} + /* * Debugging and error reporting. */ diff --git a/usr/src/uts/common/io/aggr/aggr_grp.c b/usr/src/uts/common/io/aggr/aggr_grp.c index 32ce4dfd08..eac04f2087 100644 --- a/usr/src/uts/common/io/aggr/aggr_grp.c +++ b/usr/src/uts/common/io/aggr/aggr_grp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,6 +33,38 @@ * aggregation group. * * A set of MAC ports are associated with each association group. + * + * Aggr pseudo TX rings + * -------------------- + * The underlying ports (NICs) in an aggregation can have TX rings. To + * enhance aggr's performance, these TX rings are made available to the + * aggr layer as pseudo TX rings. The concept of pseudo rings are not new. + * They are already present and implemented on the RX side. It is called + * as pseudo RX rings. The same concept is extended to the TX side where + * each TX ring of an underlying port is reflected in aggr as a pseudo + * TX ring. Thus each pseudo TX ring will map to a specific hardware TX + * ring. Even in the case of a NIC that does not have a TX ring, a pseudo + * TX ring is given to the aggregation layer. + * + * With this change, the outgoing stack depth looks much better: + * + * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() -> + * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx() + * + * Two new modes are introduced to mac_tx() to handle aggr pseudo TX rings: + * SRS_TX_AGGR and SRS_TX_BW_AGGR. + * + * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine + * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) TX + * ring belonging to a port on which the packet has to be sent. + * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4 + * policy and then uses the fanout_hint passed to it to pick a TX ring from + * the selected port. + * + * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where + * bandwidth limit is applied first on the outgoing packet and the packets + * allowed to go out would call mac_tx_aggr_mode() to send the packet on a + * particular TX ring. */ #include <sys/types.h> @@ -71,9 +103,8 @@ static void aggr_m_ioctl(void *, queue_t *, mblk_t *); static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *); static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -static int aggr_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); - +static void aggr_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t); static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *, @@ -113,7 +144,7 @@ static id_space_t *key_ids; static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; #define AGGR_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO) static mac_callbacks_t aggr_m_callbacks = { AGGR_M_CALLBACK_FLAGS, @@ -123,13 +154,15 @@ static mac_callbacks_t aggr_m_callbacks = { aggr_m_promisc, aggr_m_multicst, NULL, - aggr_m_tx, + NULL, + NULL, aggr_m_ioctl, aggr_m_capab_get, NULL, NULL, aggr_m_setprop, - aggr_m_getprop + NULL, + aggr_m_propinfo }; /*ARGSUSED*/ @@ -144,6 +177,8 @@ aggr_grp_constructor(void *buf, void *arg, int kmflag) rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL); mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&grp->lg_tx_flowctl_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&grp->lg_tx_flowctl_cv, NULL, CV_DEFAULT, NULL); grp->lg_link_state = LINK_STATE_UNKNOWN; return (0); } @@ -164,6 +199,8 @@ aggr_grp_destructor(void *buf, void *arg) mutex_destroy(&grp->lg_port_lock); cv_destroy(&grp->lg_port_cv); rw_destroy(&grp->lg_tx_lock); + mutex_destroy(&grp->lg_tx_flowctl_lock); + cv_destroy(&grp->lg_tx_flowctl_cv); } void @@ -536,7 +573,7 @@ aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force, } /* - * Add a pseudo Rx ring for the given HW ring handle. + * Add a pseudo RX ring for the given HW ring handle. */ static int aggr_add_pseudo_rx_ring(aggr_port_t *port, @@ -553,7 +590,7 @@ aggr_add_pseudo_rx_ring(aggr_port_t *port, } /* - * No slot for this new Rx ring. + * No slot for this new RX ring. */ if (j == MAX_RINGS_PER_GROUP) return (EIO); @@ -567,19 +604,20 @@ aggr_add_pseudo_rx_ring(aggr_port_t *port, * The group is already registered, dynamically add a new ring to the * mac group. */ - mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring); if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) { ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE; ring->arr_hw_rh = NULL; ring->arr_port = NULL; rx_grp->arg_ring_cnt--; - mac_hwring_teardown(hw_rh); + } else { + mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring, + mac_find_ring(rx_grp->arg_gh, j)); } return (err); } /* - * Remove the pseudo Rx ring of the given HW ring handle. + * Remove the pseudo RX ring of the given HW ring handle. */ static void aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) @@ -632,8 +670,8 @@ aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) /* * Get the list the the underlying HW rings. */ - hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh, - MAC_RING_TYPE_RX); + hw_rh_cnt = mac_hwrings_get(port->lp_mch, + &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX); if (port->lp_hwgh != NULL) { /* @@ -671,7 +709,7 @@ aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) port->lp_hwgh = NULL; } } else { - port->lp_grp_added = B_TRUE; + port->lp_rx_grp_added = B_TRUE; } done: mac_perim_exit(pmph); @@ -695,12 +733,12 @@ aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) ASSERT(MAC_PERIM_HELD(grp->lg_mh)); mac_perim_enter_by_mh(port->lp_mh, &pmph); - if (!port->lp_grp_added) + if (!port->lp_rx_grp_added) goto done; ASSERT(rx_grp->arg_gh != NULL); - hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh, - MAC_RING_TYPE_RX); + hw_rh_cnt = mac_hwrings_get(port->lp_mch, + &hwgh, hw_rh, MAC_RING_TYPE_RX); /* * If hw_rh_cnt is 0, it means that the underlying port does not @@ -725,7 +763,196 @@ aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) mac_rx_client_restart(port->lp_mch); } - port->lp_grp_added = B_FALSE; + port->lp_rx_grp_added = B_FALSE; +done: + mac_perim_exit(pmph); +} + +/* + * Add a pseudo TX ring for the given HW ring handle. + */ +static int +aggr_add_pseudo_tx_ring(aggr_port_t *port, + aggr_pseudo_tx_group_t *tx_grp, mac_ring_handle_t hw_rh, + mac_ring_handle_t *pseudo_rh) +{ + aggr_pseudo_tx_ring_t *ring; + int err; + int i; + + ASSERT(MAC_PERIM_HELD(port->lp_mh)); + for (i = 0; i < MAX_RINGS_PER_GROUP; i++) { + ring = tx_grp->atg_rings + i; + if (!(ring->atr_flags & MAC_PSEUDO_RING_INUSE)) + break; + } + /* + * No slot for this new TX ring. + */ + if (i == MAX_RINGS_PER_GROUP) + return (EIO); + /* + * The following 4 statements needs to be done before + * calling mac_group_add_ring(). Otherwise it will + * result in an assertion failure in mac_init_ring(). + */ + ring->atr_flags |= MAC_PSEUDO_RING_INUSE; + ring->atr_hw_rh = hw_rh; + ring->atr_port = port; + tx_grp->atg_ring_cnt++; + + /* + * The TX side has no concept of ring groups unlike RX groups. + * There is just a single group which stores all the TX rings. + * This group will be used to store aggr's pseudo TX rings. + */ + if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) { + ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE; + ring->atr_hw_rh = NULL; + ring->atr_port = NULL; + tx_grp->atg_ring_cnt--; + } else { + *pseudo_rh = mac_find_ring(tx_grp->atg_gh, i); + if (hw_rh != NULL) { + mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring, + mac_find_ring(tx_grp->atg_gh, i)); + } + } + return (err); +} + +/* + * Remove the pseudo TX ring of the given HW ring handle. + */ +static void +aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp, + mac_ring_handle_t pseudo_hw_rh) +{ + aggr_pseudo_tx_ring_t *ring; + int i; + + for (i = 0; i < MAX_RINGS_PER_GROUP; i++) { + ring = tx_grp->atg_rings + i; + if (ring->atr_rh != pseudo_hw_rh) + continue; + + ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE); + mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh); + ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE; + mac_hwring_teardown(ring->atr_hw_rh); + ring->atr_hw_rh = NULL; + ring->atr_port = NULL; + tx_grp->atg_ring_cnt--; + break; + } +} + +/* + * This function is called to create pseudo rings over hardware rings of + * the underlying device. There is a 1:1 mapping between the pseudo TX + * rings of the aggr and the hardware rings of the underlying port. + */ +static int +aggr_add_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp) +{ + aggr_grp_t *grp = port->lp_grp; + mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP], pseudo_rh; + mac_perim_handle_t pmph; + int hw_rh_cnt, i = 0, j; + int err = 0; + + ASSERT(MAC_PERIM_HELD(grp->lg_mh)); + mac_perim_enter_by_mh(port->lp_mh, &pmph); + + /* + * Get the list the the underlying HW rings. + */ + hw_rh_cnt = mac_hwrings_get(port->lp_mch, + NULL, hw_rh, MAC_RING_TYPE_TX); + + /* + * Even if the underlying NIC does not have TX rings, we + * still make a psuedo TX ring for that NIC with NULL as + * the ring handle. + */ + if (hw_rh_cnt == 0) + port->lp_tx_ring_cnt = 1; + else + port->lp_tx_ring_cnt = hw_rh_cnt; + + port->lp_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * + port->lp_tx_ring_cnt), KM_SLEEP); + port->lp_pseudo_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * + port->lp_tx_ring_cnt), KM_SLEEP); + + if (hw_rh_cnt == 0) { + if ((err = aggr_add_pseudo_tx_ring(port, tx_grp, + NULL, &pseudo_rh)) == 0) { + port->lp_tx_rings[0] = NULL; + port->lp_pseudo_tx_rings[0] = pseudo_rh; + } + } else { + for (i = 0; err == 0 && i < hw_rh_cnt; i++) { + err = aggr_add_pseudo_tx_ring(port, + tx_grp, hw_rh[i], &pseudo_rh); + if (err != 0) + break; + port->lp_tx_rings[i] = hw_rh[i]; + port->lp_pseudo_tx_rings[i] = pseudo_rh; + } + } + + if (err != 0) { + if (hw_rh_cnt != 0) { + for (j = 0; j < i; j++) { + aggr_rem_pseudo_tx_ring(tx_grp, + port->lp_pseudo_tx_rings[j]); + } + } + kmem_free(port->lp_tx_rings, + (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); + kmem_free(port->lp_pseudo_tx_rings, + (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); + port->lp_tx_ring_cnt = 0; + } else { + port->lp_tx_grp_added = B_TRUE; + port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch, + aggr_tx_ring_update, port); + } + mac_perim_exit(pmph); + return (err); +} + +/* + * This function is called by aggr to remove pseudo TX rings over the + * HW rings of the underlying port. + */ +static void +aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp) +{ + aggr_grp_t *grp = port->lp_grp; + mac_perim_handle_t pmph; + int i; + + ASSERT(MAC_PERIM_HELD(grp->lg_mh)); + mac_perim_enter_by_mh(port->lp_mh, &pmph); + + if (!port->lp_tx_grp_added) + goto done; + + ASSERT(tx_grp->atg_gh != NULL); + + for (i = 0; i < port->lp_tx_ring_cnt; i++) + aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]); + + kmem_free(port->lp_tx_rings, + (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); + kmem_free(port->lp_pseudo_tx_rings, + (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); + + port->lp_tx_ring_cnt = 0; + (void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh); + port->lp_tx_grp_added = B_FALSE; done: mac_perim_exit(pmph); } @@ -813,6 +1040,9 @@ aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force, * Create the pseudo ring for each HW ring of the underlying * port. */ + rc = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group); + if (rc != 0) + goto bail; rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group); if (rc != 0) goto bail; @@ -877,6 +1107,7 @@ bail: aggr_port_stop(port); mac_perim_exit(pmph); } + aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); (void) aggr_grp_rem_port(grp, port, NULL, NULL); } @@ -1001,6 +1232,7 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, mac_perim_handle_t mph; int err; int i; + kt_did_t tid = 0; /* need at least one port */ if (nports == 0) @@ -1029,10 +1261,17 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, grp->lg_started = B_FALSE; grp->lg_promisc = B_FALSE; grp->lg_lacp_done = B_FALSE; + grp->lg_tx_notify_done = B_FALSE; grp->lg_lacp_head = grp->lg_lacp_tail = NULL; grp->lg_lacp_rx_thread = thread_create(NULL, 0, aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri); + grp->lg_tx_notify_thread = thread_create(NULL, 0, + aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri); + grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * + MAX_RINGS_PER_GROUP), KM_SLEEP); + grp->lg_tx_blocked_cnt = 0; bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t)); + bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t)); aggr_lacp_init_grp(grp); /* add MAC ports to group */ @@ -1127,6 +1366,7 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, * port. Note that this is done after the aggr registers the * mac. */ + VERIFY(aggr_add_pseudo_tx_group(port, &grp->lg_tx_group) == 0); VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0); if (aggr_port_notify_link(grp, port)) link_state_changed = B_TRUE; @@ -1172,7 +1412,21 @@ bail: while (grp->lg_lacp_rx_thread != NULL) cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); mutex_exit(&grp->lg_lacp_lock); - + /* + * Inform the tx_notify thread to exit. + */ + mutex_enter(&grp->lg_tx_flowctl_lock); + if (grp->lg_tx_notify_thread != NULL) { + tid = grp->lg_tx_notify_thread->t_did; + grp->lg_tx_notify_done = B_TRUE; + cv_signal(&grp->lg_tx_flowctl_cv); + } + mutex_exit(&grp->lg_tx_flowctl_lock); + if (tid != 0) + thread_join(tid); + + kmem_free(grp->lg_tx_blocked_rings, + (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP)); rw_exit(&aggr_grp_lock); AGGR_GRP_REFRELE(grp); return (err); @@ -1272,6 +1526,7 @@ aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, grp->lg_nports--; mac_perim_exit(mph); + aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); aggr_port_delete(port); /* @@ -1378,7 +1633,20 @@ aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports) mac_perim_exit(pmph); } + /* + * aggr_rem_pseudo_tx_group() is not called here. Instead + * it is called from inside aggr_grp_rem_port() after the + * port has been detached. The reason is that + * aggr_rem_pseudo_tx_group() removes one ring at a time + * and if there is still traffic going on, then there + * is the possibility of aggr_find_tx_ring() returning a + * removed ring for transmission. Once the port has been + * detached, that port will not be used and + * aggr_find_tx_ring() will not return any rings + * belonging to it. + */ aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); + /* remove port from group */ rc = aggr_grp_rem_port(grp, port, &mac_addr_changed, &link_state_changed); @@ -1408,6 +1676,7 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) mod_hash_val_t val; mac_perim_handle_t mph, pmph; int err; + kt_did_t tid = 0; rw_enter(&aggr_grp_lock, RW_WRITER); @@ -1455,6 +1724,18 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) while (grp->lg_lacp_rx_thread != NULL) cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); mutex_exit(&grp->lg_lacp_lock); + /* + * Inform the tx_notify_thread to exit. + */ + mutex_enter(&grp->lg_tx_flowctl_lock); + if (grp->lg_tx_notify_thread != NULL) { + tid = grp->lg_tx_notify_thread->t_did; + grp->lg_tx_notify_done = B_TRUE; + cv_signal(&grp->lg_tx_flowctl_cv); + } + mutex_exit(&grp->lg_tx_flowctl_lock); + if (tid != 0) + thread_join(tid); mac_perim_enter_by_mh(grp->lg_mh, &mph); @@ -1468,6 +1749,7 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) aggr_port_stop(port); (void) aggr_grp_detach_port(grp, port); mac_perim_exit(pmph); + aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); aggr_port_delete(port); port = cport; @@ -1475,6 +1757,8 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) mac_perim_exit(mph); + kmem_free(grp->lg_tx_blocked_rings, + (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP)); /* * Wait for the port's lacp timer thread and its notification callback * to exit before calling mac_unregister() since both needs to access @@ -1600,6 +1884,37 @@ aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val) return (0); } +int +aggr_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + aggr_pseudo_rx_ring_t *rx_ring = (aggr_pseudo_rx_ring_t *)rdriver; + + if (rx_ring->arr_hw_rh != NULL) { + *val = mac_pseudo_rx_ring_stat_get(rx_ring->arr_hw_rh, stat); + } else { + aggr_port_t *port = rx_ring->arr_port; + + *val = mac_stat_get(port->lp_mh, stat); + + } + return (0); +} + +int +aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + aggr_pseudo_tx_ring_t *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver; + + if (tx_ring->atr_hw_rh != NULL) { + *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat); + } else { + aggr_port_t *port = tx_ring->atr_port; + + *val = mac_stat_get(port->lp_mh, stat); + } + return (0); +} + static int aggr_m_stat(void *arg, uint_t stat, uint64_t *val) { @@ -1821,7 +2136,6 @@ aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) if (cap_rings->mr_type == MAC_RING_TYPE_RX) { cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt; - cap_rings->mr_rget = aggr_fill_ring; /* * An aggregation advertises only one (pseudo) RX @@ -1829,12 +2143,15 @@ aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) * the underlying devices. */ cap_rings->mr_gnum = 1; - cap_rings->mr_gget = aggr_fill_group; cap_rings->mr_gaddring = NULL; cap_rings->mr_gremring = NULL; } else { - return (B_FALSE); + cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; + cap_rings->mr_rnum = grp->lg_tx_group.atg_ring_cnt; + cap_rings->mr_gnum = 0; } + cap_rings->mr_rget = aggr_fill_ring; + cap_rings->mr_gget = aggr_fill_group; break; } case MAC_CAPAB_AGGR: @@ -1845,6 +2162,8 @@ aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) aggr_cap = cap_data; aggr_cap->mca_rename_fn = aggr_grp_port_rename; aggr_cap->mca_unicst = aggr_m_unicst; + aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring; + aggr_cap->mca_arg = arg; } return (B_TRUE); } @@ -1863,18 +2182,24 @@ aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index, { aggr_grp_t *grp = arg; aggr_pseudo_rx_group_t *rx_group; - - ASSERT(rtype == MAC_RING_TYPE_RX && index == 0); - rx_group = &grp->lg_rx_group; - rx_group->arg_gh = gh; - rx_group->arg_grp = grp; - - infop->mgi_driver = (mac_group_driver_t)rx_group; - infop->mgi_start = NULL; - infop->mgi_stop = NULL; - infop->mgi_addmac = aggr_addmac; - infop->mgi_remmac = aggr_remmac; - infop->mgi_count = rx_group->arg_ring_cnt; + aggr_pseudo_tx_group_t *tx_group; + + ASSERT(index == 0); + if (rtype == MAC_RING_TYPE_RX) { + rx_group = &grp->lg_rx_group; + rx_group->arg_gh = gh; + rx_group->arg_grp = grp; + + infop->mgi_driver = (mac_group_driver_t)rx_group; + infop->mgi_start = NULL; + infop->mgi_stop = NULL; + infop->mgi_addmac = aggr_addmac; + infop->mgi_remmac = aggr_remmac; + infop->mgi_count = rx_group->arg_ring_cnt; + } else { + tx_group = &grp->lg_tx_group; + tx_group->atg_gh = gh; + } } /* @@ -1905,6 +2230,7 @@ aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring; aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr; aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr; + aggr_mac_intr.mi_ddi_handle = NULL; infop->mri_driver = (mac_ring_driver_t)rx_ring; infop->mri_start = aggr_pseudo_start_ring; @@ -1912,6 +2238,34 @@ aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_intr = aggr_mac_intr; infop->mri_poll = aggr_rx_poll; + + infop->mri_stat = aggr_rx_ring_stat; + break; + } + case MAC_RING_TYPE_TX: { + aggr_pseudo_tx_group_t *tx_group = &grp->lg_tx_group; + aggr_pseudo_tx_ring_t *tx_ring; + + ASSERT(rg_index == -1); + ASSERT(index < tx_group->atg_ring_cnt); + + tx_ring = &tx_group->atg_rings[index]; + tx_ring->atr_rh = rh; + + infop->mri_driver = (mac_ring_driver_t)tx_ring; + infop->mri_start = NULL; + infop->mri_stop = NULL; + infop->mri_tx = aggr_ring_tx; + infop->mri_stat = aggr_tx_ring_stat; + /* + * Use the hw TX ring handle to find if the ring needs + * serialization or not. For NICs that do not expose + * Tx rings, atr_hw_rh will be NULL. + */ + if (tx_ring->atr_hw_rh != NULL) { + infop->mri_flags = + mac_hwring_getinfo(tx_ring->atr_hw_rh); + } break; } default: @@ -2399,34 +2753,33 @@ aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, } int -aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_range_t *range) +aggr_grp_possible_mtu_range(aggr_grp_t *grp, uint32_t *min, uint32_t *max) { mac_propval_range_t *vals; mac_propval_uint32_range_t *ur; aggr_port_t *port; mac_perim_handle_t mph; - mac_prop_t macprop; - uint_t perm, i; - uint32_t min = 0, max = (uint32_t)-1; + uint_t i; int err = 0; ASSERT(MAC_PERIM_HELD(grp->lg_mh)); + *min = 0; + *max = (uint32_t)-1; + vals = kmem_alloc(sizeof (mac_propval_range_t) * grp->lg_nports, KM_SLEEP); - macprop.mp_id = MAC_PROP_MTU; - macprop.mp_name = "mtu"; - macprop.mp_flags = MAC_PROP_POSSIBLE; for (port = grp->lg_ports, i = 0; port != NULL; port = port->lp_next, i++) { mac_perim_enter_by_mh(port->lp_mh, &mph); - err = mac_get_prop(port->lp_mh, &macprop, vals + i, - sizeof (mac_propval_range_t), &perm); + err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL, + NULL, 0, vals + i, NULL); mac_perim_exit(mph); if (err != 0) break; } + /* * if any of the underlying ports does not support changing MTU then * just return ENOTSUP @@ -2435,47 +2788,42 @@ aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_range_t *range) ASSERT(err != 0); goto done; } - range->mpr_count = 1; - range->mpr_type = MAC_PROPVAL_UINT32; + for (i = 0; i < grp->lg_nports; i++) { - ur = &((vals + i)->range_uint32[0]); + ur = &((vals + i)->mpr_range_uint32[0]); /* * Take max of the min, for range_min; that is the minimum * MTU value for an aggregation is the maximum of the * minimum values of all the underlying ports */ - if (ur->mpur_min > min) - min = ur->mpur_min; + if (ur->mpur_min > *min) + *min = ur->mpur_min; /* Take min of the max, for range_max */ - if (ur->mpur_max < max) - max = ur->mpur_max; + if (ur->mpur_max < *max) + *max = ur->mpur_max; } - range->range_uint32[0].mpur_min = min; - range->range_uint32[0].mpur_max = max; done: kmem_free(vals, sizeof (mac_propval_range_t) * grp->lg_nports); + return (err); } -/*ARGSUSED*/ -static int -aggr_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) +static void +aggr_m_propinfo(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) { - mac_propval_range_t range; - int err = ENOTSUP; aggr_grp_t *grp = m_driver; + _NOTE(ARGUNUSED(pr_name)); + switch (pr_num) { - case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - if ((err = aggr_grp_possible_mtu_range(grp, &range)) != 0) - return (err); - bcopy(&range, pr_val, sizeof (range)); - return (0); + case MAC_PROP_MTU: { + uint32_t min, max; + + if (aggr_grp_possible_mtu_range(grp, &min, &max) != 0) + return; + mac_prop_info_set_range_uint32(prh, min, max); + break; + } } - return (err); } diff --git a/usr/src/uts/common/io/aggr/aggr_lacp.c b/usr/src/uts/common/io/aggr/aggr_lacp.c index 936e783e9e..2892f1438a 100644 --- a/usr/src/uts/common/io/aggr/aggr_lacp.c +++ b/usr/src/uts/common/io/aggr/aggr_lacp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -653,7 +653,10 @@ lacp_xmit_sm(aggr_port_t *portp) fill_lacp_pdu(portp, (lacp_t *)(mp->b_rptr + sizeof (struct ether_header))); - (void) mac_tx(portp->lp_mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL); + /* Send the packet over the first TX ring */ + mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp); + if (mp != NULL) + freemsg(mp); pl->NTT = B_FALSE; portp->lp_lacp_stats.LACPDUsTx++; @@ -1322,8 +1325,14 @@ lacp_selection_logic(aggr_port_t *portp) if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem, &aggrp->aggr.PartnerSystem) == 0 && (tpp->lp_lacp.PartnerOperKey == - aggrp->aggr.PartnerOperAggrKey)) + aggrp->aggr.PartnerOperAggrKey)) { + /* Set aggregation Partner MAC and key */ + aggrp->aggr.PartnerSystem = + pl->PartnerOperSystem; + aggrp->aggr.PartnerOperAggrKey = + pl->PartnerOperKey; break; + } } if (tpp == NULL) { @@ -2293,7 +2302,11 @@ aggr_lacp_rx(mblk_t *dmp) if (receive_marker_pdu(portp, dmp) != 0) break; - (void) mac_tx(portp->lp_mch, dmp, 0, MAC_DROP_ON_NO_DESC, NULL); + /* Send the packet over the first TX ring */ + dmp = mac_hwring_send_priv(portp->lp_mch, + portp->lp_tx_rings[0], dmp); + if (dmp != NULL) + freemsg(dmp); mac_perim_exit(mph); AGGR_PORT_REFRELE(portp); return; diff --git a/usr/src/uts/common/io/aggr/aggr_port.c b/usr/src/uts/common/io/aggr/aggr_port.c index 2c7e74131a..00545d2c03 100644 --- a/usr/src/uts/common/io/aggr/aggr_port.c +++ b/usr/src/uts/common/io/aggr/aggr_port.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -71,7 +71,7 @@ aggr_port_destructor(void *buf, void *arg) ASSERT(port->lp_mnh == NULL); ASSERT(port->lp_mphp == NULL); - ASSERT(!port->lp_grp_added); + ASSERT(!port->lp_rx_grp_added && !port->lp_tx_grp_added); ASSERT(port->lp_hwgh == NULL); } @@ -111,7 +111,7 @@ aggr_port_init_callbacks(aggr_port_t *port) port->lp_mnh = mac_notify_add(port->lp_mh, aggr_port_notify_cb, port); /* * Hold a reference of the grp and the port and this reference will - * be release when the thread exits. + * be released when the thread exits. * * The reference on the port is used for aggr_port_delete() to * continue without waiting for the thread to exit; the reference diff --git a/usr/src/uts/common/io/aggr/aggr_send.c b/usr/src/uts/common/io/aggr/aggr_send.c index bc0a19368d..7d423f267e 100644 --- a/usr/src/uts/common/io/aggr/aggr_send.c +++ b/usr/src/uts/common/io/aggr/aggr_send.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ #include <sys/conf.h> #include <sys/modctl.h> #include <sys/sunddi.h> +#include <sys/callb.h> #include <sys/vlan.h> #include <sys/strsun.h> #include <sys/strsubr.h> @@ -68,79 +69,163 @@ aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy) grp->lg_mac_tx_policy = mac_policy; } +#define HASH_HINT(hint) \ + ((hint) ^ ((hint) >> 24) ^ ((hint) >> 16) ^ ((hint) >> 8)) + /* - * Send function invoked by the MAC service module. + * Function invoked by mac layer to find a specific TX ring on a port + * to send data. */ mblk_t * -aggr_m_tx(void *arg, mblk_t *mp) +aggr_find_tx_ring(void *arg, mblk_t *mp, uintptr_t hint, mac_ring_handle_t *rh) { aggr_grp_t *grp = arg; aggr_port_t *port; - mblk_t *nextp; - mac_tx_cookie_t cookie; uint64_t hash; - void *mytx_handle; - - for (;;) { - rw_enter(&grp->lg_tx_lock, RW_READER); - if (grp->lg_ntx_ports == 0) { - /* - * We could have returned from aggr_m_start() before - * the ports were actually attached. Drop the chain. - */ - rw_exit(&grp->lg_tx_lock); - freemsgchain(mp); - return (NULL); - } - - nextp = mp->b_next; - mp->b_next = NULL; - - hash = mac_pkt_hash(DL_ETHER, mp, grp->lg_mac_tx_policy, - B_TRUE); - port = grp->lg_tx_ports[hash % grp->lg_ntx_ports]; + rw_enter(&grp->lg_tx_lock, RW_READER); + if (grp->lg_ntx_ports == 0) { /* - * Bump the active Tx ref count so that the port won't - * be deleted. The reference count will be dropped in mac_tx(). + * We could have returned from aggr_m_start() before + * the ports were actually attached. Drop the chain. */ - mytx_handle = mac_tx_hold(port->lp_mch); rw_exit(&grp->lg_tx_lock); + freemsgchain(mp); + return (NULL); + } + hash = mac_pkt_hash(DL_ETHER, mp, grp->lg_mac_tx_policy, B_TRUE); + port = grp->lg_tx_ports[hash % grp->lg_ntx_ports]; - if (mytx_handle == NULL) { - /* - * The port is quiesced. - */ - freemsg(mp); - } else { - mblk_t *ret_mp = NULL; - - /* - * It is fine that the port state changes now. - * Set MAC_TX_NO_HOLD to inform mac_tx() not to bump - * the active Tx ref again. Use hash as the hint so - * to direct traffic to different TX rings. Note below - * bit operation is needed to get the most benefit - * from the mac_tx() hash algorithm. - */ + /* + * Use hash as the hint so to direct traffic to + * different TX rings. Note below bit operation + * is needed in case hint is 0 to get the most + * benefit from HASH_HINT() algorithm. + */ + if (port->lp_tx_ring_cnt > 1) { + if (hint == 0) { hash = (hash << 24 | hash << 16 | hash); hash = (hash << 32 | hash); - cookie = mac_tx(port->lp_mch, mp, (uintptr_t)hash, - MAC_TX_NO_ENQUEUE | MAC_TX_NO_HOLD, &ret_mp); + } else { + hash = hint; + } + hash = HASH_HINT(hash); + *rh = port->lp_pseudo_tx_rings[hash % port->lp_tx_ring_cnt]; + } else { + *rh = port->lp_pseudo_tx_rings[0]; + } + rw_exit(&grp->lg_tx_lock); - mac_tx_rele(port->lp_mch, mytx_handle); + return (mp); +} - if (cookie != NULL) { - ret_mp->b_next = nextp; - mp = ret_mp; - break; - } +/* + * aggr_tx_notify_thread: + * + * aggr_tx_ring_update() callback function wakes up this thread when + * it gets called. This thread will call mac_tx_ring_update() to + * notify upper mac of flow control getting relieved. Note that + * aggr_tx_ring_update() cannot call mac_tx_ring_update() directly + * because aggr_tx_ring_update() is called from lower mac with + * mi_rw_lock held. + */ +void +aggr_tx_notify_thread(void *arg) +{ + callb_cpr_t cprinfo; + aggr_grp_t *grp = (aggr_grp_t *)arg; + mac_ring_handle_t pseudo_mrh; + + CALLB_CPR_INIT(&cprinfo, &grp->lg_tx_flowctl_lock, callb_generic_cpr, + "aggr_tx_notify_thread"); + + mutex_enter(&grp->lg_tx_flowctl_lock); + while (!grp->lg_tx_notify_done) { + if ((grp->lg_tx_blocked_cnt) == 0) { + CALLB_CPR_SAFE_BEGIN(&cprinfo); + cv_wait(&grp->lg_tx_flowctl_cv, + &grp->lg_tx_flowctl_lock); + CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_tx_flowctl_lock); + continue; + } + while (grp->lg_tx_blocked_cnt != 0) { + grp->lg_tx_blocked_cnt--; + pseudo_mrh = + grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt]; + mutex_exit(&grp->lg_tx_flowctl_lock); + mac_tx_ring_update(grp->lg_mh, pseudo_mrh); + mutex_enter(&grp->lg_tx_flowctl_lock); } + } + /* + * The grp is being destroyed, exit the thread. + */ + grp->lg_tx_notify_thread = NULL; + CALLB_CPR_EXIT(&cprinfo); + thread_exit(); +} + +/* + * Callback function registered with lower mac to receive wakeups from + * drivers when flow control is relieved (i.e. Tx descriptors are + * available). + */ +void +aggr_tx_ring_update(void *arg1, uintptr_t arg2) +{ + aggr_port_t *port = (aggr_port_t *)arg1; + mac_ring_handle_t mrh = (mac_ring_handle_t)arg2; + mac_ring_handle_t pseudo_mrh; + aggr_grp_t *grp = port->lp_grp; + int i = 0; - if ((mp = nextp) == NULL) - break; + if (mrh == NULL) { + /* + * If the underlying NIC does not expose TX rings, + * still as pseudo TX ring is presented to the + * aggr mac. + */ + pseudo_mrh = port->lp_pseudo_tx_rings[0]; + } else { + for (i = 0; i < port->lp_tx_ring_cnt; i++) { + if (port->lp_tx_rings[i] == mrh) + break; + } + ASSERT(i < port->lp_tx_ring_cnt); + pseudo_mrh = port->lp_pseudo_tx_rings[i]; } - return (mp); + mutex_enter(&grp->lg_tx_flowctl_lock); + /* + * It could be possible that some (broken?) device driver + * could send more than one wakeup on the same ring. In + * such a case, multiple instances of the same pseudo TX + * ring should not be saved in lg_tx_blocked_rings[] + * array. So first check if woken up ring (pseudo_mrh) is + * already in the lg_tx_blocked_rings[] array. + */ + for (i = 0; i < grp->lg_tx_blocked_cnt; i++) { + if (grp->lg_tx_blocked_rings[i] == pseudo_mrh) { + mutex_exit(&grp->lg_tx_flowctl_lock); + return; + } + } + /* A distinct mac_ring_handle. Save and increment count */ + grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt] = pseudo_mrh; + grp->lg_tx_blocked_cnt++; + cv_signal(&grp->lg_tx_flowctl_cv); + mutex_exit(&grp->lg_tx_flowctl_lock); +} + +/* + * Send function invoked by the MAC service module. + */ +mblk_t * +aggr_ring_tx(void *arg, mblk_t *mp) +{ + aggr_pseudo_tx_ring_t *pseudo_ring = (aggr_pseudo_tx_ring_t *)arg; + aggr_port_t *port = pseudo_ring->atr_port; + + return (mac_hwring_send_priv(port->lp_mch, pseudo_ring->atr_hw_rh, mp)); } /* diff --git a/usr/src/uts/common/io/arn/arn_main.c b/usr/src/uts/common/io/arn/arn_main.c index 32f22b007a..68e61a6773 100644 --- a/usr/src/uts/common/io/arn/arn_main.c +++ b/usr/src/uts/common/io/arn/arn_main.c @@ -139,11 +139,13 @@ static void arn_m_ioctl(void *, queue_t *, mblk_t *); static int arn_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int arn_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void arn_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); /* MAC Callcack Functions */ static mac_callbacks_t arn_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, arn_m_stat, arn_m_start, arn_m_stop, @@ -151,12 +153,14 @@ static mac_callbacks_t arn_m_callbacks = { arn_m_multicst, arn_m_unicst, arn_m_tx, + NULL, arn_m_ioctl, NULL, NULL, NULL, arn_m_setprop, - arn_m_getprop + arn_m_getprop, + arn_m_propinfo }; /* @@ -2518,17 +2522,26 @@ arn_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, /* ARGSUSED */ static int arn_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct arn_softc *sc = arg; int err = 0; err = ieee80211_getprop(&sc->sc_isc, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +arn_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct arn_softc *sc = arg; + + ieee80211_propinfo(&sc->sc_isc, pr_name, wldp_pr_num, prh); +} + /* return bus cachesize in 4B word units */ static void arn_pci_config_cachesize(struct arn_softc *sc) diff --git a/usr/src/uts/common/io/atge/atge.h b/usr/src/uts/common/io/atge/atge.h index 01c295047a..68a11705c9 100644 --- a/usr/src/uts/common/io/atge/atge.h +++ b/usr/src/uts/common/io/atge/atge.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,6 +30,7 @@ extern "C" { #endif +#include <sys/ethernet.h> #include <sys/mac_provider.h> #include "atge_l1e_reg.h" diff --git a/usr/src/uts/common/io/atge/atge_main.c b/usr/src/uts/common/io/atge/atge_main.c index c368df44aa..938d0ddefb 100644 --- a/usr/src/uts/common/io/atge/atge_main.c +++ b/usr/src/uts/common/io/atge/atge_main.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -204,16 +204,18 @@ static int atge_m_stat(void *, uint_t, uint64_t *); static int atge_m_start(void *); static void atge_m_stop(void *); static int atge_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int atge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void atge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int atge_m_unicst(void *, const uint8_t *); static int atge_m_multicst(void *, boolean_t, const uint8_t *); static int atge_m_promisc(void *, boolean_t); static mblk_t *atge_m_tx(void *, mblk_t *); static mac_callbacks_t atge_m_callbacks = { - MC_SETPROP | MC_GETPROP, + MC_SETPROP | MC_GETPROP | MC_PROPINFO, atge_m_stat, atge_m_start, atge_m_stop, @@ -221,12 +223,14 @@ static mac_callbacks_t atge_m_callbacks = { atge_m_multicst, atge_m_unicst, atge_m_tx, + NULL, /* mc_reserved */ NULL, /* mc_ioctl */ NULL, /* mc_getcapab */ NULL, /* mc_open */ NULL, /* mc_close */ atge_m_setprop, atge_m_getprop, + atge_m_propinfo }; /* @@ -1724,13 +1728,12 @@ atge_m_stat(void *arg, uint_t stat, uint64_t *val) } int -atge_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +atge_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { atge_t *atgep = arg; - return (mii_m_getprop(atgep->atge_mii, name, num, flags, sz, val, - perm)); + return (mii_m_getprop(atgep->atge_mii, name, num, sz, val)); } int @@ -1757,6 +1760,14 @@ atge_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, return (r); } +static void +atge_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + atge_t *atgep = arg; + + mii_m_propinfo(atgep->atge_mii, name, num, prh); +} void atge_program_ether(atge_t *atgep) diff --git a/usr/src/uts/common/io/ath/ath_main.c b/usr/src/uts/common/io/ath/ath_main.c index 451f827415..fa2a3dba24 100644 --- a/usr/src/uts/common/io/ath/ath_main.c +++ b/usr/src/uts/common/io/ath/ath_main.c @@ -1,5 +1,5 @@ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -221,10 +221,12 @@ static void ath_m_ioctl(void *, queue_t *, mblk_t *); static int ath_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int ath_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void ath_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t ath_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, ath_m_stat, ath_m_start, ath_m_stop, @@ -232,12 +234,14 @@ static mac_callbacks_t ath_m_callbacks = { ath_m_multicst, ath_m_unicst, ath_m_tx, + NULL, ath_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, ath_m_setprop, - ath_m_getprop + ath_m_getprop, + ath_m_propinfo }; /* @@ -1779,21 +1783,30 @@ ath_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } -/* ARGSUSED */ + static int ath_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { ath_t *asc = arg; int err = 0; err = ieee80211_getprop(&asc->asc_isc, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } static void +ath_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + ath_t *asc = arg; + + ieee80211_propinfo(&asc->asc_isc, pr_name, wldp_pr_num, mph); +} + +static void ath_m_ioctl(void *arg, queue_t *wq, mblk_t *mp) { ath_t *asc = arg; diff --git a/usr/src/uts/common/io/atu/atu.c b/usr/src/uts/common/io/atu/atu.c index 881d72f869..fdbb932fca 100644 --- a/usr/src/uts/common/io/atu/atu.c +++ b/usr/src/uts/common/io/atu/atu.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1511,6 +1511,26 @@ atu_m_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t len, return (0); } +static int +atu_m_getprop(void *arg, const char *name, mac_prop_id_t id, + uint_t length, void *buf) +{ + struct atu_softc *sc = (struct atu_softc *)arg; + struct ieee80211com *ic = &sc->sc_ic; + + return (ieee80211_getprop(ic, name, id, length, buf)); +} + +static void +atu_m_propinfo(void *arg, const char *name, mac_prop_id_t id, + mac_prop_info_handle_t mph) +{ + struct atu_softc *sc = (struct atu_softc *)arg; + struct ieee80211com *ic = &sc->sc_ic; + + ieee80211_propinfo(ic, name, id, mph); +} + static void atu_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) { @@ -1635,7 +1655,7 @@ atu_m_stat(void *arg, uint_t stat, uint64_t *val) } static mac_callbacks_t atu_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, atu_m_stat, atu_m_start, atu_m_stop, @@ -1643,10 +1663,12 @@ static mac_callbacks_t atu_m_callbacks = { atu_m_multicst, atu_m_unicst, atu_m_tx, + NULL, atu_m_ioctl, NULL, NULL, NULL, atu_m_setprop, - ieee80211_getprop + atu_m_getprop, + atu_m_propinfo }; diff --git a/usr/src/uts/common/io/bfe/bfe.c b/usr/src/uts/common/io/bfe/bfe.c index b71bcc229b..42e87bb745 100644 --- a/usr/src/uts/common/io/bfe/bfe.c +++ b/usr/src/uts/common/io/bfe/bfe.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <sys/stream.h> @@ -161,7 +161,7 @@ static void bfe_clear_stats(bfe_t *); static void bfe_gather_stats(bfe_t *); static void bfe_error(dev_info_t *, char *, ...); static int bfe_mac_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int bfe_mac_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int bfe_tx_reclaim(bfe_ring_t *); @@ -1651,92 +1651,66 @@ bfe_mac_getstat(void *arg, uint_t stat, uint64_t *val) return (err); } -/*ARGSUSED*/ int -bfe_mac_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +bfe_mac_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { bfe_t *bfe = (bfe_t *)arg; int err = 0; - boolean_t dfl = flags & MAC_PROP_DEFAULT; - - if (sz == 0) - return (EINVAL); - *perm = MAC_PROP_PERM_RW; switch (num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (link_duplex_t)) { - bcopy(&bfe->bfe_chip.duplex, val, - sizeof (link_duplex_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (link_duplex_t)); + bcopy(&bfe->bfe_chip.duplex, val, sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (uint64_t)) { - bcopy(&bfe->bfe_chip.speed, val, sizeof (uint64_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (uint64_t)); + bcopy(&bfe->bfe_chip.speed, val, sizeof (uint64_t)); break; case MAC_PROP_AUTONEG: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_aneg : bfe->bfe_adv_aneg; + *(uint8_t *)val = bfe->bfe_adv_aneg; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100fdx : bfe->bfe_adv_100fdx; + *(uint8_t *)val = bfe->bfe_adv_100fdx; break; + case MAC_PROP_EN_100FDX_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100fdx : bfe->bfe_adv_100fdx; + *(uint8_t *)val = bfe->bfe_adv_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100hdx : bfe->bfe_adv_100hdx; + *(uint8_t *)val = bfe->bfe_adv_100hdx; break; + case MAC_PROP_EN_100HDX_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100hdx : bfe->bfe_adv_100hdx; + *(uint8_t *)val = bfe->bfe_adv_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_10fdx : bfe->bfe_adv_10fdx; + *(uint8_t *)val = bfe->bfe_adv_10fdx; break; + case MAC_PROP_EN_10FDX_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_10fdx : bfe->bfe_adv_10fdx; + *(uint8_t *)val = bfe->bfe_adv_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_10hdx : bfe->bfe_adv_10hdx; + *(uint8_t *)val = bfe->bfe_adv_10hdx; break; + case MAC_PROP_EN_10HDX_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_10hdx : bfe->bfe_adv_10hdx; + *(uint8_t *)val = bfe->bfe_adv_10hdx; break; case MAC_PROP_ADV_100T4_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100T4 : bfe->bfe_adv_100T4; + *(uint8_t *)val = bfe->bfe_adv_100T4; break; + case MAC_PROP_EN_100T4_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100T4 : bfe->bfe_adv_100T4; + *(uint8_t *)val = bfe->bfe_adv_100T4; break; default: @@ -1746,6 +1720,51 @@ bfe_mac_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, return (err); } + +static void +bfe_mac_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + bfe_t *bfe = (bfe_t *)arg; + + switch (num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_aneg); + break; + + case MAC_PROP_EN_100FDX_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_100fdx); + break; + + case MAC_PROP_EN_100HDX_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_100hdx); + break; + + case MAC_PROP_EN_10FDX_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_10fdx); + break; + + case MAC_PROP_EN_10HDX_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_10hdx); + break; + + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_100T4); + break; + } +} + + /*ARGSUSED*/ int bfe_mac_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, @@ -2067,7 +2086,7 @@ bfe_mac_set_multicast(void *arg, boolean_t add, const uint8_t *macaddr) } static mac_callbacks_t bfe_mac_callbacks = { - MC_SETPROP | MC_GETPROP, + MC_SETPROP | MC_GETPROP | MC_PROPINFO, bfe_mac_getstat, /* gets stats */ bfe_mac_start, /* starts mac */ bfe_mac_stop, /* stops mac */ @@ -2075,12 +2094,14 @@ static mac_callbacks_t bfe_mac_callbacks = { bfe_mac_set_multicast, /* multicast implementation */ bfe_mac_set_ether_addr, /* sets ethernet address (unicast) */ bfe_mac_transmit_packet, /* transmits packet */ + NULL, NULL, /* ioctl */ NULL, /* getcap */ NULL, /* open */ NULL, /* close */ bfe_mac_setprop, bfe_mac_getprop, + bfe_mac_propinfo }; static void diff --git a/usr/src/uts/common/io/bge/bge_impl.h b/usr/src/uts/common/io/bge/bge_impl.h index 86b8d3093a..350cc32ac2 100644 --- a/usr/src/uts/common/io/bge/bge_impl.h +++ b/usr/src/uts/common/io/bge/bge_impl.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -441,7 +441,11 @@ typedef struct recv_ring { bge_rule_info_t *mac_addr_rule; uint8_t mac_addr_val[ETHERADDRL]; int poll_flag; /* Polling flag */ -} recv_ring_t; /* 0x90 (144) bytes */ + + /* Per-ring statistics */ + uint64_t rx_pkts; /* Received Packets Count */ + uint64_t rx_bytes; /* Received Bytes Count */ +} recv_ring_t; /* @@ -1196,6 +1200,7 @@ void bge_chip_msi_trig(bge_t *bgep); void bge_init_kstats(bge_t *bgep, int instance); void bge_fini_kstats(bge_t *bgep); int bge_m_stat(void *arg, uint_t stat, uint64_t *val); +int bge_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); /* bge_log.c */ #if BGE_DEBUGGING diff --git a/usr/src/uts/common/io/bge/bge_kstats.c b/usr/src/uts/common/io/bge/bge_kstats.c index 73994cb8d3..c10a1b4601 100644 --- a/usr/src/uts/common/io/bge/bge_kstats.c +++ b/usr/src/uts/common/io/bge/bge_kstats.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1126,3 +1126,28 @@ bge_m_stat(void *arg, uint_t stat, uint64_t *val) return (0); } + +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +bge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + recv_ring_t *rx_ring = (recv_ring_t *)rh; + + switch (stat) { + case MAC_STAT_RBYTES: + *val = rx_ring->rx_bytes; + break; + + case MAC_STAT_IPACKETS: + *val = rx_ring->rx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} diff --git a/usr/src/uts/common/io/bge/bge_main2.c b/usr/src/uts/common/io/bge/bge_main2.c index 73045e9e3e..fdd38676e7 100644 --- a/usr/src/uts/common/io/bge/bge_main2.c +++ b/usr/src/uts/common/io/bge/bge_main2.c @@ -34,10 +34,6 @@ * This is the string displayed by modinfo, etc. */ static char bge_ident[] = "Broadcom Gb Ethernet"; -/* - * Make sure you keep the version ID up to date! - */ -static char bge_version[] = "Broadcom Gb Ethernet v1.14"; /* * Property names @@ -116,13 +112,18 @@ static int bge_unicst_set(void *, const uint8_t *, static int bge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int bge_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void bge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int bge_set_priv_prop(bge_t *, const char *, uint_t, const void *); static int bge_get_priv_prop(bge_t *, const char *, uint_t, - uint_t, void *); + void *); +static void bge_priv_propinfo(const char *, + mac_prop_info_handle_t); -#define BGE_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) +#define BGE_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | \ + MC_GETPROP | MC_PROPINFO) static mac_callbacks_t bge_m_callbacks = { BGE_M_CALLBACK_FLAGS, @@ -133,22 +134,28 @@ static mac_callbacks_t bge_m_callbacks = { bge_m_multicst, NULL, bge_m_tx, + NULL, bge_m_ioctl, bge_m_getcapab, NULL, NULL, bge_m_setprop, - bge_m_getprop + bge_m_getprop, + bge_m_propinfo }; -mac_priv_prop_t bge_priv_prop[] = { - {"_adv_asym_pause_cap", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_RW} +char *bge_priv_prop[] = { + "_adv_asym_pause_cap", + "_adv_pause_cap", + "_drain_max", + "_msi_cnt", + "_rx_intr_coalesce_blank_time", + "_tx_intr_coalesce_blank_time", + "_rx_intr_coalesce_pkt_cnt", + "_tx_intr_coalesce_pkt_cnt", + NULL }; -#define BGE_MAX_PRIV_PROPS \ - (sizeof (bge_priv_prop) / sizeof (mac_priv_prop_t)) - uint8_t zero_addr[6] = {0, 0, 0, 0, 0, 0}; /* * ========== Transmit and receive ring reinitialisation ========== @@ -716,7 +723,6 @@ bge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, bge_t *bgep = barg; int err = 0; uint32_t cur_mtu, new_mtu; - uint_t maxsdu; link_flowctrl_t fl; mutex_enter(bgep->genlock); @@ -819,16 +825,11 @@ reprogram: err = EINVAL; break; } - maxsdu = bgep->chipid.ethmax_size - - sizeof (struct ether_header); - err = mac_maxsdu_update(bgep->mh, maxsdu); - if (err == 0) { - bgep->bge_dma_error = B_TRUE; - bgep->manual_reset = B_TRUE; - bge_chip_stop(bgep, B_TRUE); - bge_wake_factotum(bgep); - err = 0; - } + bgep->bge_dma_error = B_TRUE; + bgep->manual_reset = B_TRUE; + bge_chip_stop(bgep, B_TRUE); + bge_wake_factotum(bgep); + err = 0; break; case MAC_PROP_FLOWCTRL: bcopy(pr_val, &fl, sizeof (fl)); @@ -887,71 +888,36 @@ reprogram: /* ARGSUSED */ static int bge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { bge_t *bgep = barg; int err = 0; - link_flowctrl_t fl; - uint64_t speed; - int flags = bgep->chipid.flags; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); - - if (pr_valsize == 0) - return (EINVAL); - bzero(pr_val, pr_valsize); - - *perm = MAC_PROP_PERM_RW; - - mutex_enter(bgep->genlock); - if ((bgep->param_loop_mode != BGE_LOOP_NONE && - bge_param_locked(pr_num)) || - ((bgep->chipid.flags & CHIP_FLAG_SERDES) && - ((pr_num == MAC_PROP_EN_100FDX_CAP) || - (pr_num == MAC_PROP_EN_100HDX_CAP) || - (pr_num == MAC_PROP_EN_10FDX_CAP) || - (pr_num == MAC_PROP_EN_10HDX_CAP))) || - (DEVICE_5906_SERIES_CHIPSETS(bgep) && - ((pr_num == MAC_PROP_EN_1000FDX_CAP) || - (pr_num == MAC_PROP_EN_1000HDX_CAP)))) - *perm = MAC_PROP_PERM_READ; - mutex_exit(bgep->genlock); switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (link_duplex_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (link_duplex_t)); bcopy(&bgep->param_link_duplex, pr_val, sizeof (link_duplex_t)); break; - case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (speed)) - return (EINVAL); - speed = bgep->param_link_speed * 1000000ull; + case MAC_PROP_SPEED: { + uint64_t speed = bgep->param_link_speed * 1000000ull; + + ASSERT(pr_valsize >= sizeof (speed)); bcopy(&speed, pr_val, sizeof (speed)); break; + } case MAC_PROP_STATUS: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (link_state_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (link_state_t)); bcopy(&bgep->link_state, pr_val, sizeof (link_state_t)); break; case MAC_PROP_AUTONEG: - if (is_default) - *(uint8_t *)pr_val = 1; - else - *(uint8_t *)pr_val = bgep->param_adv_autoneg; + *(uint8_t *)pr_val = bgep->param_adv_autoneg; break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (fl)) - return (EINVAL); - if (is_default) { - fl = LINK_FLOWCTRL_BI; - bcopy(&fl, pr_val, sizeof (fl)); - break; - } + case MAC_PROP_FLOWCTRL: { + link_flowctrl_t fl; + + ASSERT(pr_valsize >= sizeof (fl)); if (bgep->param_link_rx_pause && !bgep->param_link_tx_pause) @@ -970,148 +936,135 @@ bge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, fl = LINK_FLOWCTRL_BI; bcopy(&fl, pr_val, sizeof (fl)); break; + } case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - if (DEVICE_5906_SERIES_CHIPSETS(bgep)) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = 1; - } - else - *(uint8_t *)pr_val = bgep->param_adv_1000fdx; + *(uint8_t *)pr_val = bgep->param_adv_1000fdx; break; case MAC_PROP_EN_1000FDX_CAP: - if (is_default) { - if (DEVICE_5906_SERIES_CHIPSETS(bgep)) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = 1; - } - else - *(uint8_t *)pr_val = bgep->param_en_1000fdx; + *(uint8_t *)pr_val = bgep->param_en_1000fdx; break; case MAC_PROP_ADV_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - if (DEVICE_5906_SERIES_CHIPSETS(bgep)) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = 1; - } - else - *(uint8_t *)pr_val = bgep->param_adv_1000hdx; + *(uint8_t *)pr_val = bgep->param_adv_1000hdx; break; case MAC_PROP_EN_1000HDX_CAP: - if (is_default) { - if (DEVICE_5906_SERIES_CHIPSETS(bgep)) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = 1; - } - else - *(uint8_t *)pr_val = bgep->param_en_1000hdx; + *(uint8_t *)pr_val = bgep->param_en_1000hdx; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_adv_100fdx; - } + *(uint8_t *)pr_val = bgep->param_adv_100fdx; break; case MAC_PROP_EN_100FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_en_100fdx; - } + *(uint8_t *)pr_val = bgep->param_en_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_adv_100hdx; - } + *(uint8_t *)pr_val = bgep->param_adv_100hdx; break; case MAC_PROP_EN_100HDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_en_100hdx; - } + *(uint8_t *)pr_val = bgep->param_en_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_adv_10fdx; - } + *(uint8_t *)pr_val = bgep->param_adv_10fdx; break; case MAC_PROP_EN_10FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_en_10fdx; - } + *(uint8_t *)pr_val = bgep->param_en_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_adv_10hdx; - } + *(uint8_t *)pr_val = bgep->param_adv_10hdx; break; case MAC_PROP_EN_10HDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_en_10hdx; - } + *(uint8_t *)pr_val = bgep->param_en_10hdx; break; case MAC_PROP_ADV_100T4_CAP: case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = 0; break; case MAC_PROP_PRIVATE: - err = bge_get_priv_prop(bgep, pr_name, pr_flags, + err = bge_get_priv_prop(bgep, pr_name, pr_valsize, pr_val); return (err); - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = BGE_DEFAULT_MTU; - if (!(flags & CHIP_FLAG_NO_JUMBO)) - range.range_uint32[0].mpur_max = - BGE_MAXIMUM_MTU; - bcopy(&range, pr_val, sizeof (range)); - break; - } default: return (ENOTSUP); } return (0); } +static void +bge_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + bge_t *bgep = barg; + int flags = bgep->chipid.flags; + + /* + * By default permissions are read/write unless specified + * otherwise by the driver. + */ + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + if (DEVICE_5906_SERIES_CHIPSETS(bgep)) + mac_prop_info_set_default_uint8(prh, 0); + else + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + mac_prop_info_set_default_uint8(prh, + (flags & CHIP_FLAG_SERDES) ? 0 : 1); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, + LINK_FLOWCTRL_BI); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, BGE_DEFAULT_MTU, + (flags & CHIP_FLAG_NO_JUMBO) ? + BGE_DEFAULT_MTU : BGE_MAXIMUM_MTU); + break; + + case MAC_PROP_PRIVATE: + bge_priv_propinfo(pr_name, prh); + break; + } + + mutex_enter(bgep->genlock); + if ((bgep->param_loop_mode != BGE_LOOP_NONE && + bge_param_locked(pr_num)) || + ((bgep->chipid.flags & CHIP_FLAG_SERDES) && + ((pr_num == MAC_PROP_EN_100FDX_CAP) || + (pr_num == MAC_PROP_EN_100HDX_CAP) || + (pr_num == MAC_PROP_EN_10FDX_CAP) || + (pr_num == MAC_PROP_EN_10HDX_CAP))) || + (DEVICE_5906_SERIES_CHIPSETS(bgep) && + ((pr_num == MAC_PROP_EN_1000FDX_CAP) || + (pr_num == MAC_PROP_EN_1000HDX_CAP)))) + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + mutex_exit(bgep->genlock); +} + /* ARGSUSED */ static int bge_set_priv_prop(bge_t *bgep, const char *pr_name, uint_t pr_valsize, @@ -1235,53 +1188,61 @@ bge_set_priv_prop(bge_t *bgep, const char *pr_name, uint_t pr_valsize, } static int -bge_get_priv_prop(bge_t *bge, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val) +bge_get_priv_prop(bge_t *bge, const char *pr_name, uint_t pr_valsize, + void *pr_val) { - int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; - if (strcmp(pr_name, "_adv_pause_cap") == 0) { - value = (is_default? 1 : bge->param_adv_pause); - err = 0; - goto done; - } - if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { - value = (is_default? 1 : bge->param_adv_asym_pause); - err = 0; - goto done; - } - if (strcmp(pr_name, "_drain_max") == 0) { - value = (is_default? 64 : bge->param_drain_max); - err = 0; - goto done; - } - if (strcmp(pr_name, "_msi_cnt") == 0) { - value = (is_default? 0 : bge->param_msi_cnt); - err = 0; - goto done; - } + if (strcmp(pr_name, "_adv_pause_cap") == 0) + value = bge->param_adv_pause; + else if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) + value = bge->param_adv_asym_pause; + else if (strcmp(pr_name, "_drain_max") == 0) + value = bge->param_drain_max; + else if (strcmp(pr_name, "_msi_cnt") == 0) + value = bge->param_msi_cnt; + else if (strcmp(pr_name, "_rx_intr_coalesce_blank_time") == 0) + value = bge->chipid.rx_ticks_norm; + else if (strcmp(pr_name, "_tx_intr_coalesce_blank_time") == 0) + value = bge->chipid.tx_ticks_norm; + else if (strcmp(pr_name, "_rx_intr_coalesce_pkt_cnt") == 0) + value = bge->chipid.rx_count_norm; + else if (strcmp(pr_name, "_tx_intr_coalesce_pkt_cnt") == 0) + value = bge->chipid.tx_count_norm; + else + return (ENOTSUP); - if (strcmp(pr_name, "_intr_coalesce_blank_time") == 0) { - value = (is_default? bge_rx_ticks_norm : - bge->chipid.rx_ticks_norm); - err = 0; - goto done; - } + (void) snprintf(pr_val, pr_valsize, "%d", value); + return (0); +} - if (strcmp(pr_name, "_intr_coalesce_pkt_cnt") == 0) { - value = (is_default? bge_rx_count_norm : - bge->chipid.rx_count_norm); - err = 0; - goto done; - } +static void +bge_priv_propinfo(const char *pr_name, mac_prop_info_handle_t mph) +{ + char valstr[64]; + int value; -done: - if (err == 0) { - (void) snprintf(pr_val, pr_valsize, "%d", value); - } - return (err); + if (strcmp(pr_name, "_adv_pause_cap") == 0) + value = 1; + else if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) + value = 1; + else if (strcmp(pr_name, "_drain_max") == 0) + value = 64; + else if (strcmp(pr_name, "_msi_cnt") == 0) + value = 0; + else if (strcmp(pr_name, "_rx_intr_coalesce_blank_time") == 0) + value = bge_rx_ticks_norm; + else if (strcmp(pr_name, "_tx_intr_coalesce_blank_time") == 0) + value = bge_tx_ticks_norm; + else if (strcmp(pr_name, "_rx_intr_coalesce_pkt_cnt") == 0) + value = bge_rx_count_norm; + else if (strcmp(pr_name, "_tx_intr_coalesce_pkt_cnt") == 0) + value = bge_tx_count_norm; + else + return; + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + mac_prop_info_set_default_str(mph, valstr); } /* @@ -1682,6 +1643,7 @@ bge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = bge_ring_start; infop->mri_stop = NULL; infop->mri_poll = bge_poll_ring; + infop->mri_stat = bge_rx_ring_stat; mintr = &infop->mri_intr; mintr->mi_handle = (mac_intr_handle_t)rx_ring; @@ -3517,7 +3479,6 @@ bge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) macp->m_max_sdu = cidp->ethmax_size - sizeof (struct ether_header); macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = bge_priv_prop; - macp->m_priv_prop_count = BGE_MAX_PRIV_PROPS; macp->m_v12n = MAC_VIRT_LEVEL1; /* @@ -3551,7 +3512,6 @@ bge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) #endif ddi_report_dev(devinfo); - BGE_REPORT((bgep, "bge version: %s", bge_version)); return (DDI_SUCCESS); diff --git a/usr/src/uts/common/io/bge/bge_recv2.c b/usr/src/uts/common/io/bge/bge_recv2.c index fb8e1fa881..ecda51cb0a 100644 --- a/usr/src/uts/common/io/bge/bge_recv2.c +++ b/usr/src/uts/common/io/bge/bge_recv2.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -65,11 +65,12 @@ bge_refill(bge_t *bgep, buff_ring_t *brp, sw_rbd_t *srbdp) bge_mbx_put(bgep, brp->chip_mbx_reg, slot); } -static mblk_t *bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p); +static mblk_t *bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p, + recv_ring_t *rrp); #pragma inline(bge_receive_packet) static mblk_t * -bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p) +bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p, recv_ring_t *rrp) { bge_rbd_t hw_rbd; buff_ring_t *brp; @@ -237,10 +238,13 @@ bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p) if (hw_rbd.flags & RBD_FLAG_TCP_UDP_CHECKSUM) pflags |= HCK_FULLCKSUM; if (hw_rbd.flags & RBD_FLAG_IP_CHECKSUM) - pflags |= HCK_IPV4_HDRCKSUM; + pflags |= HCK_IPV4_HDRCKSUM_OK; if (pflags != 0) - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, - hw_rbd.tcp_udp_cksum, pflags, 0); + mac_hcksum_set(mp, 0, 0, 0, hw_rbd.tcp_udp_cksum, pflags); + + /* Update per-ring rx statistics */ + rrp->rx_pkts++; + rrp->rx_bytes += len; refill: /* @@ -313,7 +317,8 @@ bge_receive_ring(bge_t *bgep, recv_ring_t *rrp) while ((slot != *rrp->prod_index_p) && /* Note: volatile */ (recv_cnt < BGE_MAXPKT_RCVED)) { - if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot])) != NULL) { + if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot], rrp)) + != NULL) { *tail = mp; tail = &mp->b_next; recv_cnt++; @@ -383,7 +388,8 @@ bge_poll_ring(void *arg, int bytes_to_pickup) /* Note: volatile */ while ((slot != *rrp->prod_index_p) && (sz <= bytes_to_pickup)) { - if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot])) != NULL) { + if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot], rrp)) + != NULL) { *tail = mp; sz += msgdsize(mp); tail = &mp->b_next; diff --git a/usr/src/uts/common/io/bge/bge_send.c b/usr/src/uts/common/io/bge/bge_send.c index 11f23e9f64..be3f179f31 100644 --- a/usr/src/uts/common/io/bge/bge_send.c +++ b/usr/src/uts/common/io/bge/bge_send.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -533,7 +533,7 @@ bge_ring_tx(void *arg, mblk_t *mp) /* * Retrieve checksum offloading info. */ - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); /* * Calculate pseudo checksum if needed. diff --git a/usr/src/uts/common/io/bridge.c b/usr/src/uts/common/io/bridge.c index ffd215c132..7b45039b62 100644 --- a/usr/src/uts/common/io/bridge.c +++ b/usr/src/uts/common/io/bridge.c @@ -492,36 +492,16 @@ bridge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, static int bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { bridge_mac_t *bmp = arg; int err = 0; _NOTE(ARGUNUSED(pr_name)); switch (pr_num) { - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = bmp->bm_maxsdu; - bcopy(&range, pr_val, sizeof (range)); - *perm = MAC_PROP_PERM_RW; - break; - } case MAC_PROP_STATUS: - if (pr_valsize < sizeof (bmp->bm_linkstate)) { - err = EINVAL; - } else { - bcopy(&bmp->bm_linkstate, pr_val, - sizeof (&bmp->bm_linkstate)); - *perm = MAC_PROP_PERM_READ; - } + ASSERT(pr_valsize >= sizeof (bmp->bm_linkstate)); + bcopy(&bmp->bm_linkstate, pr_val, sizeof (&bmp->bm_linkstate)); break; default: @@ -531,8 +511,27 @@ bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, return (err); } +static void +bridge_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + bridge_mac_t *bmp = arg; + + _NOTE(ARGUNUSED(pr_name)); + + switch (pr_num) { + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, bmp->bm_maxsdu, + bmp->bm_maxsdu); + break; + case MAC_PROP_STATUS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + } +} + static mac_callbacks_t bridge_m_callbacks = { - MC_SETPROP | MC_GETPROP, + MC_SETPROP | MC_GETPROP | MC_PROPINFO, bridge_m_getstat, bridge_m_start, bridge_m_stop, @@ -540,12 +539,14 @@ static mac_callbacks_t bridge_m_callbacks = { bridge_m_multicst, bridge_m_unicst, bridge_m_tx, + NULL, /* reserved */ NULL, /* ioctl */ NULL, /* getcapab */ NULL, /* open */ NULL, /* close */ bridge_m_setprop, - bridge_m_getprop + bridge_m_getprop, + bridge_m_propinfo }; /* diff --git a/usr/src/uts/common/io/dld/dld_drv.c b/usr/src/uts/common/io/dld/dld_drv.c index c91793723e..36d1c3d6ff 100644 --- a/usr/src/uts/common/io/dld/dld_drv.c +++ b/usr/src/uts/common/io/dld/dld_drv.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -429,8 +429,9 @@ drv_ioc_hwgrpget(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) dld_ioc_hwgrpget_t *hwgrpp = karg; dld_hwgrpinfo_t hwgrp, *hip; mac_handle_t mh = NULL; - int i, err, grpnum; + int i, err, rgrpnum, tgrpnum; uint_t bytes_left; + int totgrps = 0; zoneid_t zoneid = crgetzoneid(cred); if (zoneid != GLOBAL_ZONEID && @@ -445,8 +446,35 @@ drv_ioc_hwgrpget(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) hip = (dld_hwgrpinfo_t *) ((uchar_t *)arg + sizeof (dld_ioc_hwgrpget_t)); bytes_left = hwgrpp->dih_size; - grpnum = mac_hwgrp_num(mh); - for (i = 0; i < grpnum; i++) { + + rgrpnum = mac_hwgrp_num(mh, MAC_RING_TYPE_RX); + /* display the default group information first */ + if (rgrpnum > 0) { + if (sizeof (dld_hwgrpinfo_t) > bytes_left) { + err = ENOSPC; + goto done; + } + + bzero(&hwgrp, sizeof (hwgrp)); + bcopy(mac_name(mh), hwgrp.dhi_link_name, + sizeof (hwgrp.dhi_link_name)); + mac_get_hwrxgrp_info(mh, 0, &hwgrp.dhi_grp_num, + &hwgrp.dhi_n_rings, hwgrp.dhi_rings, &hwgrp.dhi_grp_type, + &hwgrp.dhi_n_clnts, hwgrp.dhi_clnts); + if (hwgrp.dhi_n_rings != 0) { + if (copyout(&hwgrp, hip, sizeof (hwgrp)) != 0) { + err = EFAULT; + goto done; + } + } + hip++; + totgrps++; + bytes_left -= sizeof (dld_hwgrpinfo_t); + } + + tgrpnum = mac_hwgrp_num(mh, MAC_RING_TYPE_TX); + /* display the default group information first */ + if (tgrpnum > 0) { if (sizeof (dld_hwgrpinfo_t) > bytes_left) { err = ENOSPC; goto done; @@ -455,15 +483,68 @@ drv_ioc_hwgrpget(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) bzero(&hwgrp, sizeof (hwgrp)); bcopy(mac_name(mh), hwgrp.dhi_link_name, sizeof (hwgrp.dhi_link_name)); - mac_get_hwgrp_info(mh, i, &hwgrp.dhi_grp_num, - &hwgrp.dhi_n_rings, &hwgrp.dhi_grp_type, + mac_get_hwtxgrp_info(mh, tgrpnum - 1, &hwgrp.dhi_grp_num, + &hwgrp.dhi_n_rings, hwgrp.dhi_rings, &hwgrp.dhi_grp_type, &hwgrp.dhi_n_clnts, hwgrp.dhi_clnts); + if (hwgrp.dhi_n_rings != 0) { + if (copyout(&hwgrp, hip, sizeof (hwgrp)) != 0) { + err = EFAULT; + goto done; + } + } + hip++; + totgrps++; + bytes_left -= sizeof (dld_hwgrpinfo_t); + } + + /* Rest of the rx groups */ + for (i = 1; i < rgrpnum; i++) { + if (sizeof (dld_hwgrpinfo_t) > bytes_left) { + err = ENOSPC; + goto done; + } + + bzero(&hwgrp, sizeof (hwgrp)); + bcopy(mac_name(mh), hwgrp.dhi_link_name, + sizeof (hwgrp.dhi_link_name)); + mac_get_hwrxgrp_info(mh, i, &hwgrp.dhi_grp_num, + &hwgrp.dhi_n_rings, hwgrp.dhi_rings, &hwgrp.dhi_grp_type, + &hwgrp.dhi_n_clnts, hwgrp.dhi_clnts); + if (hwgrp.dhi_n_rings == 0) + continue; if (copyout(&hwgrp, hip, sizeof (hwgrp)) != 0) { err = EFAULT; goto done; } hip++; + totgrps++; + bytes_left -= sizeof (dld_hwgrpinfo_t); + } + + /* Rest of the tx group */ + tgrpnum = mac_hwgrp_num(mh, MAC_RING_TYPE_TX); + for (i = 0; i < tgrpnum - 1; i++) { + if (sizeof (dld_hwgrpinfo_t) > bytes_left) { + err = ENOSPC; + goto done; + } + + bzero(&hwgrp, sizeof (hwgrp)); + bcopy(mac_name(mh), hwgrp.dhi_link_name, + sizeof (hwgrp.dhi_link_name)); + mac_get_hwtxgrp_info(mh, i, &hwgrp.dhi_grp_num, + &hwgrp.dhi_n_rings, hwgrp.dhi_rings, &hwgrp.dhi_grp_type, + &hwgrp.dhi_n_clnts, hwgrp.dhi_clnts); + if (hwgrp.dhi_n_rings == 0) + continue; + if (copyout(&hwgrp, hip, sizeof (hwgrp)) != 0) { + err = EFAULT; + goto done; + } + + hip++; + totgrps++; bytes_left -= sizeof (dld_hwgrpinfo_t); } @@ -471,7 +552,7 @@ done: if (mh != NULL) dld_mac_close(mh); if (err == 0) - hwgrpp->dih_n_groups = grpnum; + hwgrpp->dih_n_groups = totgrps; return (err); } @@ -542,7 +623,7 @@ done: } /* - * DLDIOC_SET/GETPROP + * DLDIOC_SET/GETMACPROP */ static int drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, @@ -552,7 +633,6 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, dls_dl_handle_t dlh = NULL; dls_link_t *dlp = NULL; mac_perim_handle_t mph = NULL; - mac_prop_t macprop; dld_ioc_macprop_t *kprop; datalink_id_t linkid; datalink_class_t class; @@ -606,6 +686,12 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, goto done; } + if (!mac_prop_check_size(kprop->pr_num, kprop->pr_valsize, + kprop->pr_flags & DLD_PROP_POSSIBLE)) { + err = ENOBUFS; + goto done; + } + switch (kprop->pr_num) { case MAC_PROP_ZONE: if (set) { @@ -630,6 +716,9 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, else err = drv_ioc_clrap(linkid); } else { + if (kprop->pr_valsize == 0) + return (ENOBUFS); + kprop->pr_perm_flags = MAC_PROP_PERM_RW; err = drv_ioc_getap(linkid, dlap); } @@ -652,19 +741,51 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, err = 0; } break; - default: - macprop.mp_name = kprop->pr_name; - macprop.mp_id = kprop->pr_num; - macprop.mp_flags = kprop->pr_flags; - + default: { + mac_propval_range_t range, *rangep = NULL; + void *default_val = NULL; + uint_t default_size = 0; + void *val = kprop->pr_val; + uint_t val_size = kprop->pr_valsize; + + /* set a property value */ if (set) { - err = mac_set_prop(dlp->dl_mh, &macprop, kprop->pr_val, - kprop->pr_valsize); - } else { - kprop->pr_perm_flags = MAC_PROP_PERM_RW; - err = mac_get_prop(dlp->dl_mh, &macprop, kprop->pr_val, - kprop->pr_valsize, &kprop->pr_perm_flags); + err = mac_set_prop(dlp->dl_mh, kprop->pr_num, + kprop->pr_name, kprop->pr_val, kprop->pr_valsize); + break; + } + + /* + * Get the property value, default, or possible value + * depending on flags passed from the user. + */ + + /* a property has RW permissions by default */ + kprop->pr_perm_flags = MAC_PROP_PERM_RW; + + if (kprop->pr_flags & DLD_PROP_POSSIBLE) { + rangep = ⦥ + } else if (kprop->pr_flags & DLD_PROP_DEFAULT) { + default_val = val; + default_size = val_size; } + + /* + * Always return the permissions, and optionally return + * the default value or possible values range. + */ + mac_prop_info(dlp->dl_mh, kprop->pr_num, kprop->pr_name, + default_val, default_size, rangep, &kprop->pr_perm_flags); + err = 0; + + if (default_val == NULL && rangep == NULL) { + err = mac_get_prop(dlp->dl_mh, kprop->pr_num, + kprop->pr_name, kprop->pr_val, kprop->pr_valsize); + } + + if (rangep != NULL) + bcopy(rangep, val, sizeof (range)); + } } done: @@ -673,6 +794,7 @@ done: if (dlp != NULL) dls_link_rele(dlp); + if (mph != NULL) { int32_t cpuid; void *mdip = NULL; @@ -684,9 +806,10 @@ done: mac_perim_exit(mph); - if (mdip != NULL) + if (mdip != NULL && cpuid != -1) mac_client_set_intr_cpu(mdip, dlp->dl_mch, cpuid); } + if (dlh != NULL) dls_devnet_rele_tmp(dlh); @@ -828,7 +951,8 @@ drv_ioc_getap(datalink_id_t linkid, struct dlautopush *dlap) (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&dap) != 0) { rw_exit(&dld_ap_hash_lock); - return (ENOENT); + dlap->dap_npush = 0; + return (0); } /* @@ -1221,7 +1345,7 @@ static dld_ioc_info_t drv_ioc_list[] = { {DLDIOC_GETMACPROP, DLDCOPYIN, sizeof (dld_ioc_macprop_t), drv_ioc_getprop, NULL}, {DLDIOC_GETHWGRP, DLDCOPYINOUT, sizeof (dld_ioc_hwgrpget_t), - drv_ioc_hwgrpget, secpolicy_dl_config}, + drv_ioc_hwgrpget, NULL}, }; typedef struct dld_ioc_modentry { diff --git a/usr/src/uts/common/io/dld/dld_flow.c b/usr/src/uts/common/io/dld/dld_flow.c index 281217d02d..7171953a2d 100644 --- a/usr/src/uts/common/io/dld/dld_flow.c +++ b/usr/src/uts/common/io/dld/dld_flow.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -69,20 +69,23 @@ static int dld_walk_flow_cb(mac_flowinfo_t *finfo, void *arg) { flowinfo_state_t *statep = arg; - dld_flowinfo_t fi; + dld_flowinfo_t *fi; if (statep->fi_bufsize < sizeof (dld_flowinfo_t)) return (ENOSPC); - (void) strlcpy(fi.fi_flowname, finfo->fi_flow_name, - sizeof (fi.fi_flowname)); - fi.fi_linkid = finfo->fi_link_id; - fi.fi_flow_desc = finfo->fi_flow_desc; - fi.fi_resource_props = finfo->fi_resource_props; + fi = kmem_zalloc(sizeof (*fi), KM_SLEEP); + (void) strlcpy(fi->fi_flowname, finfo->fi_flow_name, + sizeof (fi->fi_flowname)); + fi->fi_linkid = finfo->fi_link_id; + fi->fi_flow_desc = finfo->fi_flow_desc; + fi->fi_resource_props = finfo->fi_resource_props; - if (copyout(&fi, statep->fi_fl, sizeof (fi)) != 0) { + if (copyout(fi, statep->fi_fl, sizeof (*fi)) != 0) { + kmem_free(fi, sizeof (*fi)); return (EFAULT); } + kmem_free(fi, sizeof (*fi)); statep->fi_nflows++; statep->fi_bufsize -= sizeof (dld_flowinfo_t); statep->fi_fl += sizeof (dld_flowinfo_t); @@ -98,13 +101,14 @@ int dld_walk_flow(dld_ioc_walkflow_t *wf, intptr_t uaddr, cred_t *credp) { flowinfo_state_t state; - mac_flowinfo_t finfo; + mac_flowinfo_t *finfo; int err = 0; /* For now, one can only view flows from the global zone. */ if (crgetzoneid(credp) != GLOBAL_ZONEID) return (EPERM); + finfo = kmem_zalloc(sizeof (*finfo), KM_SLEEP); state.fi_bufsize = wf->wf_len; state.fi_fl = (uchar_t *)uaddr + sizeof (*wf); state.fi_nflows = 0; @@ -113,12 +117,14 @@ dld_walk_flow(dld_ioc_walkflow_t *wf, intptr_t uaddr, cred_t *credp) err = mac_link_flow_walk(wf->wf_linkid, dld_walk_flow_cb, &state); } else { - err = mac_link_flow_info(wf->wf_name, &finfo); - if (err != 0) + err = mac_link_flow_info(wf->wf_name, finfo); + if (err != 0) { + kmem_free(finfo, sizeof (*finfo)); return (err); - - err = dld_walk_flow_cb(&finfo, &state); + } + err = dld_walk_flow_cb(finfo, &state); } + kmem_free(finfo, sizeof (*finfo)); wf->wf_nflows = state.fi_nflows; return (err); } diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c index ca1fc10306..67774c329f 100644 --- a/usr/src/uts/common/io/dld/dld_proto.c +++ b/usr/src/uts/common/io/dld/dld_proto.c @@ -476,7 +476,8 @@ proto_bind_req(dld_str_t *dsp, mblk_t *mp) * etc. since part of mac_client_retarget_intr is to walk the * device tree in order to find and retarget the interrupts. */ - mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu); + if (intr_cpu != -1) + mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu); /* * Copy in MAC address. diff --git a/usr/src/uts/common/io/dmfe/dmfe_main.c b/usr/src/uts/common/io/dmfe/dmfe_main.c index 7c32a176aa..1bd204683d 100644 --- a/usr/src/uts/common/io/dmfe/dmfe_main.c +++ b/usr/src/uts/common/io/dmfe/dmfe_main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -197,12 +197,14 @@ static void dmfe_m_ioctl(void *, queue_t *, mblk_t *); static mblk_t *dmfe_m_tx(void *, mblk_t *); static int dmfe_m_stat(void *, uint_t, uint64_t *); static int dmfe_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); static int dmfe_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void dmfe_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t dmfe_m_callbacks = { - (MC_IOCTL | MC_SETPROP | MC_GETPROP), + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, dmfe_m_stat, dmfe_m_start, dmfe_m_stop, @@ -210,12 +212,14 @@ static mac_callbacks_t dmfe_m_callbacks = { dmfe_m_multicst, dmfe_m_unicst, dmfe_m_tx, + NULL, dmfe_m_ioctl, NULL, /* getcapab */ NULL, /* open */ NULL, /* close */ dmfe_m_setprop, - dmfe_m_getprop + dmfe_m_getprop, + dmfe_m_propinfo }; @@ -2178,12 +2182,12 @@ dmfe_m_ioctl(void *arg, queue_t *wq, mblk_t *mp) } int -dmfe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +dmfe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { dmfe_t *dmfep = arg; - return (mii_m_getprop(dmfep->mii, name, num, flags, sz, val, perm)); + return (mii_m_getprop(dmfep->mii, name, num, sz, val)); } int @@ -2195,6 +2199,14 @@ dmfe_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, return (mii_m_setprop(dmfep->mii, name, num, sz, val)); } +static void +dmfe_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t mph) +{ + dmfe_t *dmfep = arg; + + mii_m_propinfo(dmfep->mii, name, num, mph); +} /* * ========== Per-instance setup/teardown code ========== diff --git a/usr/src/uts/common/io/e1000g/e1000g_main.c b/usr/src/uts/common/io/e1000g/e1000g_main.c index 57d2401894..569b3f6f87 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_main.c +++ b/usr/src/uts/common/io/e1000g/e1000g_main.c @@ -45,8 +45,6 @@ #include "e1000g_debug.h" static char ident[] = "Intel PRO/1000 Ethernet"; -static char e1000g_string[] = "Intel(R) PRO/1000 Network Connection"; -static char e1000g_version[] = "Driver Ver. 5.3.22"; /* * Proto types for DDI entry points @@ -76,11 +74,12 @@ static void e1000g_m_ioctl(void *, queue_t *, mblk_t *); static int e1000g_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int e1000g_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void e1000g_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int e1000g_set_priv_prop(struct e1000g *, const char *, uint_t, const void *); -static int e1000g_get_priv_prop(struct e1000g *, const char *, uint_t, - uint_t, void *, uint_t *); +static int e1000g_get_priv_prop(struct e1000g *, const char *, uint_t, void *); static void e1000g_init_locks(struct e1000g *); static void e1000g_destroy_locks(struct e1000g *); static int e1000g_identify_hardware(struct e1000g *); @@ -154,29 +153,26 @@ static int e1000g_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data); static void e1000g_fm_init(struct e1000g *Adapter); static void e1000g_fm_fini(struct e1000g *Adapter); -static int e1000g_get_def_val(struct e1000g *, mac_prop_id_t, uint_t, void *); static void e1000g_param_sync(struct e1000g *); static void e1000g_get_driver_control(struct e1000_hw *); static void e1000g_release_driver_control(struct e1000_hw *); static void e1000g_restore_promisc(struct e1000g *Adapter); -mac_priv_prop_t e1000g_priv_props[] = { - {"_tx_bcopy_threshold", MAC_PROP_PERM_RW}, - {"_tx_interrupt_enable", MAC_PROP_PERM_RW}, - {"_tx_intr_delay", MAC_PROP_PERM_RW}, - {"_tx_intr_abs_delay", MAC_PROP_PERM_RW}, - {"_rx_bcopy_threshold", MAC_PROP_PERM_RW}, - {"_max_num_rcv_packets", MAC_PROP_PERM_RW}, - {"_rx_intr_delay", MAC_PROP_PERM_RW}, - {"_rx_intr_abs_delay", MAC_PROP_PERM_RW}, - {"_intr_throttling_rate", MAC_PROP_PERM_RW}, - {"_intr_adaptive", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_READ}, - {"_adv_asym_pause_cap", MAC_PROP_PERM_READ}, +char *e1000g_priv_props[] = { + "_tx_bcopy_threshold", + "_tx_interrupt_enable", + "_tx_intr_delay", + "_tx_intr_abs_delay", + "_rx_bcopy_threshold", + "_max_num_rcv_packets", + "_rx_intr_delay", + "_rx_intr_abs_delay", + "_intr_throttling_rate", + "_intr_adaptive", + "_adv_pause_cap", + "_adv_asym_pause_cap", + NULL }; -#define E1000G_MAX_PRIV_PROPS \ - (sizeof (e1000g_priv_props)/sizeof (mac_priv_prop_t)) - static struct cb_ops cb_ws_ops = { nulldev, /* cb_open */ @@ -233,7 +229,7 @@ static ddi_device_acc_attr_t e1000g_regs_acc_attr = { }; #define E1000G_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) static mac_callbacks_t e1000g_m_callbacks = { E1000G_M_CALLBACK_FLAGS, @@ -244,12 +240,14 @@ static mac_callbacks_t e1000g_m_callbacks = { e1000g_m_multicst, NULL, e1000g_m_tx, + NULL, e1000g_m_ioctl, e1000g_m_getcapab, NULL, NULL, e1000g_m_setprop, - e1000g_m_getprop + e1000g_m_getprop, + e1000g_m_propinfo }; /* @@ -581,7 +579,6 @@ e1000g_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) mutex_exit(&e1000g_rx_detach_lock); } - cmn_err(CE_CONT, "!%s, %s\n", e1000g_string, e1000g_version); Adapter->e1000g_state = E1000G_INITIALIZED; return (DDI_SUCCESS); @@ -610,7 +607,6 @@ e1000g_register_mac(struct e1000g *Adapter) mac->m_max_sdu = Adapter->default_mtu; mac->m_margin = VLAN_TAGSZ; mac->m_priv_props = e1000g_priv_props; - mac->m_priv_prop_count = E1000G_MAX_PRIV_PROPS; mac->m_v12n = MAC_VIRT_LEVEL1; err = mac_register(mac, &Adapter->mh); @@ -1964,6 +1960,10 @@ e1000g_stop(struct e1000g *Adapter, boolean_t global) ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_LOST); } + mutex_enter(&Adapter->link_lock); + Adapter->link_complete = B_FALSE; + mutex_exit(&Adapter->link_lock); + /* Release resources still held by the TX descriptors */ e1000g_tx_clean(Adapter); @@ -2961,12 +2961,15 @@ e1000g_fill_ring(void *arg, mac_ring_type_t rtype, const int grp_index, infop->mri_start = e1000g_ring_start; infop->mri_stop = NULL; infop->mri_poll = e1000g_poll_ring; + infop->mri_stat = e1000g_rx_ring_stat; /* Ring level interrupts */ mintr = &infop->mri_intr; mintr->mi_handle = (mac_intr_handle_t)rx_ring; mintr->mi_enable = e1000g_rx_ring_intr_enable; mintr->mi_disable = e1000g_rx_ring_intr_disable; + if (Adapter->msi_enable) + mintr->mi_ddi_handle = Adapter->htable[0]; } /* ARGSUSED */ @@ -3282,159 +3285,246 @@ reset: static int e1000g_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { struct e1000g *Adapter = arg; struct e1000_fc_info *fc = &Adapter->shared.fc; - struct e1000_hw *hw = &Adapter->shared; int err = 0; link_flowctrl_t flowctrl; uint64_t tmp = 0; - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; - - bzero(pr_val, pr_valsize); - if ((pr_flags & MAC_PROP_DEFAULT) && (pr_num != MAC_PROP_PRIVATE)) { - return (e1000g_get_def_val(Adapter, pr_num, - pr_valsize, pr_val)); - } - switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (link_duplex_t)) { - bcopy(&Adapter->link_duplex, pr_val, - sizeof (link_duplex_t)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_duplex_t)); + bcopy(&Adapter->link_duplex, pr_val, + sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (uint64_t)) { - tmp = Adapter->link_speed * 1000000ull; - bcopy(&tmp, pr_val, sizeof (tmp)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint64_t)); + tmp = Adapter->link_speed * 1000000ull; + bcopy(&tmp, pr_val, sizeof (tmp)); break; case MAC_PROP_AUTONEG: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_autoneg; break; case MAC_PROP_FLOWCTRL: - if (pr_valsize >= sizeof (link_flowctrl_t)) { - switch (fc->current_mode) { - case e1000_fc_none: - flowctrl = LINK_FLOWCTRL_NONE; - break; - case e1000_fc_rx_pause: - flowctrl = LINK_FLOWCTRL_RX; - break; - case e1000_fc_tx_pause: - flowctrl = LINK_FLOWCTRL_TX; - break; - case e1000_fc_full: - flowctrl = LINK_FLOWCTRL_BI; - break; - } - bcopy(&flowctrl, pr_val, sizeof (flowctrl)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_flowctrl_t)); + switch (fc->current_mode) { + case e1000_fc_none: + flowctrl = LINK_FLOWCTRL_NONE; + break; + case e1000_fc_rx_pause: + flowctrl = LINK_FLOWCTRL_RX; + break; + case e1000_fc_tx_pause: + flowctrl = LINK_FLOWCTRL_TX; + break; + case e1000_fc_full: + flowctrl = LINK_FLOWCTRL_BI; + break; + } + bcopy(&flowctrl, pr_val, sizeof (flowctrl)); break; case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_1000fdx; break; case MAC_PROP_EN_1000FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_1000fdx; break; case MAC_PROP_ADV_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_1000hdx; break; case MAC_PROP_EN_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_1000hdx; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_100fdx; break; case MAC_PROP_EN_100FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_100hdx; break; case MAC_PROP_EN_100HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_10fdx; break; case MAC_PROP_EN_10FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_10hdx; break; case MAC_PROP_EN_10HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_10hdx; break; case MAC_PROP_ADV_100T4_CAP: case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_100t4; break; case MAC_PROP_PRIVATE: err = e1000g_get_priv_prop(Adapter, pr_name, - pr_flags, pr_valsize, pr_val, perm); - break; - case MAC_PROP_MTU: { - struct e1000_mac_info *mac = &Adapter->shared.mac; - struct e1000_phy_info *phy = &Adapter->shared.phy; - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = DEFAULT_MTU; - range.range_uint32[0].mpur_max = Adapter->max_mtu; - /* following MAC type do not support jumbo frames */ - if ((mac->type == e1000_ich8lan) || - ((mac->type == e1000_ich9lan) && (phy->type == - e1000_phy_ife))) { - range.range_uint32[0].mpur_max = DEFAULT_MTU; - } - bcopy(&range, pr_val, sizeof (range)); + pr_valsize, pr_val); break; - } default: err = ENOTSUP; break; } + return (err); } +static void +e1000g_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + struct e1000g *Adapter = arg; + struct e1000_hw *hw = &Adapter->shared; + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_EN_1000FDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + ((Adapter->phy_ext_status & + IEEE_ESR_1000T_FD_CAPS) || + (Adapter->phy_ext_status & + IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0); + } + break; + + case MAC_PROP_EN_100FDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + ((Adapter->phy_status & MII_SR_100X_FD_CAPS) || + (Adapter->phy_status & MII_SR_100T2_FD_CAPS)) + ? 1 : 0); + } + break; + + case MAC_PROP_EN_100HDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + ((Adapter->phy_status & MII_SR_100X_HD_CAPS) || + (Adapter->phy_status & MII_SR_100T2_HD_CAPS)) + ? 1 : 0); + } + break; + + case MAC_PROP_EN_10FDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + (Adapter->phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0); + } + break; + + case MAC_PROP_EN_10HDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + (Adapter->phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0); + } + break; + + case MAC_PROP_EN_1000HDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + (Adapter->phy_status & MII_SR_AUTONEG_CAPS) + ? 1 : 0); + } + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, LINK_FLOWCTRL_BI); + break; + + case MAC_PROP_MTU: { + struct e1000_mac_info *mac = &Adapter->shared.mac; + struct e1000_phy_info *phy = &Adapter->shared.phy; + uint32_t max; + + /* some MAC types do not support jumbo frames */ + if ((mac->type == e1000_ich8lan) || + ((mac->type == e1000_ich9lan) && (phy->type == + e1000_phy_ife))) { + max = DEFAULT_MTU; + } else { + max = Adapter->max_mtu; + } + + mac_prop_info_set_range_uint32(prh, DEFAULT_MTU, max); + break; + } + case MAC_PROP_PRIVATE: { + char valstr[64]; + int value; + + if (strcmp(pr_name, "_adv_pause_cap") == 0 || + strcmp(pr_name, "_adv_asym_pause_cap") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; + } else if (strcmp(pr_name, "_tx_bcopy_threshold") == 0) { + value = DEFAULT_TX_BCOPY_THRESHOLD; + } else if (strcmp(pr_name, "_tx_interrupt_enable") == 0) { + value = DEFAULT_TX_INTR_ENABLE; + } else if (strcmp(pr_name, "_tx_intr_delay") == 0) { + value = DEFAULT_TX_INTR_DELAY; + } else if (strcmp(pr_name, "_tx_intr_abs_delay") == 0) { + value = DEFAULT_TX_INTR_ABS_DELAY; + } else if (strcmp(pr_name, "_rx_bcopy_threshold") == 0) { + value = DEFAULT_RX_BCOPY_THRESHOLD; + } else if (strcmp(pr_name, "_max_num_rcv_packets") == 0) { + value = DEFAULT_RX_LIMIT_ON_INTR; + } else if (strcmp(pr_name, "_rx_intr_delay") == 0) { + value = DEFAULT_RX_INTR_DELAY; + } else if (strcmp(pr_name, "_rx_intr_abs_delay") == 0) { + value = DEFAULT_RX_INTR_ABS_DELAY; + } else if (strcmp(pr_name, "_intr_throttling_rate") == 0) { + value = DEFAULT_INTR_THROTTLING; + } else if (strcmp(pr_name, "_intr_adaptive") == 0) { + value = 1; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + mac_prop_info_set_default_str(prh, valstr); + break; + } + } +} + /* ARGSUSED2 */ static int e1000g_set_priv_prop(struct e1000g *Adapter, const char *pr_name, @@ -3643,84 +3733,68 @@ e1000g_set_priv_prop(struct e1000g *Adapter, const char *pr_name, static int e1000g_get_priv_prop(struct e1000g *Adapter, const char *pr_name, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; if (strcmp(pr_name, "_adv_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - if (is_default) - goto done; value = Adapter->param_adv_pause; err = 0; goto done; } if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - if (is_default) - goto done; value = Adapter->param_adv_asym_pause; err = 0; goto done; } if (strcmp(pr_name, "_tx_bcopy_threshold") == 0) { - value = (is_default ? DEFAULT_TX_BCOPY_THRESHOLD : - Adapter->tx_bcopy_thresh); + value = Adapter->tx_bcopy_thresh; err = 0; goto done; } if (strcmp(pr_name, "_tx_interrupt_enable") == 0) { - value = (is_default ? DEFAULT_TX_INTR_ENABLE : - Adapter->tx_intr_enable); + value = Adapter->tx_intr_enable; err = 0; goto done; } if (strcmp(pr_name, "_tx_intr_delay") == 0) { - value = (is_default ? DEFAULT_TX_INTR_DELAY : - Adapter->tx_intr_delay); + value = Adapter->tx_intr_delay; err = 0; goto done; } if (strcmp(pr_name, "_tx_intr_abs_delay") == 0) { - value = (is_default ? DEFAULT_TX_INTR_ABS_DELAY : - Adapter->tx_intr_abs_delay); + value = Adapter->tx_intr_abs_delay; err = 0; goto done; } if (strcmp(pr_name, "_rx_bcopy_threshold") == 0) { - value = (is_default ? DEFAULT_RX_BCOPY_THRESHOLD : - Adapter->rx_bcopy_thresh); + value = Adapter->rx_bcopy_thresh; err = 0; goto done; } if (strcmp(pr_name, "_max_num_rcv_packets") == 0) { - value = (is_default ? DEFAULT_RX_LIMIT_ON_INTR : - Adapter->rx_limit_onintr); + value = Adapter->rx_limit_onintr; err = 0; goto done; } if (strcmp(pr_name, "_rx_intr_delay") == 0) { - value = (is_default ? DEFAULT_RX_INTR_DELAY : - Adapter->rx_intr_delay); + value = Adapter->rx_intr_delay; err = 0; goto done; } if (strcmp(pr_name, "_rx_intr_abs_delay") == 0) { - value = (is_default ? DEFAULT_RX_INTR_ABS_DELAY : - Adapter->rx_intr_abs_delay); + value = Adapter->rx_intr_abs_delay; err = 0; goto done; } if (strcmp(pr_name, "_intr_throttling_rate") == 0) { - value = (is_default ? DEFAULT_INTR_THROTTLING : - Adapter->intr_throttling_rate); + value = Adapter->intr_throttling_rate; err = 0; goto done; } if (strcmp(pr_name, "_intr_adaptive") == 0) { - value = (is_default ? 1 : Adapter->intr_adaptive); + value = Adapter->intr_adaptive; err = 0; goto done; } @@ -6284,88 +6358,6 @@ e1000g_quiesce(dev_info_t *devinfo) return (DDI_SUCCESS); } -static int -e1000g_get_def_val(struct e1000g *Adapter, mac_prop_id_t pr_num, - uint_t pr_valsize, void *pr_val) -{ - link_flowctrl_t fl; - struct e1000_hw *hw = &Adapter->shared; - int err = 0; - - ASSERT(pr_valsize > 0); - switch (pr_num) { - case MAC_PROP_AUTONEG: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - ((Adapter->phy_status & MII_SR_AUTONEG_CAPS) - ? 1 : 0); - break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); - fl = LINK_FLOWCTRL_BI; - bcopy(&fl, pr_val, sizeof (fl)); - break; - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_EN_1000FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 1; - else - *(uint8_t *)pr_val = - ((Adapter->phy_ext_status & - IEEE_ESR_1000T_FD_CAPS) || - (Adapter->phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) - ? 1 : 0; - break; - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - *(uint8_t *)pr_val = 0; - break; - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_EN_100FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - ((Adapter->phy_status & MII_SR_100X_FD_CAPS) || - (Adapter->phy_status & MII_SR_100T2_FD_CAPS)) - ? 1 : 0; - break; - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - ((Adapter->phy_status & MII_SR_100X_HD_CAPS) || - (Adapter->phy_status & MII_SR_100T2_HD_CAPS)) - ? 1 : 0; - break; - case MAC_PROP_ADV_10FDX_CAP: - case MAC_PROP_EN_10FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - (Adapter->phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0; - break; - case MAC_PROP_ADV_10HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - (Adapter->phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0; - break; - default: - err = ENOTSUP; - break; - } - return (err); -} - /* * synchronize the adv* and en* parameters. * diff --git a/usr/src/uts/common/io/e1000g/e1000g_rx.c b/usr/src/uts/common/io/e1000g/e1000g_rx.c index a4ff68894a..fb4d621bfb 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_rx.c +++ b/usr/src/uts/common/io/e1000g/e1000g_rx.c @@ -19,7 +19,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -709,8 +709,7 @@ rx_copy: E1000_RXD_STAT_TCPCS) && !(current_desc->errors & E1000_RXD_ERR_TCPE)) - cksumflags |= HCK_FULLCKSUM | - HCK_FULLCKSUM_OK; + cksumflags |= HCK_FULLCKSUM_OK; /* * Check IP Checksum */ @@ -718,7 +717,7 @@ rx_copy: E1000_RXD_STAT_IPCS) && !(current_desc->errors & E1000_RXD_ERR_IPE)) - cksumflags |= HCK_IPV4_HDRCKSUM; + cksumflags |= HCK_IPV4_HDRCKSUM_OK; } } @@ -771,8 +770,8 @@ rx_end_of_packet: * Process the last fragment. */ if (cksumflags != 0) { - (void) hcksum_assoc(rx_data->rx_mblk, - NULL, NULL, 0, 0, 0, 0, cksumflags, 0); + mac_hcksum_set(rx_data->rx_mblk, + 0, 0, 0, 0, cksumflags); cksumflags = 0; } diff --git a/usr/src/uts/common/io/e1000g/e1000g_stat.c b/usr/src/uts/common/io/e1000g/e1000g_stat.c index e2a7544004..7ec964f628 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_stat.c +++ b/usr/src/uts/common/io/e1000g/e1000g_stat.c @@ -19,7 +19,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -971,3 +971,60 @@ e1000g_read_phy_stat(struct e1000_hw *hw, int reg) return (val); } + +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +e1000g_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + e1000g_rx_ring_t *rx_ring = (e1000g_rx_ring_t *)rh; + struct e1000g *Adapter = rx_ring->adapter; + struct e1000_hw *hw = &Adapter->shared; + p_e1000g_stat_t e1000g_ksp = + (p_e1000g_stat_t)Adapter->e1000g_ksp->ks_data; + uint32_t low_val, high_val; + + rw_enter(&Adapter->chip_lock, RW_READER); + + if (Adapter->e1000g_state & E1000G_SUSPENDED) { + rw_exit(&Adapter->chip_lock); + return (ECANCELED); + } + + switch (stat) { + case MAC_STAT_RBYTES: + /* + * The 64-bit register will reset whenever the upper + * 32 bits are read. So we need to read the lower + * 32 bits first, then read the upper 32 bits. + */ + low_val = E1000_READ_REG(hw, E1000_TORL); + high_val = E1000_READ_REG(hw, E1000_TORH); + *val = (uint64_t)e1000g_ksp->Torh.value.ul << 32 | + (uint64_t)e1000g_ksp->Torl.value.ul; + *val += (uint64_t)high_val << 32 | (uint64_t)low_val; + + e1000g_ksp->Torl.value.ul = (uint32_t)*val; + e1000g_ksp->Torh.value.ul = (uint32_t)(*val >> 32); + break; + + case MAC_STAT_IPACKETS: + e1000g_ksp->Tpr.value.ul += + E1000_READ_REG(hw, E1000_TPR); + *val = e1000g_ksp->Tpr.value.ul; + break; + + default: + *val = 0; + rw_exit(&Adapter->chip_lock); + return (ENOTSUP); + } + + rw_exit(&Adapter->chip_lock); + + if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) + ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_UNAFFECTED); + + return (0); +} diff --git a/usr/src/uts/common/io/e1000g/e1000g_sw.h b/usr/src/uts/common/io/e1000g/e1000g_sw.h index ee9ff56fbf..40611707bb 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_sw.h +++ b/usr/src/uts/common/io/e1000g/e1000g_sw.h @@ -1052,6 +1052,7 @@ void e1000g_rxfree_func(p_rx_sw_packet_t packet); int e1000g_m_stat(void *arg, uint_t stat, uint64_t *val); int e1000g_init_stats(struct e1000g *Adapter); +int e1000g_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); void e1000_tbi_adjust_stats(struct e1000g *Adapter, uint32_t frame_len, uint8_t *mac_addr); diff --git a/usr/src/uts/common/io/e1000g/e1000g_tx.c b/usr/src/uts/common/io/e1000g/e1000g_tx.c index 9d58d9b127..512f1bd21e 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_tx.c +++ b/usr/src/uts/common/io/e1000g/e1000g_tx.c @@ -19,7 +19,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -470,10 +470,10 @@ e1000g_retrieve_context(mblk_t *mp, context_data_t *cur_context, bzero(cur_context, sizeof (context_data_t)); /* first check lso information */ - lso_info_get(mp, &mss, &lsoflags); + mac_lso_get(mp, &mss, &lsoflags); /* retrieve checksum info */ - hcksum_retrieve(mp, NULL, NULL, &cur_context->cksum_start, + mac_hcksum_get(mp, &cur_context->cksum_start, &cur_context->cksum_stuff, NULL, NULL, &cur_context->cksum_flags); /* retrieve ethernet header size */ if (((struct ether_vlan_header *)(uintptr_t)mp->b_rptr)->ether_tpid == diff --git a/usr/src/uts/common/io/elxl/elxl.c b/usr/src/uts/common/io/elxl/elxl.c index b23702cebc..2ffe96aff3 100644 --- a/usr/src/uts/common/io/elxl/elxl.c +++ b/usr/src/uts/common/io/elxl/elxl.c @@ -84,9 +84,11 @@ static int elxl_m_promisc(void *, boolean_t); static int elxl_m_multicst(void *, boolean_t, const uint8_t *); static int elxl_m_unicst(void *, const uint8_t *); static int elxl_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int elxl_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void elxl_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static boolean_t elxl_m_getcapab(void *, mac_capab_t cap, void *); static uint_t elxl_intr(caddr_t, caddr_t); static void elxl_error(elxl_t *, char *, ...); @@ -198,9 +200,10 @@ static const struct ex_product { { 0, NULL, 0 }, }; -mac_priv_prop_t ex_priv_prop[] = { - { "_media", MAC_PROP_PERM_RW }, - { "_available_media", MAC_PROP_PERM_READ }, +static char *ex_priv_prop[] = { + "_media", + "_available_media", + NULL }; static mii_ops_t ex_mii_ops = { @@ -211,7 +214,7 @@ static mii_ops_t ex_mii_ops = { }; static mac_callbacks_t elxl_m_callbacks = { - MC_GETCAPAB | MC_SETPROP | MC_GETPROP, + MC_GETCAPAB | MC_PROPERTIES, elxl_m_stat, elxl_m_start, elxl_m_stop, @@ -220,11 +223,13 @@ static mac_callbacks_t elxl_m_callbacks = { elxl_m_unicst, elxl_m_tx, NULL, + NULL, elxl_m_getcapab, NULL, NULL, elxl_m_setprop, - elxl_m_getprop + elxl_m_getprop, + elxl_m_propinfo }; /* @@ -575,7 +580,6 @@ elxl_attach(dev_info_t *dip) macp->m_max_sdu = ETHERMTU; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = ex_priv_prop; - macp->m_priv_prop_count = 2; (void) ddi_intr_enable(sc->ex_intrh); @@ -1387,38 +1391,32 @@ elxl_m_getcapab(void *arg, mac_capab_t cap, void *data) } static int -elxl_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +elxl_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { elxl_t *sc = arg; int rv; - boolean_t isdef = (flags & MAC_PROP_DEFAULT); if (sc->ex_mii_active) { - rv = mii_m_getprop(sc->ex_miih, name, num, flags, sz, - val, perm); + rv = mii_m_getprop(sc->ex_miih, name, num, sz, val); if (rv != ENOTSUP) return (rv); } switch (num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = isdef ? LINK_DUPLEX_HALF : sc->ex_duplex; + *(uint8_t *)val = sc->ex_duplex; break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)val = sc->ex_speed; break; case MAC_PROP_STATUS: - *perm = MAC_PROP_PERM_READ; bcopy(&sc->ex_link, val, sizeof (link_state_t)); break; case MAC_PROP_PRIVATE: if (strcmp(name, "_media") == 0) { char *str; - *perm = MAC_PROP_PERM_RW; switch (sc->ex_xcvr) { case XCVR_SEL_AUTO: @@ -1456,7 +1454,6 @@ elxl_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, * MAC_PROP_POSSIBLE with private properties.) */ if (strcmp(name, "_available_media") == 0) { - *perm = MAC_PROP_PERM_READ; (void) snprintf(val, sz, "%s", sc->ex_medias); return (0); } @@ -1577,6 +1574,29 @@ reset: return (0); } +static void +elxl_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + elxl_t *sc = arg; + + if (sc->ex_mii_active) + mii_m_propinfo(sc->ex_miih, name, num, prh); + + switch (num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_PRIVATE: + if (strcmp(name, "_available_media") == 0) + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + } +} + static int elxl_m_stat(void *arg, uint_t stat, uint64_t *val) { diff --git a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_gld.c b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_gld.c index 9814fdb5e7..c13b5237b5 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_gld.c +++ b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_gld.c @@ -33,14 +33,12 @@ #include <oce_ioctl.h> /* array of properties supported by this driver */ -mac_priv_prop_t oce_priv_props[] = { - {"_tx_ring_size", MAC_PROP_PERM_READ}, - {"_tx_bcopy_limit", MAC_PROP_PERM_RW}, - {"_rx_bcopy_limit", MAC_PROP_PERM_RW}, - {"_rx_ring_size", MAC_PROP_PERM_READ}, +char *oce_priv_props[] = { + "_tx_ring_size", + "_tx_bcopy_limit", + "_rx_ring_size", + NULL }; -uint32_t oce_num_props = sizeof (oce_priv_props) / sizeof (mac_priv_prop_t); - /* ---[ static function declarations ]----------------------------------- */ static int oce_power10(int power); @@ -48,7 +46,7 @@ static int oce_set_priv_prop(struct oce_dev *dev, const char *name, uint_t size, const void *val); static int oce_get_priv_prop(struct oce_dev *dev, const char *name, - uint_t flags, uint_t size, void *val); + uint_t size, void *val); /* ---[ GLD entry points ]----------------------------------------------- */ int @@ -446,119 +444,62 @@ oce_m_setprop(void *arg, const char *name, mac_prop_id_t id, int oce_m_getprop(void *arg, const char *name, mac_prop_id_t id, - uint_t flags, uint_t size, void *val, uint_t *perm) + uint_t size, void *val) { struct oce_dev *dev = arg; uint32_t ret = 0; - *perm = MAC_PROP_PERM_READ; - switch (id) { - case MAC_PROP_AUTONEG: - case MAC_PROP_EN_AUTONEG: - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_EN_1000FDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_EN_100FDX_CAP: - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - case MAC_PROP_ADV_10FDX_CAP: - case MAC_PROP_EN_10FDX_CAP: - case MAC_PROP_ADV_10HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - case MAC_PROP_ADV_100T4_CAP: - case MAC_PROP_EN_100T4_CAP: { - *(uint8_t *)val = 0x0; - break; - } - - case MAC_PROP_ADV_10GFDX_CAP: { - *(uint8_t *)val = 0x01; - break; - } - - case MAC_PROP_EN_10GFDX_CAP: { + case MAC_PROP_ADV_10GFDX_CAP: + case MAC_PROP_EN_10GFDX_CAP: *(uint8_t *)val = 0x01; break; - } case MAC_PROP_DUPLEX: { - if (size >= sizeof (link_duplex_t)) { - uint32_t *mode = (uint32_t *)val; - - *perm = MAC_PROP_PERM_READ; - if (dev->state & STATE_MAC_STARTED) - *mode = LINK_DUPLEX_FULL; - else - *mode = LINK_DUPLEX_UNKNOWN; + uint32_t *mode = (uint32_t *)val; - } else - ret = EINVAL; + ASSERT(size >= sizeof (link_duplex_t)); + if (dev->state & STATE_MAC_STARTED) + *mode = LINK_DUPLEX_FULL; + else + *mode = LINK_DUPLEX_UNKNOWN; break; } case MAC_PROP_SPEED: { - if (size >= sizeof (uint64_t)) { - uint64_t *speed = (uint64_t *)val; - - *perm = MAC_PROP_PERM_READ; - *speed = 0; - if ((dev->state & STATE_MAC_STARTED) && - (dev->link.mac_speed != 0)) { - *speed = 1000000ull * - oce_power10(dev->link.mac_speed); - } - } else - ret = EINVAL; - break; - } - - case MAC_PROP_MTU: { - mac_propval_range_t range; + uint64_t *speed = (uint64_t *)val; - *perm = MAC_PROP_PERM_RW; - if (!(flags & MAC_PROP_POSSIBLE)) { - ret = ENOTSUP; - break; + ASSERT(size >= sizeof (uint64_t)); + *speed = 0; + if ((dev->state & STATE_MAC_STARTED) && + (dev->link.mac_speed != 0)) { + *speed = 1000000ull * oce_power10(dev->link.mac_speed); } - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = OCE_MIN_MTU; - range.range_uint32[0].mpur_max = OCE_MAX_MTU; - bcopy(&range, val, sizeof (mac_propval_range_t)); break; } case MAC_PROP_FLOWCTRL: { link_flowctrl_t *fc = (link_flowctrl_t *)val; - if (size < sizeof (link_flowctrl_t)) { + ASSERT(size >= sizeof (link_flowctrl_t)); + if (dev->flow_control & OCE_FC_TX && + dev->flow_control & OCE_FC_RX) + *fc = LINK_FLOWCTRL_BI; + else if (dev->flow_control == OCE_FC_TX) + *fc = LINK_FLOWCTRL_TX; + else if (dev->flow_control == OCE_FC_RX) + *fc = LINK_FLOWCTRL_RX; + else if (dev->flow_control == 0) + *fc = LINK_FLOWCTRL_NONE; + else ret = EINVAL; - break; - } - - if (size >= sizeof (link_flowctrl_t)) { - if (dev->flow_control & OCE_FC_TX && - dev->flow_control & OCE_FC_RX) - *fc = LINK_FLOWCTRL_BI; - else if (dev->flow_control == OCE_FC_TX) - *fc = LINK_FLOWCTRL_TX; - else if (dev->flow_control == OCE_FC_RX) - *fc = LINK_FLOWCTRL_RX; - else if (dev->flow_control == 0) - *fc = LINK_FLOWCTRL_NONE; - else - ret = EINVAL; - } break; } - case MAC_PROP_PRIVATE: { - ret = oce_get_priv_prop(dev, name, flags, size, val); + case MAC_PROP_PRIVATE: + ret = oce_get_priv_prop(dev, name, size, val); break; - } + default: ret = ENOTSUP; break; @@ -566,6 +507,59 @@ oce_m_getprop(void *arg, const char *name, mac_prop_id_t id, return (ret); } /* oce_m_getprop */ +void +oce_m_propinfo(void *arg, const char *name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(arg)); + + switch (pr_num) { + case MAC_PROP_AUTONEG: + case MAC_PROP_EN_AUTONEG: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + case MAC_PROP_ADV_10GFDX_CAP: + case MAC_PROP_EN_10GFDX_CAP: + case MAC_PROP_SPEED: + case MAC_PROP_DUPLEX: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, OCE_MIN_MTU, OCE_MAX_MTU); + break; + + case MAC_PROP_PRIVATE: { + char valstr[64]; + int value; + + if (strcmp(name, "_tx_ring_size") == 0) { + value = OCE_DEFAULT_TX_RING_SIZE; + } else if (strcmp(name, "_rx_ring_size") == 0) { + value = OCE_DEFAULT_RX_RING_SIZE; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + mac_prop_info_set_default_str(prh, valstr); + break; + } + } +} /* oce_m_propinfo */ + /* * function to handle dlpi streams message from GLDv3 mac layer */ @@ -701,7 +695,6 @@ oce_set_priv_prop(struct oce_dev *dev, const char *name, * * dev - software handle to the device * name - string containing the property name - * flags - flags sent by the OS to get_prop * size - length of the string contained name * val - [OUT] pointer to the location where the result is returned * @@ -709,46 +702,22 @@ oce_set_priv_prop(struct oce_dev *dev, const char *name, */ static int oce_get_priv_prop(struct oce_dev *dev, const char *name, - uint_t flags, uint_t size, void *val) + uint_t size, void *val) { - int ret = ENOTSUP; int value; - boolean_t is_default = (flags & MAC_PROP_DEFAULT); - - if (NULL == val) { - ret = EINVAL; - return (ret); - } if (strcmp(name, "_tx_ring_size") == 0) { - value = is_default ? OCE_DEFAULT_TX_RING_SIZE : - dev->tx_ring_size; - ret = 0; - goto done; - } - - if (strcmp(name, "_tx_bcopy_limit") == 0) { + value = dev->tx_ring_size; + } else if (strcmp(name, "_tx_bcopy_limit") == 0) { value = dev->tx_bcopy_limit; - ret = 0; - goto done; - } - - if (strcmp(name, "_rx_bcopy_limit") == 0) { + } else if (strcmp(name, "_rx_ring_size") == 0) { + value = dev->rx_ring_size; + } else if (strcmp(name, "_rx_bcopy_limit") == 0) { value = dev->rx_bcopy_limit; - ret = 0; - goto done; - } - - if (strcmp(name, "_rx_ring_size") == 0) { - value = is_default ? OCE_DEFAULT_RX_RING_SIZE : - dev->rx_ring_size; - ret = 0; - goto done; + } else { + return (ENOTSUP); } -done: - if (ret == 0) { - (void) snprintf(val, size, "%d", value); - } - return (ret); + (void) snprintf(val, size, "%d", value); + return (0); } /* oce_get_priv_prop */ diff --git a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_main.c b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_main.c index f3346bb444..a4c0fdc6a5 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_main.c +++ b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_main.c @@ -116,7 +116,8 @@ static struct modlinkage oce_mod_linkage = { MODREV_1, &oce_drv, NULL }; -#define OCE_M_CB_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) +#define OCE_M_CB_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | \ + MC_PROPINFO) static mac_callbacks_t oce_mac_cb = { OCE_M_CB_FLAGS, /* mc_callbacks */ oce_m_stat, /* mc_getstat */ @@ -126,16 +127,17 @@ static mac_callbacks_t oce_mac_cb = { oce_m_multicast, /* mc_multicast */ oce_m_unicast, /* mc_unicast */ oce_m_send, /* mc_tx */ + NULL, oce_m_ioctl, /* mc_ioctl */ oce_m_getcap, /* mc_getcapab */ NULL, /* open */ NULL, /* close */ oce_m_setprop, /* set properties */ - oce_m_getprop /* get properties */ + oce_m_getprop, /* get properties */ + oce_m_propinfo /* properties info */ }; -extern mac_priv_prop_t oce_priv_props[]; -extern uint32_t oce_num_props; +extern char *oce_priv_props[]; /* Module Init */ int @@ -293,7 +295,6 @@ oce_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) mac->m_max_sdu = dev->mtu; mac->m_margin = VLAN_TAGSZ; mac->m_priv_props = oce_priv_props; - mac->m_priv_prop_count = oce_num_props; oce_log(dev, CE_NOTE, MOD_CONFIG, "Driver Private structure = 0x%p", (void *)dev); diff --git a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c index 99f210925f..cc1ddb33f9 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c +++ b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c @@ -25,7 +25,7 @@ */ /* - * Source file containing the Recieve Path handling + * Source file containing the Receive Path handling * functions */ #include <oce_impl.h> @@ -420,7 +420,7 @@ oce_set_rx_oflags(mblk_t *mp, struct oce_nic_rx_cqe *cqe) /* set flags */ if (cqe->u0.s.ip_cksum_pass) { - csum_flags |= HCK_IPV4_HDRCKSUM; + csum_flags |= HCK_IPV4_HDRCKSUM_OK; } if (cqe->u0.s.l4_cksum_pass) { @@ -428,8 +428,7 @@ oce_set_rx_oflags(mblk_t *mp, struct oce_nic_rx_cqe *cqe) } if (csum_flags) { - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, - csum_flags, 0); + (void) mac_hcksum_set(mp, 0, 0, 0, 0, csum_flags); } } diff --git a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_tx.c b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_tx.c index 5198cfc710..c1925b8074 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_tx.c +++ b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_tx.c @@ -712,11 +712,10 @@ oce_send_packet(struct oce_wq *wq, mblk_t *mp) } /* Retrieve LSO info */ - lso_info_get(mp, &mss, &flags); + mac_lso_get(mp, &mss, &flags); /* get the offload flags */ - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, - NULL, &csum_flags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &csum_flags); /* Limit should be always less than Tx Buffer Size */ if (pkt_len < dev->tx_bcopy_limit) { diff --git a/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge.c b/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge.c index 7d2873e9fe..9290ecdde7 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge.c +++ b/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge.c @@ -2203,13 +2203,13 @@ ql_set_rx_cksum(mblk_t *mp, struct ib_mac_iocb_rsp *net_rsp) /* TCP or UDP packet and checksum valid */ if (((net_rsp->flags2 & IB_MAC_IOCB_RSP_T) != 0) && ((net_rsp->flags1 & IB_MAC_IOCB_RSP_NU) == 0)) { - flags = HCK_FULLCKSUM | HCK_FULLCKSUM_OK; - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, flags, 0); + flags = HCK_FULLCKSUM_OK; + mac_hcksum_set(mp, 0, 0, 0, 0, flags); } if (((net_rsp->flags2 & IB_MAC_IOCB_RSP_U) != 0) && ((net_rsp->flags1 & IB_MAC_IOCB_RSP_NU) == 0)) { - flags = HCK_FULLCKSUM | HCK_FULLCKSUM_OK; - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, flags, 0); + flags = HCK_FULLCKSUM_OK; + mac_hcksum_set(mp, 0, 0, 0, 0, flags); } } @@ -4750,13 +4750,12 @@ ql_send_common(struct tx_ring *tx_ring, mblk_t *mp) tx_mode = USE_COPY; if (qlge->chksum_cap) { - hcksum_retrieve(mp, NULL, NULL, NULL, - NULL, NULL, NULL, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); QL_PRINT(DBG_TX, ("checksum flag is :0x%x, card capability " "is 0x%x \n", pflags, qlge->chksum_cap)); if (qlge->lso_enable) { uint32_t lso_flags = 0; - lso_info_get(mp, &mss, &lso_flags); + mac_lso_get(mp, &mss, &lso_flags); use_lso = (lso_flags == HW_LSO); } QL_PRINT(DBG_TX, ("mss :%d, use_lso %x \n", diff --git a/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge_gld.c b/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge_gld.c index 6ad591435c..83ef993a0c 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge_gld.c +++ b/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge_gld.c @@ -23,6 +23,7 @@ * Copyright 2009 QLogic Corporation. All rights reserved. */ +#include <sys/note.h> #include <qlge.h> #include <sys/strsubr.h> #include <netinet/in.h> @@ -46,9 +47,12 @@ static int ql_unicst_set(qlge_t *qlge, const uint8_t *macaddr, int slot); static int ql_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -static int ql_m_getprop(void *, const char *, mac_prop_id_t, uint_t, uint_t, - void *, uint_t *); -#define QL_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) +static int ql_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +static void ql_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); + +#define QL_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | \ + MC_GETPROP | MC_PROPINFO) static mac_callbacks_t ql_m_callbacks = { QL_M_CALLBACK_FLAGS, ql_m_getstat, @@ -58,19 +62,20 @@ static mac_callbacks_t ql_m_callbacks = { ql_m_multicst, NULL, NULL, + NULL, ql_m_ioctl, ql_m_getcapab, NULL, NULL, ql_m_setprop, - ql_m_getprop -}; -mac_priv_prop_t qlge_priv_prop[] = { - {"_adv_pause_mode", MAC_PROP_PERM_RW} + ql_m_getprop, + ql_m_propinfo }; -#define QLGE_MAX_PRIV_PROPS \ - (sizeof (qlge_priv_prop) / sizeof (mac_priv_prop_t)) +char *qlge_priv_prop[] = { + "_adv_pause_mode", + NULL +}; /* * This function starts the driver @@ -689,27 +694,6 @@ qlge_set_priv_prop(qlge_t *qlge, const char *pr_name, uint_t pr_valsize, return (ENOTSUP); } -static int -qlge_get_priv_prop(qlge_t *qlge, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val) -{ - int err = ENOTSUP; - boolean_t is_default = (boolean_t)(pr_flags & MAC_PROP_DEFAULT); - uint32_t value; - - if (strcmp(pr_name, "_adv_pause_mode") == 0) { - value = (is_default? 2 : qlge->pause); - err = 0; - goto done; - } - -done: - if (err == 0) { - (void) snprintf(pr_val, pr_valsize, "%d", value); - } - return (err); -} - /* * callback functions for set/get of properties */ @@ -778,10 +762,30 @@ ql_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, return (err); } +static int +qlge_get_priv_prop(qlge_t *qlge, const char *pr_name, uint_t pr_valsize, + void *pr_val) +{ + int err = ENOTSUP; + uint32_t value; + + if (strcmp(pr_name, "_adv_pause_mode") == 0) { + value = qlge->pause; + err = 0; + goto done; + } + +done: + if (err == 0) { + (void) snprintf(pr_val, pr_valsize, "%d", value); + } + return (err); +} + /* ARGSUSED */ static int ql_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { qlge_t *qlge = barg; uint64_t speed; @@ -795,20 +799,9 @@ ql_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, goto out; } - if (pr_valsize == 0) { - err = EINVAL; - goto out; - } - bzero(pr_val, pr_valsize); - /* mostly read only */ - *perm = MAC_PROP_PERM_READ; - switch (pr_num) { case MAC_PROP_DUPLEX: - if (pr_valsize < sizeof (link_duplex_t)) { - err = EINVAL; - goto out; - } + ASSERT(pr_valsize >= sizeof (link_duplex_t)); if (qlge->duplex) link_duplex = LINK_DUPLEX_FULL; else @@ -818,18 +811,12 @@ ql_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - if (pr_valsize < sizeof (speed)) { - err = EINVAL; - goto out; - } + ASSERT(pr_valsize >= sizeof (speed)); speed = qlge->speed * 1000000ull; bcopy(&speed, pr_val, sizeof (speed)); break; case MAC_PROP_STATUS: - if (pr_valsize < sizeof (link_state_t)) { - err = EINVAL; - goto out; - } + ASSERT(pr_valsize >= sizeof (link_state_t)); if (qlge->port_link_state == LS_DOWN) link_state = LINK_STATE_DOWN; else @@ -839,8 +826,7 @@ ql_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, break; case MAC_PROP_PRIVATE: - err = qlge_get_priv_prop(qlge, pr_name, pr_flags, - pr_valsize, pr_val); + err = qlge_get_priv_prop(qlge, pr_name, pr_valsize, pr_val); break; default: @@ -851,6 +837,35 @@ out: return (err); } +static void +ql_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(barg)); + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_PRIVATE: { + char val_str[64]; + int default_val; + + if (strcmp(pr_name, "_adv_pause_mode") == 0) + default_val = 2; + else + return; + + (void) snprintf(val_str, sizeof (val_str), "%d", default_val); + mac_prop_info_set_default_str(prh, val_str); + break; + } + } +} + /* ARGSUSED */ static boolean_t ql_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) @@ -911,7 +926,6 @@ ql_gld3_init(qlge_t *qlge, mac_register_t *macp) macp->m_max_sdu = qlge->mtu; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = qlge_priv_prop; - macp->m_priv_prop_count = QLGE_MAX_PRIV_PROPS; macp->m_v12n = 0; ql_m_callbacks.mc_unicst = ql_m_unicst; ql_m_callbacks.mc_tx = ql_m_tx; diff --git a/usr/src/uts/common/io/hme/hme.c b/usr/src/uts/common/io/hme/hme.c index 2d1d3995df..71017b5464 100644 --- a/usr/src/uts/common/io/hme/hme.c +++ b/usr/src/uts/common/io/hme/hme.c @@ -50,6 +50,7 @@ #include <sys/policy.h> #include <sys/ddi.h> #include <sys/sunddi.h> +#include <sys/byteorder.h> #include "hme_phy.h" #include "hme_mac.h" #include "hme.h" @@ -113,11 +114,12 @@ static int hme_64bit_enable = 1; /* Use 64-bit sbus transfers */ static int hme_reject_own = 1; /* Reject packets with own SA */ static int hme_ngu_enable = 0; /* Never Give Up mode */ -mac_priv_prop_t hme_priv_prop[] = { - { "_ipg0", MAC_PROP_PERM_RW }, - { "_ipg1", MAC_PROP_PERM_RW }, - { "_ipg2", MAC_PROP_PERM_RW }, - { "_lance_mode", MAC_PROP_PERM_RW }, +char *hme_priv_prop[] = { + "_ipg0", + "_ipg1", + "_ipg2", + "_lance_mode", + NULL }; static int hme_lance_mode = 1; /* to enable lance mode */ @@ -232,8 +234,9 @@ static int hme_m_multicst(void *, boolean_t, const uint8_t *); static int hme_m_unicst(void *, const uint8_t *); static mblk_t *hme_m_tx(void *, mblk_t *); static boolean_t hme_m_getcapab(void *, mac_capab_t, void *); -static int hme_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); +static int hme_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +static void hme_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int hme_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); @@ -246,7 +249,7 @@ static mii_ops_t hme_mii_ops = { }; static mac_callbacks_t hme_m_callbacks = { - MC_GETCAPAB | MC_SETPROP | MC_GETPROP, + MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO, hme_m_stat, hme_m_start, hme_m_stop, @@ -255,11 +258,13 @@ static mac_callbacks_t hme_m_callbacks = { hme_m_unicst, hme_m_tx, NULL, + NULL, hme_m_getcapab, NULL, NULL, hme_m_setprop, hme_m_getprop, + hme_m_propinfo }; DDI_DEFINE_STREAM_OPS(hme_dev_ops, nulldev, nulldev, hmeattach, hmedetach, @@ -1506,8 +1511,6 @@ hmeattach(dev_info_t *dip, ddi_attach_cmd_t cmd) macp->m_max_sdu = ETHERMTU; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = hme_priv_prop; - macp->m_priv_prop_count = - sizeof (hme_priv_prop) / sizeof (hme_priv_prop[0]); if (mac_register(macp, &hmep->hme_mh) != 0) { mac_free(macp); goto error_intr; @@ -1901,15 +1904,14 @@ hmestatinit(struct hme *hmep) } int -hme_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +hme_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { struct hme *hmep = arg; int value; - boolean_t is_default; int rv; - rv = mii_m_getprop(hmep->hme_mii, name, num, flags, sz, val, perm); + rv = mii_m_getprop(hmep->hme_mii, name, num, sz, val); if (rv != ENOTSUP) return (rv); @@ -1920,18 +1922,14 @@ hme_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, return (ENOTSUP); } - *perm = MAC_PROP_PERM_RW; - - is_default = (flags & MAC_PROP_DEFAULT) ? B_TRUE : B_FALSE; if (strcmp(name, "_ipg0") == 0) { - value = is_default ? hme_ipg0 : hmep->hme_ipg0; - + value = hmep->hme_ipg0; } else if (strcmp(name, "_ipg1") == 0) { - value = is_default ? hme_ipg1 : hmep->hme_ipg1; + value = hmep->hme_ipg1; } else if (strcmp(name, "_ipg2") == 0) { - value = is_default ? hme_ipg2 : hmep->hme_ipg2; + value = hmep->hme_ipg2; } else if (strcmp(name, "_lance_mode") == 0) { - value = is_default ? hme_lance_mode : hmep->hme_lance_mode; + value = hmep->hme_lance_mode; } else { return (ENOTSUP); } @@ -1939,6 +1937,38 @@ hme_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, return (0); } +static void +hme_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t mph) +{ + struct hme *hmep = arg; + + mii_m_propinfo(hmep->hme_mii, name, num, mph); + + switch (num) { + case MAC_PROP_PRIVATE: { + char valstr[64]; + int default_val; + + if (strcmp(name, "_ipg0") == 0) { + default_val = hme_ipg0; + } else if (strcmp(name, "_ipg1") == 0) { + default_val = hme_ipg1; + } else if (strcmp(name, "_ipg2") == 0) { + default_val = hme_ipg2; + } if (strcmp(name, "_lance_mode") == 0) { + default_val = hme_lance_mode; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%d", default_val); + mac_prop_info_set_default_str(mph, valstr); + break; + } + } +} + int hme_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, const void *val) @@ -2267,8 +2297,7 @@ hmestart(struct hme *hmep, mblk_t *mp) uint32_t start_offset; uint32_t stuff_offset; - hcksum_retrieve(mp, NULL, NULL, &start_offset, &stuff_offset, - NULL, NULL, &flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, NULL, NULL, &flags); if (flags & HCK_PARTIALCKSUM) { if (get_ether_type(mp->b_rptr) == ETHERTYPE_VLAN) { @@ -3434,8 +3463,7 @@ hmeread(struct hme *hmep, hmebuf_t *rbuf, uint32_t rflags) if (type == ETHERTYPE_IP || type == ETHERTYPE_IPV6) { uint16_t cksum = ~rflags & HMERMD_CKSUM; uint_t end = len - sizeof (struct ether_header); - (void) hcksum_assoc(bp, NULL, NULL, 0, - 0, end, htons(cksum), HCK_PARTIALCKSUM, 0); + mac_hcksum_set(bp, 0, 0, end, htons(cksum), HCK_PARTIALCKSUM); } return (bp); diff --git a/usr/src/uts/common/io/hxge/hxge_impl.h b/usr/src/uts/common/io/hxge/hxge_impl.h index 36b94382bf..0e1567e148 100644 --- a/usr/src/uts/common/io/hxge/hxge_impl.h +++ b/usr/src/uts/common/io/hxge/hxge_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -70,6 +70,7 @@ extern "C" { #include <sys/mac_provider.h> #include <sys/mac_ether.h> +#include <sys/note.h> /* * Handy macros (taken from bge driver) @@ -258,6 +259,7 @@ struct _hxge_ldg_t { p_hxge_ldv_t ldvp; hxge_sys_intr_t sys_intr_handler; p_hxge_t hxgep; + uint32_t htable_idx; }; struct _hxge_ldv_t { @@ -378,6 +380,8 @@ void hxge_destroy_kstats(p_hxge_t); int hxge_port_kstat_update(kstat_t *, int); int hxge_m_stat(void *arg, uint_t stat, uint64_t *val); +int hxge_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); +int hxge_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); /* hxge_hw.c */ void diff --git a/usr/src/uts/common/io/hxge/hxge_kstats.c b/usr/src/uts/common/io/hxge/hxge_kstats.c index bd42641d5d..d9bfffeece 100644 --- a/usr/src/uts/common/io/hxge/hxge_kstats.c +++ b/usr/src/uts/common/io/hxge/hxge_kstats.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -881,6 +881,70 @@ hxge_port_kstat_update(kstat_t *ksp, int rw) return (0); } +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +hxge_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + p_hxge_ring_handle_t rhp = (p_hxge_ring_handle_t)rdriver; + p_hxge_t hxgep = rhp->hxgep; + + ASSERT(rhp != NULL); + ASSERT(hxgep != NULL); + ASSERT(hxgep->statsp != NULL); + ASSERT(0 <= rhp->index < HXGE_MAX_RDCS); + + switch (stat) { + case MAC_STAT_IERRORS: + *val = hxgep->statsp->rdc_stats[rhp->index].ierrors; + break; + case MAC_STAT_RBYTES: + *val = hxgep->statsp->rdc_stats[rhp->index].ibytes; + break; + case MAC_STAT_IPACKETS: + *val = hxgep->statsp->rdc_stats[rhp->index].ipackets; + break; + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +hxge_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + p_hxge_ring_handle_t rhp = (p_hxge_ring_handle_t)rdriver; + p_hxge_t hxgep = rhp->hxgep; + + ASSERT(rhp != NULL); + ASSERT(hxgep != NULL); + ASSERT(hxgep->statsp != NULL); + ASSERT(0 <= rhp->index < HXGE_MAX_TDCS); + + switch (stat) { + case MAC_STAT_OERRORS: + *val = hxgep->statsp->tdc_stats[rhp->index].oerrors; + break; + case MAC_STAT_OBYTES: + *val = hxgep->statsp->tdc_stats[rhp->index].obytes; + break; + case MAC_STAT_OPACKETS: + *val = hxgep->statsp->tdc_stats[rhp->index].opackets; + break; + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + int hxge_m_stat(void *arg, uint_t stat, uint64_t *value) { diff --git a/usr/src/uts/common/io/hxge/hxge_main.c b/usr/src/uts/common/io/hxge/hxge_main.c index 24d4bec784..ee2dfc365a 100644 --- a/usr/src/uts/common/io/hxge/hxge_main.c +++ b/usr/src/uts/common/io/hxge/hxge_main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -146,28 +146,29 @@ static boolean_t hxge_param_locked(mac_prop_id_t pr_num); static int hxge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, const void *pr_val); static int hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *); -static int hxge_get_def_val(hxge_t *hxgep, mac_prop_id_t pr_num, uint_t pr_valsize, void *pr_val); +static void hxge_m_propinfo(void *barg, const char *pr_name, + mac_prop_id_t pr_num, mac_prop_info_handle_t mph); static int hxge_set_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_valsize, const void *pr_val); static int hxge_get_priv_prop(p_hxge_t hxgep, const char *pr_name, - uint_t pr_flags, uint_t pr_valsize, void *pr_val); + uint_t pr_valsize, void *pr_val); static void hxge_link_poll(void *arg); static void hxge_link_update(p_hxge_t hxge, link_state_t state); static void hxge_msix_init(p_hxge_t hxgep); -mac_priv_prop_t hxge_priv_props[] = { - {"_rxdma_intr_time", MAC_PROP_PERM_RW}, - {"_rxdma_intr_pkts", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_tcp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_udp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_ah", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_sctp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_tcp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_udp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_ah", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_sctp", MAC_PROP_PERM_RW} +char *hxge_priv_props[] = { + "_rxdma_intr_time", + "_rxdma_intr_pkts", + "_class_opt_ipv4_tcp", + "_class_opt_ipv4_udp", + "_class_opt_ipv4_ah", + "_class_opt_ipv4_sctp", + "_class_opt_ipv6_tcp", + "_class_opt_ipv6_udp", + "_class_opt_ipv6_ah", + "_class_opt_ipv6_sctp", + NULL }; #define HXGE_MAX_PRIV_PROPS \ @@ -177,7 +178,7 @@ mac_priv_prop_t hxge_priv_props[] = { #define MAX_DUMP_SZ 256 #define HXGE_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) extern hxge_status_t hxge_pfc_set_default_mac_addr(p_hxge_t hxgep); @@ -190,12 +191,14 @@ static mac_callbacks_t hxge_m_callbacks = { hxge_m_multicst, NULL, NULL, + NULL, hxge_m_ioctl, hxge_m_getcapab, NULL, NULL, hxge_m_setprop, - hxge_m_getprop + hxge_m_getprop, + hxge_m_propinfo }; /* PSARC/2007/453 MSI-X interrupt limit override. */ @@ -2935,6 +2938,41 @@ hxge_group_get(void *arg, mac_ring_type_t type, int groupid, } } +static int +hxge_ring_get_htable_idx(p_hxge_t hxgep, mac_ring_type_t type, uint32_t channel) +{ + int i; + + ASSERT(hxgep->ldgvp != NULL); + + switch (type) { + case MAC_RING_TYPE_RX: + for (i = 0; i < hxgep->ldgvp->maxldvs; i++) { + if ((hxgep->ldgvp->ldvp[i].is_rxdma) && + (hxgep->ldgvp->ldvp[i].channel == channel)) { + return ((int) + hxgep->ldgvp->ldvp[i].ldgp->htable_idx); + } + } + break; + + case MAC_RING_TYPE_TX: + for (i = 0; i < hxgep->ldgvp->maxldvs; i++) { + if ((hxgep->ldgvp->ldvp[i].is_txdma) && + (hxgep->ldgvp->ldvp[i].channel == channel)) { + return ((int) + hxgep->ldgvp->ldvp[i].ldgp->htable_idx); + } + } + break; + + default: + break; + } + + return (-1); +} + /* * Callback function for the GLDv3 layer to register all rings. */ @@ -2945,9 +2983,15 @@ hxge_fill_ring(void *arg, mac_ring_type_t type, const int rg_index, { p_hxge_t hxgep = arg; + ASSERT(hxgep != NULL); + ASSERT(infop != NULL); + switch (type) { case MAC_RING_TYPE_TX: { p_hxge_ring_handle_t rhp; + mac_intr_t *mintr = &infop->mri_intr; + p_hxge_intr_t intrp; + int htable_idx; ASSERT((index >= 0) && (index < HXGE_MAX_TDCS)); rhp = &hxgep->tx_ring_handles[index]; @@ -2958,11 +3002,22 @@ hxge_fill_ring(void *arg, mac_ring_type_t type, const int rg_index, infop->mri_start = hxge_tx_ring_start; infop->mri_stop = hxge_tx_ring_stop; infop->mri_tx = hxge_tx_ring_send; + infop->mri_stat = hxge_tx_ring_stat; + + intrp = (p_hxge_intr_t)&hxgep->hxge_intr_type; + htable_idx = hxge_ring_get_htable_idx(hxgep, type, index); + if (htable_idx >= 0) + mintr->mi_ddi_handle = intrp->htable[htable_idx]; + else + mintr->mi_ddi_handle = NULL; break; } + case MAC_RING_TYPE_RX: { p_hxge_ring_handle_t rhp; mac_intr_t hxge_mac_intr; + p_hxge_intr_t intrp; + int htable_idx; ASSERT((index >= 0) && (index < HXGE_MAX_RDCS)); rhp = &hxgep->rx_ring_handles[index]; @@ -2975,17 +3030,25 @@ hxge_fill_ring(void *arg, mac_ring_type_t type, const int rg_index, * disable interrupt (enable poll). */ hxge_mac_intr.mi_handle = (mac_intr_handle_t)rhp; - hxge_mac_intr.mi_enable = - (mac_intr_enable_t)hxge_disable_poll; - hxge_mac_intr.mi_disable = - (mac_intr_disable_t)hxge_enable_poll; + hxge_mac_intr.mi_enable = (mac_intr_enable_t)hxge_disable_poll; + hxge_mac_intr.mi_disable = (mac_intr_disable_t)hxge_enable_poll; + + intrp = (p_hxge_intr_t)&hxgep->hxge_intr_type; + htable_idx = hxge_ring_get_htable_idx(hxgep, type, index); + if (htable_idx >= 0) + hxge_mac_intr.mi_ddi_handle = intrp->htable[htable_idx]; + else + hxge_mac_intr.mi_ddi_handle = NULL; + infop->mri_driver = (mac_ring_driver_t)rhp; infop->mri_start = hxge_rx_ring_start; infop->mri_stop = hxge_rx_ring_stop; infop->mri_intr = hxge_mac_intr; infop->mri_poll = hxge_rx_poll; + infop->mri_stat = hxge_rx_ring_stat; break; } + default: break; } @@ -3186,37 +3249,9 @@ hxge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, return (err); } -/* ARGSUSED */ -static int -hxge_get_def_val(hxge_t *hxgep, mac_prop_id_t pr_num, uint_t pr_valsize, - void *pr_val) -{ - int err = 0; - link_flowctrl_t fl; - - switch (pr_num) { - case MAC_PROP_DUPLEX: - *(uint8_t *)pr_val = 2; - break; - case MAC_PROP_AUTONEG: - *(uint8_t *)pr_val = 0; - break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); - fl = LINK_FLOWCTRL_TX; - bcopy(&fl, pr_val, sizeof (fl)); - break; - default: - err = ENOTSUP; - break; - } - return (err); -} - static int hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { hxge_t *hxgep = barg; p_hxge_stats_t statsp = hxgep->statsp; @@ -3228,20 +3263,8 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, HXGE_DEBUG_MSG((hxgep, DLADM_CTL, "==> hxge_m_getprop: pr_num %d", pr_num)); - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; - - if ((pr_flags & MAC_PROP_DEFAULT) && (pr_num != MAC_PROP_PRIVATE)) { - err = hxge_get_def_val(hxgep, pr_num, pr_valsize, pr_val); - return (err); - } - - bzero(pr_val, pr_valsize); switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = statsp->mac_stats.link_duplex; HXGE_DEBUG_MSG((hxgep, DLADM_CTL, "==> hxge_m_getprop: duplex mode %d", @@ -3249,17 +3272,13 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (uint64_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (uint64_t)); tmp = statsp->mac_stats.link_speed * 1000000ull; bcopy(&tmp, pr_val, sizeof (tmp)); break; case MAC_PROP_STATUS: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (link_state_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (link_state_t)); if (!statsp->mac_stats.link_up) ls = LINK_STATE_DOWN; else @@ -3272,15 +3291,12 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, * Flow control is supported by the shared domain and * it is currently transmit only */ - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); + ASSERT(pr_valsize < sizeof (link_flowctrl_t)); fl = LINK_FLOWCTRL_TX; bcopy(&fl, pr_val, sizeof (fl)); break; case MAC_PROP_AUTONEG: /* 10G link only and it is not negotiable */ - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = 0; break; case MAC_PROP_ADV_1000FDX_CAP: @@ -3299,25 +3315,10 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, break; case MAC_PROP_PRIVATE: - err = hxge_get_priv_prop(hxgep, pr_name, pr_flags, - pr_valsize, pr_val); - break; - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = MIN_FRAME_SIZE - - MTU_TO_FRAME_SIZE; - range.range_uint32[0].mpur_max = MAX_FRAME_SIZE - - MTU_TO_FRAME_SIZE; - bcopy(&range, pr_val, sizeof (range)); + err = hxge_get_priv_prop(hxgep, pr_name, pr_valsize, + pr_val); break; - } + default: err = EINVAL; break; @@ -3328,6 +3329,60 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, return (err); } +static void +hxge_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t pr_num, mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(arg)); + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + case MAC_PROP_AUTONEG: + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + MIN_FRAME_SIZE - MTU_TO_FRAME_SIZE, + MAX_FRAME_SIZE - MTU_TO_FRAME_SIZE); + break; + + case MAC_PROP_PRIVATE: { + char valstr[MAXNAMELEN]; + + bzero(valstr, sizeof (valstr)); + + /* Receive Interrupt Blanking Parameters */ + if (strcmp(pr_name, "_rxdma_intr_time") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", + RXDMA_RCR_TO_DEFAULT); + } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", + RXDMA_RCR_PTHRES_DEFAULT); + + /* Classification and Load Distribution Configuration */ + } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0 || + strcmp(pr_name, "_class_opt_ipv4_udp") == 0 || + strcmp(pr_name, "_class_opt_ipv4_ah") == 0 || + strcmp(pr_name, "_class_opt_ipv4_sctp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_tcp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_udp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_ah") == 0 || + strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", + HXGE_CLASS_TCAM_LOOKUP); + } + + if (strlen(valstr) > 0) + mac_prop_info_set_default_str(prh, valstr); + break; + } + } +} + + /* ARGSUSED */ static int hxge_set_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_valsize, @@ -3387,8 +3442,8 @@ hxge_set_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_valsize, } static int -hxge_get_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val) +hxge_get_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_valsize, + void *pr_val) { p_hxge_param_t param_arr = hxgep->param_arr; char valstr[MAXNAMELEN]; @@ -3399,77 +3454,55 @@ hxge_get_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_flags, HXGE_DEBUG_MSG((hxgep, DLADM_CTL, "==> hxge_get_priv_prop: property %s", pr_name)); - if (pr_flags & MAC_PROP_DEFAULT) { - /* Receive Interrupt Blanking Parameters */ - if (strcmp(pr_name, "_rxdma_intr_time") == 0) { - value = RXDMA_RCR_TO_DEFAULT; - } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { - value = RXDMA_RCR_PTHRES_DEFAULT; + /* Receive Interrupt Blanking Parameters */ + if (strcmp(pr_name, "_rxdma_intr_time") == 0) { + value = hxgep->intr_timeout; + } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { + value = hxgep->intr_threshold; - /* Classification and Load Distribution Configuration */ - } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0 || - strcmp(pr_name, "_class_opt_ipv4_udp") == 0 || - strcmp(pr_name, "_class_opt_ipv4_ah") == 0 || - strcmp(pr_name, "_class_opt_ipv4_sctp") == 0 || - strcmp(pr_name, "_class_opt_ipv6_tcp") == 0 || - strcmp(pr_name, "_class_opt_ipv6_udp") == 0 || - strcmp(pr_name, "_class_opt_ipv6_ah") == 0 || - strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { - value = HXGE_CLASS_TCAM_LOOKUP; - } else { - err = EINVAL; - } - } else { - /* Receive Interrupt Blanking Parameters */ - if (strcmp(pr_name, "_rxdma_intr_time") == 0) { - value = hxgep->intr_timeout; - } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { - value = hxgep->intr_threshold; + /* Classification and Load Distribution Configuration */ + } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv4_tcp]); - /* Classification and Load Distribution Configuration */ - } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv4_tcp]); - - value = (int)param_arr[param_class_opt_ipv4_tcp].value; - } else if (strcmp(pr_name, "_class_opt_ipv4_udp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv4_udp]); - - value = (int)param_arr[param_class_opt_ipv4_udp].value; - } else if (strcmp(pr_name, "_class_opt_ipv4_ah") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv4_ah]); - - value = (int)param_arr[param_class_opt_ipv4_ah].value; - } else if (strcmp(pr_name, "_class_opt_ipv4_sctp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv4_sctp]); - - value = (int)param_arr[param_class_opt_ipv4_sctp].value; - } else if (strcmp(pr_name, "_class_opt_ipv6_tcp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv6_tcp]); - - value = (int)param_arr[param_class_opt_ipv6_tcp].value; - } else if (strcmp(pr_name, "_class_opt_ipv6_udp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv6_udp]); - - value = (int)param_arr[param_class_opt_ipv6_udp].value; - } else if (strcmp(pr_name, "_class_opt_ipv6_ah") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv6_ah]); - - value = (int)param_arr[param_class_opt_ipv6_ah].value; - } else if (strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv6_sctp]); - - value = (int)param_arr[param_class_opt_ipv6_sctp].value; - } else { - err = EINVAL; - } + value = (int)param_arr[param_class_opt_ipv4_tcp].value; + } else if (strcmp(pr_name, "_class_opt_ipv4_udp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv4_udp]); + + value = (int)param_arr[param_class_opt_ipv4_udp].value; + } else if (strcmp(pr_name, "_class_opt_ipv4_ah") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv4_ah]); + + value = (int)param_arr[param_class_opt_ipv4_ah].value; + } else if (strcmp(pr_name, "_class_opt_ipv4_sctp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv4_sctp]); + + value = (int)param_arr[param_class_opt_ipv4_sctp].value; + } else if (strcmp(pr_name, "_class_opt_ipv6_tcp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv6_tcp]); + + value = (int)param_arr[param_class_opt_ipv6_tcp].value; + } else if (strcmp(pr_name, "_class_opt_ipv6_udp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv6_udp]); + + value = (int)param_arr[param_class_opt_ipv6_udp].value; + } else if (strcmp(pr_name, "_class_opt_ipv6_ah") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv6_ah]); + + value = (int)param_arr[param_class_opt_ipv6_ah].value; + } else if (strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv6_sctp]); + + value = (int)param_arr[param_class_opt_ipv6_sctp].value; + } else { + err = EINVAL; } if (err == 0) { @@ -3916,6 +3949,7 @@ hxge_add_intrs_adv_type(p_hxge_t hxgep, uint32_t int_type) return (HXGE_ERROR | HXGE_DDI_FAILED); } + ldgp->htable_idx = x; intrp->intr_added++; } intrp->msi_intx_cnt = nactual; @@ -4219,7 +4253,6 @@ hxge_mac_register(p_hxge_t hxgep) macp->m_max_sdu = hxgep->vmac.maxframesize - MTU_TO_FRAME_SIZE; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = hxge_priv_props; - macp->m_priv_prop_count = HXGE_MAX_PRIV_PROPS; macp->m_v12n = MAC_VIRT_LEVEL1; HXGE_DEBUG_MSG((hxgep, DDI_CTL, diff --git a/usr/src/uts/common/io/hxge/hxge_rxdma.c b/usr/src/uts/common/io/hxge/hxge_rxdma.c index 6700313f63..3ac170277d 100644 --- a/usr/src/uts/common/io/hxge/hxge_rxdma.c +++ b/usr/src/uts/common/io/hxge/hxge_rxdma.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2060,8 +2060,7 @@ hxge_receive_packet(p_hxge_t hxgep, p_rx_rcr_ring_t rcr_p, pkt_type == RCR_PKT_IS_UDP) ? B_TRUE : B_FALSE); if (!no_port_bit && l4_cs_eq_bit && is_tcp_udp && !error_type) { - (void) hcksum_assoc(nmp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM_OK | HCK_FULLCKSUM, 0); + mac_hcksum_set(nmp, 0, 0, 0, 0, HCK_FULLCKSUM_OK); HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_receive_packet: Full tcp/udp cksum " diff --git a/usr/src/uts/common/io/hxge/hxge_send.c b/usr/src/uts/common/io/hxge/hxge_send.c index e453322486..647717b82c 100644 --- a/usr/src/uts/common/io/hxge/hxge_send.c +++ b/usr/src/uts/common/io/hxge/hxge_send.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -163,8 +163,8 @@ hxge_start(p_hxge_t hxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp) } } - hcksum_retrieve(mp, NULL, NULL, &start_offset, - &stuff_offset, &end_offset, &value, &cksum_flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, &end_offset, &value, + &cksum_flags); if (!HXGE_IS_VLAN_PACKET(mp->b_rptr)) { start_offset += sizeof (ether_header_t); stuff_offset += sizeof (ether_header_t); @@ -593,8 +593,8 @@ hxge_start_control_header_only: i = TXDMA_DESC_NEXT_INDEX(i, 1, tx_ring_p->tx_wrap_mask); if (ngathers > hxge_tx_max_gathers) { good_packet = B_FALSE; - hcksum_retrieve(mp, NULL, NULL, &start_offset, - &stuff_offset, &end_offset, &value, &cksum_flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, + &end_offset, &value, &cksum_flags); HXGE_DEBUG_MSG((NULL, TX_CTL, "==> hxge_start(14): pull msg - " diff --git a/usr/src/uts/common/io/ib/clients/ibd/ibd.c b/usr/src/uts/common/io/ib/clients/ibd/ibd.c index b3a39a2efc..1ca10a43e4 100644 --- a/usr/src/uts/common/io/ib/clients/ibd/ibd.c +++ b/usr/src/uts/common/io/ib/clients/ibd/ibd.c @@ -458,6 +458,7 @@ static mac_callbacks_t ibd_m_callbacks = { ibd_m_unicst, ibd_m_tx, NULL, + NULL, ibd_m_getcapab }; @@ -6256,7 +6257,7 @@ ibd_rc_large_copy: * ud destination, the opcode and the LSO header information to the * work request. */ - lso_info_get(mp, &mss, &lsoflags); + mac_lso_get(mp, &mss, &lsoflags); if ((lsoflags & HW_LSO) != HW_LSO) { node->w_swr.wr_opcode = IBT_WRC_SEND; lsohdr_sz = 0; @@ -6277,7 +6278,7 @@ ibd_rc_large_copy: lsohdr_sz = (node->w_swr.wr.ud_lso).lso_hdr_sz; } - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &hckflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &hckflags); if ((hckflags & HCK_FULLCKSUM) == HCK_FULLCKSUM) node->w_swr.wr_flags |= IBT_WR_SEND_CKSUM; else @@ -6940,8 +6941,7 @@ ibd_process_rx(ibd_state_t *state, ibd_rwqe_t *rwqe, ibt_wc_t *wc) if (((wc->wc_flags & IBT_WC_CKSUM_OK) == IBT_WC_CKSUM_OK) && (wc->wc_cksum == 0xFFFF) && (iphap->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION)) { - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM_OK); } return (mp); diff --git a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c index 45fbfd7932..334c7dcd04 100644 --- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +29,7 @@ #include <inet/ip.h> #include <inet/ip_ire.h> #include <inet/ip_if.h> +#include <sys/ethernet.h> #include <sys/ib/mgt/ibcm/ibcm_arp.h> extern char cmlog[]; diff --git a/usr/src/uts/common/io/igb/igb_gld.c b/usr/src/uts/common/io/igb/igb_gld.c index becf960af5..3630bb5019 100644 --- a/usr/src/uts/common/io/igb/igb_gld.c +++ b/usr/src/uts/common/io/igb/igb_gld.c @@ -850,11 +850,15 @@ igb_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = igb_ring_start; infop->mri_stop = NULL; infop->mri_poll = (mac_ring_poll_t)igb_rx_ring_poll; + infop->mri_stat = igb_rx_ring_stat; mintr->mi_handle = (mac_intr_handle_t)rx_ring; mintr->mi_enable = igb_rx_ring_intr_enable; mintr->mi_disable = igb_rx_ring_intr_disable; - + if (igb->intr_type & (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) { + mintr->mi_ddi_handle = + igb->htable[rx_ring->intr_vector]; + } break; } case MAC_RING_TYPE_TX: { @@ -867,7 +871,11 @@ igb_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = NULL; infop->mri_stop = NULL; infop->mri_tx = igb_tx_ring_send; - + infop->mri_stat = igb_tx_ring_stat; + if (igb->intr_type & (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) { + mintr->mi_ddi_handle = + igb->htable[tx_ring->intr_vector]; + } break; } default: @@ -1152,141 +1160,90 @@ setup_link: int igb_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { igb_t *igb = (igb_t *)arg; struct e1000_hw *hw = &igb->hw; int err = 0; uint32_t flow_control; uint64_t tmp = 0; - mac_propval_range_t range; - - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; - - bzero(pr_val, pr_valsize); - if ((pr_flags & MAC_PROP_DEFAULT) && (pr_num != MAC_PROP_PRIVATE)) - return (igb_get_def_val(igb, pr_num, pr_valsize, pr_val)); switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (link_duplex_t)) { - bcopy(&igb->link_duplex, pr_val, - sizeof (link_duplex_t)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_duplex_t)); + bcopy(&igb->link_duplex, pr_val, sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (uint64_t)) { - tmp = igb->link_speed * 1000000ull; - bcopy(&tmp, pr_val, sizeof (tmp)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint64_t)); + tmp = igb->link_speed * 1000000ull; + bcopy(&tmp, pr_val, sizeof (tmp)); break; case MAC_PROP_AUTONEG: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; + ASSERT(pr_valsize >= sizeof (uint8_t)); *(uint8_t *)pr_val = igb->param_adv_autoneg_cap; break; case MAC_PROP_FLOWCTRL: - if (pr_valsize >= sizeof (uint32_t)) { - switch (hw->fc.requested_mode) { - case e1000_fc_none: - flow_control = LINK_FLOWCTRL_NONE; - break; - case e1000_fc_rx_pause: - flow_control = LINK_FLOWCTRL_RX; - break; - case e1000_fc_tx_pause: - flow_control = LINK_FLOWCTRL_TX; - break; - case e1000_fc_full: - flow_control = LINK_FLOWCTRL_BI; - break; - } - bcopy(&flow_control, pr_val, sizeof (flow_control)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint32_t)); + switch (hw->fc.requested_mode) { + case e1000_fc_none: + flow_control = LINK_FLOWCTRL_NONE; + break; + case e1000_fc_rx_pause: + flow_control = LINK_FLOWCTRL_RX; + break; + case e1000_fc_tx_pause: + flow_control = LINK_FLOWCTRL_TX; + break; + case e1000_fc_full: + flow_control = LINK_FLOWCTRL_BI; + break; + } + bcopy(&flow_control, pr_val, sizeof (flow_control)); break; case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_1000fdx_cap; break; case MAC_PROP_EN_1000FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_1000fdx_cap; break; case MAC_PROP_ADV_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_1000hdx_cap; break; case MAC_PROP_EN_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_1000hdx_cap; break; case MAC_PROP_ADV_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_100t4_cap; break; case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_100t4_cap; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_100fdx_cap; break; case MAC_PROP_EN_100FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_100fdx_cap; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_100hdx_cap; break; case MAC_PROP_EN_100HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_100hdx_cap; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_10fdx_cap; break; case MAC_PROP_EN_10FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_10fdx_cap; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_10hdx_cap; break; case MAC_PROP_EN_10HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_10hdx_cap; break; case MAC_PROP_PRIVATE: - err = igb_get_priv_prop(igb, pr_name, - pr_flags, pr_valsize, pr_val, perm); - break; - case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = MIN_MTU; - range.range_uint32[0].mpur_max = MAX_MTU; - bcopy(&range, pr_val, sizeof (range)); + err = igb_get_priv_prop(igb, pr_name, pr_valsize, pr_val); break; default: err = EINVAL; @@ -1295,98 +1252,106 @@ igb_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, return (err); } -int -igb_get_def_val(igb_t *igb, mac_prop_id_t pr_num, - uint_t pr_valsize, void *pr_val) +void +igb_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) { - uint32_t flow_control; + igb_t *igb = (igb_t *)arg; struct e1000_hw *hw = &igb->hw; - uint16_t phy_status; - uint16_t phy_ext_status; - int err = 0; + uint16_t phy_status, phy_ext_status; - ASSERT(pr_valsize > 0); switch (pr_num) { - case MAC_PROP_AUTONEG: - if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; - } else { - (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = - (phy_status & MII_SR_AUTONEG_CAPS) ? 1 : 0; - } - break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (uint32_t)) - return (EINVAL); - flow_control = LINK_FLOWCTRL_BI; - bcopy(&flow_control, pr_val, sizeof (flow_control)); - break; + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + case MAC_PROP_EN_1000FDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 1; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { - (void) e1000_read_phy_reg(hw, - PHY_EXT_STATUS, &phy_ext_status); - *(uint8_t *)pr_val = + (void) e1000_read_phy_reg(hw, PHY_EXT_STATUS, + &phy_ext_status); + mac_prop_info_set_default_uint8(prh, ((phy_ext_status & IEEE_ESR_1000T_FD_CAPS) || - (phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0; + (phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0); } break; - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - case MAC_PROP_ADV_100T4_CAP: - case MAC_PROP_EN_100T4_CAP: - *(uint8_t *)pr_val = 0; - break; + case MAC_PROP_ADV_100FDX_CAP: case MAC_PROP_EN_100FDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = + mac_prop_info_set_default_uint8(prh, ((phy_status & MII_SR_100X_FD_CAPS) || - (phy_status & MII_SR_100T2_FD_CAPS)) ? 1 : 0; + (phy_status & MII_SR_100T2_FD_CAPS)) ? 1 : 0); } break; + case MAC_PROP_ADV_100HDX_CAP: case MAC_PROP_EN_100HDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = + mac_prop_info_set_default_uint8(prh, ((phy_status & MII_SR_100X_HD_CAPS) || - (phy_status & MII_SR_100T2_HD_CAPS)) ? 1 : 0; + (phy_status & MII_SR_100T2_HD_CAPS)) ? 1 : 0); } break; + case MAC_PROP_ADV_10FDX_CAP: case MAC_PROP_EN_10FDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = - (phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0; + mac_prop_info_set_default_uint8(prh, + (phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0); } break; + case MAC_PROP_ADV_10HDX_CAP: case MAC_PROP_EN_10HDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = - (phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0; + mac_prop_info_set_default_uint8(prh, + (phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0); } break; - default: - err = ENOTSUP; + + case MAC_PROP_AUTONEG: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); + mac_prop_info_set_default_uint8(prh, + (phy_status & MII_SR_AUTONEG_CAPS) ? 1 : 0); + } + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, LINK_FLOWCTRL_BI); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, MIN_MTU, MAX_MTU); + break; + + case MAC_PROP_PRIVATE: + igb_priv_prop_info(igb, pr_name, prh); break; } - return (err); + } boolean_t @@ -1533,72 +1498,65 @@ igb_set_priv_prop(igb_t *igb, const char *pr_name, } int -igb_get_priv_prop(igb_t *igb, const char *pr_name, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) +igb_get_priv_prop(igb_t *igb, const char *pr_name, uint_t pr_valsize, + void *pr_val) { - int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; - *perm = MAC_PROP_PERM_RW; - if (strcmp(pr_name, "_adv_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - value = (is_default ? 1 : igb->param_adv_pause_cap); - err = 0; - goto done; - } - if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - value = (is_default ? 1 : igb->param_adv_asym_pause_cap); - err = 0; - goto done; - } - if (strcmp(pr_name, "_tx_copy_thresh") == 0) { - value = (is_default ? DEFAULT_TX_COPY_THRESHOLD : - igb->tx_copy_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { - value = (is_default ? DEFAULT_TX_RECYCLE_THRESHOLD : - igb->tx_recycle_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_tx_overload_thresh") == 0) { - value = (is_default ? DEFAULT_TX_OVERLOAD_THRESHOLD : - igb->tx_overload_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_tx_resched_thresh") == 0) { - value = (is_default ? DEFAULT_TX_RESCHED_THRESHOLD : - igb->tx_resched_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_rx_copy_thresh") == 0) { - value = (is_default ? DEFAULT_RX_COPY_THRESHOLD : - igb->rx_copy_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { - value = (is_default ? DEFAULT_RX_LIMIT_PER_INTR : - igb->rx_limit_per_intr); - err = 0; - goto done; - } - if (strcmp(pr_name, "_intr_throttling") == 0) { - value = (is_default ? igb->capab->def_intr_throttle : - igb->intr_throttling[0]); - err = 0; - goto done; + value = igb->param_adv_pause_cap; + } else if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { + value = igb->param_adv_asym_pause_cap; + } else if (strcmp(pr_name, "_tx_copy_thresh") == 0) { + value = igb->tx_copy_thresh; + } else if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { + value = igb->tx_recycle_thresh; + } else if (strcmp(pr_name, "_tx_overload_thresh") == 0) { + value = igb->tx_overload_thresh; + } else if (strcmp(pr_name, "_tx_resched_thresh") == 0) { + value = igb->tx_resched_thresh; + } else if (strcmp(pr_name, "_rx_copy_thresh") == 0) { + value = igb->rx_copy_thresh; + } else if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { + value = igb->rx_limit_per_intr; + } else if (strcmp(pr_name, "_intr_throttling") == 0) { + value = igb->intr_throttling[0]; + } else { + return (ENOTSUP); } -done: - if (err == 0) { - (void) snprintf(pr_val, pr_valsize, "%d", value); + + (void) snprintf(pr_val, pr_valsize, "%d", value); + return (0); +} + +void +igb_priv_prop_info(igb_t *igb, const char *pr_name, mac_prop_info_handle_t prh) +{ + char valstr[64]; + int value; + + if (strcmp(pr_name, "_adv_pause_cap") == 0 || + strcmp(pr_name, "_adv_asym_pause_cap") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; + } else if (strcmp(pr_name, "_tx_copy_thresh") == 0) { + value = DEFAULT_TX_COPY_THRESHOLD; + } else if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { + value = DEFAULT_TX_RECYCLE_THRESHOLD; + } else if (strcmp(pr_name, "_tx_overload_thresh") == 0) { + value = DEFAULT_TX_OVERLOAD_THRESHOLD; + } else if (strcmp(pr_name, "_tx_resched_thresh") == 0) { + value = DEFAULT_TX_RESCHED_THRESHOLD; + } else if (strcmp(pr_name, "_rx_copy_thresh") == 0) { + value = DEFAULT_RX_COPY_THRESHOLD; + } else if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { + value = DEFAULT_RX_LIMIT_PER_INTR; + } else if (strcmp(pr_name, "_intr_throttling") == 0) { + value = igb->capab->def_intr_throttle; + } else { + return; } - return (err); + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + mac_prop_info_set_default_str(prh, valstr); } diff --git a/usr/src/uts/common/io/igb/igb_main.c b/usr/src/uts/common/io/igb/igb_main.c index b4070b8389..3ac2c03e1d 100644 --- a/usr/src/uts/common/io/igb/igb_main.c +++ b/usr/src/uts/common/io/igb/igb_main.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -121,21 +120,19 @@ static void igb_fm_init(igb_t *); static void igb_fm_fini(igb_t *); static void igb_release_multicast(igb_t *); -mac_priv_prop_t igb_priv_props[] = { - {"_tx_copy_thresh", MAC_PROP_PERM_RW}, - {"_tx_recycle_thresh", MAC_PROP_PERM_RW}, - {"_tx_overload_thresh", MAC_PROP_PERM_RW}, - {"_tx_resched_thresh", MAC_PROP_PERM_RW}, - {"_rx_copy_thresh", MAC_PROP_PERM_RW}, - {"_rx_limit_per_intr", MAC_PROP_PERM_RW}, - {"_intr_throttling", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_READ}, - {"_adv_asym_pause_cap", MAC_PROP_PERM_READ} +char *igb_priv_props[] = { + "_tx_copy_thresh", + "_tx_recycle_thresh", + "_tx_overload_thresh", + "_tx_resched_thresh", + "_rx_copy_thresh", + "_rx_limit_per_intr", + "_intr_throttling", + "_adv_pause_cap", + "_adv_asym_pause_cap", + NULL }; -#define IGB_MAX_PRIV_PROPS \ - (sizeof (igb_priv_props) / sizeof (mac_priv_prop_t)) - static struct cb_ops igb_cb_ops = { nulldev, /* cb_open */ nulldev, /* cb_close */ @@ -191,7 +188,7 @@ ddi_device_acc_attr_t igb_regs_acc_attr = { }; #define IGB_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) static mac_callbacks_t igb_m_callbacks = { IGB_M_CALLBACK_FLAGS, @@ -202,12 +199,14 @@ static mac_callbacks_t igb_m_callbacks = { igb_m_multicst, NULL, NULL, + NULL, igb_m_ioctl, igb_m_getcapab, NULL, NULL, igb_m_setprop, - igb_m_getprop + igb_m_getprop, + igb_m_propinfo }; /* @@ -783,7 +782,6 @@ igb_register_mac(igb_t *igb) sizeof (struct ether_vlan_header) - ETHERFCSL; mac->m_margin = VLAN_TAGSZ; mac->m_priv_props = igb_priv_props; - mac->m_priv_prop_count = IGB_MAX_PRIV_PROPS; mac->m_v12n = MAC_VIRT_LEVEL1; status = mac_register(mac, &igb->mac_hdl); diff --git a/usr/src/uts/common/io/igb/igb_rx.c b/usr/src/uts/common/io/igb/igb_rx.c index 1eeaf9d325..3f7ac957a5 100644 --- a/usr/src/uts/common/io/igb/igb_rx.c +++ b/usr/src/uts/common/io/igb/igb_rx.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -23,7 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms of the CDDL. + * Use is subject to license terms. */ #include "igb_sw.h" @@ -272,18 +271,17 @@ igb_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error) if (((status_error & E1000_RXD_STAT_TCPCS) || (status_error & E1000_RXD_STAT_UDPCS)) && !(status_error & E1000_RXDEXT_STATERR_TCPE)) - hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK; + hcksum_flags |= HCK_FULLCKSUM_OK; /* * Check IP Checksum */ if ((status_error & E1000_RXD_STAT_IPCS) && !(status_error & E1000_RXDEXT_STATERR_IPE)) - hcksum_flags |= HCK_IPV4_HDRCKSUM; + hcksum_flags |= HCK_IPV4_HDRCKSUM_OK; if (hcksum_flags != 0) { - (void) hcksum_assoc(mp, - NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, hcksum_flags); } } @@ -413,6 +411,10 @@ igb_rx(igb_rx_ring_t *rx_ring, int poll_bytes) mblk_tail = &mp->b_next; } + /* Update per-ring rx statistics */ + rx_ring->rx_pkts++; + rx_ring->rx_bytes += pkt_len; + rx_discard: /* * Reset rx descriptor read bits diff --git a/usr/src/uts/common/io/igb/igb_stat.c b/usr/src/uts/common/io/igb/igb_stat.c index 8edc4dbeed..3f5f4d69a2 100644 --- a/usr/src/uts/common/io/igb/igb_stat.c +++ b/usr/src/uts/common/io/igb/igb_stat.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -22,8 +21,8 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms of the CDDL. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #include "igb_sw.h" @@ -271,3 +270,53 @@ igb_init_stats(igb_t *igb) return (IGB_SUCCESS); } + +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +igb_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)rh; + + switch (stat) { + case MAC_STAT_RBYTES: + *val = rx_ring->rx_bytes; + break; + + case MAC_STAT_IPACKETS: + *val = rx_ring->rx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +igb_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + igb_tx_ring_t *tx_ring = (igb_tx_ring_t *)rh; + + switch (stat) { + case MAC_STAT_OBYTES: + *val = tx_ring->tx_bytes; + break; + + case MAC_STAT_OPACKETS: + *val = tx_ring->tx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} diff --git a/usr/src/uts/common/io/igb/igb_sw.h b/usr/src/uts/common/io/igb/igb_sw.h index e7e886f35c..080cd1bed6 100644 --- a/usr/src/uts/common/io/igb/igb_sw.h +++ b/usr/src/uts/common/io/igb/igb_sw.h @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -451,6 +450,12 @@ typedef struct igb_tx_ring { uint32_t recycle_fail; uint32_t stall_watchdog; + /* + * Per-ring statistics + */ + uint64_t tx_pkts; /* Packets Transmitted Count */ + uint64_t tx_bytes; /* Bytes Transmitted Count */ + #ifdef IGB_DEBUG /* * Debug statistics @@ -516,6 +521,12 @@ typedef struct igb_rx_ring { kmutex_t rx_lock; /* Rx access lock */ + /* + * Per-ring statistics + */ + uint64_t rx_pkts; /* Packets Received Count */ + uint64_t rx_bytes; /* Bytes Received Count */ + #ifdef IGB_DEBUG /* * Debug statistics @@ -810,11 +821,12 @@ boolean_t igb_m_getcapab(void *, mac_capab_t, void *); void igb_fill_ring(void *, mac_ring_type_t, const int, const int, mac_ring_info_t *, mac_ring_handle_t); int igb_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -int igb_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); +int igb_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +void igb_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); int igb_set_priv_prop(igb_t *, const char *, uint_t, const void *); -int igb_get_priv_prop(igb_t *, const char *, - uint_t, uint_t, void *, uint_t *); +int igb_get_priv_prop(igb_t *, const char *, uint_t, void *); +void igb_priv_prop_info(igb_t *, const char *, mac_prop_info_handle_t); boolean_t igb_param_locked(mac_prop_id_t); void igb_fill_group(void *arg, mac_ring_type_t, const int, mac_group_info_t *, mac_group_handle_t); @@ -850,6 +862,8 @@ int igb_init_stats(igb_t *); mblk_t *igb_rx_ring_poll(void *, int); mblk_t *igb_tx_ring_send(void *, mblk_t *); +int igb_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); +int igb_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/igb/igb_tx.c b/usr/src/uts/common/io/igb/igb_tx.c index b77afe1a5d..31e46609a5 100644 --- a/usr/src/uts/common/io/igb/igb_tx.c +++ b/usr/src/uts/common/io/igb/igb_tx.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -418,6 +417,10 @@ adjust_threshold: ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); + /* Update per-ring tx statistics */ + tx_ring->tx_pkts++; + tx_ring->tx_bytes += mbsize; + mutex_exit(&tx_ring->tx_lock); return (B_TRUE); @@ -599,7 +602,7 @@ igb_get_tx_context(mblk_t *mp, tx_context_t *ctx) ASSERT(mp != NULL); - hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, &flags); + mac_hcksum_get(mp, &start, NULL, NULL, NULL, &flags); bzero(ctx, sizeof (tx_context_t)); ctx->hcksum_flags = flags; @@ -607,7 +610,7 @@ igb_get_tx_context(mblk_t *mp, tx_context_t *ctx) if (flags == 0) return (TX_CXT_SUCCESS); - lso_info_get(mp, &mss, &lso_flag); + mac_lso_get(mp, &mss, &lso_flag); ctx->mss = mss; ctx->lso_flag = (lso_flag == HW_LSO); diff --git a/usr/src/uts/common/io/ipw/ipw2100.c b/usr/src/uts/common/io/ipw/ipw2100.c index 8c6bdbbe6f..2559c64762 100644 --- a/usr/src/uts/common/io/ipw/ipw2100.c +++ b/usr/src/uts/common/io/ipw/ipw2100.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -140,9 +140,9 @@ static void ipw2100_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); static int ipw2100_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int ipw2100_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); - + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void ipw2100_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); /* * Interrupt and Data transferring operations @@ -181,7 +181,7 @@ static int ipw2100_cpr_resume(struct ipw2100_softc *sc); * Mac Call Back entries */ mac_callbacks_t ipw2100_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, ipw2100_m_stat, ipw2100_m_start, ipw2100_m_stop, @@ -189,12 +189,14 @@ mac_callbacks_t ipw2100_m_callbacks = { ipw2100_m_multicst, ipw2100_m_unicst, ipw2100_m_tx, + NULL, ipw2100_m_ioctl, NULL, NULL, NULL, ipw2100_m_setprop, - ipw2100_m_getprop + ipw2100_m_getprop, + ipw2100_m_propinfo }; @@ -2470,7 +2472,7 @@ ipw2100_getset(struct ipw2100_softc *sc, mblk_t *m, uint32_t cmd, */ static int ipw2100_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct ipw2100_softc *sc = (struct ipw2100_softc *)arg; struct ieee80211com *ic = &sc->sc_ic; @@ -2487,14 +2489,25 @@ ipw2100_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, break; default: /* go through net80211 */ - err = ieee80211_getprop(ic, pr_name, wldp_pr_num, pr_flags, - wldp_length, wldp_buf, perm); + err = ieee80211_getprop(ic, pr_name, wldp_pr_num, + wldp_length, wldp_buf); break; } return (err); } +static void +ipw2100_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct ipw2100_softc *sc = (struct ipw2100_softc *)arg; + struct ieee80211com *ic = &sc->sc_ic; + + ieee80211_propinfo(ic, pr_name, wldp_pr_num, prh); + +} + static int ipw2100_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/iwh/iwh.c b/usr/src/uts/common/io/iwh/iwh.c index 407b814066..39b6f27e7f 100644 --- a/usr/src/uts/common/io/iwh/iwh.c +++ b/usr/src/uts/common/io/iwh/iwh.c @@ -379,8 +379,10 @@ static void iwh_m_ioctl(void *, queue_t *, mblk_t *); static int iwh_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int iwh_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, + void *wldp_buf); +static void iwh_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); /* * Supported rates for 802.11b/g modes (in 500Kbps unit). @@ -466,7 +468,7 @@ _info(struct modinfo *mip) * Mac Call Back entries */ mac_callbacks_t iwh_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, iwh_m_stat, iwh_m_start, iwh_m_stop, @@ -474,12 +476,14 @@ mac_callbacks_t iwh_m_callbacks = { iwh_m_multicst, iwh_m_unicst, iwh_m_tx, + NULL, iwh_m_ioctl, NULL, NULL, NULL, iwh_m_setprop, - iwh_m_getprop + iwh_m_getprop, + iwh_m_propinfo }; #ifdef DEBUG @@ -3520,7 +3524,7 @@ iwh_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int iwh_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { iwh_sc_t *sc; int err = EINVAL; @@ -3531,11 +3535,20 @@ iwh_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, sc = (iwh_sc_t *)arg; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +iwh_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + iwh_sc_t *sc = (iwh_sc_t *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + static int iwh_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/iwi/ipw2200.c b/usr/src/uts/common/io/iwi/ipw2200.c index d52e069496..0c9a729b43 100644 --- a/usr/src/uts/common/io/iwi/ipw2200.c +++ b/usr/src/uts/common/io/iwi/ipw2200.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -161,9 +161,9 @@ static mblk_t *ipw2200_m_tx(void *arg, mblk_t *mp); static int ipw2200_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int ipw2200_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); - + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void ipw2200_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); /* * Interrupt and Data transferring operations @@ -205,7 +205,7 @@ extern void ieee80211_notify_node_leave(ieee80211com_t *ic, * Mac Call Back entries */ mac_callbacks_t ipw2200_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, ipw2200_m_stat, ipw2200_m_start, ipw2200_m_stop, @@ -213,12 +213,14 @@ mac_callbacks_t ipw2200_m_callbacks = { ipw2200_m_multicst, ipw2200_m_unicst, ipw2200_m_tx, + NULL, ipw2200_m_ioctl, NULL, NULL, NULL, ipw2200_m_setprop, - ipw2200_m_getprop + ipw2200_m_getprop, + ipw2200_m_propinfo }; /* @@ -2558,7 +2560,7 @@ ipw2200_getset(struct ipw2200_softc *sc, mblk_t *m, uint32_t cmd, */ static int ipw2200_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct ipw2200_softc *sc = (struct ipw2200_softc *)arg; struct ieee80211com *ic = &sc->sc_ic; @@ -2575,14 +2577,24 @@ ipw2200_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, break; default: /* go through net80211 */ - err = ieee80211_getprop(ic, pr_name, wldp_pr_num, pr_flags, - wldp_length, wldp_buf, perm); + err = ieee80211_getprop(ic, pr_name, wldp_pr_num, + wldp_length, wldp_buf); break; } return (err); } +static void +ipw2200_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wlpd_pr_num, mac_prop_info_handle_t mph) +{ + struct ipw2200_softc *sc = (struct ipw2200_softc *)arg; + struct ieee80211com *ic = &sc->sc_ic; + + ieee80211_propinfo(ic, pr_name, wlpd_pr_num, mph); +} + static int ipw2200_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/iwk/iwk2.c b/usr/src/uts/common/io/iwk/iwk2.c index fbf600039c..6b5c64363c 100644 --- a/usr/src/uts/common/io/iwk/iwk2.c +++ b/usr/src/uts/common/io/iwk/iwk2.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -340,8 +340,9 @@ static void iwk_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); static int iwk_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_name, uint_t wldp_length, const void *wldp_buf); static int iwk_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_name, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_name, uint_t wldp_length, void *wldp_buf); +static void iwk_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); static void iwk_destroy_locks(iwk_sc_t *sc); static int iwk_send(ieee80211com_t *ic, mblk_t *mp, uint8_t type); static void iwk_thread(iwk_sc_t *sc); @@ -432,7 +433,7 @@ _info(struct modinfo *mip) * Mac Call Back entries */ mac_callbacks_t iwk_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, iwk_m_stat, iwk_m_start, iwk_m_stop, @@ -440,12 +441,14 @@ mac_callbacks_t iwk_m_callbacks = { iwk_m_multicst, iwk_m_unicst, iwk_m_tx, + NULL, iwk_m_ioctl, NULL, NULL, NULL, iwk_m_setprop, - iwk_m_getprop + iwk_m_getprop, + iwk_m_propinfo }; #ifdef DEBUG @@ -3090,19 +3093,20 @@ iwk_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) /* * callback functions for set/get properties */ -/* ARGSUSED */ + static int iwk_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; iwk_sc_t *sc = (iwk_sc_t *)arg; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } + static int iwk_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) @@ -3129,6 +3133,16 @@ iwk_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } +static void +iwk_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + iwk_sc_t *sc = (iwk_sc_t *)arg; + ieee80211com_t *ic = &sc->sc_ic; + + ieee80211_propinfo(ic, pr_name, wldp_pr_num, mph); +} + /*ARGSUSED*/ static int iwk_m_stat(void *arg, uint_t stat, uint64_t *val) diff --git a/usr/src/uts/common/io/iwp/iwp.c b/usr/src/uts/common/io/iwp/iwp.c index 317c02bee0..eec2b84664 100644 --- a/usr/src/uts/common/io/iwp/iwp.c +++ b/usr/src/uts/common/io/iwp/iwp.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -347,8 +347,9 @@ static void iwp_m_ioctl(void *, queue_t *, mblk_t *); static int iwp_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int iwp_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void iwp_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); /* * Supported rates for 802.11b/g modes (in 500Kbps unit). @@ -429,7 +430,7 @@ _info(struct modinfo *mip) * Mac Call Back entries */ mac_callbacks_t iwp_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, iwp_m_stat, iwp_m_start, iwp_m_stop, @@ -437,12 +438,14 @@ mac_callbacks_t iwp_m_callbacks = { iwp_m_multicst, iwp_m_unicst, iwp_m_tx, + NULL, iwp_m_ioctl, NULL, NULL, NULL, iwp_m_setprop, - iwp_m_getprop + iwp_m_getprop, + iwp_m_propinfo }; #ifdef DEBUG @@ -3355,7 +3358,7 @@ iwp_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int iwp_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { iwp_sc_t *sc; int err = EINVAL; @@ -3366,11 +3369,21 @@ iwp_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, sc = (iwp_sc_t *)arg; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +iwp_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + iwp_sc_t *sc; + + sc = (iwp_sc_t *)arg; + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + static int iwp_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/ixgbe/ixgbe.conf b/usr/src/uts/common/io/ixgbe/ixgbe.conf index 8163fa8d9a..d5cee7501a 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe.conf +++ b/usr/src/uts/common/io/ixgbe/ixgbe.conf @@ -21,7 +21,7 @@ # # Copyright(c) 2007-2008 Intel Corporation. All rights reserved. # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # @@ -66,7 +66,8 @@ # # rx_group_number # The number of the receive groups -# Allowed values: 1 - 16 +# Allowed values: 1 - 16 (for Intel 82598 10Gb ethernet controller) +# Allowed values: 1 - 64 (for Intel 82599 10Gb ethernet controller) # Default value: 1 # # -------- How to set parameters for a particular interface --------- diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_debug.c b/usr/src/uts/common/io/ixgbe/ixgbe_debug.c index f4dc85aad6..1430817445 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_debug.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_debug.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -22,7 +21,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -40,8 +39,8 @@ ixgbe_dump_interrupt(void *adapter, char *tag) { ixgbe_t *ixgbe = (ixgbe_t *)adapter; struct ixgbe_hw *hw = &ixgbe->hw; - ixgbe_intr_vector_t *vect; - uint32_t ivar, reg; + ixgbe_intr_vector_t *vect; + uint32_t ivar, reg, hw_index; int i, j; /* @@ -74,10 +73,11 @@ ixgbe_dump_interrupt(void *adapter, char *tag) /* for each rx ring bit set */ j = bt_getlowbit(vect->rx_map, 0, (ixgbe->num_rx_rings - 1)); while (j >= 0) { + hw_index = ixgbe->rx_rings[j].hw_index; ixgbe_log(ixgbe, "rx %d ivar %d rxdctl: 0x%x srrctl: 0x%x\n", - j, IXGBE_IVAR_RX_QUEUE(j), - IXGBE_READ_REG(hw, IXGBE_RXDCTL(j)), - IXGBE_READ_REG(hw, IXGBE_SRRCTL(j))); + hw_index, IXGBE_IVAR_RX_QUEUE(hw_index), + IXGBE_READ_REG(hw, IXGBE_RXDCTL(hw_index)), + IXGBE_READ_REG(hw, IXGBE_SRRCTL(hw_index))); j = bt_getlowbit(vect->rx_map, (j + 1), (ixgbe->num_rx_rings - 1)); } @@ -427,7 +427,7 @@ void ixgbe_dump_regs(void *adapter) { ixgbe_t *ixgbe = (ixgbe_t *)adapter; - uint32_t reg_val; + uint32_t reg_val, hw_index; struct ixgbe_hw *hw = &ixgbe->hw; int i; DEBUGFUNC("ixgbe_dump_regs"); @@ -460,10 +460,11 @@ ixgbe_dump_regs(void *adapter) reg_val = IXGBE_READ_REG(hw, IXGBE_RXCTRL); ixgbe_log(ixgbe, "\tRXCTRL=%x\n", reg_val); for (i = 0; i < ixgbe->num_rx_rings; i++) { - reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); - ixgbe_log(ixgbe, "\tRXDCTL(%d)=%x\n", i, reg_val); - reg_val = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); - ixgbe_log(ixgbe, "\tSRRCTL(%d)=%x\n", i, reg_val); + hw_index = ixgbe->rx_rings[i].hw_index; + reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(hw_index)); + ixgbe_log(ixgbe, "\tRXDCTL(%d)=%x\n", hw_index, reg_val); + reg_val = IXGBE_READ_REG(hw, IXGBE_SRRCTL(hw_index)); + ixgbe_log(ixgbe, "\tSRRCTL(%d)=%x\n", hw_index, reg_val); } reg_val = IXGBE_READ_REG(hw, IXGBE_RXCSUM); ixgbe_log(ixgbe, "\tRXCSUM=%x\n", reg_val); diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_gld.c b/usr/src/uts/common/io/ixgbe/ixgbe_gld.c index abc1f3647f..a7d580de1d 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_gld.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_gld.c @@ -21,308 +21,13 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include "ixgbe_sw.h" /* - * Retrieve a value for one of the statistics. - */ -int -ixgbe_m_stat(void *arg, uint_t stat, uint64_t *val) -{ - ixgbe_t *ixgbe = (ixgbe_t *)arg; - struct ixgbe_hw *hw = &ixgbe->hw; - ixgbe_stat_t *ixgbe_ks; - int i; - - ixgbe_ks = (ixgbe_stat_t *)ixgbe->ixgbe_ks->ks_data; - - mutex_enter(&ixgbe->gen_lock); - - if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { - mutex_exit(&ixgbe->gen_lock); - return (ECANCELED); - } - - switch (stat) { - case MAC_STAT_IFSPEED: - *val = ixgbe->link_speed * 1000000ull; - break; - - case MAC_STAT_MULTIRCV: - ixgbe_ks->mprc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_MPRC); - *val = ixgbe_ks->mprc.value.ui64; - break; - - case MAC_STAT_BRDCSTRCV: - ixgbe_ks->bprc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_BPRC); - *val = ixgbe_ks->bprc.value.ui64; - break; - - case MAC_STAT_MULTIXMT: - ixgbe_ks->mptc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_MPTC); - *val = ixgbe_ks->mptc.value.ui64; - break; - - case MAC_STAT_BRDCSTXMT: - ixgbe_ks->bptc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_BPTC); - *val = ixgbe_ks->bptc.value.ui64; - break; - - case MAC_STAT_NORCVBUF: - for (i = 0; i < 8; i++) { - ixgbe_ks->rnbc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RNBC(i)); - } - *val = ixgbe_ks->rnbc.value.ui64; - break; - - case MAC_STAT_IERRORS: - ixgbe_ks->crcerrs.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_CRCERRS); - ixgbe_ks->illerrc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ILLERRC); - ixgbe_ks->errbc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ERRBC); - ixgbe_ks->rlec.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RLEC); - *val = ixgbe_ks->crcerrs.value.ui64 + - ixgbe_ks->illerrc.value.ui64 + - ixgbe_ks->errbc.value.ui64 + - ixgbe_ks->rlec.value.ui64; - break; - - case MAC_STAT_RBYTES: - ixgbe_ks->tor.value.ui64 = 0; - for (i = 0; i < 16; i++) { - ixgbe_ks->qbrc[i].value.ui64 += - IXGBE_READ_REG(hw, IXGBE_QBRC(i)); - ixgbe_ks->tor.value.ui64 += - ixgbe_ks->qbrc[i].value.ui64; - } - *val = ixgbe_ks->tor.value.ui64; - break; - - case MAC_STAT_OBYTES: - ixgbe_ks->tot.value.ui64 = 0; - for (i = 0; i < 16; i++) { - if (hw->mac.type >= ixgbe_mac_82599EB) { - ixgbe_ks->qbtc[i].value.ui64 += - IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); - ixgbe_ks->qbtc[i].value.ui64 += ((uint64_t) - IXGBE_READ_REG(hw, IXGBE_QBTC_H(i))) << 32; - } else { - ixgbe_ks->qbtc[i].value.ui64 += - IXGBE_READ_REG(hw, IXGBE_QBTC(i)); - } - ixgbe_ks->tot.value.ui64 += - ixgbe_ks->qbtc[i].value.ui64; - } - *val = ixgbe_ks->tot.value.ui64; - break; - - case MAC_STAT_IPACKETS: - ixgbe_ks->tpr.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_TPR); - *val = ixgbe_ks->tpr.value.ui64; - break; - - case MAC_STAT_OPACKETS: - ixgbe_ks->tpt.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_TPT); - *val = ixgbe_ks->tpt.value.ui64; - break; - - /* RFC 1643 stats */ - case ETHER_STAT_FCS_ERRORS: - ixgbe_ks->crcerrs.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_CRCERRS); - *val = ixgbe_ks->crcerrs.value.ui64; - break; - - case ETHER_STAT_TOOLONG_ERRORS: - ixgbe_ks->roc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ROC); - *val = ixgbe_ks->roc.value.ui64; - break; - - case ETHER_STAT_MACRCV_ERRORS: - ixgbe_ks->crcerrs.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_CRCERRS); - ixgbe_ks->illerrc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ILLERRC); - ixgbe_ks->errbc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ERRBC); - ixgbe_ks->rlec.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RLEC); - *val = ixgbe_ks->crcerrs.value.ui64 + - ixgbe_ks->illerrc.value.ui64 + - ixgbe_ks->errbc.value.ui64 + - ixgbe_ks->rlec.value.ui64; - break; - - /* MII/GMII stats */ - case ETHER_STAT_XCVR_ADDR: - /* The Internal PHY's MDI address for each MAC is 1 */ - *val = 1; - break; - - case ETHER_STAT_XCVR_ID: - *val = hw->phy.id; - break; - - case ETHER_STAT_XCVR_INUSE: - switch (ixgbe->link_speed) { - case IXGBE_LINK_SPEED_1GB_FULL: - *val = - (hw->phy.media_type == ixgbe_media_type_copper) ? - XCVR_1000T : XCVR_1000X; - break; - case IXGBE_LINK_SPEED_100_FULL: - *val = (hw->phy.media_type == ixgbe_media_type_copper) ? - XCVR_100T2 : XCVR_100X; - break; - default: - *val = XCVR_NONE; - break; - } - break; - - case ETHER_STAT_CAP_10GFDX: - *val = 1; - break; - - case ETHER_STAT_CAP_1000FDX: - *val = 1; - break; - - case ETHER_STAT_CAP_100FDX: - *val = 1; - break; - - case ETHER_STAT_CAP_ASMPAUSE: - *val = ixgbe->param_asym_pause_cap; - break; - - case ETHER_STAT_CAP_PAUSE: - *val = ixgbe->param_pause_cap; - break; - - case ETHER_STAT_CAP_AUTONEG: - *val = 1; - break; - - case ETHER_STAT_ADV_CAP_10GFDX: - *val = ixgbe->param_adv_10000fdx_cap; - break; - - case ETHER_STAT_ADV_CAP_1000FDX: - *val = ixgbe->param_adv_1000fdx_cap; - break; - - case ETHER_STAT_ADV_CAP_100FDX: - *val = ixgbe->param_adv_100fdx_cap; - break; - - case ETHER_STAT_ADV_CAP_ASMPAUSE: - *val = ixgbe->param_adv_asym_pause_cap; - break; - - case ETHER_STAT_ADV_CAP_PAUSE: - *val = ixgbe->param_adv_pause_cap; - break; - - case ETHER_STAT_ADV_CAP_AUTONEG: - *val = ixgbe->param_adv_autoneg_cap; - break; - - case ETHER_STAT_LP_CAP_10GFDX: - *val = ixgbe->param_lp_10000fdx_cap; - break; - - case ETHER_STAT_LP_CAP_1000FDX: - *val = ixgbe->param_lp_1000fdx_cap; - break; - - case ETHER_STAT_LP_CAP_100FDX: - *val = ixgbe->param_lp_100fdx_cap; - break; - - case ETHER_STAT_LP_CAP_ASMPAUSE: - *val = ixgbe->param_lp_asym_pause_cap; - break; - - case ETHER_STAT_LP_CAP_PAUSE: - *val = ixgbe->param_lp_pause_cap; - break; - - case ETHER_STAT_LP_CAP_AUTONEG: - *val = ixgbe->param_lp_autoneg_cap; - break; - - case ETHER_STAT_LINK_ASMPAUSE: - *val = ixgbe->param_asym_pause_cap; - break; - - case ETHER_STAT_LINK_PAUSE: - *val = ixgbe->param_pause_cap; - break; - - case ETHER_STAT_LINK_AUTONEG: - *val = ixgbe->param_adv_autoneg_cap; - break; - - case ETHER_STAT_LINK_DUPLEX: - *val = ixgbe->link_duplex; - break; - - case ETHER_STAT_TOOSHORT_ERRORS: - ixgbe_ks->ruc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RUC); - *val = ixgbe_ks->ruc.value.ui64; - break; - - case ETHER_STAT_CAP_REMFAULT: - *val = ixgbe->param_rem_fault; - break; - - case ETHER_STAT_ADV_REMFAULT: - *val = ixgbe->param_adv_rem_fault; - break; - - case ETHER_STAT_LP_REMFAULT: - *val = ixgbe->param_lp_rem_fault; - break; - - case ETHER_STAT_JABBER_ERRORS: - ixgbe_ks->rjc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RJC); - *val = ixgbe_ks->rjc.value.ui64; - break; - - default: - mutex_exit(&ixgbe->gen_lock); - return (ENOTSUP); - } - - mutex_exit(&ixgbe->gen_lock); - - if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) { - ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); - return (EIO); - } - - return (0); -} - -/* * Bring the device out of the reset/quiesced state that it * was in when the interface was registered. */ @@ -732,115 +437,68 @@ setup_link: int ixgbe_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { ixgbe_t *ixgbe = (ixgbe_t *)arg; struct ixgbe_hw *hw = &ixgbe->hw; int err = 0; uint32_t flow_control; uint64_t tmp = 0; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); - mac_propval_range_t range; - - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_READ; - - bzero(pr_val, pr_valsize); switch (pr_num) { case MAC_PROP_DUPLEX: - if (pr_valsize >= sizeof (link_duplex_t)) { - bcopy(&ixgbe->link_duplex, pr_val, - sizeof (link_duplex_t)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_duplex_t)); + bcopy(&ixgbe->link_duplex, pr_val, + sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - if (pr_valsize >= sizeof (uint64_t)) { - tmp = ixgbe->link_speed * 1000000ull; - bcopy(&tmp, pr_val, sizeof (tmp)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint64_t)); + tmp = ixgbe->link_speed * 1000000ull; + bcopy(&tmp, pr_val, sizeof (tmp)); break; case MAC_PROP_AUTONEG: - if (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) - *perm = MAC_PROP_PERM_RW; - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_adv_autoneg_cap); + *(uint8_t *)pr_val = ixgbe->param_adv_autoneg_cap; break; case MAC_PROP_FLOWCTRL: - *perm = MAC_PROP_PERM_RW; - if (pr_valsize >= sizeof (uint32_t)) { - if (is_default) { + ASSERT(pr_valsize >= sizeof (uint32_t)); + + switch (hw->fc.requested_mode) { + case ixgbe_fc_none: flow_control = LINK_FLOWCTRL_NONE; - bcopy(&flow_control, pr_val, - sizeof (flow_control)); break; - } - switch (hw->fc.requested_mode) { - case ixgbe_fc_none: - flow_control = LINK_FLOWCTRL_NONE; - break; - case ixgbe_fc_rx_pause: - flow_control = LINK_FLOWCTRL_RX; - break; - case ixgbe_fc_tx_pause: - flow_control = LINK_FLOWCTRL_TX; - break; - case ixgbe_fc_full: - flow_control = LINK_FLOWCTRL_BI; - break; - } - bcopy(&flow_control, pr_val, sizeof (flow_control)); - } else - err = EINVAL; + case ixgbe_fc_rx_pause: + flow_control = LINK_FLOWCTRL_RX; + break; + case ixgbe_fc_tx_pause: + flow_control = LINK_FLOWCTRL_TX; + break; + case ixgbe_fc_full: + flow_control = LINK_FLOWCTRL_BI; + break; + } + bcopy(&flow_control, pr_val, sizeof (flow_control)); break; case MAC_PROP_ADV_10GFDX_CAP: - *(uint8_t *)pr_val = (is_default ? 1 : - ixgbe->param_adv_10000fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_adv_10000fdx_cap; break; case MAC_PROP_EN_10GFDX_CAP: - if (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) - *perm = MAC_PROP_PERM_RW; - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_en_10000fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_en_10000fdx_cap; break; case MAC_PROP_ADV_1000FDX_CAP: - *(uint8_t *)pr_val = (is_default ? 1 : - ixgbe->param_adv_1000fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_adv_1000fdx_cap; break; case MAC_PROP_EN_1000FDX_CAP: - if (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) - *perm = MAC_PROP_PERM_RW; - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_en_1000fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_en_1000fdx_cap; break; case MAC_PROP_ADV_100FDX_CAP: - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_adv_100fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_adv_100fdx_cap; break; case MAC_PROP_EN_100FDX_CAP: - if (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) - *perm = MAC_PROP_PERM_RW; - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_en_100fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_en_100fdx_cap; break; case MAC_PROP_PRIVATE: err = ixgbe_get_priv_prop(ixgbe, pr_name, - pr_flags, pr_valsize, pr_val, perm); - break; - case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = DEFAULT_MTU; - range.range_uint32[0].mpur_max = ixgbe->capab->max_mtu; - bcopy(&range, pr_val, sizeof (range)); + pr_valsize, pr_val); break; default: err = EINVAL; @@ -849,6 +507,78 @@ ixgbe_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, return (err); } +void +ixgbe_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + ixgbe_t *ixgbe = (ixgbe_t *)arg; + uint_t perm; + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_10GFDX_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + case MAC_PROP_EN_10GFDX_CAP: + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + perm = (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) ? + MAC_PROP_PERM_RW : MAC_PROP_PERM_READ; + if (perm == MAC_PROP_PERM_RW) + mac_prop_info_set_default_uint8(prh, 1); + mac_prop_info_set_perm(prh, perm); + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, + LINK_FLOWCTRL_NONE); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + DEFAULT_MTU, ixgbe->capab->max_mtu); + break; + + case MAC_PROP_PRIVATE: { + char valstr[64]; + int value; + + bzero(valstr, sizeof (valstr)); + + if (strcmp(pr_name, "_adv_pause_cap") == 0 || + strcmp(pr_name, "_adv_asym_pause_cap") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; + } + + if (strcmp(pr_name, "_tx_copy_thresh") == 0) { + value = DEFAULT_TX_COPY_THRESHOLD; + } else if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { + value = DEFAULT_TX_RECYCLE_THRESHOLD; + } else if (strcmp(pr_name, "_tx_overload_thresh") == 0) { + value = DEFAULT_TX_OVERLOAD_THRESHOLD; + } else if (strcmp(pr_name, "_tx_resched_thresh") == 0) { + value = DEFAULT_TX_RESCHED_THRESHOLD; + } else if (strcmp(pr_name, "_rx_copy_thresh") == 0) { + value = DEFAULT_RX_COPY_THRESHOLD; + } else if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { + value = DEFAULT_RX_LIMIT_PER_INTR; + } if (strcmp(pr_name, "_intr_throttling") == 0) { + value = ixgbe->capab->def_intr_throttle; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%x", value); + } + } +} + boolean_t ixgbe_param_locked(mac_prop_id_t pr_num) { @@ -999,65 +729,53 @@ ixgbe_set_priv_prop(ixgbe_t *ixgbe, const char *pr_name, int ixgbe_get_priv_prop(ixgbe_t *ixgbe, const char *pr_name, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; - *perm = MAC_PROP_PERM_RW; - if (strcmp(pr_name, "_adv_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - value = (is_default ? 1 : ixgbe->param_adv_pause_cap); + value = ixgbe->param_adv_pause_cap; err = 0; goto done; } if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - value = (is_default ? 1 : ixgbe->param_adv_asym_pause_cap); + value = ixgbe->param_adv_asym_pause_cap; err = 0; goto done; } if (strcmp(pr_name, "_tx_copy_thresh") == 0) { - value = (is_default ? DEFAULT_TX_COPY_THRESHOLD : - ixgbe->tx_copy_thresh); + value = ixgbe->tx_copy_thresh; err = 0; goto done; } if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { - value = (is_default ? DEFAULT_TX_RECYCLE_THRESHOLD : - ixgbe->tx_recycle_thresh); + value = ixgbe->tx_recycle_thresh; err = 0; goto done; } if (strcmp(pr_name, "_tx_overload_thresh") == 0) { - value = (is_default ? DEFAULT_TX_OVERLOAD_THRESHOLD : - ixgbe->tx_overload_thresh); + value = ixgbe->tx_overload_thresh; err = 0; goto done; } if (strcmp(pr_name, "_tx_resched_thresh") == 0) { - value = (is_default ? DEFAULT_TX_RESCHED_THRESHOLD : - ixgbe->tx_resched_thresh); + value = ixgbe->tx_resched_thresh; err = 0; goto done; } if (strcmp(pr_name, "_rx_copy_thresh") == 0) { - value = (is_default ? DEFAULT_RX_COPY_THRESHOLD : - ixgbe->rx_copy_thresh); + value = ixgbe->rx_copy_thresh; err = 0; goto done; } if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { - value = (is_default ? DEFAULT_RX_LIMIT_PER_INTR : - ixgbe->rx_limit_per_intr); + value = ixgbe->rx_limit_per_intr; err = 0; goto done; } if (strcmp(pr_name, "_intr_throttling") == 0) { - value = (is_default ? ixgbe->capab->def_intr_throttle : - ixgbe->intr_throttling[0]); + value = ixgbe->intr_throttling[0]; err = 0; goto done; } diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_main.c b/usr/src/uts/common/io/ixgbe/ixgbe_main.c index 4f9dd4f40f..3d97264a52 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_main.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_main.c @@ -28,7 +28,7 @@ #include "ixgbe_sw.h" static char ixgbe_ident[] = "Intel 10Gb Ethernet"; -static char ixgbe_version[] = "driver version 1.1.4"; +static char ixgbe_version[] = "ixgbe 1.1.4"; /* * Local function protoypes @@ -57,11 +57,13 @@ static void ixgbe_setup_tx(ixgbe_t *); static void ixgbe_setup_rx_ring(ixgbe_rx_ring_t *); static void ixgbe_setup_tx_ring(ixgbe_tx_ring_t *); static void ixgbe_setup_rss(ixgbe_t *); +static void ixgbe_setup_vmdq(ixgbe_t *); +static void ixgbe_setup_vmdq_rss(ixgbe_t *); static void ixgbe_init_unicst(ixgbe_t *); -static int ixgbe_unicst_set(ixgbe_t *, const uint8_t *, int); static int ixgbe_unicst_find(ixgbe_t *, const uint8_t *); static void ixgbe_setup_multicst(ixgbe_t *); static void ixgbe_get_hw_state(ixgbe_t *); +static void ixgbe_setup_vmdq_rss_conf(ixgbe_t *ixgbe); static void ixgbe_get_conf(ixgbe_t *); static void ixgbe_init_params(ixgbe_t *); static int ixgbe_get_prop(ixgbe_t *, char *, int, int, int); @@ -86,6 +88,7 @@ static void ixgbe_map_txring_to_vector(ixgbe_t *, int, int); static void ixgbe_setup_ivar(ixgbe_t *, uint16_t, uint8_t, int8_t); static void ixgbe_enable_ivar(ixgbe_t *, uint16_t, int8_t); static void ixgbe_disable_ivar(ixgbe_t *, uint16_t, int8_t); +static uint32_t ixgbe_get_hw_rx_index(ixgbe_t *ixgbe, uint32_t sw_rx_index); static int ixgbe_map_intrs_to_vectors(ixgbe_t *); static void ixgbe_setup_adapter_vector(ixgbe_t *); static void ixgbe_rem_intr_handlers(ixgbe_t *); @@ -109,22 +112,26 @@ static int ixgbe_resume(dev_info_t *); static int ixgbe_suspend(dev_info_t *); static void ixgbe_unconfigure(dev_info_t *, ixgbe_t *); static uint8_t *ixgbe_mc_table_itr(struct ixgbe_hw *, uint8_t **, uint32_t *); +static int ixgbe_cbfunc(dev_info_t *, ddi_cb_action_t, void *, void *, void *); +static int ixgbe_intr_cb_register(ixgbe_t *); +static int ixgbe_intr_adjust(ixgbe_t *, ddi_cb_action_t, int); static int ixgbe_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data); static void ixgbe_fm_init(ixgbe_t *); static void ixgbe_fm_fini(ixgbe_t *); -mac_priv_prop_t ixgbe_priv_props[] = { - {"_tx_copy_thresh", MAC_PROP_PERM_RW}, - {"_tx_recycle_thresh", MAC_PROP_PERM_RW}, - {"_tx_overload_thresh", MAC_PROP_PERM_RW}, - {"_tx_resched_thresh", MAC_PROP_PERM_RW}, - {"_rx_copy_thresh", MAC_PROP_PERM_RW}, - {"_rx_limit_per_intr", MAC_PROP_PERM_RW}, - {"_intr_throttling", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_READ}, - {"_adv_asym_pause_cap", MAC_PROP_PERM_READ} +char *ixgbe_priv_props[] = { + "_tx_copy_thresh", + "_tx_recycle_thresh", + "_tx_overload_thresh", + "_tx_resched_thresh", + "_rx_copy_thresh", + "_rx_limit_per_intr", + "_intr_throttling", + "_adv_pause_cap", + "_adv_asym_pause_cap", + NULL }; #define IXGBE_MAX_PRIV_PROPS \ @@ -202,7 +209,7 @@ static lb_property_t lb_external = { }; #define IXGBE_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) static mac_callbacks_t ixgbe_m_callbacks = { IXGBE_M_CALLBACK_FLAGS, @@ -213,12 +220,14 @@ static mac_callbacks_t ixgbe_m_callbacks = { ixgbe_m_multicst, NULL, NULL, + NULL, ixgbe_m_ioctl, ixgbe_m_getcapab, NULL, NULL, ixgbe_m_setprop, - ixgbe_m_getprop + ixgbe_m_getprop, + ixgbe_m_propinfo }; /* @@ -227,7 +236,10 @@ static mac_callbacks_t ixgbe_m_callbacks = { static adapter_info_t ixgbe_82598eb_cap = { 64, /* maximum number of rx queues */ 1, /* minimum number of rx queues */ - 8, /* default number of rx queues */ + 64, /* default number of rx queues */ + 16, /* maximum number of rx groups */ + 1, /* minimum number of rx groups */ + 1, /* default number of rx groups */ 32, /* maximum number of tx queues */ 1, /* minimum number of tx queues */ 8, /* default number of tx queues */ @@ -247,7 +259,10 @@ static adapter_info_t ixgbe_82598eb_cap = { static adapter_info_t ixgbe_82599eb_cap = { 128, /* maximum number of rx queues */ 1, /* minimum number of rx queues */ - 8, /* default number of rx queues */ + 128, /* default number of rx queues */ + 64, /* maximum number of rx groups */ + 1, /* minimum number of rx groups */ + 1, /* default number of rx groups */ 128, /* maximum number of tx queues */ 1, /* minimum number of tx queues */ 8, /* default number of tx queues */ @@ -406,6 +421,14 @@ ixgbe_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) ixgbe->attach_progress |= ATTACH_PROGRESS_PROPS; /* + * Register interrupt callback + */ + if (ixgbe_intr_cb_register(ixgbe) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "Failed to register interrupt callback"); + goto attach_fail; + } + + /* * Allocate interrupts */ if (ixgbe_alloc_intrs(ixgbe) != IXGBE_SUCCESS) { @@ -662,6 +685,11 @@ ixgbe_unconfigure(dev_info_t *devinfo, ixgbe_t *ixgbe) } /* + * Unregister interrupt callback handler + */ + (void) ddi_cb_unregister(ixgbe->cb_hdl); + + /* * Remove driver properties */ if (ixgbe->attach_progress & ATTACH_PROGRESS_PROPS) { @@ -745,7 +773,6 @@ ixgbe_register_mac(ixgbe_t *ixgbe) mac->m_max_sdu = ixgbe->default_mtu; mac->m_margin = VLAN_TAGSZ; mac->m_priv_props = ixgbe_priv_props; - mac->m_priv_prop_count = IXGBE_MAX_PRIV_PROPS; mac->m_v12n = MAC_VIRT_LEVEL1; status = mac_register(mac, &ixgbe->mac_hdl); @@ -879,9 +906,11 @@ ixgbe_init_driver_settings(ixgbe_t *ixgbe) struct ixgbe_hw *hw = &ixgbe->hw; dev_info_t *devinfo = ixgbe->dip; ixgbe_rx_ring_t *rx_ring; + ixgbe_rx_group_t *rx_group; ixgbe_tx_ring_t *tx_ring; uint32_t rx_size; uint32_t tx_size; + uint32_t ring_per_group; int i; /* @@ -915,12 +944,21 @@ ixgbe_init_driver_settings(ixgbe_t *ixgbe) ((tx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10; /* - * Initialize rx/tx rings parameters + * Initialize rx/tx rings/groups parameters */ + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; for (i = 0; i < ixgbe->num_rx_rings; i++) { rx_ring = &ixgbe->rx_rings[i]; rx_ring->index = i; rx_ring->ixgbe = ixgbe; + rx_ring->group_index = i / ring_per_group; + rx_ring->hw_index = ixgbe_get_hw_rx_index(ixgbe, i); + } + + for (i = 0; i < ixgbe->num_rx_groups; i++) { + rx_group = &ixgbe->rx_groups[i]; + rx_group->index = i; + rx_group->ixgbe = ixgbe; } for (i = 0; i < ixgbe->num_tx_rings; i++) { @@ -1605,6 +1643,218 @@ ixgbe_stop(ixgbe_t *ixgbe, boolean_t free_buffer) } /* + * ixgbe_cbfunc - Driver interface for generic DDI callbacks + */ +/* ARGSUSED */ +static int +ixgbe_cbfunc(dev_info_t *dip, ddi_cb_action_t cbaction, void *cbarg, + void *arg1, void *arg2) +{ + ixgbe_t *ixgbe = (ixgbe_t *)arg1; + + switch (cbaction) { + /* IRM callback */ + int count; + case DDI_CB_INTR_ADD: + case DDI_CB_INTR_REMOVE: + count = (int)(uintptr_t)cbarg; + ASSERT(ixgbe->intr_type == DDI_INTR_TYPE_MSIX); + DTRACE_PROBE2(ixgbe__irm__callback, int, count, + int, ixgbe->intr_cnt); + if (ixgbe_intr_adjust(ixgbe, cbaction, count) != + DDI_SUCCESS) { + ixgbe_error(ixgbe, + "IRM CB: Failed to adjust interrupts"); + goto cb_fail; + } + break; + default: + IXGBE_DEBUGLOG_1(ixgbe, "DDI CB: action 0x%x NOT supported", + cbaction); + return (DDI_ENOTSUP); + } + return (DDI_SUCCESS); +cb_fail: + return (DDI_FAILURE); +} + +/* + * ixgbe_intr_adjust - Adjust interrupt to respond to IRM request. + */ +static int +ixgbe_intr_adjust(ixgbe_t *ixgbe, ddi_cb_action_t cbaction, int count) +{ + int i, rc, actual; + + if (count == 0) + return (DDI_SUCCESS); + + if ((cbaction == DDI_CB_INTR_ADD && + ixgbe->intr_cnt + count > ixgbe->intr_cnt_max) || + (cbaction == DDI_CB_INTR_REMOVE && + ixgbe->intr_cnt - count < ixgbe->intr_cnt_min)) + return (DDI_FAILURE); + + if (!(ixgbe->ixgbe_state & IXGBE_STARTED)) { + return (DDI_FAILURE); + } + + for (i = 0; i < ixgbe->num_rx_rings; i++) + mac_ring_intr_set(ixgbe->rx_rings[i].ring_handle, NULL); + for (i = 0; i < ixgbe->num_tx_rings; i++) + mac_ring_intr_set(ixgbe->tx_rings[i].ring_handle, NULL); + + mutex_enter(&ixgbe->gen_lock); + ixgbe->ixgbe_state &= ~IXGBE_STARTED; + ixgbe->ixgbe_state |= IXGBE_INTR_ADJUST; + ixgbe->ixgbe_state |= IXGBE_SUSPENDED; + mac_link_update(ixgbe->mac_hdl, LINK_STATE_UNKNOWN); + + ixgbe_stop(ixgbe, B_FALSE); + /* + * Disable interrupts + */ + if (ixgbe->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) { + rc = ixgbe_disable_intrs(ixgbe); + ASSERT(rc == IXGBE_SUCCESS); + } + ixgbe->attach_progress &= ~ATTACH_PROGRESS_ENABLE_INTR; + + /* + * Remove interrupt handlers + */ + if (ixgbe->attach_progress & ATTACH_PROGRESS_ADD_INTR) { + ixgbe_rem_intr_handlers(ixgbe); + } + ixgbe->attach_progress &= ~ATTACH_PROGRESS_ADD_INTR; + + /* + * Clear vect_map + */ + bzero(&ixgbe->vect_map, sizeof (ixgbe->vect_map)); + switch (cbaction) { + case DDI_CB_INTR_ADD: + rc = ddi_intr_alloc(ixgbe->dip, ixgbe->htable, + DDI_INTR_TYPE_MSIX, ixgbe->intr_cnt, count, &actual, + DDI_INTR_ALLOC_NORMAL); + if (rc != DDI_SUCCESS || actual != count) { + ixgbe_log(ixgbe, "Adjust interrupts failed." + "return: %d, irm cb size: %d, actual: %d", + rc, count, actual); + goto intr_adjust_fail; + } + ixgbe->intr_cnt += count; + break; + + case DDI_CB_INTR_REMOVE: + for (i = ixgbe->intr_cnt - count; + i < ixgbe->intr_cnt; i ++) { + rc = ddi_intr_free(ixgbe->htable[i]); + ixgbe->htable[i] = NULL; + if (rc != DDI_SUCCESS) { + ixgbe_log(ixgbe, "Adjust interrupts failed." + "return: %d, irm cb size: %d, actual: %d", + rc, count, actual); + goto intr_adjust_fail; + } + } + ixgbe->intr_cnt -= count; + break; + } + + /* + * Get priority for first vector, assume remaining are all the same + */ + rc = ddi_intr_get_pri(ixgbe->htable[0], &ixgbe->intr_pri); + if (rc != DDI_SUCCESS) { + ixgbe_log(ixgbe, + "Get interrupt priority failed: %d", rc); + goto intr_adjust_fail; + } + rc = ddi_intr_get_cap(ixgbe->htable[0], &ixgbe->intr_cap); + if (rc != DDI_SUCCESS) { + ixgbe_log(ixgbe, "Get interrupt cap failed: %d", rc); + goto intr_adjust_fail; + } + ixgbe->attach_progress |= ATTACH_PROGRESS_ALLOC_INTR; + + /* + * Map rings to interrupt vectors + */ + if (ixgbe_map_intrs_to_vectors(ixgbe) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, + "IRM CB: Failed to map interrupts to vectors"); + goto intr_adjust_fail; + } + + /* + * Add interrupt handlers + */ + if (ixgbe_add_intr_handlers(ixgbe) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "IRM CB: Failed to add interrupt handlers"); + goto intr_adjust_fail; + } + ixgbe->attach_progress |= ATTACH_PROGRESS_ADD_INTR; + + /* + * Now that mutex locks are initialized, and the chip is also + * initialized, enable interrupts. + */ + if (ixgbe_enable_intrs(ixgbe) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "IRM CB: Failed to enable DDI interrupts"); + goto intr_adjust_fail; + } + ixgbe->attach_progress |= ATTACH_PROGRESS_ENABLE_INTR; + if (ixgbe_start(ixgbe, B_FALSE) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "IRM CB: Failed to start"); + goto intr_adjust_fail; + } + ixgbe->ixgbe_state &= ~IXGBE_INTR_ADJUST; + ixgbe->ixgbe_state &= ~IXGBE_SUSPENDED; + ixgbe->ixgbe_state |= IXGBE_STARTED; + mutex_exit(&ixgbe->gen_lock); + + for (i = 0; i < ixgbe->num_rx_rings; i++) { + mac_ring_intr_set(ixgbe->rx_rings[i].ring_handle, + ixgbe->htable[ixgbe->rx_rings[i].intr_vector]); + } + for (i = 0; i < ixgbe->num_tx_rings; i++) { + mac_ring_intr_set(ixgbe->tx_rings[i].ring_handle, + ixgbe->htable[ixgbe->tx_rings[i].intr_vector]); + } + + /* Wakeup all Tx rings */ + for (i = 0; i < ixgbe->num_tx_rings; i++) { + mac_tx_ring_update(ixgbe->mac_hdl, + ixgbe->tx_rings[i].ring_handle); + } + + IXGBE_DEBUGLOG_3(ixgbe, + "IRM CB: interrupts new value: 0x%x(0x%x:0x%x).", + ixgbe->intr_cnt, ixgbe->intr_cnt_min, ixgbe->intr_cnt_max); + return (DDI_SUCCESS); + +intr_adjust_fail: + ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_LOST); + mutex_exit(&ixgbe->gen_lock); + return (DDI_FAILURE); +} + +/* + * ixgbe_intr_cb_register - Register interrupt callback function. + */ +static int +ixgbe_intr_cb_register(ixgbe_t *ixgbe) +{ + if (ddi_cb_register(ixgbe->dip, DDI_CB_FLAG_INTR, ixgbe_cbfunc, + ixgbe, NULL, &ixgbe->cb_hdl) != DDI_SUCCESS) { + return (IXGBE_FAILURE); + } + IXGBE_DEBUGLOG_0(ixgbe, "Interrupt callback function registered."); + return (IXGBE_SUCCESS); +} + +/* * ixgbe_alloc_rings - Allocate memory space for rx/tx rings. */ static int @@ -1771,21 +2021,22 @@ ixgbe_setup_rx_ring(ixgbe_rx_ring_t *rx_ring) * Initialize the length register */ size = rx_data->ring_size * sizeof (union ixgbe_adv_rx_desc); - IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rx_ring->index), size); + IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rx_ring->hw_index), size); /* * Initialize the base address registers */ buf_low = (uint32_t)rx_data->rbd_area.dma_address; buf_high = (uint32_t)(rx_data->rbd_area.dma_address >> 32); - IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rx_ring->index), buf_high); - IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rx_ring->index), buf_low); + IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rx_ring->hw_index), buf_high); + IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rx_ring->hw_index), buf_low); /* * Setup head & tail pointers */ - IXGBE_WRITE_REG(hw, IXGBE_RDT(rx_ring->index), rx_data->ring_size - 1); - IXGBE_WRITE_REG(hw, IXGBE_RDH(rx_ring->index), 0); + IXGBE_WRITE_REG(hw, IXGBE_RDT(rx_ring->hw_index), + rx_data->ring_size - 1); + IXGBE_WRITE_REG(hw, IXGBE_RDH(rx_ring->hw_index), 0); rx_data->rbd_next = 0; rx_data->lro_first = 0; @@ -1796,14 +2047,14 @@ ixgbe_setup_rx_ring(ixgbe_rx_ring_t *rx_ring) * HTHRESH=0 descriptors (to minimize latency on fetch) * WTHRESH defaults to 1 (writeback each descriptor) */ - reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rx_ring->index)); + reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rx_ring->hw_index)); reg_val |= IXGBE_RXDCTL_ENABLE; /* enable queue */ /* Not a valid value for 82599 */ if (hw->mac.type < ixgbe_mac_82599EB) { reg_val |= 0x0020; /* pthresh */ } - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rx_ring->index), reg_val); + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rx_ring->hw_index), reg_val); if (hw->mac.type == ixgbe_mac_82599EB) { reg_val = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); @@ -1818,7 +2069,7 @@ ixgbe_setup_rx_ring(ixgbe_rx_ring_t *rx_ring) reg_val = (ixgbe->rx_buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) | IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; reg_val |= IXGBE_SRRCTL_DROP_EN; - IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rx_ring->index), reg_val); + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rx_ring->hw_index), reg_val); } static void @@ -1826,18 +2077,33 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) { ixgbe_rx_ring_t *rx_ring; struct ixgbe_hw *hw = &ixgbe->hw; - ixgbe_rx_group_t *rx_group; uint32_t reg_val; uint32_t ring_mapping; - int i; + uint32_t i, index; + uint32_t psrtype_rss_bit; /* PSRTYPE must be configured for 82599 */ - reg_val = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | - IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; -#define IXGBE_PSRTYPE_L2_PKT 0x00001000 - reg_val |= IXGBE_PSRTYPE_L2_PKT; - reg_val |= 0xE0000000; - IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), reg_val); + if (ixgbe->classify_mode != IXGBE_CLASSIFY_VMDQ && + ixgbe->classify_mode != IXGBE_CLASSIFY_VMDQ_RSS) { + reg_val = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | + IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; + reg_val |= IXGBE_PSRTYPE_L2HDR; + reg_val |= 0x80000000; + IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), reg_val); + } else { + if (ixgbe->num_rx_groups > 32) { + psrtype_rss_bit = 0x20000000; + } else { + psrtype_rss_bit = 0x40000000; + } + for (i = 0; i < ixgbe->capab->max_rx_grp_num; i++) { + reg_val = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | + IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; + reg_val |= IXGBE_PSRTYPE_L2HDR; + reg_val |= psrtype_rss_bit; + IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(i), reg_val); + } + } /* * Set filter control in FCTRL to accept broadcast packets and do @@ -1850,6 +2116,46 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg_val); /* + * Hardware checksum settings + */ + if (ixgbe->rx_hcksum_enable) { + reg_val = IXGBE_RXCSUM_IPPCSE; /* IP checksum */ + IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, reg_val); + } + + /* + * Setup VMDq and RSS for multiple receive queues + */ + switch (ixgbe->classify_mode) { + case IXGBE_CLASSIFY_RSS: + /* + * One group, only RSS is needed when more than + * one ring enabled. + */ + ixgbe_setup_rss(ixgbe); + break; + + case IXGBE_CLASSIFY_VMDQ: + /* + * Multiple groups, each group has one ring, + * only VMDq is needed. + */ + ixgbe_setup_vmdq(ixgbe); + break; + + case IXGBE_CLASSIFY_VMDQ_RSS: + /* + * Multiple groups and multiple rings, both + * VMDq and RSS are needed. + */ + ixgbe_setup_vmdq_rss(ixgbe); + break; + + default: + break; + } + + /* * Enable the receive unit. This must be done after filter * control is set in FCTRL. */ @@ -1866,27 +2172,15 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) } /* - * Setup rx groups. - */ - for (i = 0; i < ixgbe->num_rx_groups; i++) { - rx_group = &ixgbe->rx_groups[i]; - rx_group->index = i; - rx_group->ixgbe = ixgbe; - } - - /* * Setup the per-ring statistics mapping. */ ring_mapping = 0; for (i = 0; i < ixgbe->num_rx_rings; i++) { - ring_mapping |= (i & 0xF) << (8 * (i & 0x3)); - if ((i & 0x3) == 0x3) { - IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i >> 2), ring_mapping); - ring_mapping = 0; - } + index = ixgbe->rx_rings[i].hw_index; + ring_mapping = IXGBE_READ_REG(hw, IXGBE_RQSMR(index >> 2)); + ring_mapping |= (i & 0xF) << (8 * (index & 0x3)); + IXGBE_WRITE_REG(hw, IXGBE_RQSMR(index >> 2), ring_mapping); } - if ((i & 0x3) != 0x3) - IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i >> 2), ring_mapping); /* * The Max Frame Size in MHADD/MAXFRS will be internally increased @@ -1906,50 +2200,6 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) reg_val |= IXGBE_HLREG0_JUMBOEN; IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg_val); } - - /* - * Hardware checksum settings - */ - if (ixgbe->rx_hcksum_enable) { - reg_val = IXGBE_RXCSUM_IPPCSE; /* IP checksum */ - IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, reg_val); - } - - /* - * Setup RSS for multiple receive queues - */ - if (ixgbe->num_rx_rings > 1) - ixgbe_setup_rss(ixgbe); - - /* - * Setup RSC for multiple receive queues. - */ - if (ixgbe->lro_enable) { - for (i = 0; i < ixgbe->num_rx_rings; i++) { - /* - * Make sure rx_buf_size * MAXDESC not greater - * than 65535. - * Intel recommends 4 for MAXDESC field value. - */ - reg_val = IXGBE_READ_REG(hw, IXGBE_RSCCTL(i)); - reg_val |= IXGBE_RSCCTL_RSCEN; - if (ixgbe->rx_buf_size == IXGBE_PKG_BUF_16k) - reg_val |= IXGBE_RSCCTL_MAXDESC_1; - else - reg_val |= IXGBE_RSCCTL_MAXDESC_4; - IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(i), reg_val); - } - - reg_val = IXGBE_READ_REG(hw, IXGBE_RSCDBU); - reg_val |= IXGBE_RSCDBU_RSCACKDIS; - IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, reg_val); - - reg_val = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); - reg_val |= IXGBE_RDRXCTL_RSCACKC; - reg_val &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; - - IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg_val); - } } static void @@ -2114,13 +2364,17 @@ ixgbe_setup_rss(ixgbe_t *ixgbe) uint32_t i, mrqc, rxcsum; uint32_t random; uint32_t reta; + uint32_t ring_per_group; /* * Fill out redirection table */ reta = 0; + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + for (i = 0; i < 128; i++) { - reta = (reta << 8) | (i % ixgbe->num_rx_rings); + reta = (reta << 8) | (i % ring_per_group) | + ((i % ring_per_group) << 4); if ((i & 3) == 3) IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); } @@ -2161,6 +2415,185 @@ ixgbe_setup_rss(ixgbe_t *ixgbe) } /* + * ixgbe_setup_vmdq - Setup MAC classification feature + */ +static void +ixgbe_setup_vmdq(ixgbe_t *ixgbe) +{ + struct ixgbe_hw *hw = &ixgbe->hw; + uint32_t vmdctl, i, vtctl; + + /* + * Setup the VMDq Control register, enable VMDq based on + * packet destination MAC address: + */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + /* + * VMDq Enable = 1; + * VMDq Filter = 0; MAC filtering + * Default VMDq output index = 0; + */ + vmdctl = IXGBE_VMD_CTL_VMDQ_EN; + IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl); + break; + + case ixgbe_mac_82599EB: + /* + * Enable VMDq-only. + */ + vmdctl = IXGBE_MRQC_VMDQEN; + IXGBE_WRITE_REG(hw, IXGBE_MRQC, vmdctl); + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(i), 0); + } + + /* + * Enable Virtualization and Replication. + */ + vtctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; + IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl); + + /* + * Enable receiving packets to all VFs + */ + IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), IXGBE_VFRE_ENABLE_ALL); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), IXGBE_VFRE_ENABLE_ALL); + + break; + + default: + break; + } +} + +/* + * ixgbe_setup_vmdq_rss - Setup both vmdq feature and rss feature. + */ +static void +ixgbe_setup_vmdq_rss(ixgbe_t *ixgbe) +{ + struct ixgbe_hw *hw = &ixgbe->hw; + uint32_t i, mrqc, rxcsum; + uint32_t random; + uint32_t reta; + uint32_t ring_per_group; + uint32_t vmdctl, vtctl; + + /* + * Fill out redirection table + */ + reta = 0; + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + for (i = 0; i < 128; i++) { + reta = (reta << 8) | (i % ring_per_group) | + ((i % ring_per_group) << 4); + if ((i & 3) == 3) + IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); + } + + /* + * Fill out hash function seeds with a random constant + */ + for (i = 0; i < 10; i++) { + (void) random_get_pseudo_bytes((uint8_t *)&random, + sizeof (uint32_t)); + IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random); + } + + /* + * Enable and setup RSS and VMDq + */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + /* + * Enable RSS & Setup RSS Hash functions + */ + mrqc = IXGBE_MRQC_RSSEN | + IXGBE_MRQC_RSS_FIELD_IPV4 | + IXGBE_MRQC_RSS_FIELD_IPV4_TCP | + IXGBE_MRQC_RSS_FIELD_IPV4_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX | + IXGBE_MRQC_RSS_FIELD_IPV6 | + IXGBE_MRQC_RSS_FIELD_IPV6_TCP | + IXGBE_MRQC_RSS_FIELD_IPV6_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); + + /* + * Enable and Setup VMDq + * VMDq Filter = 0; MAC filtering + * Default VMDq output index = 0; + */ + vmdctl = IXGBE_VMD_CTL_VMDQ_EN; + IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl); + break; + + case ixgbe_mac_82599EB: + /* + * Enable RSS & Setup RSS Hash functions + */ + mrqc = IXGBE_MRQC_RSS_FIELD_IPV4 | + IXGBE_MRQC_RSS_FIELD_IPV4_TCP | + IXGBE_MRQC_RSS_FIELD_IPV4_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX | + IXGBE_MRQC_RSS_FIELD_IPV6 | + IXGBE_MRQC_RSS_FIELD_IPV6_TCP | + IXGBE_MRQC_RSS_FIELD_IPV6_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; + + /* + * Enable VMDq+RSS. + */ + if (ixgbe->num_rx_groups > 32) { + mrqc = mrqc | IXGBE_MRQC_VMDQRSS64EN; + } else { + mrqc = mrqc | IXGBE_MRQC_VMDQRSS32EN; + } + + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(i), 0); + } + break; + + default: + break; + + } + + /* + * Disable Packet Checksum to enable RSS for multiple receive queues. + * It is an adapter hardware limitation that Packet Checksum is + * mutually exclusive with RSS. + */ + rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); + rxcsum |= IXGBE_RXCSUM_PCSD; + rxcsum &= ~IXGBE_RXCSUM_IPPCSE; + IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); + + if (hw->mac.type == ixgbe_mac_82599EB) { + /* + * Enable Virtualization and Replication. + */ + vtctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; + IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl); + + /* + * Enable receiving packets to all VFs + */ + IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), IXGBE_VFRE_ENABLE_ALL); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), IXGBE_VFRE_ENABLE_ALL); + } +} + +/* * ixgbe_init_unicst - Initialize the unicast addresses. */ static void @@ -2183,7 +2616,7 @@ ixgbe_init_unicst(ixgbe_t *ixgbe) /* * Initialize the multiple unicast addresses */ - ixgbe->unicst_total = MAX_NUM_UNICAST_ADDRESSES; + ixgbe->unicst_total = hw->mac.num_rar_entries; ixgbe->unicst_avail = ixgbe->unicst_total; for (slot = 0; slot < ixgbe->unicst_total; slot++) { mac_addr = ixgbe->unicst_addr[slot].mac.addr; @@ -2198,7 +2631,8 @@ ixgbe_init_unicst(ixgbe_t *ixgbe) mac_addr = ixgbe->unicst_addr[slot].mac.addr; if (ixgbe->unicst_addr[slot].mac.set == 1) { (void) ixgbe_set_rar(hw, slot, mac_addr, - NULL, IXGBE_RAH_AV); + ixgbe->unicst_addr[slot].mac.group_index, + IXGBE_RAH_AV); } else { bzero(mac_addr, ETHERADDRL); (void) ixgbe_set_rar(hw, slot, mac_addr, @@ -2209,35 +2643,6 @@ ixgbe_init_unicst(ixgbe_t *ixgbe) } /* - * ixgbe_unicst_set - Set the unicast address to the specified slot. - */ -int -ixgbe_unicst_set(ixgbe_t *ixgbe, const uint8_t *mac_addr, - int slot) -{ - struct ixgbe_hw *hw = &ixgbe->hw; - - ASSERT(mutex_owned(&ixgbe->gen_lock)); - - /* - * Save the unicast address in the software data structure - */ - bcopy(mac_addr, ixgbe->unicst_addr[slot].mac.addr, ETHERADDRL); - - /* - * Set the unicast address to the RAR register - */ - (void) ixgbe_set_rar(hw, slot, (uint8_t *)mac_addr, NULL, IXGBE_RAH_AV); - - if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) { - ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); - return (EIO); - } - - return (0); -} - -/* * ixgbe_unicst_find - Find the slot for the specified unicast address */ int @@ -2352,6 +2757,81 @@ ixgbe_setup_multicst(ixgbe_t *ixgbe) } /* + * ixgbe_setup_vmdq_rss_conf - Configure vmdq and rss (number and mode). + * + * Configure the rx classification mode (vmdq & rss) and vmdq & rss numbers. + * Different chipsets may have different allowed configuration of vmdq and rss. + */ +static void +ixgbe_setup_vmdq_rss_conf(ixgbe_t *ixgbe) +{ + struct ixgbe_hw *hw = &ixgbe->hw; + uint32_t ring_per_group; + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + /* + * 82598 supports the following combination: + * vmdq no. x rss no. + * [5..16] x 1 + * [1..4] x [1..16] + * However 8 rss queue per pool (vmdq) is sufficient for + * most cases. + */ + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + if (ixgbe->num_rx_groups > 4) { + ixgbe->num_rx_rings = ixgbe->num_rx_groups; + } else { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + min(8, ring_per_group); + } + + break; + + case ixgbe_mac_82599EB: + /* + * 82599 supports the following combination: + * vmdq no. x rss no. + * [33..64] x [1..2] + * [2..32] x [1..4] + * 1 x [1..16] + * However 8 rss queue per pool (vmdq) is sufficient for + * most cases. + */ + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + if (ixgbe->num_rx_groups == 1) { + ixgbe->num_rx_rings = min(8, ring_per_group); + } else if (ixgbe->num_rx_groups <= 32) { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + min(4, ring_per_group); + } else if (ixgbe->num_rx_groups <= 64) { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + min(2, ring_per_group); + } + + break; + + default: + break; + } + + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + + if (ixgbe->num_rx_groups == 1 && ring_per_group == 1) { + ixgbe->classify_mode = IXGBE_CLASSIFY_NONE; + } else if (ixgbe->num_rx_groups != 1 && ring_per_group == 1) { + ixgbe->classify_mode = IXGBE_CLASSIFY_VMDQ; + } else if (ixgbe->num_rx_groups != 1 && ring_per_group != 1) { + ixgbe->classify_mode = IXGBE_CLASSIFY_VMDQ_RSS; + } else { + ixgbe->classify_mode = IXGBE_CLASSIFY_RSS; + } + + ixgbe_log(ixgbe, "rx group number:%d, rx ring number:%d", + ixgbe->num_rx_groups, ixgbe->num_rx_rings); +} + +/* * ixgbe_get_conf - Get driver configurations set in driver.conf. * * This routine gets user-configured values out of the configuration @@ -2434,7 +2914,8 @@ ixgbe_get_conf(ixgbe_t *ixgbe) * Multiple groups configuration */ ixgbe->num_rx_groups = ixgbe_get_prop(ixgbe, PROP_RX_GROUP_NUM, - MIN_RX_GROUP_NUM, MAX_RX_GROUP_NUM, DEFAULT_RX_GROUP_NUM); + ixgbe->capab->min_rx_grp_num, ixgbe->capab->max_rx_grp_num, + ixgbe->capab->def_rx_grp_num); ixgbe->mr_enable = ixgbe_get_prop(ixgbe, PROP_MR_ENABLE, 0, 1, DEFAULT_MR_ENABLE); @@ -2443,6 +2924,16 @@ ixgbe_get_conf(ixgbe_t *ixgbe) ixgbe->num_tx_rings = 1; ixgbe->num_rx_rings = 1; ixgbe->num_rx_groups = 1; + ixgbe->classify_mode = IXGBE_CLASSIFY_NONE; + } else { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + max(ixgbe->num_rx_rings / ixgbe->num_rx_groups, 1); + /* + * The combination of num_rx_rings and num_rx_groups + * may be not supported by h/w. We need to adjust + * them to appropriate values. + */ + ixgbe_setup_vmdq_rss_conf(ixgbe); } /* @@ -3699,6 +4190,7 @@ ixgbe_alloc_intrs(ixgbe_t *ixgbe) ixgbe->num_rx_rings = 1; ixgbe->num_rx_groups = 1; ixgbe->num_tx_rings = 1; + ixgbe->classify_mode = IXGBE_CLASSIFY_NONE; ixgbe_log(ixgbe, "MSI-X not used, force rings and groups number to 1"); @@ -3745,9 +4237,10 @@ static int ixgbe_alloc_intr_handles(ixgbe_t *ixgbe, int intr_type) { dev_info_t *devinfo; - int request, count, avail, actual; + int request, count, actual; int minimum; int rc; + uint32_t ring_per_group; devinfo = ixgbe->dip; @@ -3767,12 +4260,13 @@ ixgbe_alloc_intr_handles(ixgbe_t *ixgbe, int intr_type) case DDI_INTR_TYPE_MSIX: /* * Best number of vectors for the adapter is - * # rx rings + # tx rings. + * (# rx rings + # tx rings), however we will + * limit the request number. */ - request = ixgbe->num_rx_rings + ixgbe->num_tx_rings; + request = min(16, ixgbe->num_rx_rings + ixgbe->num_tx_rings); if (request > ixgbe->capab->max_ring_vect) request = ixgbe->capab->max_ring_vect; - minimum = 2; + minimum = 1; IXGBE_DEBUGLOG_0(ixgbe, "interrupt type: MSI-X"); break; @@ -3797,26 +4291,10 @@ ixgbe_alloc_intr_handles(ixgbe_t *ixgbe, int intr_type) } IXGBE_DEBUGLOG_1(ixgbe, "interrupts supported: %d", count); - /* - * Get number of available interrupts - */ - rc = ddi_intr_get_navail(devinfo, intr_type, &avail); - if ((rc != DDI_SUCCESS) || (avail < minimum)) { - ixgbe_log(ixgbe, - "Get interrupt available number failed. " - "Return: %d, available: %d", rc, avail); - return (IXGBE_FAILURE); - } - IXGBE_DEBUGLOG_1(ixgbe, "interrupts available: %d", avail); - - if (avail < request) { - ixgbe_log(ixgbe, "Request %d handles, %d available", - request, avail); - request = avail; - } - actual = 0; ixgbe->intr_cnt = 0; + ixgbe->intr_cnt_max = 0; + ixgbe->intr_cnt_min = 0; /* * Allocate an array of interrupt handles @@ -3834,7 +4312,24 @@ ixgbe_alloc_intr_handles(ixgbe_t *ixgbe, int intr_type) } IXGBE_DEBUGLOG_1(ixgbe, "interrupts actually allocated: %d", actual); + /* + * upper/lower limit of interrupts + */ ixgbe->intr_cnt = actual; + ixgbe->intr_cnt_max = request; + ixgbe->intr_cnt_min = minimum; + + /* + * rss number per group should not exceed the rx interrupt number, + * else need to adjust rx ring number. + */ + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + ASSERT((ixgbe->num_rx_rings % ixgbe->num_rx_groups) == 0); + if (min(actual, ixgbe->num_rx_rings) < ring_per_group) { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + min(actual, ixgbe->num_rx_rings); + ixgbe_setup_vmdq_rss_conf(ixgbe); + } /* * Now we know the actual number of vectors. Here we map the vector @@ -4147,6 +4642,53 @@ ixgbe_disable_ivar(ixgbe_t *ixgbe, uint16_t intr_alloc_entry, int8_t cause) } /* + * Convert the rx ring index driver maintained to the rx ring index + * in h/w. + */ +static uint32_t +ixgbe_get_hw_rx_index(ixgbe_t *ixgbe, uint32_t sw_rx_index) +{ + + struct ixgbe_hw *hw = &ixgbe->hw; + uint32_t rx_ring_per_group, hw_rx_index; + + if (ixgbe->classify_mode == IXGBE_CLASSIFY_RSS || + ixgbe->classify_mode == IXGBE_CLASSIFY_NONE) { + return (sw_rx_index); + } else if (ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ) { + if (hw->mac.type == ixgbe_mac_82598EB) { + return (sw_rx_index); + } else if (hw->mac.type == ixgbe_mac_82599EB) { + return (sw_rx_index * 2); + } + } else if (ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ_RSS) { + rx_ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + + if (hw->mac.type == ixgbe_mac_82598EB) { + hw_rx_index = (sw_rx_index / rx_ring_per_group) * + 16 + (sw_rx_index % rx_ring_per_group); + return (hw_rx_index); + } else if (hw->mac.type == ixgbe_mac_82599EB) { + if (ixgbe->num_rx_groups > 32) { + hw_rx_index = (sw_rx_index / + rx_ring_per_group) * 2 + + (sw_rx_index % rx_ring_per_group); + } else { + hw_rx_index = (sw_rx_index / + rx_ring_per_group) * 4 + + (sw_rx_index % rx_ring_per_group); + } + return (hw_rx_index); + } + } + + /* + * Should never reach. Just to make compiler happy. + */ + return (sw_rx_index); +} + +/* * ixgbe_map_intrs_to_vectors - Map different interrupts to MSI-X vectors. * * For MSI-X, here will map rx interrupt, tx interrupt and other interrupt @@ -4183,7 +4725,6 @@ ixgbe_map_intrs_to_vectors(ixgbe_t *ixgbe) */ BT_SET(ixgbe->vect_map[vector].other_map, 0); ixgbe->vect_map[vector].other_cnt++; - vector++; /* * Map rx ring interrupts to vectors @@ -4217,6 +4758,7 @@ ixgbe_setup_adapter_vector(ixgbe_t *ixgbe) ixgbe_intr_vector_t *vect; /* vector bitmap */ int r_idx; /* ring index */ int v_idx; /* vector index */ + uint32_t hw_index; /* * Clear any previous entries @@ -4265,7 +4807,8 @@ ixgbe_setup_adapter_vector(ixgbe_t *ixgbe) (ixgbe->num_rx_rings - 1)); while (r_idx >= 0) { - ixgbe_setup_ivar(ixgbe, r_idx, v_idx, 0); + hw_index = ixgbe->rx_rings[r_idx].hw_index; + ixgbe_setup_ivar(ixgbe, hw_index, v_idx, 0); r_idx = bt_getlowbit(vect->rx_map, (r_idx + 1), (ixgbe->num_rx_rings - 1)); } @@ -4637,11 +5180,31 @@ ixgbe_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num) } /* + * Get the global ring index by a ring index within a group. + */ +static int +ixgbe_get_rx_ring_index(ixgbe_t *ixgbe, int gindex, int rindex) +{ + ixgbe_rx_ring_t *rx_ring; + int i; + + for (i = 0; i < ixgbe->num_rx_rings; i++) { + rx_ring = &ixgbe->rx_rings[i]; + if (rx_ring->group_index == gindex) + rindex--; + if (rindex < 0) + return (i); + } + + return (-1); +} + +/* * Callback funtion for MAC layer to register all rings. */ /* ARGSUSED */ void -ixgbe_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, +ixgbe_fill_ring(void *arg, mac_ring_type_t rtype, const int group_index, const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) { ixgbe_t *ixgbe = (ixgbe_t *)arg; @@ -4649,25 +5212,37 @@ ixgbe_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, switch (rtype) { case MAC_RING_TYPE_RX: { - ASSERT(rg_index == 0); - ASSERT(ring_index < ixgbe->num_rx_rings); + /* + * 'index' is the ring index within the group. + * Need to get the global ring index by searching in groups. + */ + int global_ring_index = ixgbe_get_rx_ring_index( + ixgbe, group_index, ring_index); - ixgbe_rx_ring_t *rx_ring = &ixgbe->rx_rings[ring_index]; + ASSERT(global_ring_index >= 0); + + ixgbe_rx_ring_t *rx_ring = &ixgbe->rx_rings[global_ring_index]; rx_ring->ring_handle = rh; infop->mri_driver = (mac_ring_driver_t)rx_ring; infop->mri_start = ixgbe_ring_start; infop->mri_stop = NULL; infop->mri_poll = ixgbe_ring_rx_poll; + infop->mri_stat = ixgbe_rx_ring_stat; mintr->mi_handle = (mac_intr_handle_t)rx_ring; mintr->mi_enable = ixgbe_rx_ring_intr_enable; mintr->mi_disable = ixgbe_rx_ring_intr_disable; + if (ixgbe->intr_type & + (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) { + mintr->mi_ddi_handle = + ixgbe->htable[rx_ring->intr_vector]; + } break; } case MAC_RING_TYPE_TX: { - ASSERT(rg_index == -1); + ASSERT(group_index == -1); ASSERT(ring_index < ixgbe->num_tx_rings); ixgbe_tx_ring_t *tx_ring = &ixgbe->tx_rings[ring_index]; @@ -4677,7 +5252,12 @@ ixgbe_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = NULL; infop->mri_stop = NULL; infop->mri_tx = ixgbe_ring_tx; - + infop->mri_stat = ixgbe_tx_ring_stat; + if (ixgbe->intr_type & + (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) { + mintr->mi_ddi_handle = + ixgbe->htable[tx_ring->intr_vector]; + } break; } default: @@ -4726,16 +5306,26 @@ ixgbe_rx_ring_intr_enable(mac_intr_handle_t intrh) ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)intrh; ixgbe_t *ixgbe = rx_ring->ixgbe; int r_idx = rx_ring->index; + int hw_r_idx = rx_ring->hw_index; int v_idx = rx_ring->intr_vector; mutex_enter(&ixgbe->gen_lock); - ASSERT(BT_TEST(ixgbe->vect_map[v_idx].rx_map, r_idx) == 0); + if (ixgbe->ixgbe_state & IXGBE_INTR_ADJUST) { + mutex_exit(&ixgbe->gen_lock); + /* + * Simply return 0. + * Interrupts are being adjusted. ixgbe_intr_adjust() + * will eventually re-enable the interrupt when it's + * done with the adjustment. + */ + return (0); + } /* * To enable interrupt by setting the VAL bit of given interrupt * vector allocation register (IVAR). */ - ixgbe_enable_ivar(ixgbe, r_idx, 0); + ixgbe_enable_ivar(ixgbe, hw_r_idx, 0); BT_SET(ixgbe->vect_map[v_idx].rx_map, r_idx); @@ -4759,16 +5349,34 @@ ixgbe_rx_ring_intr_disable(mac_intr_handle_t intrh) ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)intrh; ixgbe_t *ixgbe = rx_ring->ixgbe; int r_idx = rx_ring->index; + int hw_r_idx = rx_ring->hw_index; int v_idx = rx_ring->intr_vector; mutex_enter(&ixgbe->gen_lock); - ASSERT(BT_TEST(ixgbe->vect_map[v_idx].rx_map, r_idx) == 1); + if (ixgbe->ixgbe_state & IXGBE_INTR_ADJUST) { + mutex_exit(&ixgbe->gen_lock); + /* + * Simply return 0. + * In the rare case where an interrupt is being + * disabled while interrupts are being adjusted, + * we don't fail the operation. No interrupts will + * be generated while they are adjusted, and + * ixgbe_intr_adjust() will cause the interrupts + * to be re-enabled once it completes. Note that + * in this case, packets may be delivered to the + * stack via interrupts before xgbe_rx_ring_intr_enable() + * is called again. This is acceptable since interrupt + * adjustment is infrequent, and the stack will be + * able to handle these packets. + */ + return (0); + } /* * To disable interrupt by clearing the VAL bit of given interrupt * vector allocation register (IVAR). */ - ixgbe_disable_ivar(ixgbe, r_idx, 0); + ixgbe_disable_ivar(ixgbe, hw_r_idx, 0); BT_CLEAR(ixgbe->vect_map[v_idx].rx_map, r_idx); @@ -4785,8 +5393,8 @@ ixgbe_addmac(void *arg, const uint8_t *mac_addr) { ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)arg; ixgbe_t *ixgbe = rx_group->ixgbe; - int slot; - int err; + struct ixgbe_hw *hw = &ixgbe->hw; + int slot, i; mutex_enter(&ixgbe->gen_lock); @@ -4801,21 +5409,40 @@ ixgbe_addmac(void *arg, const uint8_t *mac_addr) return (ENOSPC); } - for (slot = 0; slot < ixgbe->unicst_total; slot++) { - if (ixgbe->unicst_addr[slot].mac.set == 0) - break; + /* + * The first ixgbe->num_rx_groups slots are reserved for each respective + * group. The rest slots are shared by all groups. While adding a + * MAC address, reserved slots are firstly checked then the shared + * slots are searched. + */ + slot = -1; + if (ixgbe->unicst_addr[rx_group->index].mac.set == 1) { + for (i = ixgbe->num_rx_groups; i < ixgbe->unicst_total; i++) { + if (ixgbe->unicst_addr[i].mac.set == 0) { + slot = i; + break; + } + } + } else { + slot = rx_group->index; } - ASSERT((slot >= 0) && (slot < ixgbe->unicst_total)); - - if ((err = ixgbe_unicst_set(ixgbe, mac_addr, slot)) == 0) { - ixgbe->unicst_addr[slot].mac.set = 1; - ixgbe->unicst_avail--; + if (slot == -1) { + /* no slots available */ + mutex_exit(&ixgbe->gen_lock); + return (ENOSPC); } + bcopy(mac_addr, ixgbe->unicst_addr[slot].mac.addr, ETHERADDRL); + (void) ixgbe_set_rar(hw, slot, ixgbe->unicst_addr[slot].mac.addr, + rx_group->index, IXGBE_RAH_AV); + ixgbe->unicst_addr[slot].mac.set = 1; + ixgbe->unicst_addr[slot].mac.group_index = rx_group->index; + ixgbe->unicst_avail--; + mutex_exit(&ixgbe->gen_lock); - return (err); + return (0); } /* @@ -4826,8 +5453,8 @@ ixgbe_remmac(void *arg, const uint8_t *mac_addr) { ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)arg; ixgbe_t *ixgbe = rx_group->ixgbe; + struct ixgbe_hw *hw = &ixgbe->hw; int slot; - int err; mutex_enter(&ixgbe->gen_lock); @@ -4848,13 +5475,11 @@ ixgbe_remmac(void *arg, const uint8_t *mac_addr) } bzero(ixgbe->unicst_addr[slot].mac.addr, ETHERADDRL); - if ((err = ixgbe_unicst_set(ixgbe, - ixgbe->unicst_addr[slot].mac.addr, slot)) == 0) { - ixgbe->unicst_addr[slot].mac.set = 0; - ixgbe->unicst_avail++; - } + (void) ixgbe_clear_rar(hw, slot); + ixgbe->unicst_addr[slot].mac.set = 0; + ixgbe->unicst_avail++; mutex_exit(&ixgbe->gen_lock); - return (err); + return (0); } diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h b/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h index d9747b4f8f..53690b3a46 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h +++ b/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -22,7 +21,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -71,8 +70,6 @@ void ixgbe_write_pci_cfg(struct ixgbe_hw *, uint32_t, uint32_t); #define CMD_MEM_WRT_INVALIDATE 0x0010 /* BIT_4 */ #define PCI_COMMAND_REGISTER 0x04 #define PCI_EX_CONF_CAP 0xE0 -#define MAX_NUM_UNICAST_ADDRESSES 0x10 -#define MAX_NUM_MULTICAST_ADDRESSES 0x1000 #define SPEED_10GB 10000 #define SPEED_1GB 1000 #define SPEED_100 100 diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_rx.c b/usr/src/uts/common/io/ixgbe/ixgbe_rx.c index 2ed6a09405..c9efa55a87 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_rx.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_rx.c @@ -514,18 +514,17 @@ ixgbe_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error) */ if ((status_error & IXGBE_RXD_STAT_L4CS) && !(status_error & IXGBE_RXDADV_ERR_TCPE)) - hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK; + hcksum_flags |= HCK_FULLCKSUM_OK; /* * Check IP Checksum */ if ((status_error & IXGBE_RXD_STAT_IPCS) && !(status_error & IXGBE_RXDADV_ERR_IPE)) - hcksum_flags |= HCK_IPV4_HDRCKSUM; + hcksum_flags |= HCK_IPV4_HDRCKSUM_OK; if (hcksum_flags != 0) { - (void) hcksum_assoc(mp, - NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, hcksum_flags); } } @@ -722,6 +721,9 @@ rx_discard: status_error = current_rbd->wb.upper.status_error; } + rx_ring->stat_rbytes += received_bytes; + rx_ring->stat_ipackets += pkt_num; + DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORDEV); rx_data->rbd_next = rx_next; @@ -735,7 +737,7 @@ rx_discard: } else rx_tail = PREV_INDEX(rx_next, 1, rx_data->ring_size); - IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_RDT(rx_ring->index), rx_tail); + IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_RDT(rx_ring->hw_index), rx_tail); if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) { ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_stat.c b/usr/src/uts/common/io/ixgbe/ixgbe_stat.c index 54dfdbff09..4d95a00d9f 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_stat.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_stat.c @@ -438,3 +438,358 @@ ixgbe_init_stats(ixgbe_t *ixgbe) return (IXGBE_SUCCESS); } + +/* + * Retrieve a value for one of the statistics. + */ +int +ixgbe_m_stat(void *arg, uint_t stat, uint64_t *val) +{ + ixgbe_t *ixgbe = (ixgbe_t *)arg; + struct ixgbe_hw *hw = &ixgbe->hw; + ixgbe_stat_t *ixgbe_ks; + int i; + + ixgbe_ks = (ixgbe_stat_t *)ixgbe->ixgbe_ks->ks_data; + + mutex_enter(&ixgbe->gen_lock); + + if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { + mutex_exit(&ixgbe->gen_lock); + return (ECANCELED); + } + + switch (stat) { + case MAC_STAT_IFSPEED: + *val = ixgbe->link_speed * 1000000ull; + break; + + case MAC_STAT_MULTIRCV: + ixgbe_ks->mprc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_MPRC); + *val = ixgbe_ks->mprc.value.ui64; + break; + + case MAC_STAT_BRDCSTRCV: + ixgbe_ks->bprc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_BPRC); + *val = ixgbe_ks->bprc.value.ui64; + break; + + case MAC_STAT_MULTIXMT: + ixgbe_ks->mptc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_MPTC); + *val = ixgbe_ks->mptc.value.ui64; + break; + + case MAC_STAT_BRDCSTXMT: + ixgbe_ks->bptc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_BPTC); + *val = ixgbe_ks->bptc.value.ui64; + break; + + case MAC_STAT_NORCVBUF: + for (i = 0; i < 8; i++) { + ixgbe_ks->rnbc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RNBC(i)); + } + *val = ixgbe_ks->rnbc.value.ui64; + break; + + case MAC_STAT_IERRORS: + ixgbe_ks->crcerrs.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_CRCERRS); + ixgbe_ks->illerrc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ILLERRC); + ixgbe_ks->errbc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ERRBC); + ixgbe_ks->rlec.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RLEC); + *val = ixgbe_ks->crcerrs.value.ui64 + + ixgbe_ks->illerrc.value.ui64 + + ixgbe_ks->errbc.value.ui64 + + ixgbe_ks->rlec.value.ui64; + break; + + case MAC_STAT_RBYTES: + ixgbe_ks->tor.value.ui64 = 0; + for (i = 0; i < 16; i++) { + ixgbe_ks->qbrc[i].value.ui64 += + IXGBE_READ_REG(hw, IXGBE_QBRC(i)); + ixgbe_ks->tor.value.ui64 += + ixgbe_ks->qbrc[i].value.ui64; + } + *val = ixgbe_ks->tor.value.ui64; + break; + + case MAC_STAT_OBYTES: + ixgbe_ks->tot.value.ui64 = 0; + for (i = 0; i < 16; i++) { + if (hw->mac.type >= ixgbe_mac_82599EB) { + ixgbe_ks->qbtc[i].value.ui64 += + IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); + ixgbe_ks->qbtc[i].value.ui64 += ((uint64_t) + IXGBE_READ_REG(hw, IXGBE_QBTC_H(i))) << 32; + } else { + ixgbe_ks->qbtc[i].value.ui64 += + IXGBE_READ_REG(hw, IXGBE_QBTC(i)); + } + ixgbe_ks->tot.value.ui64 += + ixgbe_ks->qbtc[i].value.ui64; + } + *val = ixgbe_ks->tot.value.ui64; + break; + + case MAC_STAT_IPACKETS: + ixgbe_ks->tpr.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_TPR); + *val = ixgbe_ks->tpr.value.ui64; + break; + + case MAC_STAT_OPACKETS: + ixgbe_ks->tpt.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_TPT); + *val = ixgbe_ks->tpt.value.ui64; + break; + + /* RFC 1643 stats */ + case ETHER_STAT_FCS_ERRORS: + ixgbe_ks->crcerrs.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_CRCERRS); + *val = ixgbe_ks->crcerrs.value.ui64; + break; + + case ETHER_STAT_TOOLONG_ERRORS: + ixgbe_ks->roc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ROC); + *val = ixgbe_ks->roc.value.ui64; + break; + + case ETHER_STAT_MACRCV_ERRORS: + ixgbe_ks->crcerrs.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_CRCERRS); + ixgbe_ks->illerrc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ILLERRC); + ixgbe_ks->errbc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ERRBC); + ixgbe_ks->rlec.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RLEC); + *val = ixgbe_ks->crcerrs.value.ui64 + + ixgbe_ks->illerrc.value.ui64 + + ixgbe_ks->errbc.value.ui64 + + ixgbe_ks->rlec.value.ui64; + break; + + /* MII/GMII stats */ + case ETHER_STAT_XCVR_ADDR: + /* The Internal PHY's MDI address for each MAC is 1 */ + *val = 1; + break; + + case ETHER_STAT_XCVR_ID: + *val = hw->phy.id; + break; + + case ETHER_STAT_XCVR_INUSE: + switch (ixgbe->link_speed) { + case IXGBE_LINK_SPEED_1GB_FULL: + *val = + (hw->phy.media_type == ixgbe_media_type_copper) ? + XCVR_1000T : XCVR_1000X; + break; + case IXGBE_LINK_SPEED_100_FULL: + *val = (hw->phy.media_type == ixgbe_media_type_copper) ? + XCVR_100T2 : XCVR_100X; + break; + default: + *val = XCVR_NONE; + break; + } + break; + + case ETHER_STAT_CAP_10GFDX: + *val = 1; + break; + + case ETHER_STAT_CAP_1000FDX: + *val = 1; + break; + + case ETHER_STAT_CAP_100FDX: + *val = 1; + break; + + case ETHER_STAT_CAP_ASMPAUSE: + *val = ixgbe->param_asym_pause_cap; + break; + + case ETHER_STAT_CAP_PAUSE: + *val = ixgbe->param_pause_cap; + break; + + case ETHER_STAT_CAP_AUTONEG: + *val = 1; + break; + + case ETHER_STAT_ADV_CAP_10GFDX: + *val = ixgbe->param_adv_10000fdx_cap; + break; + + case ETHER_STAT_ADV_CAP_1000FDX: + *val = ixgbe->param_adv_1000fdx_cap; + break; + + case ETHER_STAT_ADV_CAP_100FDX: + *val = ixgbe->param_adv_100fdx_cap; + break; + + case ETHER_STAT_ADV_CAP_ASMPAUSE: + *val = ixgbe->param_adv_asym_pause_cap; + break; + + case ETHER_STAT_ADV_CAP_PAUSE: + *val = ixgbe->param_adv_pause_cap; + break; + + case ETHER_STAT_ADV_CAP_AUTONEG: + *val = ixgbe->param_adv_autoneg_cap; + break; + + case ETHER_STAT_LP_CAP_10GFDX: + *val = ixgbe->param_lp_10000fdx_cap; + break; + + case ETHER_STAT_LP_CAP_1000FDX: + *val = ixgbe->param_lp_1000fdx_cap; + break; + + case ETHER_STAT_LP_CAP_100FDX: + *val = ixgbe->param_lp_100fdx_cap; + break; + + case ETHER_STAT_LP_CAP_ASMPAUSE: + *val = ixgbe->param_lp_asym_pause_cap; + break; + + case ETHER_STAT_LP_CAP_PAUSE: + *val = ixgbe->param_lp_pause_cap; + break; + + case ETHER_STAT_LP_CAP_AUTONEG: + *val = ixgbe->param_lp_autoneg_cap; + break; + + case ETHER_STAT_LINK_ASMPAUSE: + *val = ixgbe->param_asym_pause_cap; + break; + + case ETHER_STAT_LINK_PAUSE: + *val = ixgbe->param_pause_cap; + break; + + case ETHER_STAT_LINK_AUTONEG: + *val = ixgbe->param_adv_autoneg_cap; + break; + + case ETHER_STAT_LINK_DUPLEX: + *val = ixgbe->link_duplex; + break; + + case ETHER_STAT_TOOSHORT_ERRORS: + ixgbe_ks->ruc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RUC); + *val = ixgbe_ks->ruc.value.ui64; + break; + + case ETHER_STAT_CAP_REMFAULT: + *val = ixgbe->param_rem_fault; + break; + + case ETHER_STAT_ADV_REMFAULT: + *val = ixgbe->param_adv_rem_fault; + break; + + case ETHER_STAT_LP_REMFAULT: + *val = ixgbe->param_lp_rem_fault; + break; + + case ETHER_STAT_JABBER_ERRORS: + ixgbe_ks->rjc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RJC); + *val = ixgbe_ks->rjc.value.ui64; + break; + + default: + mutex_exit(&ixgbe->gen_lock); + return (ENOTSUP); + } + + mutex_exit(&ixgbe->gen_lock); + + if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) { + ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); + return (EIO); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +ixgbe_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)rh; + ixgbe_t *ixgbe = rx_ring->ixgbe; + + if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { + return (ECANCELED); + } + + switch (stat) { + case MAC_STAT_RBYTES: + *val = rx_ring->stat_rbytes; + break; + + case MAC_STAT_IPACKETS: + *val = rx_ring->stat_ipackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +ixgbe_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)rh; + ixgbe_t *ixgbe = tx_ring->ixgbe; + + if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { + return (ECANCELED); + } + + switch (stat) { + case MAC_STAT_OBYTES: + *val = tx_ring->stat_obytes; + break; + + case MAC_STAT_OPACKETS: + *val = tx_ring->stat_opackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h index 30dd825e0f..f5e68fcd87 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h +++ b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h @@ -80,9 +80,10 @@ extern "C" { #define IXGBE_STARTED 0x02 #define IXGBE_SUSPENDED 0x04 #define IXGBE_STALL 0x08 +#define IXGBE_INTR_ADJUST 0x40 #define IXGBE_ERROR 0x80 -#define MAX_NUM_UNICAST_ADDRESSES 0x10 +#define MAX_NUM_UNICAST_ADDRESSES 0x80 #define MAX_NUM_MULTICAST_ADDRESSES 0x1000 #define IXGBE_INTR_NONE 0 #define IXGBE_INTR_MSIX 1 @@ -109,11 +110,11 @@ extern "C" { #define MAX_TX_QUEUE_NUM 128 #define MAX_RX_QUEUE_NUM 128 #define MAX_INTR_VECTOR 64 +#define MAX_RX_GROUP_NUM 64 /* * Maximum values for user configurable parameters */ -#define MAX_RX_GROUP_NUM 1 #define MAX_TX_RING_SIZE 4096 #define MAX_RX_RING_SIZE 4096 @@ -128,7 +129,6 @@ extern "C" { /* * Minimum values for user configurable parameters */ -#define MIN_RX_GROUP_NUM 1 #define MIN_TX_RING_SIZE 64 #define MIN_RX_RING_SIZE 64 @@ -143,7 +143,6 @@ extern "C" { /* * Default values for user configurable parameters */ -#define DEFAULT_RX_GROUP_NUM 1 #define DEFAULT_TX_RING_SIZE 1024 #define DEFAULT_RX_RING_SIZE 1024 @@ -251,11 +250,22 @@ extern "C" { #define IXGBE_FLAG_FAN_FAIL_CAPABLE (u32)(1 << 8) #define IXGBE_FLAG_RSC_CAPABLE (u32)(1 << 9) +/* + * Classification mode + */ +#define IXGBE_CLASSIFY_NONE 0 +#define IXGBE_CLASSIFY_RSS 1 +#define IXGBE_CLASSIFY_VMDQ 2 +#define IXGBE_CLASSIFY_VMDQ_RSS 3 + /* adapter-specific info for each supported device type */ typedef struct adapter_info { - uint32_t max_rx_que_num; /* maximum number of rx queues */ - uint32_t min_rx_que_num; /* minimum number of rx queues */ - uint32_t def_rx_que_num; /* default number of rx queues */ + uint32_t max_rx_que_num; /* maximum number of rx queues */ + uint32_t min_rx_que_num; /* minimum number of rx queues */ + uint32_t def_rx_que_num; /* default number of rx queues */ + uint32_t max_rx_grp_num; /* maximum number of rx groups */ + uint32_t min_rx_grp_num; /* minimum number of rx groups */ + uint32_t def_rx_grp_num; /* default number of rx groups */ uint32_t max_tx_que_num; /* maximum number of tx queues */ uint32_t min_tx_que_num; /* minimum number of tx queues */ uint32_t def_tx_que_num; /* default number of tx queues */ @@ -358,7 +368,7 @@ typedef union ixgbe_ether_addr { } reg; struct { uint8_t set; - uint8_t redundant; + uint8_t group_index; uint8_t addr[ETHERADDRL]; } mac; } ixgbe_ether_addr_t; @@ -494,6 +504,8 @@ typedef struct ixgbe_tx_ring { uint32_t stat_break_tbd_limit; uint32_t stat_lso_header_fail; #endif + uint64_t stat_obytes; + uint64_t stat_opackets; mac_ring_handle_t ring_handle; @@ -546,6 +558,8 @@ typedef struct ixgbe_rx_data { */ typedef struct ixgbe_rx_ring { uint32_t index; /* Ring index */ + uint32_t group_index; /* Group index */ + uint32_t hw_index; /* h/w ring index */ uint32_t intr_vector; /* Interrupt vector index */ uint32_t vect_bit; /* vector's bit in register */ @@ -561,6 +575,8 @@ typedef struct ixgbe_rx_ring { uint32_t stat_cksum_error; uint32_t stat_exceed_pkt; #endif + uint64_t stat_rbytes; + uint64_t stat_ipackets; mac_ring_handle_t ring_handle; uint64_t ring_gen_num; @@ -651,6 +667,7 @@ typedef struct ixgbe { boolean_t tx_hcksum_enable; /* Tx h/w cksum offload */ boolean_t lso_enable; /* Large Segment Offload */ boolean_t mr_enable; /* Multiple Tx and Rx Ring */ + uint32_t classify_mode; /* Classification mode */ uint32_t tx_copy_thresh; /* Tx copy threshold */ uint32_t tx_recycle_thresh; /* Tx recycle threshold */ uint32_t tx_overload_thresh; /* Tx overload threshold */ @@ -664,11 +681,14 @@ typedef struct ixgbe { int intr_type; int intr_cnt; + uint32_t intr_cnt_max; + uint32_t intr_cnt_min; int intr_cap; size_t intr_size; uint_t intr_pri; ddi_intr_handle_t *htable; uint32_t eims_mask; + ddi_cb_handle_t cb_hdl; /* Interrupt callback handle */ kmutex_t gen_lock; /* General lock for device access */ kmutex_t watchdog_lock; @@ -825,16 +845,15 @@ int ixgbe_m_start(void *); void ixgbe_m_stop(void *); int ixgbe_m_promisc(void *, boolean_t); int ixgbe_m_multicst(void *, boolean_t, const uint8_t *); -int ixgbe_m_stat(void *, uint_t, uint64_t *); void ixgbe_m_resources(void *); void ixgbe_m_ioctl(void *, queue_t *, mblk_t *); boolean_t ixgbe_m_getcapab(void *, mac_capab_t, void *); int ixgbe_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -int ixgbe_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); +int ixgbe_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +void ixgbe_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); int ixgbe_set_priv_prop(ixgbe_t *, const char *, uint_t, const void *); -int ixgbe_get_priv_prop(ixgbe_t *, const char *, - uint_t, uint_t, void *, uint_t *); +int ixgbe_get_priv_prop(ixgbe_t *, const char *, uint_t, void *); boolean_t ixgbe_param_locked(mac_prop_id_t); /* @@ -864,6 +883,9 @@ void ixgbe_error(void *, const char *, ...); * Function prototypes in ixgbe_stat.c */ int ixgbe_init_stats(ixgbe_t *); +int ixgbe_m_stat(void *, uint_t, uint64_t *); +int ixgbe_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); +int ixgbe_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_tx.c b/usr/src/uts/common/io/ixgbe/ixgbe_tx.c index 310b6226fd..484b9c11e3 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_tx.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_tx.c @@ -21,7 +21,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -501,6 +501,9 @@ adjust_threshold: ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); + tx_ring->stat_obytes += mbsize; + tx_ring->stat_opackets ++; + mutex_exit(&tx_ring->tx_lock); /* @@ -696,7 +699,7 @@ ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) ASSERT(mp != NULL); - hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, &hckflags); + mac_hcksum_get(mp, &start, NULL, NULL, NULL, &hckflags); bzero(ctx, sizeof (ixgbe_tx_context_t)); if (hckflags == 0) { @@ -705,7 +708,7 @@ ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) ctx->hcksum_flags = hckflags; - lso_info_get(mp, &mss, &lsoflags); + mac_lso_get(mp, &mss, &lsoflags); ctx->mss = mss; ctx->lso_flag = (lsoflags == HW_LSO); diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c index 92d1542efd..e9e1a62e1b 100644 --- a/usr/src/uts/common/io/mac/mac.c +++ b/usr/src/uts/common/io/mac/mac.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -280,6 +280,7 @@ #include <sys/mac_provider.h> #include <sys/mac_client_impl.h> #include <sys/mac_soft_ring.h> +#include <sys/mac_stat.h> #include <sys/mac_impl.h> #include <sys/mac.h> #include <sys/dls.h> @@ -306,6 +307,11 @@ #include <sys/exacct_impl.h> #include <inet/nd.h> #include <sys/ethernet.h> +#include <sys/pool.h> +#include <sys/pool_pset.h> +#include <sys/cpupart.h> +#include <inet/wifi_ioctl.h> +#include <net/wpa.h> #define IMPL_HASHSZ 67 /* prime */ @@ -316,6 +322,7 @@ uint_t i_mac_impl_count; static kmem_cache_t *mac_ring_cache; static id_space_t *minor_ids; static uint32_t minor_count; +static pool_event_cb_t mac_pool_event_reg; /* * Logging stuff. Perhaps mac_logging_interval could be broken into @@ -370,6 +377,7 @@ void mac_tx_client_block(mac_client_impl_t *); static void mac_rx_ring_quiesce(mac_ring_t *, uint_t); static int mac_start_group_and_rings(mac_group_t *); static void mac_stop_group_and_rings(mac_group_t *); +static void mac_pool_event_cb(pool_event_t, int, void *); /* * Module initialization functions. @@ -440,14 +448,22 @@ mac_init(void) mac_flow_log_enable = B_FALSE; mac_link_log_enable = B_FALSE; mac_logging_timer = 0; + + /* Register to be notified of noteworthy pools events */ + mac_pool_event_reg.pec_func = mac_pool_event_cb; + mac_pool_event_reg.pec_arg = NULL; + pool_event_cb_register(&mac_pool_event_reg); } int mac_fini(void) { + if (i_mac_impl_count > 0 || minor_count > 0) return (EBUSY); + pool_event_cb_unregister(&mac_pool_event_reg); + id_space_destroy(minor_ids); mac_flow_fini(); @@ -459,6 +475,8 @@ mac_fini(void) mod_hash_destroy_hash(i_mactype_hash); mac_soft_ring_finish(); + + return (0); } @@ -501,7 +519,6 @@ i_mac_constructor(void *buf, void *arg, int kmflag) mip->mi_linkstate = LINK_STATE_UNKNOWN; - mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL); rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL); mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL); mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL); @@ -554,7 +571,6 @@ i_mac_destructor(void *buf, void *arg) ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL); ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0); - mutex_destroy(&mip->mi_lock); rw_destroy(&mip->mi_rw_lock); mutex_destroy(&mip->mi_promisc_lock); @@ -1049,6 +1065,7 @@ mac_start(mac_handle_t mh) { mac_impl_t *mip = (mac_impl_t *)mh; int err = 0; + mac_group_t *defgrp; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); ASSERT(mip->mi_start != NULL); @@ -1074,33 +1091,31 @@ mac_start(mac_handle_t mh) if (mip->mi_default_tx_ring != NULL) { ring = (mac_ring_t *)mip->mi_default_tx_ring; - err = mac_start_ring(ring); - if (err != 0) { - mip->mi_active--; - return (err); + if (ring->mr_state != MR_INUSE) { + err = mac_start_ring(ring); + if (err != 0) { + mip->mi_active--; + return (err); + } } - ring->mr_state = MR_INUSE; } - if (mip->mi_rx_groups != NULL) { + if ((defgrp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { /* * Start the default ring, since it will be needed * to receive broadcast and multicast traffic for * both primary and non-primary MAC clients. */ - mac_group_t *grp = &mip->mi_rx_groups[0]; - - ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); - err = mac_start_group_and_rings(grp); + ASSERT(defgrp->mrg_state == MAC_GROUP_STATE_REGISTERED); + err = mac_start_group_and_rings(defgrp); if (err != 0) { mip->mi_active--; - if (ring != NULL) { + if ((ring != NULL) && + (ring->mr_state == MR_INUSE)) mac_stop_ring(ring); - ring->mr_state = MR_FREE; - } return (err); } - mac_set_rx_group_state(grp, MAC_GROUP_STATE_SHARED); + mac_set_group_state(defgrp, MAC_GROUP_STATE_SHARED); } } @@ -1114,6 +1129,7 @@ void mac_stop(mac_handle_t mh) { mac_impl_t *mip = (mac_impl_t *)mh; + mac_group_t *grp; ASSERT(mip->mi_stop != NULL); ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); @@ -1123,15 +1139,12 @@ mac_stop(mac_handle_t mh) */ ASSERT(mip->mi_active != 0); if (--mip->mi_active == 0) { - if (mip->mi_rx_groups != NULL) { + if ((grp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { /* * There should be no more active clients since the * MAC is being stopped. Stop the default RX group * and transition it back to registered state. - */ - mac_group_t *grp = &mip->mi_rx_groups[0]; - - /* + * * When clients are torn down, the groups * are release via mac_release_rx_group which * knows the the default group is always in @@ -1141,18 +1154,20 @@ mac_stop(mac_handle_t mh) * as a client) and group is in SHARED state. */ ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED); - ASSERT(MAC_RX_GROUP_NO_CLIENT(grp) && + ASSERT(MAC_GROUP_NO_CLIENT(grp) && mip->mi_nactiveclients == 0); mac_stop_group_and_rings(grp); - mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED); + mac_set_group_state(grp, MAC_GROUP_STATE_REGISTERED); } if (mip->mi_default_tx_ring != NULL) { mac_ring_t *ring; ring = (mac_ring_t *)mip->mi_default_tx_ring; - mac_stop_ring(ring); - ring->mr_state = MR_FREE; + if (ring->mr_state == MR_INUSE) { + mac_stop_ring(ring); + ring->mr_flag = 0; + } } /* @@ -1460,74 +1475,111 @@ mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh, mac_ring_handle_t *hwrh, mac_ring_type_t rtype) { mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + flow_entry_t *flent = mcip->mci_flent; + mac_group_t *grp; + mac_ring_t *ring; int cnt = 0; - switch (rtype) { - case MAC_RING_TYPE_RX: { - flow_entry_t *flent = mcip->mci_flent; - mac_group_t *grp; - mac_ring_t *ring; - + if (rtype == MAC_RING_TYPE_RX) { grp = flent->fe_rx_ring_group; - /* - * The mac client did not reserve any RX group, return directly. - * This is probably because the underlying MAC does not support - * any groups. - */ - *hwgh = NULL; - if (grp == NULL) - return (0); - /* - * This group must be reserved by this mac client. - */ - ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && - (mch == (mac_client_handle_t) - (MAC_RX_GROUP_ONLY_CLIENT(grp)))); - for (ring = grp->mrg_rings; - ring != NULL; ring = ring->mr_next, cnt++) { - ASSERT(cnt < MAX_RINGS_PER_GROUP); - hwrh[cnt] = (mac_ring_handle_t)ring; - } - *hwgh = (mac_group_handle_t)grp; - return (cnt); - } - case MAC_RING_TYPE_TX: { - mac_soft_ring_set_t *tx_srs; - mac_srs_tx_t *tx; - - tx_srs = MCIP_TX_SRS(mcip); - tx = &tx_srs->srs_tx; - for (; cnt < tx->st_ring_count; cnt++) - hwrh[cnt] = tx->st_rings[cnt]; - return (cnt); - } - default: + } else if (rtype == MAC_RING_TYPE_TX) { + grp = flent->fe_tx_ring_group; + } else { ASSERT(B_FALSE); return (-1); } + /* + * The mac client did not reserve any RX group, return directly. + * This is probably because the underlying MAC does not support + * any groups. + */ + if (hwgh != NULL) + *hwgh = NULL; + if (grp == NULL) + return (0); + /* + * This group must be reserved by this mac client. + */ + ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && + (mcip == MAC_GROUP_ONLY_CLIENT(grp))); + + for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next, cnt++) { + ASSERT(cnt < MAX_RINGS_PER_GROUP); + hwrh[cnt] = (mac_ring_handle_t)ring; + } + if (hwgh != NULL) + *hwgh = (mac_group_handle_t)grp; + + return (cnt); } /* - * Setup the RX callback of the mac client which exclusively controls HW ring. + * This function is called to get info about Tx/Rx rings. + * + * Return value: returns uint_t which will have various bits set + * that indicates different properties of the ring. + */ +uint_t +mac_hwring_getinfo(mac_ring_handle_t rh) +{ + mac_ring_t *ring = (mac_ring_t *)rh; + mac_ring_info_t *info = &ring->mr_info; + + return (info->mri_flags); +} + +/* + * Export ddi interrupt handles from the HW ring to the pseudo ring and + * setup the RX callback of the mac client which exclusively controls + * HW ring. */ void -mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh) +mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh, + mac_ring_handle_t pseudo_rh) { mac_ring_t *hw_ring = (mac_ring_t *)hwrh; + mac_ring_t *pseudo_ring; mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; - mac_srs->srs_mrh = prh; - mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; + if (pseudo_rh != NULL) { + pseudo_ring = (mac_ring_t *)pseudo_rh; + /* Export the ddi handles to pseudo ring */ + pseudo_ring->mr_info.mri_intr.mi_ddi_handle = + hw_ring->mr_info.mri_intr.mi_ddi_handle; + pseudo_ring->mr_info.mri_intr.mi_ddi_shared = + hw_ring->mr_info.mri_intr.mi_ddi_shared; + /* + * Save a pointer to pseudo ring in the hw ring. If + * interrupt handle changes, the hw ring will be + * notified of the change (see mac_ring_intr_set()) + * and the appropriate change has to be made to + * the pseudo ring that has exported the ddi handle. + */ + hw_ring->mr_prh = pseudo_rh; + } + + if (hw_ring->mr_type == MAC_RING_TYPE_RX) { + ASSERT(!(mac_srs->srs_type & SRST_TX)); + mac_srs->srs_mrh = prh; + mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; + } } void mac_hwring_teardown(mac_ring_handle_t hwrh) { mac_ring_t *hw_ring = (mac_ring_t *)hwrh; - mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; + mac_soft_ring_set_t *mac_srs; - mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; - mac_srs->srs_mrh = NULL; + if (hw_ring == NULL) + return; + hw_ring->mr_prh = NULL; + if (hw_ring->mr_type == MAC_RING_TYPE_RX) { + mac_srs = hw_ring->mr_srs; + ASSERT(!(mac_srs->srs_type & SRST_TX)); + mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; + mac_srs->srs_mrh = NULL; + } } int @@ -1575,7 +1627,7 @@ mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup) } /* - * Send packets through the selected tx ring. + * Send packets through a selected tx ring. */ mblk_t * mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp) @@ -1588,6 +1640,35 @@ mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp) return (info->mri_tx(info->mri_driver, mp)); } +/* + * Query stats for a particular rx/tx ring + */ +int +mac_hwring_getstat(mac_ring_handle_t rh, uint_t stat, uint64_t *val) +{ + mac_ring_t *ring = (mac_ring_t *)rh; + mac_ring_info_t *info = &ring->mr_info; + + return (info->mri_stat(info->mri_driver, stat, val)); +} + +/* + * Private function that is only used by aggr to send packets through + * a port/Tx ring. Since aggr exposes a pseudo Tx ring even for ports + * that does not expose Tx rings, aggr_ring_tx() entry point needs + * access to mac_impl_t to send packets through m_tx() entry point. + * It accomplishes this by calling mac_hwring_send_priv() function. + */ +mblk_t * +mac_hwring_send_priv(mac_client_handle_t mch, mac_ring_handle_t rh, mblk_t *mp) +{ + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_impl_t *mip = mcip->mci_mip; + + MAC_TX(mip, rh, mp, mcip); + return (mp); +} + int mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr) { @@ -1609,7 +1690,7 @@ mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) * started/stopped outside of this function. */ void -mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state) +mac_set_group_state(mac_group_t *grp, mac_group_state_t state) { /* * If there is no change in the group state, just return. @@ -1629,9 +1710,10 @@ mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state) */ ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); - if (GROUP_INTR_DISABLE_FUNC(grp) != NULL) + if (grp->mrg_type == MAC_RING_TYPE_RX && + GROUP_INTR_DISABLE_FUNC(grp) != NULL) { GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); - + } break; case MAC_GROUP_STATE_SHARED: @@ -1641,9 +1723,10 @@ mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state) */ ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); - if (GROUP_INTR_ENABLE_FUNC(grp) != NULL) + if (grp->mrg_type == MAC_RING_TYPE_RX && + GROUP_INTR_ENABLE_FUNC(grp) != NULL) { GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); - + } /* The ring is not available for reservations any more */ break; @@ -1921,7 +2004,8 @@ mac_rx_srs_restart(mac_soft_ring_set_t *srs) if (mr != NULL) { MAC_RING_UNMARK(mr, MR_QUIESCE); /* In case the ring was stopped, safely restart it */ - (void) mac_start_ring(mr); + if (mr->mr_state != MR_INUSE) + (void) mac_start_ring(mr); } else { FLOW_UNMARK(flent, FE_QUIESCE); } @@ -2088,9 +2172,11 @@ mac_tx_flow_restart(flow_entry_t *flent, void *arg) return (0); } -void -mac_tx_client_quiesce(mac_client_impl_t *mcip, uint_t srs_quiesce_flag) +static void +i_mac_tx_client_quiesce(mac_client_handle_t mch, uint_t srs_quiesce_flag) { + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); mac_tx_client_block(mcip); @@ -2102,8 +2188,22 @@ mac_tx_client_quiesce(mac_client_impl_t *mcip, uint_t srs_quiesce_flag) } void -mac_tx_client_restart(mac_client_impl_t *mcip) +mac_tx_client_quiesce(mac_client_handle_t mch) +{ + i_mac_tx_client_quiesce(mch, SRS_QUIESCE); +} + +void +mac_tx_client_condemn(mac_client_handle_t mch) +{ + i_mac_tx_client_quiesce(mch, SRS_CONDEMNED); +} + +void +mac_tx_client_restart(mac_client_handle_t mch) { + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); mac_tx_client_unblock(mcip); @@ -2119,22 +2219,22 @@ mac_tx_client_flush(mac_client_impl_t *mcip) { ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); - mac_tx_client_quiesce(mcip, SRS_QUIESCE); - mac_tx_client_restart(mcip); + mac_tx_client_quiesce((mac_client_handle_t)mcip); + mac_tx_client_restart((mac_client_handle_t)mcip); } void mac_client_quiesce(mac_client_impl_t *mcip) { mac_rx_client_quiesce((mac_client_handle_t)mcip); - mac_tx_client_quiesce(mcip, SRS_QUIESCE); + mac_tx_client_quiesce((mac_client_handle_t)mcip); } void mac_client_restart(mac_client_impl_t *mcip) { mac_rx_client_restart((mac_client_handle_t)mcip); - mac_tx_client_restart(mcip); + mac_tx_client_restart((mac_client_handle_t)mcip); } /* @@ -2386,8 +2486,21 @@ i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring) rw_enter(&mip->mi_rw_lock, RW_READER); for (cclient = mip->mi_clients_list; cclient != NULL; cclient = cclient->mci_client_next) { - if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) + if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) { mac_tx_srs_wakeup(mac_srs, ring); + } else { + /* + * Aggr opens underlying ports in exclusive mode + * and registers flow control callbacks using + * mac_tx_client_notify(). When opened in + * exclusive mode, Tx SRS won't be created + * during mac_unicast_add(). + */ + if (cclient->mci_state_flags & MCIS_EXCLUSIVE) { + mac_tx_invoke_callbacks(cclient, + (mac_tx_cookie_t)ring); + } + } (void) mac_flow_walk(cclient->mci_subflow_tab, mac_tx_flow_srs_wakeup, ring); } @@ -2724,43 +2837,196 @@ done: } /* - * mac_set_prop() sets mac or hardware driver properties: - * MAC resource properties include maxbw, priority, and cpu binding list. - * Driver properties are private properties to the hardware, such as mtu - * and speed. There's one other MAC property -- the PVID. - * If the property is a driver property, mac_set_prop() calls driver's callback - * function to set it. - * If the property is a mac resource property, mac_set_prop() invokes - * mac_set_resources() which will cache the property value in mac_impl_t and - * may call mac_client_set_resource() to update property value of the primary - * mac client, if it exists. + * Checks the size of the value size specified for a property as + * part of a property operation. Returns B_TRUE if the size is + * correct, B_FALSE otherwise. + */ +boolean_t +mac_prop_check_size(mac_prop_id_t id, uint_t valsize, boolean_t is_range) +{ + uint_t minsize = 0; + + if (is_range) + return (valsize >= sizeof (mac_propval_range_t)); + + switch (id) { + case MAC_PROP_ZONE: + minsize = sizeof (dld_ioc_zid_t); + break; + case MAC_PROP_AUTOPUSH: + if (valsize != 0) + minsize = sizeof (struct dlautopush); + break; + case MAC_PROP_TAGMODE: + minsize = sizeof (link_tagmode_t); + break; + case MAC_PROP_RESOURCE: + case MAC_PROP_RESOURCE_EFF: + minsize = sizeof (mac_resource_props_t); + break; + case MAC_PROP_DUPLEX: + minsize = sizeof (link_duplex_t); + break; + case MAC_PROP_SPEED: + minsize = sizeof (uint64_t); + break; + case MAC_PROP_STATUS: + minsize = sizeof (link_state_t); + break; + case MAC_PROP_AUTONEG: + case MAC_PROP_EN_AUTONEG: + minsize = sizeof (uint8_t); + break; + case MAC_PROP_MTU: + case MAC_PROP_LLIMIT: + case MAC_PROP_LDECAY: + minsize = sizeof (uint32_t); + break; + case MAC_PROP_FLOWCTRL: + minsize = sizeof (link_flowctrl_t); + break; + case MAC_PROP_ADV_10GFDX_CAP: + case MAC_PROP_EN_10GFDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + minsize = sizeof (uint8_t); + break; + case MAC_PROP_PVID: + minsize = sizeof (uint16_t); + break; + case MAC_PROP_IPTUN_HOPLIMIT: + minsize = sizeof (uint32_t); + break; + case MAC_PROP_IPTUN_ENCAPLIMIT: + minsize = sizeof (uint32_t); + break; + case MAC_PROP_MAX_TX_RINGS_AVAIL: + case MAC_PROP_MAX_RX_RINGS_AVAIL: + case MAC_PROP_MAX_RXHWCLNT_AVAIL: + case MAC_PROP_MAX_TXHWCLNT_AVAIL: + minsize = sizeof (uint_t); + break; + case MAC_PROP_WL_ESSID: + minsize = sizeof (wl_linkstatus_t); + break; + case MAC_PROP_WL_BSSID: + minsize = sizeof (wl_bssid_t); + break; + case MAC_PROP_WL_BSSTYPE: + minsize = sizeof (wl_bss_type_t); + break; + case MAC_PROP_WL_LINKSTATUS: + minsize = sizeof (wl_linkstatus_t); + break; + case MAC_PROP_WL_DESIRED_RATES: + minsize = sizeof (wl_rates_t); + break; + case MAC_PROP_WL_SUPPORTED_RATES: + minsize = sizeof (wl_rates_t); + break; + case MAC_PROP_WL_AUTH_MODE: + minsize = sizeof (wl_authmode_t); + break; + case MAC_PROP_WL_ENCRYPTION: + minsize = sizeof (wl_encryption_t); + break; + case MAC_PROP_WL_RSSI: + minsize = sizeof (wl_rssi_t); + break; + case MAC_PROP_WL_PHY_CONFIG: + minsize = sizeof (wl_phy_conf_t); + break; + case MAC_PROP_WL_CAPABILITY: + minsize = sizeof (wl_capability_t); + break; + case MAC_PROP_WL_WPA: + minsize = sizeof (wl_wpa_t); + break; + case MAC_PROP_WL_SCANRESULTS: + minsize = sizeof (wl_wpa_ess_t); + break; + case MAC_PROP_WL_POWER_MODE: + minsize = sizeof (wl_ps_mode_t); + break; + case MAC_PROP_WL_RADIO: + minsize = sizeof (wl_radio_t); + break; + case MAC_PROP_WL_ESS_LIST: + minsize = sizeof (wl_ess_list_t); + break; + case MAC_PROP_WL_KEY_TAB: + minsize = sizeof (wl_wep_key_tab_t); + break; + case MAC_PROP_WL_CREATE_IBSS: + minsize = sizeof (wl_create_ibss_t); + break; + case MAC_PROP_WL_SETOPTIE: + minsize = sizeof (wl_wpa_ie_t); + break; + case MAC_PROP_WL_DELKEY: + minsize = sizeof (wl_del_key_t); + break; + case MAC_PROP_WL_KEY: + minsize = sizeof (wl_key_t); + break; + case MAC_PROP_WL_MLME: + minsize = sizeof (wl_mlme_t); + break; + } + + return (valsize >= minsize); +} + +/* + * mac_set_prop() sets MAC or hardware driver properties: + * + * - MAC-managed properties such as resource properties include maxbw, + * priority, and cpu binding list, as well as the default port VID + * used by bridging. These properties are consumed by the MAC layer + * itself and not passed down to the driver. For resource control + * properties, this function invokes mac_set_resources() which will + * cache the property value in mac_impl_t and may call + * mac_client_set_resource() to update property value of the primary + * mac client, if it exists. + * + * - Properties which act on the hardware and must be passed to the + * driver, such as MTU, through the driver's mc_setprop() entry point. */ int -mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) +mac_set_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, + uint_t valsize) { int err = ENOTSUP; mac_impl_t *mip = (mac_impl_t *)mh; ASSERT(MAC_PERIM_HELD(mh)); - switch (macprop->mp_id) { - case MAC_PROP_MAXBW: - case MAC_PROP_PRIO: - case MAC_PROP_PROTECT: - case MAC_PROP_BIND_CPU: { - mac_resource_props_t mrp; + switch (id) { + case MAC_PROP_RESOURCE: { + mac_resource_props_t *mrp; - /* If it is mac property, call mac_set_resources() */ - if (valsize < sizeof (mac_resource_props_t)) - return (EINVAL); - bcopy(val, &mrp, sizeof (mrp)); - err = mac_set_resources(mh, &mrp); + /* call mac_set_resources() for MAC properties */ + ASSERT(valsize >= sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + bcopy(val, mrp, sizeof (*mrp)); + err = mac_set_resources(mh, mrp); + kmem_free(mrp, sizeof (*mrp)); break; } case MAC_PROP_PVID: - if (valsize < sizeof (uint16_t) || - (mip->mi_state_flags & MIS_IS_VNIC)) + ASSERT(valsize >= sizeof (uint16_t)); + if (mip->mi_state_flags & MIS_IS_VNIC) return (EINVAL); err = mac_set_pvid(mh, *(uint16_t *)val); break; @@ -2768,8 +3034,7 @@ mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) case MAC_PROP_MTU: { uint32_t mtu; - if (valsize < sizeof (mtu)) - return (EINVAL); + ASSERT(valsize >= sizeof (uint32_t)); bcopy(val, &mtu, sizeof (mtu)); err = mac_set_mtu(mh, mtu, NULL); break; @@ -2783,9 +3048,9 @@ mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) (mip->mi_state_flags & MIS_IS_VNIC)) return (EINVAL); bcopy(val, &learnval, sizeof (learnval)); - if (learnval == 0 && macprop->mp_id == MAC_PROP_LDECAY) + if (learnval == 0 && id == MAC_PROP_LDECAY) return (EINVAL); - if (macprop->mp_id == MAC_PROP_LLIMIT) + if (id == MAC_PROP_LLIMIT) mip->mi_llimit = learnval; else mip->mi_ldecay = learnval; @@ -2797,60 +3062,68 @@ mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) /* For other driver properties, call driver's callback */ if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) { err = mip->mi_callbacks->mc_setprop(mip->mi_driver, - macprop->mp_name, macprop->mp_id, valsize, val); + name, id, valsize, val); } } return (err); } /* - * mac_get_prop() gets mac or hardware driver properties. + * mac_get_prop() gets MAC or device driver properties. * * If the property is a driver property, mac_get_prop() calls driver's callback - * function to get it. - * If the property is a mac property, mac_get_prop() invokes mac_get_resources() + * entry point to get it. + * If the property is a MAC property, mac_get_prop() invokes mac_get_resources() * which returns the cached value in mac_impl_t. */ int -mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize, - uint_t *perm) +mac_get_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, + uint_t valsize) { int err = ENOTSUP; mac_impl_t *mip = (mac_impl_t *)mh; - link_state_t link_state; - boolean_t is_getprop, is_setprop; + uint_t rings; + uint_t vlinks; - is_getprop = (mip->mi_callbacks->mc_callbacks & MC_GETPROP); - is_setprop = (mip->mi_callbacks->mc_callbacks & MC_SETPROP); + bzero(val, valsize); - switch (macprop->mp_id) { - case MAC_PROP_MAXBW: - case MAC_PROP_PRIO: - case MAC_PROP_PROTECT: - case MAC_PROP_BIND_CPU: { - mac_resource_props_t mrp; + switch (id) { + case MAC_PROP_RESOURCE: { + mac_resource_props_t *mrp; /* If mac property, read from cache */ - if (valsize < sizeof (mac_resource_props_t)) - return (EINVAL); - mac_get_resources(mh, &mrp); - bcopy(&mrp, val, sizeof (mac_resource_props_t)); + ASSERT(valsize >= sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + mac_get_resources(mh, mrp); + bcopy(mrp, val, sizeof (*mrp)); + kmem_free(mrp, sizeof (*mrp)); + return (0); + } + case MAC_PROP_RESOURCE_EFF: { + mac_resource_props_t *mrp; + + /* If mac effective property, read from client */ + ASSERT(valsize >= sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + mac_get_effective_resources(mh, mrp); + bcopy(mrp, val, sizeof (*mrp)); + kmem_free(mrp, sizeof (*mrp)); return (0); } case MAC_PROP_PVID: - if (valsize < sizeof (uint16_t) || - (mip->mi_state_flags & MIS_IS_VNIC)) + ASSERT(valsize >= sizeof (uint16_t)); + if (mip->mi_state_flags & MIS_IS_VNIC) return (EINVAL); *(uint16_t *)val = mac_get_pvid(mh); return (0); case MAC_PROP_LLIMIT: case MAC_PROP_LDECAY: - if (valsize < sizeof (uint32_t) || - (mip->mi_state_flags & MIS_IS_VNIC)) + ASSERT(valsize >= sizeof (uint32_t)); + if (mip->mi_state_flags & MIS_IS_VNIC) return (EINVAL); - if (macprop->mp_id == MAC_PROP_LLIMIT) + if (id == MAC_PROP_LLIMIT) bcopy(&mip->mi_llimit, val, sizeof (mip->mi_llimit)); else bcopy(&mip->mi_ldecay, val, sizeof (mip->mi_ldecay)); @@ -2858,78 +3131,261 @@ mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize, case MAC_PROP_MTU: { uint32_t sdu; - mac_propval_range_t range; - - if ((macprop->mp_flags & MAC_PROP_POSSIBLE) != 0) { - if (valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - if (is_getprop) { - err = mip->mi_callbacks->mc_getprop(mip-> - mi_driver, macprop->mp_name, macprop->mp_id, - macprop->mp_flags, valsize, val, perm); - } - /* - * If the driver doesn't have *_m_getprop defined or - * if the driver doesn't support setting MTU then - * return the CURRENT value as POSSIBLE value. - */ - if (!is_getprop || err == ENOTSUP) { - mac_sdu_get(mh, NULL, &sdu); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = sdu; - bcopy(&range, val, sizeof (range)); - err = 0; - } - return (err); - } - if (valsize < sizeof (sdu)) - return (EINVAL); - if ((macprop->mp_flags & MAC_PROP_DEFAULT) == 0) { - mac_sdu_get(mh, NULL, &sdu); - bcopy(&sdu, val, sizeof (sdu)); - if (is_setprop && (mip->mi_callbacks->mc_setprop(mip-> - mi_driver, macprop->mp_name, macprop->mp_id, - valsize, val) == 0)) { - *perm = MAC_PROP_PERM_RW; - } else { - *perm = MAC_PROP_PERM_READ; - } - return (0); - } else { - if (mip->mi_info.mi_media == DL_ETHER) { - sdu = ETHERMTU; - bcopy(&sdu, val, sizeof (sdu)); - return (0); - } - /* - * ask driver for its default. - */ - break; - } + ASSERT(valsize >= sizeof (uint32_t)); + mac_sdu_get(mh, NULL, &sdu); + bcopy(&sdu, val, sizeof (sdu)); + + return (0); } - case MAC_PROP_STATUS: + case MAC_PROP_STATUS: { + link_state_t link_state; + if (valsize < sizeof (link_state)) return (EINVAL); - *perm = MAC_PROP_PERM_READ; link_state = mac_link_get(mh); bcopy(&link_state, val, sizeof (link_state)); + + return (0); + } + + case MAC_PROP_MAX_RX_RINGS_AVAIL: + case MAC_PROP_MAX_TX_RINGS_AVAIL: + ASSERT(valsize >= sizeof (uint_t)); + rings = id == MAC_PROP_MAX_RX_RINGS_AVAIL ? + mac_rxavail_get(mh) : mac_txavail_get(mh); + bcopy(&rings, val, sizeof (uint_t)); + return (0); + + case MAC_PROP_MAX_RXHWCLNT_AVAIL: + case MAC_PROP_MAX_TXHWCLNT_AVAIL: + ASSERT(valsize >= sizeof (uint_t)); + vlinks = id == MAC_PROP_MAX_RXHWCLNT_AVAIL ? + mac_rxhwlnksavail_get(mh) : mac_txhwlnksavail_get(mh); + bcopy(&vlinks, val, sizeof (uint_t)); return (0); + + case MAC_PROP_RXRINGSRANGE: + case MAC_PROP_TXRINGSRANGE: + /* + * The value for these properties are returned through + * the MAC_PROP_RESOURCE property. + */ + return (0); + default: break; } + /* If driver property, request from driver */ - if (is_getprop) { - err = mip->mi_callbacks->mc_getprop(mip->mi_driver, - macprop->mp_name, macprop->mp_id, macprop->mp_flags, - valsize, val, perm); + if (mip->mi_callbacks->mc_callbacks & MC_GETPROP) { + err = mip->mi_callbacks->mc_getprop(mip->mi_driver, name, id, + valsize, val); } + return (err); } +/* + * Helper function to initialize the range structure for use in + * mac_get_prop. If the type can be other than uint32, we can + * pass that as an arg. + */ +static void +_mac_set_range(mac_propval_range_t *range, uint32_t min, uint32_t max) +{ + range->mpr_count = 1; + range->mpr_type = MAC_PROPVAL_UINT32; + range->mpr_range_uint32[0].mpur_min = min; + range->mpr_range_uint32[0].mpur_max = max; +} + +/* + * Returns information about the specified property, such as default + * values or permissions. + */ +int +mac_prop_info(mac_handle_t mh, mac_prop_id_t id, char *name, + void *default_val, uint_t default_size, mac_propval_range_t *range, + uint_t *perm) +{ + mac_prop_info_state_t state; + mac_impl_t *mip = (mac_impl_t *)mh; + uint_t max; + + /* + * A property is read/write by default unless the driver says + * otherwise. + */ + if (perm != NULL) + *perm = MAC_PROP_PERM_RW; + + if (default_val != NULL) + bzero(default_val, default_size); + + /* + * First, handle framework properties for which we don't need to + * involve the driver. + */ + switch (id) { + case MAC_PROP_RESOURCE: + case MAC_PROP_PVID: + case MAC_PROP_LLIMIT: + case MAC_PROP_LDECAY: + return (0); + + case MAC_PROP_MAX_RX_RINGS_AVAIL: + case MAC_PROP_MAX_TX_RINGS_AVAIL: + case MAC_PROP_MAX_RXHWCLNT_AVAIL: + case MAC_PROP_MAX_TXHWCLNT_AVAIL: + if (perm != NULL) + *perm = MAC_PROP_PERM_READ; + return (0); + + case MAC_PROP_RXRINGSRANGE: + case MAC_PROP_TXRINGSRANGE: + /* + * Currently, we support range for RX and TX rings properties. + * When we extend this support to maxbw, cpus and priority, + * we should move this to mac_get_resources. + * There is no default value for RX or TX rings. + */ + if ((mip->mi_state_flags & MIS_IS_VNIC) && + mac_is_vnic_primary(mh)) { + /* + * We don't support setting rings for a VLAN + * data link because it shares its ring with the + * primary MAC client. + */ + if (perm != NULL) + *perm = MAC_PROP_PERM_READ; + if (range != NULL) + range->mpr_count = 0; + } else if (range != NULL) { + if (mip->mi_state_flags & MIS_IS_VNIC) + mh = mac_get_lower_mac_handle(mh); + mip = (mac_impl_t *)mh; + if ((id == MAC_PROP_RXRINGSRANGE && + mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) || + (id == MAC_PROP_TXRINGSRANGE && + mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC)) { + if (id == MAC_PROP_RXRINGSRANGE) { + if ((mac_rxhwlnksavail_get(mh) + + mac_rxhwlnksrsvd_get(mh)) <= 1) { + /* + * doesn't support groups or + * rings + */ + range->mpr_count = 0; + } else { + /* + * supports specifying groups, + * but not rings + */ + _mac_set_range(range, 0, 0); + } + } else { + if ((mac_txhwlnksavail_get(mh) + + mac_txhwlnksrsvd_get(mh)) <= 1) { + /* + * doesn't support groups or + * rings + */ + range->mpr_count = 0; + } else { + /* + * supports specifying groups, + * but not rings + */ + _mac_set_range(range, 0, 0); + } + } + } else { + max = id == MAC_PROP_RXRINGSRANGE ? + mac_rxavail_get(mh) + mac_rxrsvd_get(mh) : + mac_txavail_get(mh) + mac_txrsvd_get(mh); + if (max <= 1) { + /* + * doesn't support groups or + * rings + */ + range->mpr_count = 0; + } else { + /* + * -1 because we have to leave out the + * default ring. + */ + _mac_set_range(range, 1, max - 1); + } + } + } + return (0); + + case MAC_PROP_STATUS: + if (perm != NULL) + *perm = MAC_PROP_PERM_READ; + return (0); + } + + /* + * Get the property info from the driver if it implements the + * property info entry point. + */ + bzero(&state, sizeof (state)); + + if (mip->mi_callbacks->mc_callbacks & MC_PROPINFO) { + state.pr_default = default_val; + state.pr_default_size = default_size; + state.pr_range = range; + + mip->mi_callbacks->mc_propinfo(mip->mi_driver, name, id, + (mac_prop_info_handle_t)&state); + + /* + * The operation could fail if the buffer supplied by + * the user was too small for the range or default + * value of the property. + */ + if (state.pr_default_status != 0) + return (state.pr_default_status); + + if (perm != NULL && state.pr_flags & MAC_PROP_INFO_PERM) + *perm = state.pr_perm; + } + + /* + * The MAC layer may want to provide default values or allowed + * ranges for properties if the driver does not provide a + * property info entry point, or that entry point exists, but + * it did not provide a default value or allowed ranges for + * that property. + */ + switch (id) { + case MAC_PROP_MTU: { + uint32_t sdu; + + mac_sdu_get(mh, NULL, &sdu); + + if (range != NULL && !(state.pr_flags & + MAC_PROP_INFO_RANGE)) { + /* MTU range */ + _mac_set_range(range, sdu, sdu); + } + + if (default_val != NULL && !(state.pr_flags & + MAC_PROP_INFO_DEFAULT)) { + if (mip->mi_info.mi_media == DL_ETHER) + sdu = ETHERMTU; + /* default MTU value */ + bcopy(&sdu, default_val, sizeof (sdu)); + } + } + } + + return (0); +} + int mac_fastpath_disable(mac_handle_t mh) { @@ -2953,29 +3409,47 @@ mac_fastpath_enable(mac_handle_t mh) } void -mac_register_priv_prop(mac_impl_t *mip, mac_priv_prop_t *mpp, uint_t nprop) +mac_register_priv_prop(mac_impl_t *mip, char **priv_props) { - mac_priv_prop_t *mpriv; + uint_t nprops, i; + + if (priv_props == NULL) + return; - if (mpp == NULL) + nprops = 0; + while (priv_props[nprops] != NULL) + nprops++; + if (nprops == 0) return; - mpriv = kmem_zalloc(nprop * sizeof (*mpriv), KM_SLEEP); - (void) memcpy(mpriv, mpp, nprop * sizeof (*mpriv)); - mip->mi_priv_prop = mpriv; - mip->mi_priv_prop_count = nprop; + + mip->mi_priv_prop = kmem_zalloc(nprops * sizeof (char *), KM_SLEEP); + + for (i = 0; i < nprops; i++) { + mip->mi_priv_prop[i] = kmem_zalloc(MAXLINKPROPNAME, KM_SLEEP); + (void) strlcpy(mip->mi_priv_prop[i], priv_props[i], + MAXLINKPROPNAME); + } + + mip->mi_priv_prop_count = nprops; } void mac_unregister_priv_prop(mac_impl_t *mip) { - mac_priv_prop_t *mpriv; + uint_t i; - mpriv = mip->mi_priv_prop; - if (mpriv != NULL) { - kmem_free(mpriv, mip->mi_priv_prop_count * sizeof (*mpriv)); - mip->mi_priv_prop = NULL; + if (mip->mi_priv_prop_count == 0) { + ASSERT(mip->mi_priv_prop == NULL); + return; } + + for (i = 0; i < mip->mi_priv_prop_count; i++) + kmem_free(mip->mi_priv_prop[i], MAXLINKPROPNAME); + kmem_free(mip->mi_priv_prop, mip->mi_priv_prop_count * + sizeof (char *)); + + mip->mi_priv_prop = NULL; mip->mi_priv_prop_count = 0; } @@ -2990,22 +3464,19 @@ mac_unregister_priv_prop(mac_impl_t *mip) * count mechanism) will drop such packets. */ static mac_ring_t * -mac_ring_alloc(mac_impl_t *mip, mac_capab_rings_t *cap_rings) +mac_ring_alloc(mac_impl_t *mip) { mac_ring_t *ring; - if (cap_rings->mr_type == MAC_RING_TYPE_RX) { - mutex_enter(&mip->mi_ring_lock); - if (mip->mi_ring_freelist != NULL) { - ring = mip->mi_ring_freelist; - mip->mi_ring_freelist = ring->mr_next; - bzero(ring, sizeof (mac_ring_t)); - } else { - ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); - } + mutex_enter(&mip->mi_ring_lock); + if (mip->mi_ring_freelist != NULL) { + ring = mip->mi_ring_freelist; + mip->mi_ring_freelist = ring->mr_next; + bzero(ring, sizeof (mac_ring_t)); mutex_exit(&mip->mi_ring_lock); } else { - ring = kmem_zalloc(sizeof (mac_ring_t), KM_SLEEP); + mutex_exit(&mip->mi_ring_lock); + ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); } ASSERT((ring != NULL) && (ring->mr_state == MR_FREE)); return (ring); @@ -3014,16 +3485,16 @@ mac_ring_alloc(mac_impl_t *mip, mac_capab_rings_t *cap_rings) static void mac_ring_free(mac_impl_t *mip, mac_ring_t *ring) { - if (ring->mr_type == MAC_RING_TYPE_RX) { - mutex_enter(&mip->mi_ring_lock); - ring->mr_state = MR_FREE; - ring->mr_flag = 0; - ring->mr_next = mip->mi_ring_freelist; - mip->mi_ring_freelist = ring; - mutex_exit(&mip->mi_ring_lock); - } else { - kmem_free(ring, sizeof (mac_ring_t)); - } + ASSERT(ring->mr_state == MR_FREE); + + mutex_enter(&mip->mi_ring_lock); + ring->mr_state = MR_FREE; + ring->mr_flag = 0; + ring->mr_next = mip->mi_ring_freelist; + ring->mr_mip = NULL; + mip->mi_ring_freelist = ring; + mac_ring_stat_delete(ring); + mutex_exit(&mip->mi_ring_lock); } static void @@ -3046,18 +3517,28 @@ mac_start_ring(mac_ring_t *ring) { int rv = 0; - if (ring->mr_start != NULL) + ASSERT(ring->mr_state == MR_FREE); + + if (ring->mr_start != NULL) { rv = ring->mr_start(ring->mr_driver, ring->mr_gen_num); + if (rv != 0) + return (rv); + } + ring->mr_state = MR_INUSE; return (rv); } void mac_stop_ring(mac_ring_t *ring) { + ASSERT(ring->mr_state == MR_INUSE); + if (ring->mr_stop != NULL) ring->mr_stop(ring->mr_driver); + ring->mr_state = MR_FREE; + /* * Increment the ring generation number for this ring. */ @@ -3104,7 +3585,6 @@ mac_start_group_and_rings(mac_group_t *group) ASSERT(ring->mr_state == MR_FREE); if ((rv = mac_start_ring(ring)) != 0) goto error; - ring->mr_state = MR_INUSE; ring->mr_classify_type = MAC_SW_CLASSIFIER; } return (0); @@ -3123,7 +3603,6 @@ mac_stop_group_and_rings(mac_group_t *group) for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { if (ring->mr_state != MR_FREE) { mac_stop_ring(ring); - ring->mr_state = MR_FREE; ring->mr_flag = 0; ring->mr_classify_type = MAC_NO_CLASSIFIER; } @@ -3136,13 +3615,24 @@ static mac_ring_t * mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, mac_capab_rings_t *cap_rings) { - mac_ring_t *ring; + mac_ring_t *ring, *rnext; mac_ring_info_t ring_info; + ddi_intr_handle_t ddi_handle; - ring = mac_ring_alloc(mip, cap_rings); + ring = mac_ring_alloc(mip); /* Prepare basic information of ring */ - ring->mr_index = index; + + /* + * Ring index is numbered to be unique across a particular device. + * Ring index computation makes following assumptions: + * - For drivers with static grouping (e.g. ixgbe, bge), + * ring index exchanged with the driver (e.g. during mr_rget) + * is unique only across the group the ring belongs to. + * - Drivers with dynamic grouping (e.g. nxge), start + * with single group (mrg_index = 0). + */ + ring->mr_index = group->mrg_index * group->mrg_info.mgi_count + index; ring->mr_type = group->mrg_type; ring->mr_gh = (mac_group_handle_t)group; @@ -3159,12 +3649,63 @@ mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, ring->mr_info = ring_info; + /* + * The interrupt handle could be shared among multiple rings. + * Thus if there is a bunch of rings that are sharing an + * interrupt, then only one ring among the bunch will be made + * available for interrupt re-targeting; the rest will have + * ddi_shared flag set to TRUE and would not be available for + * be interrupt re-targeting. + */ + if ((ddi_handle = ring_info.mri_intr.mi_ddi_handle) != NULL) { + rnext = ring->mr_next; + while (rnext != NULL) { + if (rnext->mr_info.mri_intr.mi_ddi_handle == + ddi_handle) { + /* + * If default ring (mr_index == 0) is part + * of a group of rings sharing an + * interrupt, then set ddi_shared flag for + * the default ring and give another ring + * the chance to be re-targeted. + */ + if (rnext->mr_index == 0 && + !rnext->mr_info.mri_intr.mi_ddi_shared) { + rnext->mr_info.mri_intr.mi_ddi_shared = + B_TRUE; + } else { + ring->mr_info.mri_intr.mi_ddi_shared = + B_TRUE; + } + break; + } + rnext = rnext->mr_next; + } + /* + * If rnext is NULL, then no matching ddi_handle was found. + * Rx rings get registered first. So if this is a Tx ring, + * then go through all the Rx rings and see if there is a + * matching ddi handle. + */ + if (rnext == NULL && ring->mr_type == MAC_RING_TYPE_TX) { + mac_compare_ddi_handle(mip->mi_rx_groups, + mip->mi_rx_group_count, ring); + } + } + /* Update ring's status */ ring->mr_state = MR_FREE; ring->mr_flag = 0; /* Update the ring count of the group */ group->mrg_cur_count++; + + /* Create per ring kstats */ + if (ring->mr_stat != NULL) { + ring->mr_mip = mip; + mac_ring_stat_create(ring); + } + return (ring); } @@ -3188,13 +3729,17 @@ mac_init_group(mac_impl_t *mip, mac_group_t *group, int size, int mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) { - mac_capab_rings_t *cap_rings; - mac_group_t *group, *groups; - mac_group_info_t group_info; - uint_t group_free = 0; - uint_t ring_left; - mac_ring_t *ring; - int g, err = 0; + mac_capab_rings_t *cap_rings; + mac_group_t *group; + mac_group_t *groups; + mac_group_info_t group_info; + uint_t group_free = 0; + uint_t ring_left; + mac_ring_t *ring; + int g; + int err = 0; + uint_t grpcnt; + boolean_t pseudo_txgrp = B_FALSE; switch (rtype) { case MAC_RING_TYPE_RX: @@ -3213,15 +3758,32 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) ASSERT(B_FALSE); } - if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, - cap_rings)) + if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, cap_rings)) return (0); + grpcnt = cap_rings->mr_gnum; + + /* + * If we have multiple TX rings, but only one TX group, we can + * create pseudo TX groups (one per TX ring) in the MAC layer, + * except for an aggr. For an aggr currently we maintain only + * one group with all the rings (for all its ports), going + * forwards we might change this. + */ + if (rtype == MAC_RING_TYPE_TX && + cap_rings->mr_gnum == 0 && cap_rings->mr_rnum > 0 && + (mip->mi_state_flags & MIS_IS_AGGR) == 0) { + /* + * The -1 here is because we create a default TX group + * with all the rings in it. + */ + grpcnt = cap_rings->mr_rnum - 1; + pseudo_txgrp = B_TRUE; + } /* * Allocate a contiguous buffer for all groups. */ - groups = kmem_zalloc(sizeof (mac_group_t) * (cap_rings->mr_gnum + 1), - KM_SLEEP); + groups = kmem_zalloc(sizeof (mac_group_t) * (grpcnt+ 1), KM_SLEEP); ring_left = cap_rings->mr_rnum; @@ -3229,7 +3791,7 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) * Get all ring groups if any, and get their ring members * if any. */ - for (g = 0; g < cap_rings->mr_gnum; g++) { + for (g = 0; g < grpcnt; g++) { group = groups + g; /* Prepare basic information of the group */ @@ -3242,6 +3804,16 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) /* Zero to reuse the info data structure */ bzero(&group_info, sizeof (group_info)); + if (pseudo_txgrp) { + /* + * This is a pseudo group that we created, apart + * from setting the state there is nothing to be + * done. + */ + group->mrg_state = MAC_GROUP_STATE_REGISTERED; + group_free++; + continue; + } /* Query group information from driver */ cap_rings->mr_gget(mip->mi_driver, rtype, g, &group_info, (mac_group_handle_t)group); @@ -3321,15 +3893,16 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) */ if (rtype == MAC_RING_TYPE_RX) { if ((group_info.mgi_addmac == NULL) || - (group_info.mgi_addmac == NULL)) + (group_info.mgi_addmac == NULL)) { goto bail; + } } /* Cache driver-supplied information */ group->mrg_info = group_info; /* Update the group's status and group count. */ - mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED); + mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); group_free++; group->mrg_rings = NULL; @@ -3342,7 +3915,7 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) } /* Build up a dummy group for free resources as a pool */ - group = groups + cap_rings->mr_gnum; + group = groups + grpcnt; /* Prepare basic information of the group */ group->mrg_index = -1; @@ -3366,36 +3939,88 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) ring_left = 0; /* Update this group's status */ - mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED); + mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); } else group->mrg_rings = NULL; ASSERT(ring_left == 0); bail: + /* Cache other important information to finalize the initialization */ switch (rtype) { case MAC_RING_TYPE_RX: mip->mi_rx_group_type = cap_rings->mr_group_type; mip->mi_rx_group_count = cap_rings->mr_gnum; mip->mi_rx_groups = groups; + mip->mi_rx_donor_grp = groups; + if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + /* + * The default ring is reserved since it is + * used for sending the broadcast etc. packets. + */ + mip->mi_rxrings_avail = + mip->mi_rx_groups->mrg_cur_count - 1; + mip->mi_rxrings_rsvd = 1; + } + /* + * The default group cannot be reserved. It is used by + * all the clients that do not have an exclusive group. + */ + mip->mi_rxhwclnt_avail = mip->mi_rx_group_count - 1; + mip->mi_rxhwclnt_used = 1; break; case MAC_RING_TYPE_TX: - mip->mi_tx_group_type = cap_rings->mr_group_type; - mip->mi_tx_group_count = cap_rings->mr_gnum; + mip->mi_tx_group_type = pseudo_txgrp ? MAC_GROUP_TYPE_DYNAMIC : + cap_rings->mr_group_type; + mip->mi_tx_group_count = grpcnt; mip->mi_tx_group_free = group_free; mip->mi_tx_groups = groups; + group = groups + grpcnt; + ring = group->mrg_rings; /* - * Ring 0 is used as the default one and it could be assigned - * to a client as well. + * The ring can be NULL in the case of aggr. Aggr will + * have an empty Tx group which will get populated + * later when pseudo Tx rings are added after + * mac_register() is done. */ - group = groups + cap_rings->mr_gnum; - ring = group->mrg_rings; - while ((ring->mr_index != 0) && (ring->mr_next != NULL)) - ring = ring->mr_next; - ASSERT(ring->mr_index == 0); - mip->mi_default_tx_ring = (mac_ring_handle_t)ring; + if (ring == NULL) { + ASSERT(mip->mi_state_flags & MIS_IS_AGGR); + /* + * pass the group to aggr so it can add Tx + * rings to the group later. + */ + cap_rings->mr_gget(mip->mi_driver, rtype, 0, NULL, + (mac_group_handle_t)group); + /* + * Even though there are no rings at this time + * (rings will come later), set the group + * state to registered. + */ + group->mrg_state = MAC_GROUP_STATE_REGISTERED; + } else { + /* + * Ring 0 is used as the default one and it could be + * assigned to a client as well. + */ + while ((ring->mr_index != 0) && (ring->mr_next != NULL)) + ring = ring->mr_next; + ASSERT(ring->mr_index == 0); + mip->mi_default_tx_ring = (mac_ring_handle_t)ring; + } + if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) + mip->mi_txrings_avail = group->mrg_cur_count - 1; + /* + * The default ring cannot be reserved. + */ + mip->mi_txrings_rsvd = 1; + /* + * The default group cannot be reserved. It will be shared + * by clients that do not have an exclusive group. + */ + mip->mi_txhwclnt_avail = mip->mi_tx_group_count; + mip->mi_txhwclnt_used = 1; break; default: ASSERT(B_FALSE); @@ -3408,8 +4033,45 @@ bail: } /* - * Called to free all ring groups with particular type. It's supposed all groups - * have been released by clinet. + * The ddi interrupt handle could be shared amoung rings. If so, compare + * the new ring's ddi handle with the existing ones and set ddi_shared + * flag. + */ +void +mac_compare_ddi_handle(mac_group_t *groups, uint_t grpcnt, mac_ring_t *cring) +{ + mac_group_t *group; + mac_ring_t *ring; + ddi_intr_handle_t ddi_handle; + int g; + + ddi_handle = cring->mr_info.mri_intr.mi_ddi_handle; + for (g = 0; g < grpcnt; g++) { + group = groups + g; + for (ring = group->mrg_rings; ring != NULL; + ring = ring->mr_next) { + if (ring == cring) + continue; + if (ring->mr_info.mri_intr.mi_ddi_handle == + ddi_handle) { + if (cring->mr_type == MAC_RING_TYPE_RX && + ring->mr_index == 0 && + !ring->mr_info.mri_intr.mi_ddi_shared) { + ring->mr_info.mri_intr.mi_ddi_shared = + B_TRUE; + } else { + cring->mr_info.mri_intr.mi_ddi_shared = + B_TRUE; + } + return; + } + } + } +} + +/* + * Called to free all groups of particular type (RX or TX). It's assumed that + * no clients are using these groups. */ void mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) @@ -3426,6 +4088,7 @@ mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) group_count = mip->mi_rx_group_count; mip->mi_rx_groups = NULL; + mip->mi_rx_donor_grp = NULL; mip->mi_rx_group_count = 0; break; case MAC_RING_TYPE_TX: @@ -3501,32 +4164,6 @@ mac_group_remmac(mac_group_t *group, const uint8_t *addr) } /* - * Release a ring in use by marking it MR_FREE. - * Any other client may reserve it for its use. - */ -void -mac_release_tx_ring(mac_ring_handle_t rh) -{ - mac_ring_t *ring = (mac_ring_t *)rh; - mac_group_t *group = (mac_group_t *)ring->mr_gh; - mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; - - ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - ASSERT(ring->mr_state != MR_FREE); - - /* - * Default tx ring will be released by mac_stop(). - */ - if (rh == mip->mi_default_tx_ring) - return; - - mac_stop_ring(ring); - - ring->mr_state = MR_FREE; - ring->mr_flag = 0; -} - -/* * This is the entry point for packets transmitted through the bridging code. * If no bridge is in place, MAC_RING_TX transmits using tx ring. The 'rh' * pointer may be NULL to select the default ring. @@ -3558,16 +4195,17 @@ mac_bridge_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp) /* * Find a ring from its index. */ -mac_ring_t * -mac_find_ring(mac_group_t *group, int index) +mac_ring_handle_t +mac_find_ring(mac_group_handle_t gh, int index) { + mac_group_t *group = (mac_group_t *)gh; mac_ring_t *ring = group->mrg_rings; for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) if (ring->mr_index == index) break; - return (ring); + return ((mac_ring_handle_t)ring); } /* * Add a ring to an existing group. @@ -3586,6 +4224,7 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) boolean_t driver_call = (ring == NULL); mac_group_type_t group_type; int ret = 0; + flow_entry_t *flent; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); @@ -3606,8 +4245,8 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) * There should be no ring with the same ring index in the target * group. */ - ASSERT(mac_find_ring(group, driver_call ? index : ring->mr_index) == - NULL); + ASSERT(mac_find_ring((mac_group_handle_t)group, + driver_call ? index : ring->mr_index) == NULL); if (driver_call) { /* @@ -3627,7 +4266,8 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) * and the mac_ring_t already exists. */ ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); - ASSERT(cap_rings->mr_gaddring != NULL); + ASSERT(group->mrg_driver == NULL || + cap_rings->mr_gaddring != NULL); ASSERT(ring->mr_gh == NULL); } @@ -3667,6 +4307,27 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) return (0); /* + * Start the ring if needed. Failure causes to undo the grouping action. + */ + if (ring->mr_state != MR_INUSE) { + if ((ret = mac_start_ring(ring)) != 0) { + if (!driver_call) { + cap_rings->mr_gremring(group->mrg_driver, + ring->mr_driver, ring->mr_type); + } + group->mrg_cur_count--; + group->mrg_rings = ring->mr_next; + + ring->mr_gh = NULL; + + if (driver_call) + mac_ring_free(mip, ring); + + return (ret); + } + } + + /* * Set up SRS/SR according to the ring type. */ switch (ring->mr_type) { @@ -3676,58 +4337,98 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) * reserved for someones exclusive use. */ if (group->mrg_state == MAC_GROUP_STATE_RESERVED) { - flow_entry_t *flent; mac_client_impl_t *mcip; - mcip = MAC_RX_GROUP_ONLY_CLIENT(group); - ASSERT(mcip != NULL); - flent = mcip->mci_flent; - ASSERT(flent->fe_rx_srs_cnt > 0); - mac_srs_group_setup(mcip, flent, group, SRST_LINK); + mcip = MAC_GROUP_ONLY_CLIENT(group); + /* + * Even though this group is reserved we migth still + * have multiple clients, i.e a VLAN shares the + * group with the primary mac client. + */ + if (mcip != NULL) { + flent = mcip->mci_flent; + ASSERT(flent->fe_rx_srs_cnt > 0); + mac_rx_srs_group_setup(mcip, flent, SRST_LINK); + mac_fanout_setup(mcip, flent, + MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, + mcip, NULL, NULL); + } else { + ring->mr_classify_type = MAC_SW_CLASSIFIER; + } } break; case MAC_RING_TYPE_TX: + { + mac_grp_client_t *mgcp = group->mrg_clients; + mac_client_impl_t *mcip; + mac_soft_ring_set_t *mac_srs; + mac_srs_tx_t *tx; + + if (MAC_GROUP_NO_CLIENT(group)) { + if (ring->mr_state == MR_INUSE) + mac_stop_ring(ring); + ring->mr_flag = 0; + break; + } /* - * For TX this function is only invoked during the - * initial creation of a group when a share is - * associated with a MAC client. So the datapath is not - * yet setup, and will be setup later after the - * group has been reserved and populated. + * If the rings are being moved to a group that has + * clients using it, then add the new rings to the + * clients SRS. */ + while (mgcp != NULL) { + boolean_t is_aggr; + + mcip = mgcp->mgc_client; + flent = mcip->mci_flent; + is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR); + mac_srs = MCIP_TX_SRS(mcip); + tx = &mac_srs->srs_tx; + mac_tx_client_quiesce((mac_client_handle_t)mcip); + /* + * If we are growing from 1 to multiple rings. + */ + if (tx->st_mode == SRS_TX_BW || + tx->st_mode == SRS_TX_SERIALIZE || + tx->st_mode == SRS_TX_DEFAULT) { + mac_ring_t *tx_ring = tx->st_arg2; + + tx->st_arg2 = NULL; + mac_tx_srs_stat_recreate(mac_srs, B_TRUE); + mac_tx_srs_add_ring(mac_srs, tx_ring); + if (mac_srs->srs_type & SRST_BW_CONTROL) { + tx->st_mode = is_aggr ? SRS_TX_BW_AGGR : + SRS_TX_BW_FANOUT; + } else { + tx->st_mode = is_aggr ? SRS_TX_AGGR : + SRS_TX_FANOUT; + } + tx->st_func = mac_tx_get_func(tx->st_mode); + } + mac_tx_srs_add_ring(mac_srs, ring); + mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), + mac_rx_deliver, mcip, NULL, NULL); + mac_tx_client_restart((mac_client_handle_t)mcip); + mgcp = mgcp->mgc_next; + } break; + } default: ASSERT(B_FALSE); } - /* - * Start the ring if needed. Failure causes to undo the grouping action. + * For aggr, the default ring will be NULL to begin with. If it + * is NULL, then pick the first ring that gets added as the + * default ring. Any ring in an aggregation can be removed at + * any time (by the user action of removing a link) and if the + * current default ring gets removed, then a new one gets + * picked (see i_mac_group_rem_ring()). */ - if ((ret = mac_start_ring(ring)) != 0) { - if (ring->mr_type == MAC_RING_TYPE_RX) { - if (ring->mr_srs != NULL) { - mac_rx_srs_remove(ring->mr_srs); - ring->mr_srs = NULL; - } - } - if (!driver_call) { - cap_rings->mr_gremring(group->mrg_driver, - ring->mr_driver, ring->mr_type); - } - group->mrg_cur_count--; - group->mrg_rings = ring->mr_next; - - ring->mr_gh = NULL; - - if (driver_call) - mac_ring_free(mip, ring); - - return (ret); + if (mip->mi_state_flags & MIS_IS_AGGR && + mip->mi_default_tx_ring == NULL && + ring->mr_type == MAC_RING_TYPE_TX) { + mip->mi_default_tx_ring = (mac_ring_handle_t)ring; } - /* - * Update the ring's state. - */ - ring->mr_state = MR_INUSE; MAC_RING_UNMARK(ring, MR_INCIPIENT); return (0); } @@ -3748,18 +4449,18 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - ASSERT(mac_find_ring(group, ring->mr_index) == ring); + ASSERT(mac_find_ring((mac_group_handle_t)group, + ring->mr_index) == (mac_ring_handle_t)ring); ASSERT((mac_group_t *)ring->mr_gh == group); ASSERT(ring->mr_type == group->mrg_type); + if (ring->mr_state == MR_INUSE) + mac_stop_ring(ring); switch (ring->mr_type) { case MAC_RING_TYPE_RX: group_type = mip->mi_rx_group_type; cap_rings = &mip->mi_rx_rings_cap; - if (group->mrg_state >= MAC_GROUP_STATE_RESERVED) - mac_stop_ring(ring); - /* * Only hardware classified packets hold a reference to the * ring all the way up the Rx path. mac_rx_srs_remove() @@ -3771,13 +4472,20 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, mac_rx_srs_remove(ring->mr_srs); ring->mr_srs = NULL; } - ring->mr_state = MR_FREE; - ring->mr_flag = 0; break; case MAC_RING_TYPE_TX: + { + mac_grp_client_t *mgcp; + mac_client_impl_t *mcip; + mac_soft_ring_set_t *mac_srs; + mac_srs_tx_t *tx; + mac_ring_t *rem_ring; + mac_group_t *defgrp; + uint_t ring_info = 0; + /* - * For TX this function is only invoked in two + * For TX this function is invoked in three * cases: * * 1) In the case of a failure during the @@ -3789,13 +4497,120 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, * 2) From mac_release_tx_group() when freeing * a TX SRS. * - * In both cases the SRS and its soft rings are - * already quiesced. + * 3) In the case of aggr, when a port gets removed, + * the pseudo Tx rings that it exposed gets removed. + * + * In the first two cases the SRS and its soft + * rings are already quiesced. */ - ASSERT(!driver_call); + if (driver_call) { + mac_client_impl_t *mcip; + mac_soft_ring_set_t *mac_srs; + mac_soft_ring_t *sringp; + mac_srs_tx_t *srs_tx; + + if (mip->mi_state_flags & MIS_IS_AGGR && + mip->mi_default_tx_ring == + (mac_ring_handle_t)ring) { + /* pick a new default Tx ring */ + mip->mi_default_tx_ring = + (group->mrg_rings != ring) ? + (mac_ring_handle_t)group->mrg_rings : + (mac_ring_handle_t)(ring->mr_next); + } + /* Presently only aggr case comes here */ + if (group->mrg_state != MAC_GROUP_STATE_RESERVED) + break; + + mcip = MAC_GROUP_ONLY_CLIENT(group); + ASSERT(mcip != NULL); + ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR); + mac_srs = MCIP_TX_SRS(mcip); + ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_AGGR || + mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); + srs_tx = &mac_srs->srs_tx; + /* + * Wakeup any callers blocked on this + * Tx ring due to flow control. + */ + sringp = srs_tx->st_soft_rings[ring->mr_index]; + ASSERT(sringp != NULL); + mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)sringp); + mac_tx_client_quiesce((mac_client_handle_t)mcip); + mac_tx_srs_del_ring(mac_srs, ring); + mac_tx_client_restart((mac_client_handle_t)mcip); + break; + } + ASSERT(ring != (mac_ring_t *)mip->mi_default_tx_ring); group_type = mip->mi_tx_group_type; cap_rings = &mip->mi_tx_rings_cap; + /* + * See if we need to take it out of the MAC clients using + * this group + */ + if (MAC_GROUP_NO_CLIENT(group)) + break; + mgcp = group->mrg_clients; + defgrp = MAC_DEFAULT_TX_GROUP(mip); + while (mgcp != NULL) { + mcip = mgcp->mgc_client; + mac_srs = MCIP_TX_SRS(mcip); + tx = &mac_srs->srs_tx; + mac_tx_client_quiesce((mac_client_handle_t)mcip); + /* + * If we are here when removing rings from the + * defgroup, mac_reserve_tx_ring would have + * already deleted the ring from the MAC + * clients in the group. + */ + if (group != defgrp) { + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t) + mac_tx_srs_get_soft_ring(mac_srs, ring)); + mac_tx_srs_del_ring(mac_srs, ring); + } + /* + * Additionally, if we are left with only + * one ring in the group after this, we need + * to modify the mode etc. to. (We haven't + * yet taken the ring out, so we check with 2). + */ + if (group->mrg_cur_count == 2) { + if (ring->mr_next == NULL) + rem_ring = group->mrg_rings; + else + rem_ring = ring->mr_next; + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t) + mac_tx_srs_get_soft_ring(mac_srs, + rem_ring)); + mac_tx_srs_del_ring(mac_srs, rem_ring); + if (rem_ring->mr_state != MR_INUSE) { + (void) mac_start_ring(rem_ring); + } + tx->st_arg2 = (void *)rem_ring; + mac_tx_srs_stat_recreate(mac_srs, B_FALSE); + ring_info = mac_hwring_getinfo( + (mac_ring_handle_t)rem_ring); + /* + * We are shrinking from multiple + * to 1 ring. + */ + if (mac_srs->srs_type & SRST_BW_CONTROL) { + tx->st_mode = SRS_TX_BW; + } else if (mac_tx_serialize || + (ring_info & MAC_RING_TX_SERIALIZE)) { + tx->st_mode = SRS_TX_SERIALIZE; + } else { + tx->st_mode = SRS_TX_DEFAULT; + } + tx->st_func = mac_tx_get_func(tx->st_mode); + } + mac_tx_client_restart((mac_client_handle_t)mcip); + mgcp = mgcp->mgc_next; + } break; + } default: ASSERT(B_FALSE); } @@ -3817,7 +4632,8 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, if (!driver_call) { ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); - ASSERT(cap_rings->mr_gremring != NULL); + ASSERT(group->mrg_driver == NULL || + cap_rings->mr_gremring != NULL); /* * Remove the driver level hardware ring. @@ -3829,12 +4645,10 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, } ring->mr_gh = NULL; - if (driver_call) { + if (driver_call) mac_ring_free(mip, ring); - } else { - ring->mr_state = MR_FREE; + else ring->mr_flag = 0; - } } /* @@ -3982,7 +4796,9 @@ mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, allocated_map = B_TRUE; } - ASSERT(map->ma_group == group); + ASSERT(map->ma_group == NULL || map->ma_group == group); + if (map->ma_group == NULL) + map->ma_group = group; /* * If the MAC address is already in use, simply account for the @@ -4082,6 +4898,8 @@ mac_remove_macaddr(mac_address_t *map) return (0); err = mac_group_remmac(map->ma_group, map->ma_addr); + if (err == 0) + map->ma_group = NULL; break; case MAC_ADDRESS_TYPE_UNICAST_PROMISC: err = i_mac_promisc_set(mip, B_FALSE); @@ -4122,7 +4940,7 @@ mac_update_macaddr(mac_address_t *map, uint8_t *mac_addr) * Update the primary address for drivers that are not * RINGS capable. */ - if (map->ma_group == NULL) { + if (mip->mi_rx_groups == NULL) { err = mip->mi_unicst(mip->mi_driver, (const uint8_t *) mac_addr); if (err != 0) @@ -4223,11 +5041,6 @@ mac_init_macaddr(mac_impl_t *mip) if (mip->mi_rx_groups == NULL) map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; - /* - * The primary MAC address is reserved for default group according - * to current design. - */ - map->ma_group = mip->mi_rx_groups; map->ma_mip = mip; mip->mi_addresses = map; @@ -4258,6 +5071,11 @@ mac_fini_macaddr(mac_impl_t *mip) /* * Logging related functions. + * + * Note that Kernel statistics have been extended to maintain fine + * granularity of statistics viz. hardware lane, software lane, fanout + * stats etc. However, extended accounting continues to support only + * aggregate statistics like before. */ /* Write the Flow description to the log file */ @@ -4304,18 +5122,33 @@ mac_write_flow_desc(flow_entry_t *flent, mac_client_impl_t *mcip) int mac_write_flow_stats(flow_entry_t *flent) { - flow_stats_t *fl_stats; - net_stat_t nstat; + net_stat_t nstat; + mac_soft_ring_set_t *mac_srs; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + int i; - fl_stats = &flent->fe_flowstats; + bzero(&nstat, sizeof (net_stat_t)); nstat.ns_name = flent->fe_flow_name; - nstat.ns_ibytes = fl_stats->fs_rbytes; - nstat.ns_obytes = fl_stats->fs_obytes; - nstat.ns_ipackets = fl_stats->fs_ipackets; - nstat.ns_opackets = fl_stats->fs_opackets; - nstat.ns_ierrors = fl_stats->fs_ierrors; - nstat.ns_oerrors = fl_stats->fs_oerrors; + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + nstat.ns_ibytes += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes; + nstat.ns_ipackets += mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; + nstat.ns_oerrors += mac_rx_stat->mrs_ierrors; + } + mac_srs = (mac_soft_ring_set_t *)(flent->fe_tx_srs); + if (mac_srs != NULL) { + mac_tx_stat = &mac_srs->srs_tx.st_stat; + + nstat.ns_obytes = mac_tx_stat->mts_obytes; + nstat.ns_opackets = mac_tx_stat->mts_opackets; + nstat.ns_oerrors = mac_tx_stat->mts_oerrors; + } return (exacct_commit_netinfo((void *)&nstat, EX_NET_FLSTAT_REC)); } @@ -4347,16 +5180,38 @@ mac_write_link_desc(mac_client_impl_t *mcip) int mac_write_link_stats(mac_client_impl_t *mcip) { - net_stat_t nstat; + net_stat_t nstat; + flow_entry_t *flent; + mac_soft_ring_set_t *mac_srs; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + int i; + bzero(&nstat, sizeof (net_stat_t)); nstat.ns_name = mcip->mci_name; - nstat.ns_ibytes = mcip->mci_stat_ibytes; - nstat.ns_obytes = mcip->mci_stat_obytes; - nstat.ns_ipackets = mcip->mci_stat_ipackets; - nstat.ns_opackets = mcip->mci_stat_opackets; - nstat.ns_ierrors = mcip->mci_stat_ierrors; - nstat.ns_oerrors = mcip->mci_stat_oerrors; + flent = mcip->mci_flent; + if (flent != NULL) { + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + nstat.ns_ibytes += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + + mac_rx_stat->mrs_lclbytes; + nstat.ns_ipackets += mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; + nstat.ns_oerrors += mac_rx_stat->mrs_ierrors; + } + } + mac_srs = (mac_soft_ring_set_t *)(mcip->mci_flent->fe_tx_srs); + if (mac_srs != NULL) { + mac_tx_stat = &mac_srs->srs_tx.st_stat; + + nstat.ns_obytes = mac_tx_stat->mts_obytes; + nstat.ns_opackets = mac_tx_stat->mts_opackets; + nstat.ns_oerrors = mac_tx_stat->mts_oerrors; + } return (exacct_commit_netinfo((void *)&nstat, EX_NET_LNSTAT_REC)); } @@ -4706,181 +5561,255 @@ mac_flow_update_priority(mac_client_impl_t *mcip, flow_entry_t *flent) mac_ring_t * mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring) { - mac_group_t *group; - mac_ring_t *ring; + mac_group_t *group; + mac_grp_client_t *mgcp; + mac_client_impl_t *mcip; + mac_soft_ring_set_t *srs; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - if (mip->mi_tx_groups == NULL) - return (NULL); - /* * Find an available ring and start it before changing its status. * The unassigned rings are at the end of the mi_tx_groups * array. */ - group = mip->mi_tx_groups + mip->mi_tx_group_count; + group = MAC_DEFAULT_TX_GROUP(mip); - for (ring = group->mrg_rings; ring != NULL; - ring = ring->mr_next) { - if (desired_ring == NULL) { - if (ring->mr_state == MR_FREE) - /* wanted any free ring and found one */ - break; - } else { - mac_ring_t *sring; - mac_client_impl_t *client; - mac_soft_ring_set_t *srs; + /* Can't take the default ring out of the default group */ + ASSERT(desired_ring != (mac_ring_t *)mip->mi_default_tx_ring); - if (ring != desired_ring) - /* wants a desired ring but this one ain't it */ - continue; + if (desired_ring->mr_state == MR_FREE) { + ASSERT(MAC_GROUP_NO_CLIENT(group)); + if (mac_start_ring(desired_ring) != 0) + return (NULL); + return (desired_ring); + } + /* + * There are clients using this ring, so let's move the clients + * away from using this ring. + */ + for (mgcp = group->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { + mcip = mgcp->mgc_client; + mac_tx_client_quiesce((mac_client_handle_t)mcip); + srs = MCIP_TX_SRS(mcip); + ASSERT(mac_tx_srs_ring_present(srs, desired_ring)); + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(srs, + desired_ring)); + mac_tx_srs_del_ring(srs, desired_ring); + mac_tx_client_restart((mac_client_handle_t)mcip); + } + return (desired_ring); +} - if (ring->mr_state == MR_FREE) - break; +/* + * For a reserved group with multiple clients, return the primary client. + */ +static mac_client_impl_t * +mac_get_grp_primary(mac_group_t *grp) +{ + mac_grp_client_t *mgcp = grp->mrg_clients; + mac_client_impl_t *mcip; + + while (mgcp != NULL) { + mcip = mgcp->mgc_client; + if (mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) + return (mcip); + mgcp = mgcp->mgc_next; + } + return (NULL); +} + +/* + * Hybrid I/O specifies the ring that should be given to a share. + * If the ring is already used by clients, then we need to release + * the ring back to the default group so that we can give it to + * the share. This means the clients using this ring now get a + * replacement ring. If there aren't any replacement rings, this + * function returns a failure. + */ +static int +mac_reclaim_ring_from_grp(mac_impl_t *mip, mac_ring_type_t ring_type, + mac_ring_t *ring, mac_ring_t **rings, int nrings) +{ + mac_group_t *group = (mac_group_t *)ring->mr_gh; + mac_resource_props_t *mrp; + mac_client_impl_t *mcip; + mac_group_t *defgrp; + mac_ring_t *tring; + mac_group_t *tgrp; + int i; + int j; + mcip = MAC_GROUP_ONLY_CLIENT(group); + if (mcip == NULL) + mcip = mac_get_grp_primary(group); + ASSERT(mcip != NULL); + ASSERT(mcip->mci_share == NULL); + + mrp = MCIP_RESOURCE_PROPS(mcip); + if (ring_type == MAC_RING_TYPE_RX) { + defgrp = mip->mi_rx_donor_grp; + if ((mrp->mrp_mask & MRP_RX_RINGS) == 0) { + /* Need to put this mac client in the default group */ + if (mac_rx_switch_group(mcip, group, defgrp) != 0) + return (ENOSPC); + } else { /* - * Found the desired ring but it's already in use. - * Swap it with a new ring. + * Switch this ring with some other ring from + * the default group. */ - - /* find the client which owns that ring */ - for (client = mip->mi_clients_list; client != NULL; - client = client->mci_client_next) { - srs = MCIP_TX_SRS(client); - if (srs != NULL && mac_tx_srs_ring_present(srs, - desired_ring)) { - /* found our ring */ - break; + for (tring = defgrp->mrg_rings; tring != NULL; + tring = tring->mr_next) { + if (tring->mr_index == 0) + continue; + for (j = 0; j < nrings; j++) { + if (rings[j] == tring) + break; } + if (j >= nrings) + break; } - if (client == NULL) { - /* - * The TX ring is in use, but it's not - * associated with any clients, so it - * has to be the default ring. In that - * case we can simply assign a new ring - * as the default ring, and we're done. - */ - ASSERT(mip->mi_default_tx_ring == - (mac_ring_handle_t)desired_ring); - - /* - * Quiesce all clients on top of - * the NIC to make sure there are no - * pending threads still relying on - * that default ring, for example - * the multicast path. - */ - for (client = mip->mi_clients_list; - client != NULL; - client = client->mci_client_next) { - mac_tx_client_quiesce(client, - SRS_QUIESCE); - } - - mip->mi_default_tx_ring = (mac_ring_handle_t) - mac_reserve_tx_ring(mip, NULL); - - /* resume the clients */ - for (client = mip->mi_clients_list; - client != NULL; - client = client->mci_client_next) - mac_tx_client_restart(client); - - break; + if (tring == NULL) + return (ENOSPC); + if (mac_group_mov_ring(mip, group, tring) != 0) + return (ENOSPC); + if (mac_group_mov_ring(mip, defgrp, ring) != 0) { + (void) mac_group_mov_ring(mip, defgrp, tring); + return (ENOSPC); } + } + ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); + return (0); + } + defgrp = MAC_DEFAULT_TX_GROUP(mip); + if (ring == (mac_ring_t *)mip->mi_default_tx_ring) { + /* + * See if we can get a spare ring to replace the default + * ring. + */ + if (defgrp->mrg_cur_count == 1) { /* - * Note that we cannot simply invoke the group - * add/rem routines since the client doesn't have a - * TX group. So we need to instead add/remove - * the rings from the SRS. + * Need to get a ring from another client, see if + * there are any clients that can be moved to + * the default group, thereby freeing some rings. */ - ASSERT(client->mci_share == NULL); - - /* first quiece the client */ - mac_tx_client_quiesce(client, SRS_QUIESCE); - - /* give a new ring to the client... */ - sring = mac_reserve_tx_ring(mip, NULL); - if (sring != NULL) { - /* - * There are no other available ring - * on that MAC instance. The client - * will fallback to the shared TX - * ring. - */ - mac_tx_srs_add_ring(srs, sring); - } - - /* ... in exchange for our desired ring */ - mac_tx_srs_del_ring(srs, desired_ring); - - /* restart the client */ - mac_tx_client_restart(client); - - if (mip->mi_default_tx_ring == - (mac_ring_handle_t)desired_ring) { - /* - * The desired ring is the default ring, - * and there are one or more clients - * using that default ring directly. - */ - mip->mi_default_tx_ring = - (mac_ring_handle_t)sring; - /* - * Find clients using default ring and - * swap it with the new default ring. - */ - for (client = mip->mi_clients_list; - client != NULL; - client = client->mci_client_next) { - srs = MCIP_TX_SRS(client); - if (srs != NULL && - mac_tx_srs_ring_present(srs, - desired_ring)) { - /* first quiece the client */ - mac_tx_client_quiesce(client, - SRS_QUIESCE); - - /* - * Give it the new default - * ring, and remove the old - * one. - */ - if (sring != NULL) { - mac_tx_srs_add_ring(srs, - sring); - } - mac_tx_srs_del_ring(srs, - desired_ring); - - /* restart the client */ - mac_tx_client_restart(client); + for (i = 0; i < mip->mi_tx_group_count; i++) { + tgrp = &mip->mi_tx_groups[i]; + if (tgrp->mrg_state == + MAC_GROUP_STATE_REGISTERED) { + continue; + } + mcip = MAC_GROUP_ONLY_CLIENT(tgrp); + if (mcip == NULL) + mcip = mac_get_grp_primary(tgrp); + ASSERT(mcip != NULL); + mrp = MCIP_RESOURCE_PROPS(mcip); + if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { + ASSERT(tgrp->mrg_cur_count == 1); + /* + * If this ring is part of the + * rings asked by the share we cannot + * use it as the default ring. + */ + for (j = 0; j < nrings; j++) { + if (rings[j] == tgrp->mrg_rings) + break; } + if (j < nrings) + continue; + mac_tx_client_quiesce( + (mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, tgrp, + defgrp); + mac_tx_client_restart( + (mac_client_handle_t)mcip); + break; } } - break; + /* + * All the rings are reserved, can't give up the + * default ring. + */ + if (defgrp->mrg_cur_count <= 1) + return (ENOSPC); + } + /* + * Swap the default ring with another. + */ + for (tring = defgrp->mrg_rings; tring != NULL; + tring = tring->mr_next) { + /* + * If this ring is part of the rings asked by the + * share we cannot use it as the default ring. + */ + for (j = 0; j < nrings; j++) { + if (rings[j] == tring) + break; + } + if (j >= nrings) + break; } + ASSERT(tring != NULL); + mip->mi_default_tx_ring = (mac_ring_handle_t)tring; + return (0); } - - if (ring != NULL) { - if (mac_start_ring(ring) != 0) - return (NULL); - ring->mr_state = MR_INUSE; + /* + * The Tx ring is with a group reserved by a MAC client. See if + * we can swap it. + */ + ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); + mcip = MAC_GROUP_ONLY_CLIENT(group); + if (mcip == NULL) + mcip = mac_get_grp_primary(group); + ASSERT(mcip != NULL); + mrp = MCIP_RESOURCE_PROPS(mcip); + mac_tx_client_quiesce((mac_client_handle_t)mcip); + if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { + ASSERT(group->mrg_cur_count == 1); + /* Put this mac client in the default group */ + mac_tx_switch_group(mcip, group, defgrp); + } else { + /* + * Switch this ring with some other ring from + * the default group. + */ + for (tring = defgrp->mrg_rings; tring != NULL; + tring = tring->mr_next) { + if (tring == (mac_ring_t *)mip->mi_default_tx_ring) + continue; + /* + * If this ring is part of the rings asked by the + * share we cannot use it for swapping. + */ + for (j = 0; j < nrings; j++) { + if (rings[j] == tring) + break; + } + if (j >= nrings) + break; + } + if (tring == NULL) { + mac_tx_client_restart((mac_client_handle_t)mcip); + return (ENOSPC); + } + if (mac_group_mov_ring(mip, group, tring) != 0) { + mac_tx_client_restart((mac_client_handle_t)mcip); + return (ENOSPC); + } + if (mac_group_mov_ring(mip, defgrp, ring) != 0) { + (void) mac_group_mov_ring(mip, defgrp, tring); + mac_tx_client_restart((mac_client_handle_t)mcip); + return (ENOSPC); + } } - - return (ring); + mac_tx_client_restart((mac_client_handle_t)mcip); + ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); + return (0); } /* - * Minimum number of rings to leave in the default TX group when allocating - * rings to new clients. - */ -static uint_t mac_min_rx_default_rings = 1; - -/* * Populate a zero-ring group with rings. If the share is non-NULL, * the rings are chosen according to that share. * Invoked after allocating a new RX or TX group through @@ -4889,15 +5818,17 @@ static uint_t mac_min_rx_default_rings = 1; */ int i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, - mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share) + mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share, + uint32_t ringcnt) { - mac_ring_t **rings, *tmp_ring[1], *ring; + mac_ring_t **rings, *ring; uint_t nrings; - int rv, i, j; + int rv = 0, i = 0, j; - ASSERT(mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC && - mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC); - ASSERT(new_group->mrg_cur_count == 0); + ASSERT((ring_type == MAC_RING_TYPE_RX && + mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) || + (ring_type == MAC_RING_TYPE_TX && + mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC)); /* * First find the rings to allocate to the group. @@ -4910,9 +5841,23 @@ i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, KM_SLEEP); mip->mi_share_capab.ms_squery(share, ring_type, (mac_ring_handle_t *)rings, &nrings); + for (i = 0; i < nrings; i++) { + /* + * If we have given this ring to a non-default + * group, we need to check if we can get this + * ring. + */ + ring = rings[i]; + if (ring->mr_gh != (mac_group_handle_t)src_group || + ring == (mac_ring_t *)mip->mi_default_tx_ring) { + if (mac_reclaim_ring_from_grp(mip, ring_type, + ring, rings, nrings) != 0) { + rv = ENOSPC; + goto bail; + } + } + } } else { - /* this function is called for TX only with a share */ - ASSERT(ring_type == MAC_RING_TYPE_RX); /* * Pick one ring from default group. * @@ -4922,23 +5867,37 @@ i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, * We need a better way for a driver to indicate this, * for example a per-ring flag. */ + rings = kmem_alloc(ringcnt * sizeof (mac_ring_handle_t), + KM_SLEEP); for (ring = src_group->mrg_rings; ring != NULL; ring = ring->mr_next) { - if (ring->mr_index != 0) + if (ring_type == MAC_RING_TYPE_RX && + ring->mr_index == 0) { + continue; + } + if (ring_type == MAC_RING_TYPE_TX && + ring == (mac_ring_t *)mip->mi_default_tx_ring) { + continue; + } + rings[i++] = ring; + if (i == ringcnt) break; } ASSERT(ring != NULL); - nrings = 1; - tmp_ring[0] = ring; - rings = tmp_ring; + nrings = i; + /* Not enough rings as required */ + if (nrings != ringcnt) { + rv = ENOSPC; + goto bail; + } } switch (ring_type) { case MAC_RING_TYPE_RX: - if (src_group->mrg_cur_count - nrings < - mac_min_rx_default_rings) { + if (src_group->mrg_cur_count - nrings < 1) { /* we ran out of rings */ - return (ENOSPC); + rv = ENOSPC; + goto bail; } /* move receive rings to new group */ @@ -4950,7 +5909,7 @@ i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, (void) mac_group_mov_ring(mip, src_group, rings[j]); } - return (rv); + goto bail; } } break; @@ -4959,37 +5918,42 @@ i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, mac_ring_t *tmp_ring; /* move the TX rings to the new group */ - ASSERT(src_group == NULL); for (i = 0; i < nrings; i++) { /* get the desired ring */ tmp_ring = mac_reserve_tx_ring(mip, rings[i]); + if (tmp_ring == NULL) { + rv = ENOSPC; + goto bail; + } ASSERT(tmp_ring == rings[i]); rv = mac_group_mov_ring(mip, new_group, rings[i]); if (rv != 0) { /* cleanup on failure */ for (j = 0; j < i; j++) { (void) mac_group_mov_ring(mip, - mip->mi_tx_groups + - mip->mi_tx_group_count, rings[j]); + MAC_DEFAULT_TX_GROUP(mip), + rings[j]); } + goto bail; } } break; } } - if (share != NULL) { - /* add group to share */ + /* add group to share */ + if (share != NULL) mip->mi_share_capab.ms_sadd(share, new_group->mrg_driver); - /* free temporary array of rings */ - kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); - } - return (0); +bail: + /* free temporary array of rings */ + kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); + + return (rv); } void -mac_rx_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) +mac_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) { mac_grp_client_t *mgcp; @@ -5008,7 +5972,7 @@ mac_rx_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) } void -mac_rx_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) +mac_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) { mac_grp_client_t *mgcp, **pprev; @@ -5034,65 +5998,149 @@ mac_rx_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) * largest number of rings, otherwise the default ring when available. */ mac_group_t * -mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, - mac_rx_group_reserve_type_t rtype) +mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) { mac_share_handle_t share = mcip->mci_share; mac_impl_t *mip = mcip->mci_mip; mac_group_t *grp = NULL; - int i, start, loopcount; - int err; + int i; + int err = 0; mac_address_t *map; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + int nrings; + int donor_grp_rcnt; + boolean_t need_exclgrp = B_FALSE; + int need_rings = 0; + mac_group_t *candidate_grp = NULL; + mac_client_impl_t *gclient; + mac_resource_props_t *gmrp; + mac_group_t *donorgrp = NULL; + boolean_t rxhw = mrp->mrp_mask & MRP_RX_RINGS; + boolean_t unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC; + boolean_t isprimary; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - /* Check if a group already has this mac address (case of VLANs) */ - if ((map = mac_find_macaddr(mip, mac_addr)) != NULL) - return (map->ma_group); + isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; + + /* + * Check if a group already has this mac address (case of VLANs) + * unless we are moving this MAC client from one group to another. + */ + if (!move && (map = mac_find_macaddr(mip, mac_addr)) != NULL) { + if (map->ma_group != NULL) + return (map->ma_group); + } + if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0) + return (NULL); + /* + * If exclusive open, return NULL which will enable the + * caller to use the default group. + */ + if (mcip->mci_state_flags & MCIS_EXCLUSIVE) + return (NULL); - if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0 || - rtype == MAC_RX_NO_RESERVE) + /* For dynamic groups default unspecified to 1 */ + if (rxhw && unspec && + mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + mrp->mrp_nrxrings = 1; + } + /* + * For static grouping we allow only specifying rings=0 and + * unspecified + */ + if (rxhw && mrp->mrp_nrxrings > 0 && + mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) { return (NULL); + } + if (rxhw) { + /* + * We have explicitly asked for a group (with nrxrings, + * if unspec). + */ + if (unspec || mrp->mrp_nrxrings > 0) { + need_exclgrp = B_TRUE; + need_rings = mrp->mrp_nrxrings; + } else if (mrp->mrp_nrxrings == 0) { + /* + * We have asked for a software group. + */ + return (NULL); + } + } else if (isprimary && mip->mi_nactiveclients == 1 && + mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + /* + * If the primary is the only active client on this + * mip and we have not asked for any rings, we give + * it the default group so that the primary gets to + * use all the rings. + */ + return (NULL); + } + + /* The group that can donate rings */ + donorgrp = mip->mi_rx_donor_grp; + + /* + * The number of rings that the default group can donate. + * We need to leave at least one ring. + */ + donor_grp_rcnt = donorgrp->mrg_cur_count - 1; /* * Try to exclusively reserve a RX group. * - * For flows requires SW_RING it always goes to the default group - * (Until we can explicitely call out default groups (CR 6695600), - * we assume that the default group is always at position zero); + * For flows requiring HW_DEFAULT_RING (unicast flow of the primary + * client), try to reserve the a non-default RX group and give + * it all the rings from the donor group, except the default ring * - * For flows requires HW_DEFAULT_RING (unicast flow of the primary - * client), try to reserve the default RX group only. + * For flows requiring HW_RING (unicast flow of other clients), try + * to reserve non-default RX group with the specified number of + * rings, if available. * - * For flows requires HW_RING (unicast flow of other clients), try - * to reserve non-default RX group then the default group. + * For flows that have not asked for software or hardware ring, + * try to reserve a non-default group with 1 ring, if available. */ - switch (rtype) { - case MAC_RX_RESERVE_DEFAULT: - start = 0; - loopcount = 1; - break; - case MAC_RX_RESERVE_NONDEFAULT: - start = 1; - loopcount = mip->mi_rx_group_count; - } - - for (i = start; i < start + loopcount; i++) { - grp = &mip->mi_rx_groups[i % mip->mi_rx_group_count]; + for (i = 1; i < mip->mi_rx_group_count; i++) { + grp = &mip->mi_rx_groups[i]; DTRACE_PROBE3(rx__group__trying, char *, mip->mi_name, int, grp->mrg_index, mac_group_state_t, grp->mrg_state); /* - * Check to see whether this mac client is the only client - * on this RX group. If not, we cannot exclusively reserve - * this RX group. + * Check if this group could be a candidate group for + * eviction if we need a group for this MAC client, + * but there aren't any. A candidate group is one + * that didn't ask for an exclusive group, but got + * one and it has enough rings (combined with what + * the donor group can donate) for the new MAC + * client */ - if (!MAC_RX_GROUP_NO_CLIENT(grp) && - (MAC_RX_GROUP_ONLY_CLIENT(grp) != mcip)) { + if (grp->mrg_state >= MAC_GROUP_STATE_RESERVED) { + /* + * If the primary/donor group is not the default + * group, don't bother looking for a candidate group. + * If we don't have enough rings we will check + * if the primary group can be vacated. + */ + if (candidate_grp == NULL && + donorgrp == MAC_DEFAULT_RX_GROUP(mip)) { + ASSERT(!MAC_GROUP_NO_CLIENT(grp)); + gclient = MAC_GROUP_ONLY_CLIENT(grp); + if (gclient == NULL) + gclient = mac_get_grp_primary(grp); + ASSERT(gclient != NULL); + gmrp = MCIP_RESOURCE_PROPS(gclient); + if (gclient->mci_share == NULL && + (gmrp->mrp_mask & MRP_RX_RINGS) == 0 && + (unspec || + (grp->mrg_cur_count + donor_grp_rcnt >= + need_rings))) { + candidate_grp = grp; + } + } continue; } - /* * This group could already be SHARED by other multicast * flows on this client. In that case, the group would @@ -5105,35 +6153,133 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, continue; } - if ((i % mip->mi_rx_group_count) == 0 || - mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) { + if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) break; - } - ASSERT(grp->mrg_cur_count == 0); /* * Populate the group. Rings should be taken - * from the default group at position 0 for now. + * from the donor group. */ + nrings = rxhw ? need_rings : isprimary ? donor_grp_rcnt: 1; - err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, - &mip->mi_rx_groups[0], grp, share); - if (err == 0) - break; + /* + * If the donor group can't donate, let's just walk and + * see if someone can vacate a group, so that we have + * enough rings for this, unless we already have + * identified a candiate group.. + */ + if (nrings <= donor_grp_rcnt) { + err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, + donorgrp, grp, share, nrings); + if (err == 0) { + /* + * For a share i_mac_group_allocate_rings gets + * the rings from the driver, let's populate + * the property for the client now. + */ + if (share != NULL) { + mac_client_set_rings( + (mac_client_handle_t)mcip, + grp->mrg_cur_count, -1); + } + if (mac_is_primary_client(mcip) && !rxhw) + mip->mi_rx_donor_grp = grp; + break; + } + } DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, mip->mi_name, int, grp->mrg_index, int, err); /* - * It's a dynamic group but the grouping operation failed. + * It's a dynamic group but the grouping operation + * failed. */ mac_stop_group(grp); } + /* We didn't find an exclusive group for this MAC client */ + if (i >= mip->mi_rx_group_count) { - if (i == start + loopcount) - return (NULL); + if (!need_exclgrp) + return (NULL); + /* + * If we found a candidate group then we switch the + * MAC client from the candidate_group to the default + * group and give the group to this MAC client. If + * we didn't find a candidate_group, check if the + * primary is in its own group and if it can make way + * for this MAC client. + */ + if (candidate_grp == NULL && + donorgrp != MAC_DEFAULT_RX_GROUP(mip) && + donorgrp->mrg_cur_count >= need_rings) { + candidate_grp = donorgrp; + } + if (candidate_grp != NULL) { + boolean_t prim_grp = B_FALSE; + + /* + * Switch the MAC client from the candidate group + * to the default group.. If this group was the + * donor group, then after the switch we need + * to update the donor group too. + */ + grp = candidate_grp; + gclient = MAC_GROUP_ONLY_CLIENT(grp); + if (gclient == NULL) + gclient = mac_get_grp_primary(grp); + if (grp == mip->mi_rx_donor_grp) + prim_grp = B_TRUE; + if (mac_rx_switch_group(gclient, grp, + MAC_DEFAULT_RX_GROUP(mip)) != 0) { + return (NULL); + } + if (prim_grp) { + mip->mi_rx_donor_grp = + MAC_DEFAULT_RX_GROUP(mip); + donorgrp = MAC_DEFAULT_RX_GROUP(mip); + } + + + /* + * Now give this group with the required rings + * to this MAC client. + */ + ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); + if (mac_start_group(grp) != 0) + return (NULL); + + if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) + return (grp); + + donor_grp_rcnt = donorgrp->mrg_cur_count - 1; + ASSERT(grp->mrg_cur_count == 0); + ASSERT(donor_grp_rcnt >= need_rings); + err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, + donorgrp, grp, share, need_rings); + if (err == 0) { + /* + * For a share i_mac_group_allocate_rings gets + * the rings from the driver, let's populate + * the property for the client now. + */ + if (share != NULL) { + mac_client_set_rings( + (mac_client_handle_t)mcip, + grp->mrg_cur_count, -1); + } + DTRACE_PROBE2(rx__group__reserved, + char *, mip->mi_name, int, grp->mrg_index); + return (grp); + } + DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, + mip->mi_name, int, grp->mrg_index, int, err); + mac_stop_group(grp); + } + return (NULL); + } ASSERT(grp != NULL); DTRACE_PROBE2(rx__group__reserved, @@ -5152,10 +6298,13 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, void mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) { - mac_impl_t *mip = mcip->mci_mip; - mac_ring_t *ring; + mac_impl_t *mip = mcip->mci_mip; + mac_ring_t *ring; - ASSERT(group != &mip->mi_rx_groups[0]); + ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); + + if (mip->mi_rx_donor_grp == group) + mip->mi_rx_donor_grp = MAC_DEFAULT_RX_GROUP(mip); /* * This is the case where there are no clients left. Any @@ -5170,10 +6319,12 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) */ ring->mr_srs = NULL; } - ASSERT(ring->mr_state == MR_INUSE); - mac_stop_ring(ring); - ring->mr_state = MR_FREE; - ring->mr_flag = 0; + ASSERT(group->mrg_state < MAC_GROUP_STATE_RESERVED || + ring->mr_state == MR_INUSE); + if (ring->mr_state == MR_INUSE) { + mac_stop_ring(ring); + ring->mr_flag = 0; + } } /* remove group from share */ @@ -5190,8 +6341,8 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) * Move rings back to default group. */ while ((ring = group->mrg_rings) != NULL) { - (void) mac_group_mov_ring(mip, - &mip->mi_rx_groups[0], ring); + (void) mac_group_mov_ring(mip, mip->mi_rx_donor_grp, + ring); } } mac_stop_group(group); @@ -5202,86 +6353,637 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) } /* + * When we move the primary's mac address between groups, we need to also + * take all the clients sharing the same mac address along with it (VLANs) + * We remove the mac address for such clients from the group after quiescing + * them. When we add the mac address we restart the client. Note that + * the primary's mac address is removed from the group after all the + * other clients sharing the address are removed. Similarly, the primary's + * mac address is added before all the other client's mac address are + * added. While grp is the group where the clients reside, tgrp is + * the group where the addresses have to be added. + */ +static void +mac_rx_move_macaddr_prim(mac_client_impl_t *mcip, mac_group_t *grp, + mac_group_t *tgrp, uint8_t *maddr, boolean_t add) +{ + mac_impl_t *mip = mcip->mci_mip; + mac_grp_client_t *mgcp = grp->mrg_clients; + mac_client_impl_t *gmcip; + boolean_t prim; + + prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; + + /* + * If the clients are in a non-default group, we just have to + * walk the group's client list. If it is in the default group + * (which will be shared by other clients as well, we need to + * check if the unicast address matches mcip's unicast. + */ + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + if (gmcip != mcip && + (grp != MAC_DEFAULT_RX_GROUP(mip) || + mcip->mci_unicast == gmcip->mci_unicast)) { + if (!add) { + mac_rx_client_quiesce( + (mac_client_handle_t)gmcip); + (void) mac_remove_macaddr(mcip->mci_unicast); + } else { + (void) mac_add_macaddr(mip, tgrp, maddr, prim); + mac_rx_client_restart( + (mac_client_handle_t)gmcip); + } + } + mgcp = mgcp->mgc_next; + } +} + + +/* + * Move the MAC address from fgrp to tgrp. If this is the primary client, + * we need to take any VLANs etc. together too. + */ +static int +mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp, + mac_group_t *tgrp) +{ + mac_impl_t *mip = mcip->mci_mip; + uint8_t maddr[MAXMACADDRLEN]; + int err = 0; + boolean_t prim; + boolean_t multiclnt = B_FALSE; + + mac_rx_client_quiesce((mac_client_handle_t)mcip); + ASSERT(mcip->mci_unicast != NULL); + bcopy(mcip->mci_unicast->ma_addr, maddr, mcip->mci_unicast->ma_len); + + prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; + if (mcip->mci_unicast->ma_nusers > 1) { + mac_rx_move_macaddr_prim(mcip, fgrp, NULL, maddr, B_FALSE); + multiclnt = B_TRUE; + } + ASSERT(mcip->mci_unicast->ma_nusers == 1); + err = mac_remove_macaddr(mcip->mci_unicast); + if (err != 0) { + mac_rx_client_restart((mac_client_handle_t)mcip); + if (multiclnt) { + mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, + B_TRUE); + } + return (err); + } + /* + * Program the H/W Classifier first, if this fails we need + * not proceed with the other stuff. + */ + if ((err = mac_add_macaddr(mip, tgrp, maddr, prim)) != 0) { + /* Revert back the H/W Classifier */ + if ((err = mac_add_macaddr(mip, fgrp, maddr, prim)) != 0) { + /* + * This should not fail now since it worked earlier, + * should we panic? + */ + cmn_err(CE_WARN, + "mac_rx_switch_group: switching %p back" + " to group %p failed!!", (void *)mcip, + (void *)fgrp); + } + mac_rx_client_restart((mac_client_handle_t)mcip); + if (multiclnt) { + mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, + B_TRUE); + } + return (err); + } + mcip->mci_unicast = mac_find_macaddr(mip, maddr); + mac_rx_client_restart((mac_client_handle_t)mcip); + if (multiclnt) + mac_rx_move_macaddr_prim(mcip, fgrp, tgrp, maddr, B_TRUE); + return (err); +} + +/* + * Switch the MAC client from one group to another. This means we need + * to remove the MAC address from the group, remove the MAC client, + * teardown the SRSs and revert the group state. Then, we add the client + * to the destination group, set the SRSs, and add the MAC address to the + * group. + */ +int +mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, + mac_group_t *tgrp) +{ + int err; + mac_group_state_t next_state; + mac_client_impl_t *group_only_mcip; + mac_client_impl_t *gmcip; + mac_impl_t *mip = mcip->mci_mip; + mac_grp_client_t *mgcp; + + ASSERT(fgrp == mcip->mci_flent->fe_rx_ring_group); + + if ((err = mac_rx_move_macaddr(mcip, fgrp, tgrp)) != 0) + return (err); + + /* + * The group might be reserved, but SRSs may not be set up, e.g. + * primary and its vlans using a reserved group. + */ + if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED && + MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { + mac_rx_srs_group_teardown(mcip->mci_flent, B_TRUE); + } + if (fgrp != MAC_DEFAULT_RX_GROUP(mip)) { + mgcp = fgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + mgcp = mgcp->mgc_next; + mac_group_remove_client(fgrp, gmcip); + mac_group_add_client(tgrp, gmcip); + gmcip->mci_flent->fe_rx_ring_group = tgrp; + } + mac_release_rx_group(mcip, fgrp); + ASSERT(MAC_GROUP_NO_CLIENT(fgrp)); + mac_set_group_state(fgrp, MAC_GROUP_STATE_REGISTERED); + } else { + mac_group_remove_client(fgrp, mcip); + mac_group_add_client(tgrp, mcip); + mcip->mci_flent->fe_rx_ring_group = tgrp; + /* + * If there are other clients (VLANs) sharing this address + * we should be here only for the primary. + */ + if (mcip->mci_unicast->ma_nusers > 1) { + /* + * We need to move all the clients that are using + * this h/w address. + */ + mgcp = fgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + mgcp = mgcp->mgc_next; + if (mcip->mci_unicast == gmcip->mci_unicast) { + mac_group_remove_client(fgrp, gmcip); + mac_group_add_client(tgrp, gmcip); + gmcip->mci_flent->fe_rx_ring_group = + tgrp; + } + } + } + /* + * The default group will still take the multicast, + * broadcast traffic etc., so it won't go to + * MAC_GROUP_STATE_REGISTERED. + */ + if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED) + mac_rx_group_unmark(fgrp, MR_CONDEMNED); + mac_set_group_state(fgrp, MAC_GROUP_STATE_SHARED); + } + next_state = mac_group_next_state(tgrp, &group_only_mcip, + MAC_DEFAULT_RX_GROUP(mip), B_TRUE); + mac_set_group_state(tgrp, next_state); + /* + * If the destination group is reserved, setup the SRSs etc. + */ + if (tgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { + mac_rx_srs_group_setup(mcip, mcip->mci_flent, SRST_LINK); + mac_fanout_setup(mcip, mcip->mci_flent, + MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, mcip, NULL, + NULL); + mac_rx_group_unmark(tgrp, MR_INCIPIENT); + } else { + mac_rx_switch_grp_to_sw(tgrp); + } + return (0); +} + +/* * Reserves a TX group for the specified share. Invoked by mac_tx_srs_setup() * when a share was allocated to the client. */ mac_group_t * -mac_reserve_tx_group(mac_impl_t *mip, mac_share_handle_t share) +mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) { - mac_group_t *grp; - int rv, i; + mac_impl_t *mip = mcip->mci_mip; + mac_group_t *grp = NULL; + int rv; + int i; + int err; + mac_group_t *defgrp; + mac_share_handle_t share = mcip->mci_share; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + int nrings; + int defnrings; + boolean_t need_exclgrp = B_FALSE; + int need_rings = 0; + mac_group_t *candidate_grp = NULL; + mac_client_impl_t *gclient; + mac_resource_props_t *gmrp; + boolean_t txhw = mrp->mrp_mask & MRP_TX_RINGS; + boolean_t unspec = mrp->mrp_mask & MRP_TXRINGS_UNSPEC; + boolean_t isprimary; + + isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; + /* + * When we come here for a VLAN on the primary (dladm create-vlan), + * we need to pair it along with the primary (to keep it consistent + * with the RX side). So, we check if the primary is already assigned + * to a group and return the group if so. The other way is also + * true, i.e. the VLAN is already created and now we are plumbing + * the primary. + */ + if (!move && isprimary) { + for (gclient = mip->mi_clients_list; gclient != NULL; + gclient = gclient->mci_client_next) { + if (gclient->mci_flent->fe_type & FLOW_PRIMARY_MAC && + gclient->mci_flent->fe_tx_ring_group != NULL) { + return (gclient->mci_flent->fe_tx_ring_group); + } + } + } + + if (mip->mi_tx_groups == NULL || mip->mi_tx_group_count == 0) + return (NULL); + + /* For dynamic groups, default unspec to 1 */ + if (txhw && unspec && + mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + mrp->mrp_ntxrings = 1; + } + /* + * For static grouping we allow only specifying rings=0 and + * unspecified + */ + if (txhw && mrp->mrp_ntxrings > 0 && + mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC) { + return (NULL); + } + + if (txhw) { + /* + * We have explicitly asked for a group (with ntxrings, + * if unspec). + */ + if (unspec || mrp->mrp_ntxrings > 0) { + need_exclgrp = B_TRUE; + need_rings = mrp->mrp_ntxrings; + } else if (mrp->mrp_ntxrings == 0) { + /* + * We have asked for a software group. + */ + return (NULL); + } + } + defgrp = MAC_DEFAULT_TX_GROUP(mip); + /* + * The number of rings that the default group can donate. + * We need to leave at least one ring - the default ring - in + * this group. + */ + defnrings = defgrp->mrg_cur_count - 1; /* - * TX groups are currently allocated only to MAC clients - * which are associated with a share. Since we have a fixed - * number of share and groups, and we already successfully - * allocated a share, find an available TX group. + * Primary gets default group unless explicitly told not + * to (i.e. rings > 0). */ - ASSERT(share != NULL); - ASSERT(mip->mi_tx_group_free > 0); + if (isprimary && !need_exclgrp) + return (NULL); + nrings = (mrp->mrp_mask & MRP_TX_RINGS) != 0 ? mrp->mrp_ntxrings : 1; for (i = 0; i < mip->mi_tx_group_count; i++) { grp = &mip->mi_tx_groups[i]; - if ((grp->mrg_state == MAC_GROUP_STATE_RESERVED) || - (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) + (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) { + /* + * Select a candidate for replacement if we don't + * get an exclusive group. A candidate group is one + * that didn't ask for an exclusive group, but got + * one and it has enough rings (combined with what + * the default group can donate) for the new MAC + * client. + */ + if (grp->mrg_state == MAC_GROUP_STATE_RESERVED && + candidate_grp == NULL) { + gclient = MAC_GROUP_ONLY_CLIENT(grp); + if (gclient == NULL) + gclient = mac_get_grp_primary(grp); + gmrp = MCIP_RESOURCE_PROPS(gclient); + if (gclient->mci_share == NULL && + (gmrp->mrp_mask & MRP_TX_RINGS) == 0 && + (unspec || + (grp->mrg_cur_count + defnrings) >= + need_rings)) { + candidate_grp = grp; + } + } continue; + } + /* + * If the default can't donate let's just walk and + * see if someone can vacate a group, so that we have + * enough rings for this. + */ + if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC || + nrings <= defnrings) { + if (grp->mrg_state == MAC_GROUP_STATE_REGISTERED) { + rv = mac_start_group(grp); + ASSERT(rv == 0); + } + break; + } + } - rv = mac_start_group(grp); - ASSERT(rv == 0); + /* The default group */ + if (i >= mip->mi_tx_group_count) { + /* + * If we need an exclusive group and have identified a + * candidate group we switch the MAC client from the + * candidate group to the default group and give the + * candidate group to this client. + */ + if (need_exclgrp && candidate_grp != NULL) { + /* + * Switch the MAC client from the candidate group + * to the default group. + */ + grp = candidate_grp; + gclient = MAC_GROUP_ONLY_CLIENT(grp); + if (gclient == NULL) + gclient = mac_get_grp_primary(grp); + mac_tx_client_quiesce((mac_client_handle_t)gclient); + mac_tx_switch_group(gclient, grp, defgrp); + mac_tx_client_restart((mac_client_handle_t)gclient); - grp->mrg_state = MAC_GROUP_STATE_RESERVED; - break; - } + /* + * Give the candidate group with the specified number + * of rings to this MAC client. + */ + ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); + rv = mac_start_group(grp); + ASSERT(rv == 0); - ASSERT(grp != NULL); + if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) + return (grp); + + ASSERT(grp->mrg_cur_count == 0); + ASSERT(defgrp->mrg_cur_count > need_rings); + err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, + defgrp, grp, share, need_rings); + if (err == 0) { + /* + * For a share i_mac_group_allocate_rings gets + * the rings from the driver, let's populate + * the property for the client now. + */ + if (share != NULL) { + mac_client_set_rings( + (mac_client_handle_t)mcip, -1, + grp->mrg_cur_count); + } + mip->mi_tx_group_free--; + return (grp); + } + DTRACE_PROBE3(tx__group__reserve__alloc__rings, char *, + mip->mi_name, int, grp->mrg_index, int, err); + mac_stop_group(grp); + } + return (NULL); + } /* - * Populate the group. Rings should be taken from the group - * of unassigned rings, which is past the array of TX - * groups adversized by the driver. + * We got an exclusive group, but it is not dynamic. */ - rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, NULL, - grp, share); + if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) { + mip->mi_tx_group_free--; + return (grp); + } + + rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, defgrp, grp, + share, nrings); if (rv != 0) { DTRACE_PROBE3(tx__group__reserve__alloc__rings, char *, mip->mi_name, int, grp->mrg_index, int, rv); - mac_stop_group(grp); - grp->mrg_state = MAC_GROUP_STATE_UNINIT; - return (NULL); } - + /* + * For a share i_mac_group_allocate_rings gets the rings from the + * driver, let's populate the property for the client now. + */ + if (share != NULL) { + mac_client_set_rings((mac_client_handle_t)mcip, -1, + grp->mrg_cur_count); + } mip->mi_tx_group_free--; - return (grp); } void -mac_release_tx_group(mac_impl_t *mip, mac_group_t *grp) +mac_release_tx_group(mac_client_impl_t *mcip, mac_group_t *grp) { - mac_client_impl_t *mcip = grp->mrg_tx_client; - mac_share_handle_t share = mcip->mci_share; - mac_ring_t *ring; - - ASSERT(mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC); - ASSERT(share != NULL); - ASSERT(grp->mrg_state == MAC_GROUP_STATE_RESERVED); + mac_impl_t *mip = mcip->mci_mip; + mac_share_handle_t share = mcip->mci_share; + mac_ring_t *ring; + mac_soft_ring_set_t *srs = MCIP_TX_SRS(mcip); + mac_group_t *defgrp; + + defgrp = MAC_DEFAULT_TX_GROUP(mip); + if (srs != NULL) { + if (srs->srs_soft_ring_count > 0) { + for (ring = grp->mrg_rings; ring != NULL; + ring = ring->mr_next) { + ASSERT(mac_tx_srs_ring_present(srs, ring)); + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t) + mac_tx_srs_get_soft_ring(srs, ring)); + mac_tx_srs_del_ring(srs, ring); + } + } else { + ASSERT(srs->srs_tx.st_arg2 != NULL); + srs->srs_tx.st_arg2 = NULL; + mac_srs_stat_delete(srs); + } + } + if (share != NULL) + mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); - mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); - while ((ring = grp->mrg_rings) != NULL) { - /* move the ring back to the pool */ - (void) mac_group_mov_ring(mip, mip->mi_tx_groups + - mip->mi_tx_group_count, ring); + /* move the ring back to the pool */ + if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + while ((ring = grp->mrg_rings) != NULL) + (void) mac_group_mov_ring(mip, defgrp, ring); } mac_stop_group(grp); - mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED); - grp->mrg_tx_client = NULL; mip->mi_tx_group_free++; } /* + * Disassociate a MAC client from a group, i.e go through the rings in the + * group and delete all the soft rings tied to them. + */ +static void +mac_tx_dismantle_soft_rings(mac_group_t *fgrp, flow_entry_t *flent) +{ + mac_client_impl_t *mcip = flent->fe_mcip; + mac_soft_ring_set_t *tx_srs; + mac_srs_tx_t *tx; + mac_ring_t *ring; + + tx_srs = flent->fe_tx_srs; + tx = &tx_srs->srs_tx; + + /* Single ring case we haven't created any soft rings */ + if (tx->st_mode == SRS_TX_BW || tx->st_mode == SRS_TX_SERIALIZE || + tx->st_mode == SRS_TX_DEFAULT) { + tx->st_arg2 = NULL; + mac_srs_stat_delete(tx_srs); + /* Fanout case, where we have to dismantle the soft rings */ + } else { + for (ring = fgrp->mrg_rings; ring != NULL; + ring = ring->mr_next) { + ASSERT(mac_tx_srs_ring_present(tx_srs, ring)); + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(tx_srs, + ring)); + mac_tx_srs_del_ring(tx_srs, ring); + } + ASSERT(tx->st_arg2 == NULL); + } +} + +/* + * Switch the MAC client from one group to another. This means we need + * to remove the MAC client, teardown the SRSs and revert the group state. + * Then, we add the client to the destination roup, set the SRSs etc. + */ +void +mac_tx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, + mac_group_t *tgrp) +{ + mac_client_impl_t *group_only_mcip; + mac_impl_t *mip = mcip->mci_mip; + flow_entry_t *flent = mcip->mci_flent; + mac_group_t *defgrp; + mac_grp_client_t *mgcp; + mac_client_impl_t *gmcip; + flow_entry_t *gflent; + + defgrp = MAC_DEFAULT_TX_GROUP(mip); + ASSERT(fgrp == flent->fe_tx_ring_group); + + if (fgrp == defgrp) { + /* + * If this is the primary we need to find any VLANs on + * the primary and move them too. + */ + mac_group_remove_client(fgrp, mcip); + mac_tx_dismantle_soft_rings(fgrp, flent); + if (mcip->mci_unicast->ma_nusers > 1) { + mgcp = fgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + mgcp = mgcp->mgc_next; + if (mcip->mci_unicast != gmcip->mci_unicast) + continue; + mac_tx_client_quiesce( + (mac_client_handle_t)gmcip); + + gflent = gmcip->mci_flent; + mac_group_remove_client(fgrp, gmcip); + mac_tx_dismantle_soft_rings(fgrp, gflent); + + mac_group_add_client(tgrp, gmcip); + gflent->fe_tx_ring_group = tgrp; + /* We could directly set this to SHARED */ + tgrp->mrg_state = mac_group_next_state(tgrp, + &group_only_mcip, defgrp, B_FALSE); + + mac_tx_srs_group_setup(gmcip, gflent, + SRST_LINK); + mac_fanout_setup(gmcip, gflent, + MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, + gmcip, NULL, NULL); + + mac_tx_client_restart( + (mac_client_handle_t)gmcip); + } + } + if (MAC_GROUP_NO_CLIENT(fgrp)) { + mac_ring_t *ring; + int cnt; + int ringcnt; + + fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; + /* + * Additionally, we also need to stop all + * the rings in the default group, except + * the default ring. The reason being + * this group won't be released since it is + * the default group, so the rings won't + * be stopped otherwise. + */ + ringcnt = fgrp->mrg_cur_count; + ring = fgrp->mrg_rings; + for (cnt = 0; cnt < ringcnt; cnt++) { + if (ring->mr_state == MR_INUSE && + ring != + (mac_ring_t *)mip->mi_default_tx_ring) { + mac_stop_ring(ring); + ring->mr_flag = 0; + } + ring = ring->mr_next; + } + } else if (MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { + fgrp->mrg_state = MAC_GROUP_STATE_RESERVED; + } else { + ASSERT(fgrp->mrg_state == MAC_GROUP_STATE_SHARED); + } + } else { + /* + * We could have VLANs sharing the non-default group with + * the primary. + */ + mgcp = fgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + mgcp = mgcp->mgc_next; + if (gmcip == mcip) + continue; + mac_tx_client_quiesce((mac_client_handle_t)gmcip); + gflent = gmcip->mci_flent; + + mac_group_remove_client(fgrp, gmcip); + mac_tx_dismantle_soft_rings(fgrp, gflent); + + mac_group_add_client(tgrp, gmcip); + gflent->fe_tx_ring_group = tgrp; + /* We could directly set this to SHARED */ + tgrp->mrg_state = mac_group_next_state(tgrp, + &group_only_mcip, defgrp, B_FALSE); + mac_tx_srs_group_setup(gmcip, gflent, SRST_LINK); + mac_fanout_setup(gmcip, gflent, + MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, + gmcip, NULL, NULL); + + mac_tx_client_restart((mac_client_handle_t)gmcip); + } + mac_group_remove_client(fgrp, mcip); + mac_release_tx_group(mcip, fgrp); + fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; + } + + /* Add it to the tgroup */ + mac_group_add_client(tgrp, mcip); + flent->fe_tx_ring_group = tgrp; + tgrp->mrg_state = mac_group_next_state(tgrp, &group_only_mcip, + defgrp, B_FALSE); + + mac_tx_srs_group_setup(mcip, flent, SRST_LINK); + mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), + mac_rx_deliver, mcip, NULL, NULL); +} + +/* * This is a 1-time control path activity initiated by the client (IP). * The mac perimeter protects against other simultaneous control activities, * for example an ioctl that attempts to change the degree of fanout and @@ -5416,3 +7118,599 @@ mac_no_active(mac_handle_t mh) mip->mi_state_flags |= MIS_NO_ACTIVE; i_mac_perim_exit(mip); } + +/* + * Walk the primary VLAN clients whenever the primary's rings property + * changes and update the mac_resource_props_t for the VLAN's client. + * We need to do this since we don't support setting these properties + * on the primary's VLAN clients, but the VLAN clients have to + * follow the primary w.r.t the rings property; + */ +void +mac_set_prim_vlan_rings(mac_impl_t *mip, mac_resource_props_t *mrp) +{ + mac_client_impl_t *vmcip; + mac_resource_props_t *vmrp; + + for (vmcip = mip->mi_clients_list; vmcip != NULL; + vmcip = vmcip->mci_client_next) { + if (!(vmcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) || + mac_client_vid((mac_client_handle_t)vmcip) == + VLAN_ID_NONE) { + continue; + } + vmrp = MCIP_RESOURCE_PROPS(vmcip); + + vmrp->mrp_nrxrings = mrp->mrp_nrxrings; + if (mrp->mrp_mask & MRP_RX_RINGS) + vmrp->mrp_mask |= MRP_RX_RINGS; + else if (vmrp->mrp_mask & MRP_RX_RINGS) + vmrp->mrp_mask &= ~MRP_RX_RINGS; + + vmrp->mrp_ntxrings = mrp->mrp_ntxrings; + if (mrp->mrp_mask & MRP_TX_RINGS) + vmrp->mrp_mask |= MRP_TX_RINGS; + else if (vmrp->mrp_mask & MRP_TX_RINGS) + vmrp->mrp_mask &= ~MRP_TX_RINGS; + + if (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) + vmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; + else + vmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; + + if (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) + vmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; + else + vmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; + } +} + +/* + * We are adding or removing ring(s) from a group. The source for taking + * rings is the default group. The destination for giving rings back is + * the default group. + */ +int +mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group, + mac_group_t *defgrp) +{ + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + uint_t modify; + int count; + mac_ring_t *ring; + mac_ring_t *next; + mac_impl_t *mip = mcip->mci_mip; + mac_ring_t **rings; + uint_t ringcnt; + int i = 0; + boolean_t rx_group = group->mrg_type == MAC_RING_TYPE_RX; + int start; + int end; + mac_group_t *tgrp; + int j; + int rv = 0; + + /* + * If we are asked for just a group, we give 1 ring, else + * the specified number of rings. + */ + if (rx_group) { + ringcnt = (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) ? 1: + mrp->mrp_nrxrings; + } else { + ringcnt = (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) ? 1: + mrp->mrp_ntxrings; + } + + /* don't allow modifying rings for a share for now. */ + ASSERT(mcip->mci_share == NULL); + + if (ringcnt == group->mrg_cur_count) + return (0); + + if (group->mrg_cur_count > ringcnt) { + modify = group->mrg_cur_count - ringcnt; + if (rx_group) { + if (mip->mi_rx_donor_grp == group) { + ASSERT(mac_is_primary_client(mcip)); + mip->mi_rx_donor_grp = defgrp; + } else { + defgrp = mip->mi_rx_donor_grp; + } + } + ring = group->mrg_rings; + rings = kmem_alloc(modify * sizeof (mac_ring_handle_t), + KM_SLEEP); + j = 0; + for (count = 0; count < modify; count++) { + next = ring->mr_next; + rv = mac_group_mov_ring(mip, defgrp, ring); + if (rv != 0) { + /* cleanup on failure */ + for (j = 0; j < count; j++) { + (void) mac_group_mov_ring(mip, group, + rings[j]); + } + break; + } + rings[j++] = ring; + ring = next; + } + kmem_free(rings, modify * sizeof (mac_ring_handle_t)); + return (rv); + } + if (ringcnt >= MAX_RINGS_PER_GROUP) + return (EINVAL); + + modify = ringcnt - group->mrg_cur_count; + + if (rx_group) { + if (group != mip->mi_rx_donor_grp) + defgrp = mip->mi_rx_donor_grp; + else + /* + * This is the donor group with all the remaining + * rings. Default group now gets to be the donor + */ + mip->mi_rx_donor_grp = defgrp; + start = 1; + end = mip->mi_rx_group_count; + } else { + start = 0; + end = mip->mi_tx_group_count - 1; + } + /* + * If the default doesn't have any rings, lets see if we can + * take rings given to an h/w client that doesn't need it. + * For now, we just see if there is any one client that can donate + * all the required rings. + */ + if (defgrp->mrg_cur_count < (modify + 1)) { + for (i = start; i < end; i++) { + if (rx_group) { + tgrp = &mip->mi_rx_groups[i]; + if (tgrp == group || tgrp->mrg_state < + MAC_GROUP_STATE_RESERVED) { + continue; + } + mcip = MAC_GROUP_ONLY_CLIENT(tgrp); + if (mcip == NULL) + mcip = mac_get_grp_primary(tgrp); + ASSERT(mcip != NULL); + mrp = MCIP_RESOURCE_PROPS(mcip); + if ((mrp->mrp_mask & MRP_RX_RINGS) != 0) + continue; + if ((tgrp->mrg_cur_count + + defgrp->mrg_cur_count) < (modify + 1)) { + continue; + } + if (mac_rx_switch_group(mcip, tgrp, + defgrp) != 0) { + return (ENOSPC); + } + } else { + tgrp = &mip->mi_tx_groups[i]; + if (tgrp == group || tgrp->mrg_state < + MAC_GROUP_STATE_RESERVED) { + continue; + } + mcip = MAC_GROUP_ONLY_CLIENT(tgrp); + if (mcip == NULL) + mcip = mac_get_grp_primary(tgrp); + mrp = MCIP_RESOURCE_PROPS(mcip); + if ((mrp->mrp_mask & MRP_TX_RINGS) != 0) + continue; + if ((tgrp->mrg_cur_count + + defgrp->mrg_cur_count) < (modify + 1)) { + continue; + } + /* OK, we can switch this to s/w */ + mac_tx_client_quiesce( + (mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, tgrp, defgrp); + mac_tx_client_restart( + (mac_client_handle_t)mcip); + } + } + if (defgrp->mrg_cur_count < (modify + 1)) + return (ENOSPC); + } + if ((rv = i_mac_group_allocate_rings(mip, group->mrg_type, defgrp, + group, mcip->mci_share, modify)) != 0) { + return (rv); + } + return (0); +} + +/* + * Given the poolname in mac_resource_props, find the cpupart + * that is associated with this pool. The cpupart will be used + * later for finding the cpus to be bound to the networking threads. + * + * use_default is set B_TRUE if pools are enabled and pool_default + * is returned. This avoids a 2nd lookup to set the poolname + * for pool-effective. + * + * returns: + * + * NULL - pools are disabled or if the 'cpus' property is set. + * cpupart of pool_default - pools are enabled and the pool + * is not available or poolname is blank + * cpupart of named pool - pools are enabled and the pool + * is available. + */ +cpupart_t * +mac_pset_find(mac_resource_props_t *mrp, boolean_t *use_default) +{ + pool_t *pool; + cpupart_t *cpupart; + + *use_default = B_FALSE; + + /* CPUs property is set */ + if (mrp->mrp_mask & MRP_CPUS) + return (NULL); + + ASSERT(pool_lock_held()); + + /* Pools are disabled, no pset */ + if (pool_state == POOL_DISABLED) + return (NULL); + + /* Pools property is set */ + if (mrp->mrp_mask & MRP_POOL) { + if ((pool = pool_lookup_pool_by_name(mrp->mrp_pool)) == NULL) { + /* Pool not found */ + DTRACE_PROBE1(mac_pset_find_no_pool, char *, + mrp->mrp_pool); + *use_default = B_TRUE; + pool = pool_default; + } + /* Pools property is not set */ + } else { + *use_default = B_TRUE; + pool = pool_default; + } + + /* Find the CPU pset that corresponds to the pool */ + mutex_enter(&cpu_lock); + if ((cpupart = cpupart_find(pool->pool_pset->pset_id)) == NULL) { + DTRACE_PROBE1(mac_find_pset_no_pset, psetid_t, + pool->pool_pset->pset_id); + } + mutex_exit(&cpu_lock); + + return (cpupart); +} + +void +mac_set_pool_effective(boolean_t use_default, cpupart_t *cpupart, + mac_resource_props_t *mrp, mac_resource_props_t *emrp) +{ + ASSERT(pool_lock_held()); + + if (cpupart != NULL) { + emrp->mrp_mask |= MRP_POOL; + if (use_default) { + (void) strcpy(emrp->mrp_pool, + "pool_default"); + } else { + ASSERT(strlen(mrp->mrp_pool) != 0); + (void) strcpy(emrp->mrp_pool, + mrp->mrp_pool); + } + } else { + emrp->mrp_mask &= ~MRP_POOL; + bzero(emrp->mrp_pool, MAXPATHLEN); + } +} + +struct mac_pool_arg { + char mpa_poolname[MAXPATHLEN]; + pool_event_t mpa_what; +}; + +/*ARGSUSED*/ +static uint_t +mac_pool_link_update(mod_hash_key_t key, mod_hash_val_t *val, void *arg) +{ + struct mac_pool_arg *mpa = arg; + mac_impl_t *mip = (mac_impl_t *)val; + mac_client_impl_t *mcip; + mac_resource_props_t *mrp, *emrp; + boolean_t pool_update = B_FALSE; + boolean_t pool_clear = B_FALSE; + boolean_t use_default = B_FALSE; + cpupart_t *cpupart = NULL; + + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + i_mac_perim_enter(mip); + for (mcip = mip->mi_clients_list; mcip != NULL; + mcip = mcip->mci_client_next) { + pool_update = B_FALSE; + pool_clear = B_FALSE; + use_default = B_FALSE; + mac_client_get_resources((mac_client_handle_t)mcip, mrp); + emrp = MCIP_EFFECTIVE_PROPS(mcip); + + /* + * When pools are enabled + */ + if ((mpa->mpa_what == POOL_E_ENABLE) && + ((mrp->mrp_mask & MRP_CPUS) == 0)) { + mrp->mrp_mask |= MRP_POOL; + pool_update = B_TRUE; + } + + /* + * When pools are disabled + */ + if ((mpa->mpa_what == POOL_E_DISABLE) && + ((mrp->mrp_mask & MRP_CPUS) == 0)) { + mrp->mrp_mask |= MRP_POOL; + pool_clear = B_TRUE; + } + + /* + * Look for links with the pool property set and the poolname + * matching the one which is changing. + */ + if (strcmp(mrp->mrp_pool, mpa->mpa_poolname) == 0) { + /* + * The pool associated with the link has changed. + */ + if (mpa->mpa_what == POOL_E_CHANGE) { + mrp->mrp_mask |= MRP_POOL; + pool_update = B_TRUE; + } + } + + /* + * This link is associated with pool_default and + * pool_default has changed. + */ + if ((mpa->mpa_what == POOL_E_CHANGE) && + (strcmp(emrp->mrp_pool, "pool_default") == 0) && + (strcmp(mpa->mpa_poolname, "pool_default") == 0)) { + mrp->mrp_mask |= MRP_POOL; + pool_update = B_TRUE; + } + + /* + * Get new list of cpus for the pool, bind network + * threads to new list of cpus and update resources. + */ + if (pool_update) { + if (MCIP_DATAPATH_SETUP(mcip)) { + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_setup(mcip, mcip->mci_flent, mrp, + mac_rx_deliver, mcip, NULL, cpupart); + mac_set_pool_effective(use_default, cpupart, + mrp, emrp); + pool_unlock(); + } + mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), + B_FALSE); + } + + /* + * Clear the effective pool and bind network threads + * to any available CPU. + */ + if (pool_clear) { + if (MCIP_DATAPATH_SETUP(mcip)) { + emrp->mrp_mask &= ~MRP_POOL; + bzero(emrp->mrp_pool, MAXPATHLEN); + mac_fanout_setup(mcip, mcip->mci_flent, mrp, + mac_rx_deliver, mcip, NULL, NULL); + } + mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), + B_FALSE); + } + } + i_mac_perim_exit(mip); + kmem_free(mrp, sizeof (*mrp)); + return (MH_WALK_CONTINUE); +} + +static void +mac_pool_update(void *arg) +{ + mod_hash_walk(i_mac_impl_hash, mac_pool_link_update, arg); + kmem_free(arg, sizeof (struct mac_pool_arg)); +} + +/* + * Callback function to be executed when a noteworthy pool event + * takes place. + */ +/* ARGSUSED */ +static void +mac_pool_event_cb(pool_event_t what, poolid_t id, void *arg) +{ + pool_t *pool; + char *poolname = NULL; + struct mac_pool_arg *mpa; + + pool_lock(); + mpa = kmem_zalloc(sizeof (struct mac_pool_arg), KM_SLEEP); + + switch (what) { + case POOL_E_ENABLE: + case POOL_E_DISABLE: + break; + + case POOL_E_CHANGE: + pool = pool_lookup_pool_by_id(id); + if (pool == NULL) { + kmem_free(mpa, sizeof (struct mac_pool_arg)); + pool_unlock(); + return; + } + pool_get_name(pool, &poolname); + (void) strlcpy(mpa->mpa_poolname, poolname, + sizeof (mpa->mpa_poolname)); + break; + + default: + kmem_free(mpa, sizeof (struct mac_pool_arg)); + pool_unlock(); + return; + } + pool_unlock(); + + mpa->mpa_what = what; + + mac_pool_update(mpa); +} + +/* + * Set effective rings property. This could be called from datapath_setup/ + * datapath_teardown or set-linkprop. + * If the group is reserved we just go ahead and set the effective rings. + * Additionally, for TX this could mean the default group has lost/gained + * some rings, so if the default group is reserved, we need to adjust the + * effective rings for the default group clients. For RX, if we are working + * with the non-default group, we just need * to reset the effective props + * for the default group clients. + */ +void +mac_set_rings_effective(mac_client_impl_t *mcip) +{ + mac_impl_t *mip = mcip->mci_mip; + mac_group_t *grp; + mac_group_t *defgrp; + flow_entry_t *flent = mcip->mci_flent; + mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); + mac_grp_client_t *mgcp; + mac_client_impl_t *gmcip; + + grp = flent->fe_rx_ring_group; + if (grp != NULL) { + defgrp = MAC_DEFAULT_RX_GROUP(mip); + /* + * If we have reserved a group, set the effective rings + * to the ring count in the group. + */ + if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { + emrp->mrp_mask |= MRP_RX_RINGS; + emrp->mrp_nrxrings = grp->mrg_cur_count; + } + + /* + * We go through the clients in the shared group and + * reset the effective properties. It is possible this + * might have already been done for some client (i.e. + * if some client is being moved to a group that is + * already shared). The case where the default group is + * RESERVED is taken care of above (note in the RX side if + * there is a non-default group, the default group is always + * SHARED). + */ + if (grp != defgrp || grp->mrg_state == MAC_GROUP_STATE_SHARED) { + if (grp->mrg_state == MAC_GROUP_STATE_SHARED) + mgcp = grp->mrg_clients; + else + mgcp = defgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + emrp = MCIP_EFFECTIVE_PROPS(gmcip); + if (emrp->mrp_mask & MRP_RX_RINGS) { + emrp->mrp_mask &= ~MRP_RX_RINGS; + emrp->mrp_nrxrings = 0; + } + mgcp = mgcp->mgc_next; + } + } + } + + /* Now the TX side */ + grp = flent->fe_tx_ring_group; + if (grp != NULL) { + defgrp = MAC_DEFAULT_TX_GROUP(mip); + + if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { + emrp->mrp_mask |= MRP_TX_RINGS; + emrp->mrp_ntxrings = grp->mrg_cur_count; + } else if (grp->mrg_state == MAC_GROUP_STATE_SHARED) { + mgcp = grp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + emrp = MCIP_EFFECTIVE_PROPS(gmcip); + if (emrp->mrp_mask & MRP_TX_RINGS) { + emrp->mrp_mask &= ~MRP_TX_RINGS; + emrp->mrp_ntxrings = 0; + } + mgcp = mgcp->mgc_next; + } + } + + /* + * If the group is not the default group and the default + * group is reserved, the ring count in the default group + * might have changed, update it. + */ + if (grp != defgrp && + defgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { + gmcip = MAC_GROUP_ONLY_CLIENT(defgrp); + emrp = MCIP_EFFECTIVE_PROPS(gmcip); + emrp->mrp_ntxrings = defgrp->mrg_cur_count; + } + } + emrp = MCIP_EFFECTIVE_PROPS(mcip); +} + +/* + * Check if the primary is in the default group. If so, see if we + * can give it a an exclusive group now that another client is + * being configured. We take the primary out of the default group + * because the multicast/broadcast packets for the all the clients + * will land in the default ring in the default group which means + * any client in the default group, even if it is the only on in + * the group, will lose exclusive access to the rings, hence + * polling. + */ +mac_client_impl_t * +mac_check_primary_relocation(mac_client_impl_t *mcip, boolean_t rxhw) +{ + mac_impl_t *mip = mcip->mci_mip; + mac_group_t *defgrp = MAC_DEFAULT_RX_GROUP(mip); + flow_entry_t *flent = mcip->mci_flent; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + uint8_t *mac_addr; + mac_group_t *ngrp; + + /* + * Check if the primary is in the default group, if not + * or if it is explicitly configured to be in the default + * group OR set the RX rings property, return. + */ + if (flent->fe_rx_ring_group != defgrp || mrp->mrp_mask & MRP_RX_RINGS) + return (NULL); + + /* + * If the new client needs an exclusive group and we + * don't have another for the primary, return. + */ + if (rxhw && mip->mi_rxhwclnt_avail < 2) + return (NULL); + + mac_addr = flent->fe_flow_desc.fd_dst_mac; + /* + * We call this when we are setting up the datapath for + * the first non-primary. + */ + ASSERT(mip->mi_nactiveclients == 2); + /* + * OK, now we have the primary that needs to be relocated. + */ + ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); + if (ngrp == NULL) + return (NULL); + if (mac_rx_switch_group(mcip, defgrp, ngrp) != 0) { + mac_stop_group(ngrp); + return (NULL); + } + return (mcip); +} diff --git a/usr/src/uts/common/io/mac/mac_bcast.c b/usr/src/uts/common/io/mac/mac_bcast.c index 2f17228e06..1aba37c822 100644 --- a/usr/src/uts/common/io/mac/mac_bcast.c +++ b/usr/src/uts/common/io/mac/mac_bcast.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -212,10 +212,15 @@ mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback) rw_enter(&mip->mi_rw_lock, RW_READER); /* update stats */ - if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) - dst_mcip->mci_stat_multircv++; - else - dst_mcip->mci_stat_brdcstrcv++; + if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) { + MCIP_STAT_UPDATE(dst_mcip, multircv, 1); + MCIP_STAT_UPDATE(dst_mcip, multircvbytes, + msgdsize(mp_chain)); + } else { + MCIP_STAT_UPDATE(dst_mcip, brdcstrcv, 1); + MCIP_STAT_UPDATE(dst_mcip, brdcstrcvbytes, + msgdsize(mp_chain)); + } if (grp->mbg_clients_gen != gen) { /* @@ -236,10 +241,12 @@ mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback) * so we need to send a copy of the packet to the * underlying NIC so that it can be sent on the wire. */ - src_mcip->mci_stat_multixmt++; - src_mcip->mci_stat_brdcstxmt++; + MCIP_STAT_UPDATE(src_mcip, multixmt, 1); + MCIP_STAT_UPDATE(src_mcip, multixmtbytes, msgdsize(mp_chain)); + MCIP_STAT_UPDATE(src_mcip, brdcstxmt, 1); + MCIP_STAT_UPDATE(src_mcip, brdcstxmtbytes, msgdsize(mp_chain)); - MAC_TX(mip, mip->mi_default_tx_ring, mp_chain, B_FALSE); + MAC_TX(mip, mip->mi_default_tx_ring, mp_chain, src_mcip); if (mp_chain != NULL) freemsgchain(mp_chain); } else { diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c index 78c7eae9f2..2f8962f67a 100644 --- a/usr/src/uts/common/io/mac/mac_client.c +++ b/usr/src/uts/common/io/mac/mac_client.c @@ -108,6 +108,7 @@ #include <sys/mac_impl.h> #include <sys/mac_client_impl.h> #include <sys/mac_soft_ring.h> +#include <sys/mac_stat.h> #include <sys/dls.h> #include <sys/dld.h> #include <sys/modctl.h> @@ -144,6 +145,10 @@ static void mac_client_remove_flow_from_list(mac_client_impl_t *, static void mac_client_add_to_flow_list(mac_client_impl_t *, flow_entry_t *); static void mac_rename_flow_names(mac_client_impl_t *, const char *); static void mac_virtual_link_update(mac_impl_t *); +static int mac_client_datapath_setup(mac_client_impl_t *, uint16_t, + uint8_t *, mac_resource_props_t *, boolean_t, mac_unicast_impl_t *); +static void mac_client_datapath_teardown(mac_client_handle_t, + mac_unicast_impl_t *, flow_entry_t *); /* ARGSUSED */ static int @@ -560,6 +565,14 @@ mac_client_link_state(mac_client_impl_t *mcip) } /* + * These statistics are consumed by dladm show-link -s <vnic>, + * dladm show-vnic -s and netstat. With the introduction of dlstat, + * dladm show-link -s and dladm show-vnic -s witll be EOL'ed while + * netstat will consume from kstats introduced for dlstat. This code + * will be removed at that time. + */ + +/* * Return the statistics of a MAC client. These statistics are different * then the statistics of the underlying MAC which are returned by * mac_stat_get(). @@ -567,9 +580,17 @@ mac_client_link_state(mac_client_impl_t *mcip) uint64_t mac_client_stat_get(mac_client_handle_t mch, uint_t stat) { - mac_client_impl_t *mcip = (mac_client_impl_t *)mch; - mac_impl_t *mip = mcip->mci_mip; - uint64_t val; + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_impl_t *mip = mcip->mci_mip; + flow_entry_t *flent = mcip->mci_flent; + mac_soft_ring_set_t *mac_srs; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + int i; + uint64_t val = 0; + + mac_srs = (mac_soft_ring_set_t *)(flent->fe_tx_srs); + mac_tx_stat = &mac_srs->srs_tx.st_stat; switch (stat) { case MAC_STAT_LINK_STATE: @@ -588,37 +609,52 @@ mac_client_stat_get(mac_client_handle_t mch, uint_t stat) val = mac_client_ifspeed(mcip); break; case MAC_STAT_MULTIRCV: - val = mcip->mci_stat_multircv; + val = mcip->mci_misc_stat.mms_multircv; break; case MAC_STAT_BRDCSTRCV: - val = mcip->mci_stat_brdcstrcv; + val = mcip->mci_misc_stat.mms_brdcstrcv; break; case MAC_STAT_MULTIXMT: - val = mcip->mci_stat_multixmt; + val = mcip->mci_misc_stat.mms_multixmt; break; case MAC_STAT_BRDCSTXMT: - val = mcip->mci_stat_brdcstxmt; + val = mcip->mci_misc_stat.mms_brdcstxmt; break; case MAC_STAT_OBYTES: - val = mcip->mci_stat_obytes; + val = mac_tx_stat->mts_obytes; break; case MAC_STAT_OPACKETS: - val = mcip->mci_stat_opackets; + val = mac_tx_stat->mts_opackets; break; case MAC_STAT_OERRORS: - val = mcip->mci_stat_oerrors; + val = mac_tx_stat->mts_oerrors; break; case MAC_STAT_IPACKETS: - val = mcip->mci_stat_ipackets; + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + val += mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; + } break; case MAC_STAT_RBYTES: - val = mcip->mci_stat_ibytes; + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + val += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + + mac_rx_stat->mrs_lclbytes; + } break; case MAC_STAT_IERRORS: - val = mcip->mci_stat_ierrors; + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + val += mac_rx_stat->mrs_ierrors; + } break; default: - val = mac_stat_default(mip, stat); + val = mac_driver_stat_default(mip, stat); break; } @@ -676,12 +712,30 @@ mac_stat_get(mac_handle_t mh, uint_t stat) * The driver doesn't support this statistic. Get the * statistic's default value. */ - val = mac_stat_default(mip, stat); + val = mac_driver_stat_default(mip, stat); } return (val); } /* + * Query hardware rx ring corresponding to the pseudo ring. + */ +uint64_t +mac_pseudo_rx_ring_stat_get(mac_ring_handle_t handle, uint_t stat) +{ + return (mac_rx_ring_stat_get(handle, stat)); +} + +/* + * Query hardware tx ring corresponding to the pseudo ring. + */ +uint64_t +mac_pseudo_tx_ring_stat_get(mac_ring_handle_t handle, uint_t stat) +{ + return (mac_tx_ring_stat_get(handle, stat)); +} + +/* * Utility function which returns the VID associated with a flow entry. */ uint16_t @@ -752,6 +806,12 @@ mac_unicast_update_client_flow(mac_client_impl_t *mcip) mac_flow_set_desc(flent, &flow_desc); /* + * The v6 local addr (used by mac protection) needs to be + * regenerated because our mac address has changed. + */ + mac_protect_update_v6_local_addr(mcip); + + /* * A MAC client could have one MAC address but multiple * VLANs. In that case update the flow entries corresponding * to all VLANs of the MAC client. @@ -1184,20 +1244,14 @@ int mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, uint16_t flags) { - mac_impl_t *mip = (mac_impl_t *)mh; - mac_client_impl_t *mcip; - int err = 0; - boolean_t share_desired = - ((flags & MAC_OPEN_FLAGS_SHARES_DESIRED) != 0); - boolean_t no_hwrings = ((flags & MAC_OPEN_FLAGS_NO_HWRINGS) != 0); - boolean_t req_hwrings = ((flags & MAC_OPEN_FLAGS_REQ_HWRINGS) != 0); - flow_entry_t *flent = NULL; + mac_impl_t *mip = (mac_impl_t *)mh; + mac_client_impl_t *mcip; + int err = 0; + boolean_t share_desired; + flow_entry_t *flent = NULL; + share_desired = (flags & MAC_OPEN_FLAGS_SHARES_DESIRED) != 0; *mchp = NULL; - if (share_desired && no_hwrings) { - /* can't have shares but no hardware rings */ - return (EINVAL); - } i_mac_perim_enter(mip); @@ -1249,6 +1303,9 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, if ((flags & MAC_OPEN_FLAGS_IS_AGGR_PORT) != 0) mcip->mci_state_flags |= MCIS_IS_AGGR_PORT; + if (mip->mi_state_flags & MIS_IS_AGGR) + mcip->mci_state_flags |= MCIS_IS_AGGR; + if ((flags & MAC_OPEN_FLAGS_USE_DATALINK_NAME) != 0) { datalink_id_t linkid; @@ -1283,19 +1340,18 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, if (flags & MAC_OPEN_FLAGS_MULTI_PRIMARY) mcip->mci_flags |= MAC_CLIENT_FLAGS_MULTI_PRIMARY; + if (flags & MAC_OPEN_FLAGS_NO_UNICAST_ADDR) + mcip->mci_state_flags |= MCIS_NO_UNICAST_ADDR; + + mac_protect_init(mcip); + /* the subflow table will be created dynamically */ mcip->mci_subflow_tab = NULL; - mcip->mci_stat_multircv = 0; - mcip->mci_stat_brdcstrcv = 0; - mcip->mci_stat_multixmt = 0; - mcip->mci_stat_brdcstxmt = 0; - - mcip->mci_stat_obytes = 0; - mcip->mci_stat_opackets = 0; - mcip->mci_stat_oerrors = 0; - mcip->mci_stat_ibytes = 0; - mcip->mci_stat_ipackets = 0; - mcip->mci_stat_ierrors = 0; + + mcip->mci_misc_stat.mms_multircv = 0; + mcip->mci_misc_stat.mms_brdcstrcv = 0; + mcip->mci_misc_stat.mms_multixmt = 0; + mcip->mci_misc_stat.mms_brdcstxmt = 0; /* Create an initial flow */ @@ -1321,20 +1377,25 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, */ mac_client_add(mcip); - if (no_hwrings) - mcip->mci_state_flags |= MCIS_NO_HWRINGS; - if (req_hwrings) - mcip->mci_state_flags |= MCIS_REQ_HWRINGS; mcip->mci_share = NULL; - if (share_desired) { - ASSERT(!no_hwrings); + if (share_desired) i_mac_share_alloc(mcip); - } DTRACE_PROBE2(mac__client__open__allocated, mac_impl_t *, mcip->mci_mip, mac_client_impl_t *, mcip); *mchp = (mac_client_handle_t)mcip; + /* + * We will do mimimal datapath setup to allow a MAC client to + * transmit or receive non-unicast packets without waiting + * for mac_unicast_add. + */ + if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) { + if ((err = mac_client_datapath_setup(mcip, VLAN_ID_NONE, + NULL, NULL, B_TRUE, NULL)) != 0) { + goto done; + } + } i_mac_perim_exit(mip); return (0); @@ -1373,6 +1434,13 @@ mac_client_close(mac_client_handle_t mch, uint16_t flags) return; } + /* If we have only setup up minimal datapth setup, tear it down */ + if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) { + mac_client_datapath_teardown((mac_client_handle_t)mcip, NULL, + mcip->mci_flent); + mcip->mci_state_flags &= ~MCIS_NO_UNICAST_ADDR; + } + /* * Remove the flent associated with the MAC client */ @@ -1389,7 +1457,7 @@ mac_client_close(mac_client_handle_t mch, uint16_t flags) ASSERT(mcip->mci_tx_notify_cb_list == NULL); i_mac_share_free(mcip); - + mac_protect_fini(mcip); mac_client_remove(mcip); i_mac_perim_exit(mip); @@ -1495,6 +1563,335 @@ mac_update_subflow_priority(mac_client_impl_t *mcip) } /* + * Modify the TX or RX ring properties. We could either just move around + * rings, i.e add/remove rings given to a client. Or this might cause the + * client to move from hardware based to software or the other way around. + * If we want to reset this property, then we clear the mask, additionally + * if the client was given a non-default group we remove all rings except + * for 1 and give it back to the default group. + */ +int +mac_client_set_rings_prop(mac_client_impl_t *mcip, mac_resource_props_t *mrp, + mac_resource_props_t *tmrp) +{ + mac_impl_t *mip = mcip->mci_mip; + flow_entry_t *flent = mcip->mci_flent; + uint8_t *mac_addr; + int err = 0; + mac_group_t *defgrp; + mac_group_t *group; + mac_group_t *ngrp; + mac_resource_props_t *cmrp = MCIP_RESOURCE_PROPS(mcip); + uint_t ringcnt; + boolean_t unspec; + + if (mcip->mci_share != NULL) + return (EINVAL); + + if (mrp->mrp_mask & MRP_RX_RINGS) { + unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC; + group = flent->fe_rx_ring_group; + defgrp = MAC_DEFAULT_RX_GROUP(mip); + mac_addr = flent->fe_flow_desc.fd_dst_mac; + + /* + * No resulting change. If we are resetting on a client on + * which there was no rx rings property. For dynamic group + * if we are setting the same number of rings already set. + * For static group if we are requesting a group again. + */ + if (mrp->mrp_mask & MRP_RINGS_RESET) { + if (!(tmrp->mrp_mask & MRP_RX_RINGS)) + return (0); + } else { + if (unspec) { + if (tmrp->mrp_mask & MRP_RXRINGS_UNSPEC) + return (0); + } else if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + if ((tmrp->mrp_mask & MRP_RX_RINGS) && + !(tmrp->mrp_mask & MRP_RXRINGS_UNSPEC) && + mrp->mrp_nrxrings == tmrp->mrp_nrxrings) { + return (0); + } + } + } + /* Resetting the prop */ + if (mrp->mrp_mask & MRP_RINGS_RESET) { + /* + * We will just keep one ring and give others back if + * we are not the primary. For the primary we give + * all the rings in the default group except the + * default ring. If it is a static group, then + * we don't do anything, but clear the MRP_RX_RINGS + * flag. + */ + if (group != defgrp) { + if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + /* + * This group has reserved rings + * that need to be released now, + * so does the group. + */ + MAC_RX_RING_RELEASED(mip, + group->mrg_cur_count); + MAC_RX_GRP_RELEASED(mip); + if ((flent->fe_type & + FLOW_PRIMARY_MAC) != 0) { + if (mip->mi_nactiveclients == + 1) { + (void) + mac_rx_switch_group( + mcip, group, + defgrp); + return (0); + } else { + cmrp->mrp_nrxrings = + group-> + mrg_cur_count + + defgrp-> + mrg_cur_count - 1; + } + } else { + cmrp->mrp_nrxrings = 1; + } + (void) mac_group_ring_modify(mcip, + group, defgrp); + } else { + /* + * If this is a static group, we + * need to release the group. The + * client will remain in the same + * group till some other client + * needs this group. + */ + MAC_RX_GRP_RELEASED(mip); + } + /* Let check if we can give this an excl group */ + } else if (group == defgrp) { + ngrp = mac_reserve_rx_group(mcip, mac_addr, + B_TRUE); + /* Couldn't give it a group, that's fine */ + if (ngrp == NULL) + return (0); + /* Switch to H/W */ + if (mac_rx_switch_group(mcip, defgrp, ngrp) != + 0) { + mac_stop_group(ngrp); + return (0); + } + } + /* + * If the client is in the default group, we will + * just clear the MRP_RX_RINGS and leave it as + * it rather than look for an exclusive group + * for it. + */ + return (0); + } + + if (group == defgrp && ((mrp->mrp_nrxrings > 0) || unspec)) { + ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); + if (ngrp == NULL) + return (ENOSPC); + + /* Switch to H/W */ + if (mac_rx_switch_group(mcip, defgrp, ngrp) != 0) { + mac_release_rx_group(mcip, ngrp); + return (ENOSPC); + } + MAC_RX_GRP_RESERVED(mip); + if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) + MAC_RX_RING_RESERVED(mip, ngrp->mrg_cur_count); + } else if (group != defgrp && !unspec && + mrp->mrp_nrxrings == 0) { + /* Switch to S/W */ + ringcnt = group->mrg_cur_count; + if (mac_rx_switch_group(mcip, group, defgrp) != 0) + return (ENOSPC); + if (tmrp->mrp_mask & MRP_RX_RINGS) { + MAC_RX_GRP_RELEASED(mip); + if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_RX_RING_RELEASED(mip, ringcnt); + } + } + } else if (group != defgrp && mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + ringcnt = group->mrg_cur_count; + err = mac_group_ring_modify(mcip, group, defgrp); + if (err != 0) + return (err); + /* + * Update the accounting. If this group + * already had explicitly reserved rings, + * we need to update the rings based on + * the new ring count. If this group + * had not explicitly reserved rings, + * then we just reserve the rings asked for + * and reserve the group. + */ + if (tmrp->mrp_mask & MRP_RX_RINGS) { + if (ringcnt > group->mrg_cur_count) { + MAC_RX_RING_RELEASED(mip, + ringcnt - group->mrg_cur_count); + } else { + MAC_RX_RING_RESERVED(mip, + group->mrg_cur_count - ringcnt); + } + } else { + MAC_RX_RING_RESERVED(mip, group->mrg_cur_count); + MAC_RX_GRP_RESERVED(mip); + } + } + } + if (mrp->mrp_mask & MRP_TX_RINGS) { + unspec = mrp->mrp_mask & MRP_TXRINGS_UNSPEC; + group = flent->fe_tx_ring_group; + defgrp = MAC_DEFAULT_TX_GROUP(mip); + + /* + * For static groups we only allow rings=0 or resetting the + * rings property. + */ + if (mrp->mrp_ntxrings > 0 && + mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) { + return (ENOTSUP); + } + if (mrp->mrp_mask & MRP_RINGS_RESET) { + if (!(tmrp->mrp_mask & MRP_TX_RINGS)) + return (0); + } else { + if (unspec) { + if (tmrp->mrp_mask & MRP_TXRINGS_UNSPEC) + return (0); + } else if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + if ((tmrp->mrp_mask & MRP_TX_RINGS) && + !(tmrp->mrp_mask & MRP_TXRINGS_UNSPEC) && + mrp->mrp_ntxrings == tmrp->mrp_ntxrings) { + return (0); + } + } + } + /* Resetting the prop */ + if (mrp->mrp_mask & MRP_RINGS_RESET) { + if (group != defgrp) { + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + ringcnt = group->mrg_cur_count; + if ((flent->fe_type & + FLOW_PRIMARY_MAC) != 0) { + mac_tx_client_quiesce( + (mac_client_handle_t) + mcip); + mac_tx_switch_group(mcip, + group, defgrp); + mac_tx_client_restart( + (mac_client_handle_t) + mcip); + MAC_TX_GRP_RELEASED(mip); + MAC_TX_RING_RELEASED(mip, + ringcnt); + return (0); + } + cmrp->mrp_ntxrings = 1; + (void) mac_group_ring_modify(mcip, + group, defgrp); + /* + * This group has reserved rings + * that need to be released now. + */ + MAC_TX_RING_RELEASED(mip, ringcnt); + } + /* + * If this is a static group, we + * need to release the group. The + * client will remain in the same + * group till some other client + * needs this group. + */ + MAC_TX_GRP_RELEASED(mip); + } else if (group == defgrp && + (flent->fe_type & FLOW_PRIMARY_MAC) == 0) { + ngrp = mac_reserve_tx_group(mcip, B_TRUE); + if (ngrp == NULL) + return (0); + mac_tx_client_quiesce( + (mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, defgrp, ngrp); + mac_tx_client_restart( + (mac_client_handle_t)mcip); + } + /* + * If the client is in the default group, we will + * just clear the MRP_TX_RINGS and leave it as + * it rather than look for an exclusive group + * for it. + */ + return (0); + } + + /* Switch to H/W */ + if (group == defgrp && ((mrp->mrp_ntxrings > 0) || unspec)) { + ngrp = mac_reserve_tx_group(mcip, B_TRUE); + if (ngrp == NULL) + return (ENOSPC); + mac_tx_client_quiesce((mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, defgrp, ngrp); + mac_tx_client_restart((mac_client_handle_t)mcip); + MAC_TX_GRP_RESERVED(mip); + if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) + MAC_TX_RING_RESERVED(mip, ngrp->mrg_cur_count); + /* Switch to S/W */ + } else if (group != defgrp && !unspec && + mrp->mrp_ntxrings == 0) { + /* Switch to S/W */ + ringcnt = group->mrg_cur_count; + mac_tx_client_quiesce((mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, group, defgrp); + mac_tx_client_restart((mac_client_handle_t)mcip); + if (tmrp->mrp_mask & MRP_TX_RINGS) { + MAC_TX_GRP_RELEASED(mip); + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_TX_RING_RELEASED(mip, ringcnt); + } + } + } else if (group != defgrp && mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + ringcnt = group->mrg_cur_count; + err = mac_group_ring_modify(mcip, group, defgrp); + if (err != 0) + return (err); + /* + * Update the accounting. If this group + * already had explicitly reserved rings, + * we need to update the rings based on + * the new ring count. If this group + * had not explicitly reserved rings, + * then we just reserve the rings asked for + * and reserve the group. + */ + if (tmrp->mrp_mask & MRP_TX_RINGS) { + if (ringcnt > group->mrg_cur_count) { + MAC_TX_RING_RELEASED(mip, + ringcnt - group->mrg_cur_count); + } else { + MAC_TX_RING_RESERVED(mip, + group->mrg_cur_count - ringcnt); + } + } else { + MAC_TX_RING_RESERVED(mip, group->mrg_cur_count); + MAC_TX_GRP_RESERVED(mip); + } + } + } + return (0); +} + +/* * When the MAC client is being brought up (i.e. we do a unicast_add) we need * to initialize the cpu and resource control structure in the * mac_client_impl_t from the mac_impl_t (i.e if there are any cached @@ -1506,16 +1903,73 @@ mac_resource_ctl_set(mac_client_handle_t mch, mac_resource_props_t *mrp) mac_client_impl_t *mcip = (mac_client_impl_t *)mch; mac_impl_t *mip = (mac_impl_t *)mcip->mci_mip; int err = 0; + flow_entry_t *flent = mcip->mci_flent; + mac_resource_props_t *omrp, *nmrp = MCIP_RESOURCE_PROPS(mcip); ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - err = mac_validate_props(mrp); + err = mac_validate_props(mcip->mci_state_flags & MCIS_IS_VNIC ? + mcip->mci_upper_mip : mip, mrp); if (err != 0) return (err); + /* + * Copy over the existing properties since mac_update_resources + * will modify the client's mrp. Currently, the saved property + * is used to determine the difference between existing and + * modified rings property. + */ + omrp = kmem_zalloc(sizeof (*omrp), KM_SLEEP); + bcopy(nmrp, omrp, sizeof (*omrp)); mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), B_FALSE); if (MCIP_DATAPATH_SETUP(mcip)) { /* + * We support rings only for primary client when there are + * multiple clients sharing the same MAC address (e.g. VLAN). + */ + if (mrp->mrp_mask & MRP_RX_RINGS || + mrp->mrp_mask & MRP_TX_RINGS) { + + if ((err = mac_client_set_rings_prop(mcip, mrp, + omrp)) != 0) { + if (omrp->mrp_mask & MRP_RX_RINGS) { + nmrp->mrp_mask |= MRP_RX_RINGS; + nmrp->mrp_nrxrings = omrp->mrp_nrxrings; + } else { + nmrp->mrp_mask &= ~MRP_RX_RINGS; + nmrp->mrp_nrxrings = 0; + } + if (omrp->mrp_mask & MRP_TX_RINGS) { + nmrp->mrp_mask |= MRP_TX_RINGS; + nmrp->mrp_ntxrings = omrp->mrp_ntxrings; + } else { + nmrp->mrp_mask &= ~MRP_TX_RINGS; + nmrp->mrp_ntxrings = 0; + } + if (omrp->mrp_mask & MRP_RXRINGS_UNSPEC) + omrp->mrp_mask |= MRP_RXRINGS_UNSPEC; + else + omrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; + + if (omrp->mrp_mask & MRP_TXRINGS_UNSPEC) + omrp->mrp_mask |= MRP_TXRINGS_UNSPEC; + else + omrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; + kmem_free(omrp, sizeof (*omrp)); + return (err); + } + + /* + * If we modified the rings property of the primary + * we need to update the property fields of its + * VLANs as they inherit the primary's properites. + */ + if (mac_is_primary_client(mcip)) { + mac_set_prim_vlan_rings(mip, + MCIP_RESOURCE_PROPS(mcip)); + } + } + /* * We have to set this prior to calling mac_flow_modify. */ if (mrp->mrp_mask & MRP_PRIORITY) { @@ -1528,11 +1982,11 @@ mac_resource_ctl_set(mac_client_handle_t mch, mac_resource_props_t *mrp) } } - mac_flow_modify(mip->mi_flow_tab, mcip->mci_flent, mrp); + mac_flow_modify(mip->mi_flow_tab, flent, mrp); if (mrp->mrp_mask & MRP_PRIORITY) mac_update_subflow_priority(mcip); - return (0); } + kmem_free(omrp, sizeof (*omrp)); return (0); } @@ -1562,8 +2016,12 @@ mac_unicast_flow_create(mac_client_impl_t *mcip, uint8_t *mac_addr, */ bzero(&flow_desc, sizeof (flow_desc)); - flow_desc.fd_mac_len = mip->mi_type->mt_addr_length; - bcopy(mac_addr, flow_desc.fd_dst_mac, flow_desc.fd_mac_len); + ASSERT(mac_addr != NULL || + (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR)); + if (mac_addr != NULL) { + flow_desc.fd_mac_len = mip->mi_type->mt_addr_length; + bcopy(mac_addr, flow_desc.fd_dst_mac, flow_desc.fd_mac_len); + } flow_desc.fd_mask = FLOW_LINK_DST; if (vid != 0) { flow_desc.fd_vid = vid; @@ -1612,6 +2070,7 @@ mac_unicast_flow_create(mac_client_impl_t *mcip, uint8_t *mac_addr, flent_flags, flent)) != 0) return (err); + mac_misc_stat_create(*flent); FLOW_MARK(*flent, FE_INCIPIENT); (*flent)->fe_mcip = mcip; @@ -1700,6 +2159,9 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, boolean_t nactiveclients_added = B_FALSE; flow_entry_t *flent; int err = 0; + boolean_t no_unicast; + + no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR; if ((err = mac_start((mac_handle_t)mip)) != 0) goto bail; @@ -1725,10 +2187,11 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, /* We are configuring the unicast flow now */ if (!MCIP_DATAPATH_SETUP(mcip)) { - MAC_CLIENT_SET_PRIORITY_RANGE(mcip, - (mrp->mrp_mask & MRP_PRIORITY) ? mrp->mrp_priority : - MPL_LINK_DEFAULT); - + if (mrp != NULL) { + MAC_CLIENT_SET_PRIORITY_RANGE(mcip, + (mrp->mrp_mask & MRP_PRIORITY) ? mrp->mrp_priority : + MPL_LINK_DEFAULT); + } if ((err = mac_unicast_flow_create(mcip, mac_addr, vid, isprimary, B_TRUE, &flent, mrp)) != 0) goto bail; @@ -1743,6 +2206,8 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, if ((err = mac_datapath_setup(mcip, flent, SRST_LINK)) != 0) goto bail; + if (no_unicast) + goto done_setup; /* * The unicast MAC address must have been added successfully. */ @@ -1756,6 +2221,7 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, } else { mac_address_t *map = mcip->mci_unicast; + ASSERT(!no_unicast); /* * A unicast flow already exists for that MAC client, * this flow must be the same mac address but with @@ -1794,7 +2260,7 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, mcip->mci_unicast_list = muip; rw_exit(&mcip->mci_rw_lock); - +done_setup: /* * First add the flent to the flow list of this mcip. Then set * the mip's mi_single_active_client if needed. The Rx path assumes @@ -1802,7 +2268,6 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, * flent. */ mac_client_add_to_flow_list(mcip, flent); - if (nactiveclients_added) mac_update_single_active_client(mip); /* @@ -1889,7 +2354,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, boolean_t fastpath_disabled = B_FALSE; boolean_t is_primary = (flags & MAC_UNICAST_PRIMARY); boolean_t is_unicast_hw = (flags & MAC_UNICAST_HW); - mac_resource_props_t mrp; + mac_resource_props_t *mrp; boolean_t passive_client = B_FALSE; mac_unicast_impl_t *muip; boolean_t is_vnic_primary = @@ -1899,6 +2364,13 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != 0))); /* + * Can't unicast add if the client asked only for minimal datapath + * setup. + */ + if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) + return (ENOTSUP); + + /* * Check for an attempted use of the current Port VLAN ID, if enabled. * No client may use it. */ @@ -2020,7 +2492,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, mip->mi_state_flags |= MIS_EXCLUSIVE; } - bzero(&mrp, sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); if (is_primary && !(mcip->mci_state_flags & (MCIS_IS_VNIC | MCIS_IS_AGGR_PORT))) { /* @@ -2029,11 +2501,40 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, * port, its property should be set in the mcip when the * VNIC/aggr was created. */ - mac_get_resources((mac_handle_t)mip, &mrp); - (void) mac_client_set_resources(mch, &mrp); + mac_get_resources((mac_handle_t)mip, mrp); + (void) mac_client_set_resources(mch, mrp); } else if (mcip->mci_state_flags & MCIS_IS_VNIC) { - bcopy(MCIP_RESOURCE_PROPS(mcip), &mrp, - sizeof (mac_resource_props_t)); + /* + * This is a primary VLAN client, we don't support + * specifying rings property for this as it inherits the + * rings property from its MAC. + */ + if (is_vnic_primary) { + mac_resource_props_t *vmrp; + + vmrp = MCIP_RESOURCE_PROPS(mcip); + if (vmrp->mrp_mask & MRP_RX_RINGS || + vmrp->mrp_mask & MRP_TX_RINGS) { + if (fastpath_disabled) + mac_fastpath_enable((mac_handle_t)mip); + kmem_free(mrp, sizeof (*mrp)); + return (ENOTSUP); + } + /* + * Additionally we also need to inherit any + * rings property from the MAC. + */ + mac_get_resources((mac_handle_t)mip, mrp); + if (mrp->mrp_mask & MRP_RX_RINGS) { + vmrp->mrp_mask |= MRP_RX_RINGS; + vmrp->mrp_nrxrings = mrp->mrp_nrxrings; + } + if (mrp->mrp_mask & MRP_TX_RINGS) { + vmrp->mrp_mask |= MRP_TX_RINGS; + vmrp->mrp_ntxrings = mrp->mrp_ntxrings; + } + } + bcopy(MCIP_RESOURCE_PROPS(mcip), mrp, sizeof (*mrp)); } muip = kmem_zalloc(sizeof (mac_unicast_impl_t), KM_SLEEP); @@ -2151,6 +2652,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, ASSERT((mcip->mci_flags & MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) == 0); mcip->mci_flags |= MAC_CLIENT_FLAGS_PASSIVE_PRIMARY; + kmem_free(mrp, sizeof (*mrp)); /* * Stash the unicast address handle, we will use it when @@ -2161,10 +2663,12 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, return (0); } - err = mac_client_datapath_setup(mcip, vid, mac_addr, &mrp, + err = mac_client_datapath_setup(mcip, vid, mac_addr, mrp, is_primary || is_vnic_primary, muip); if (err != 0) goto bail_out; + + kmem_free(mrp, sizeof (*mrp)); *mah = (mac_unicast_handle_t)muip; return (0); @@ -2178,6 +2682,7 @@ bail_out: mip->mi_driver); } } + kmem_free(mrp, sizeof (*mrp)); kmem_free(muip, sizeof (mac_unicast_impl_t)); return (err); } @@ -2227,25 +2732,33 @@ mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, return (err); } -void +static void mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip, flow_entry_t *flent) { mac_client_impl_t *mcip = (mac_client_impl_t *)mch; mac_impl_t *mip = mcip->mci_mip; + boolean_t no_unicast; /* - * We would have initialized subflows etc. only if we brought up - * the primary client and set the unicast unicast address etc. - * Deactivate the flows. The flow entry will be removed from the - * active flow tables, and the associated SRS, softrings etc will - * be deleted. But the flow entry itself won't be destroyed, instead - * it will continue to be archived off the the global flow hash - * list, for a possible future activation when say IP is plumbed - * again. + * If we have not added a unicast address for this MAC client, just + * teardown the datapath. */ - mac_link_release_flows(mch); + no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR; + if (!no_unicast) { + /* + * We would have initialized subflows etc. only if we brought + * up the primary client and set the unicast unicast address + * etc. Deactivate the flows. The flow entry will be removed + * from the active flow tables, and the associated SRS, + * softrings etc will be deleted. But the flow entry itself + * won't be destroyed, instead it will continue to be archived + * off the the global flow hash list, for a possible future + * activation when say IP is plumbed again. + */ + mac_link_release_flows(mch); + } mip->mi_nactiveclients--; mac_update_single_active_client(mip); @@ -2287,6 +2800,7 @@ mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip, flent->fe_tx_srs == NULL && flent->fe_rx_srs_cnt == 0); flent->fe_flags = FE_MC_NO_DATAPATH; flow_stat_destroy(flent); + mac_misc_stat_delete(flent); /* Initialize the receiver function to a safe routine */ flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop; @@ -2297,8 +2811,9 @@ mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip, mutex_exit(&flent->fe_lock); if (mip->mi_type->mt_brdcst_addr != NULL) { + ASSERT(muip != NULL || no_unicast); mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr, - muip->mui_vid); + muip != NULL ? muip->mui_vid : VLAN_ID_NONE); } if (mip->mi_nactiveclients == 1) { @@ -2324,8 +2839,12 @@ mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip, if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK) mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK; - kmem_free(muip, sizeof (mac_unicast_impl_t)); + if (muip != NULL) + kmem_free(muip, sizeof (mac_unicast_impl_t)); + mac_protect_cancel_timer(mcip); + mac_protect_flush_dhcp(mcip); + bzero(&mcip->mci_misc_stat, sizeof (mcip->mci_misc_stat)); /* * Disable fastpath if this is a VNIC or a VLAN. */ @@ -2345,7 +2864,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) mac_unicast_impl_t *pre; mac_impl_t *mip = mcip->mci_mip; flow_entry_t *flent; - boolean_t isprimary = B_FALSE; + uint16_t mui_vid; i_mac_perim_enter(mip); if (mcip->mci_flags & MAC_CLIENT_FLAGS_VNIC_PRIMARY) { @@ -2436,11 +2955,6 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) rw_exit(&mcip->mci_rw_lock); } - if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) && - muip->mui_vid == 0) { - mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PRIMARY; - isprimary = B_TRUE; - } if (!mac_client_single_rcvr(mcip)) { /* * This MAC client is shared by more than one unicast @@ -2490,34 +3004,39 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) return (0); } + mui_vid = muip->mui_vid; mac_client_datapath_teardown(mch, muip, flent); + if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) && mui_vid == 0) { + mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PRIMARY; + } else { + i_mac_perim_exit(mip); + return (0); + } + /* * If we are removing the primary, check if we have a passive primary * client that we need to activate now. */ - if (!isprimary) { - i_mac_perim_exit(mip); - return (0); - } mcip = mac_get_passive_primary_client(mip); if (mcip != NULL) { - mac_resource_props_t mrp; + mac_resource_props_t *mrp; mac_unicast_impl_t *muip; mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PASSIVE_PRIMARY; - bzero(&mrp, sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + /* * Apply the property cached in the mac_impl_t to the * primary mac client. */ - mac_get_resources((mac_handle_t)mip, &mrp); - (void) mac_client_set_resources(mch, &mrp); + mac_get_resources((mac_handle_t)mip, mrp); + (void) mac_client_set_resources(mch, mrp); ASSERT(mcip->mci_p_unicast_list != NULL); muip = mcip->mci_p_unicast_list; mcip->mci_p_unicast_list = NULL; if (mac_client_datapath_setup(mcip, VLAN_ID_NONE, - mip->mi_addr, &mrp, B_TRUE, muip) == 0) { + mip->mi_addr, mrp, B_TRUE, muip) == 0) { if (mcip->mci_rx_p_fn != NULL) { mac_rx_set(mch, mcip->mci_rx_p_fn, mcip->mci_rx_p_arg); @@ -2527,6 +3046,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) } else { kmem_free(muip, sizeof (mac_unicast_impl_t)); } + kmem_free(mrp, sizeof (*mrp)); } i_mac_perim_exit(mip); return (0); @@ -2775,36 +3295,6 @@ mac_promisc_remove(mac_promisc_handle_t mph) } /* - * Bump the count of the number of active Tx threads. This is maintained as - * a per CPU counter. On (CMT kind of) machines with large number of CPUs, - * a single mci_tx_lock may become contended. However a count of the total - * number of Tx threads per client is needed in order to quiesce the Tx side - * prior to reassigning a Tx ring dynamically to another client. The thread - * that needs to quiesce the Tx traffic grabs all the percpu locks and checks - * the sum of the individual percpu refcnts. Each Tx data thread only grabs - * its own percpu lock and increments its own refcnt. - */ -void * -mac_tx_hold(mac_client_handle_t mch) -{ - mac_client_impl_t *mcip = (mac_client_impl_t *)mch; - mac_tx_percpu_t *mytx; - int error; - - MAC_TX_TRY_HOLD(mcip, mytx, error); - return (error == 0 ? (void *)mytx : NULL); -} - -void -mac_tx_rele(mac_client_handle_t mch, void *mytx_handle) -{ - mac_client_impl_t *mcip = (mac_client_impl_t *)mch; - mac_tx_percpu_t *mytx = mytx_handle; - - MAC_TX_RELE(mcip, mytx) -} - -/* * Send function invoked by MAC clients. */ mac_tx_cookie_t @@ -2872,8 +3362,7 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, srs_tx = &srs->srs_tx; if (srs_tx->st_mode == SRS_TX_DEFAULT && (srs->srs_state & SRS_ENQUEUED) == 0 && - mip->mi_nactiveclients == 1 && mip->mi_promisc_list == NULL && - mp_chain->b_next == NULL) { + mip->mi_nactiveclients == 1 && mp_chain->b_next == NULL) { uint64_t obytes; /* @@ -2891,7 +3380,7 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, MAC_VID_CHECK(mcip, mp_chain, err); if (err != 0) { freemsg(mp_chain); - mcip->mci_stat_oerrors++; + mcip->mci_misc_stat.mms_txerrors++; goto done; } } @@ -2899,7 +3388,7 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, mp_chain = mac_add_vlan_tag(mp_chain, 0, mac_client_vid(mch)); if (mp_chain == NULL) { - mcip->mci_stat_oerrors++; + mcip->mci_misc_stat.mms_txerrors++; goto done; } } @@ -2908,17 +3397,11 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, obytes = (mp_chain->b_cont == NULL ? MBLKL(mp_chain) : msgdsize(mp_chain)); - MAC_TX(mip, srs_tx->st_arg2, mp_chain, - ((mcip->mci_state_flags & MCIS_SHARE_BOUND) != 0)); - + MAC_TX(mip, srs_tx->st_arg2, mp_chain, mcip); if (mp_chain == NULL) { cookie = NULL; - mcip->mci_stat_obytes += obytes; - mcip->mci_stat_opackets += 1; - if ((srs->srs_type & SRST_FLOW) != 0) { - FLOW_STAT_UPDATE(flent, obytes, obytes); - FLOW_STAT_UPDATE(flent, opackets, 1); - } + SRS_TX_STAT_UPDATE(srs, opackets, 1); + SRS_TX_STAT_UPDATE(srs, obytes, obytes); } else { mutex_enter(&srs->srs_lock); cookie = mac_tx_srs_no_desc(srs, mp_chain, @@ -2978,7 +3461,14 @@ mac_tx_is_flow_blocked(mac_client_handle_t mch, mac_tx_cookie_t cookie) } mutex_enter(&mac_srs->srs_lock); - if (mac_srs->srs_tx.st_mode == SRS_TX_FANOUT) { + /* + * Only in the case of TX_FANOUT and TX_AGGR, the underlying + * softring (s_ring_state) will have the HIWAT set. This is + * the multiple Tx ring flow control case. For all other + * case, SRS (srs_state) will store the condition. + */ + if (mac_srs->srs_tx.st_mode == SRS_TX_FANOUT || + mac_srs->srs_tx.st_mode == SRS_TX_AGGR) { if (cookie != NULL) { sringp = (mac_soft_ring_t *)cookie; mutex_enter(&sringp->s_ring_lock); @@ -2986,8 +3476,8 @@ mac_tx_is_flow_blocked(mac_client_handle_t mch, mac_tx_cookie_t cookie) blocked = B_TRUE; mutex_exit(&sringp->s_ring_lock); } else { - for (i = 0; i < mac_srs->srs_oth_ring_count; i++) { - sringp = mac_srs->srs_oth_soft_rings[i]; + for (i = 0; i < mac_srs->srs_tx_ring_count; i++) { + sringp = mac_srs->srs_tx_soft_rings[i]; mutex_enter(&sringp->s_ring_lock); if (sringp->s_ring_state & S_RING_TX_HIWAT) { blocked = B_TRUE; @@ -3228,9 +3718,10 @@ mac_cpu_set(mac_client_handle_t mch, mac_resource_props_t *mrp) ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - if ((err = mac_validate_props(mrp)) != 0) + if ((err = mac_validate_props(mcip->mci_state_flags & MCIS_IS_VNIC ? + mcip->mci_upper_mip : mip, mrp)) != 0) { return (err); - + } if (MCIP_DATAPATH_SETUP(mcip)) mac_flow_modify(mip->mi_flow_tab, mcip->mci_flent, mrp); @@ -3256,14 +3747,20 @@ mac_client_set_resources(mac_client_handle_t mch, mac_resource_props_t *mrp) goto done; } - if (mrp->mrp_mask & MRP_CPUS) { + if (mrp->mrp_mask & (MRP_CPUS|MRP_POOL)) { err = mac_cpu_set(mch, mrp); if (err != 0) goto done; } - if (mrp->mrp_mask & MRP_PROTECT) + if (mrp->mrp_mask & MRP_PROTECT) { err = mac_protect_set(mch, mrp); + if (err != 0) + goto done; + } + + if ((mrp->mrp_mask & MRP_RX_RINGS) || (mrp->mrp_mask & MRP_TX_RINGS)) + err = mac_resource_ctl_set(mch, mrp); done: i_mac_perim_exit(mip); @@ -3283,6 +3780,20 @@ mac_client_get_resources(mac_client_handle_t mch, mac_resource_props_t *mrp) } /* + * Return the effective properties currently associated with the specified + * MAC client. + */ +void +mac_client_get_effective_resources(mac_client_handle_t mch, + mac_resource_props_t *mrp) +{ + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_resource_props_t *mcip_mrp = MCIP_EFFECTIVE_PROPS(mcip); + + bcopy(mcip_mrp, mrp, sizeof (mac_resource_props_t)); +} + +/* * Pass a copy of the specified packet to the promiscuous callbacks * of the specified MAC. * @@ -3708,6 +4219,16 @@ mac_get_lower_mac_handle(mac_handle_t mh) return (((vnic_t *)mip->mi_driver)->vn_lower_mh); } +boolean_t +mac_is_vnic_primary(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + ASSERT(mac_is_vnic(mh)); + return (((vnic_t *)mip->mi_driver)->vn_addr_type == + VNIC_MAC_ADDR_TYPE_PRIMARY); +} + void mac_update_resources(mac_resource_props_t *nmrp, mac_resource_props_t *cmrp, boolean_t is_user_flow) @@ -3728,17 +4249,66 @@ mac_update_resources(mac_resource_props_t *nmrp, mac_resource_props_t *cmrp, } } if (nmrp->mrp_mask & MRP_MAXBW) { - cmrp->mrp_maxbw = nmrp->mrp_maxbw; - if (nmrp->mrp_maxbw == MRP_MAXBW_RESETVAL) + if (nmrp->mrp_maxbw == MRP_MAXBW_RESETVAL) { cmrp->mrp_mask &= ~MRP_MAXBW; - else + cmrp->mrp_maxbw = 0; + } else { cmrp->mrp_mask |= MRP_MAXBW; + cmrp->mrp_maxbw = nmrp->mrp_maxbw; + } } if (nmrp->mrp_mask & MRP_CPUS) MAC_COPY_CPUS(nmrp, cmrp); + if (nmrp->mrp_mask & MRP_POOL) { + if (strlen(nmrp->mrp_pool) == 0) { + cmrp->mrp_mask &= ~MRP_POOL; + bzero(cmrp->mrp_pool, sizeof (cmrp->mrp_pool)); + } else { + cmrp->mrp_mask |= MRP_POOL; + (void) strncpy(cmrp->mrp_pool, nmrp->mrp_pool, + sizeof (cmrp->mrp_pool)); + } + + } + if (nmrp->mrp_mask & MRP_PROTECT) mac_protect_update(nmrp, cmrp); + + /* + * Update the rings specified. + */ + if (nmrp->mrp_mask & MRP_RX_RINGS) { + if (nmrp->mrp_mask & MRP_RINGS_RESET) { + cmrp->mrp_mask &= ~MRP_RX_RINGS; + if (cmrp->mrp_mask & MRP_RXRINGS_UNSPEC) + cmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; + cmrp->mrp_nrxrings = 0; + } else { + cmrp->mrp_mask |= MRP_RX_RINGS; + cmrp->mrp_nrxrings = nmrp->mrp_nrxrings; + } + } + if (nmrp->mrp_mask & MRP_TX_RINGS) { + if (nmrp->mrp_mask & MRP_RINGS_RESET) { + cmrp->mrp_mask &= ~MRP_TX_RINGS; + if (cmrp->mrp_mask & MRP_TXRINGS_UNSPEC) + cmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; + cmrp->mrp_ntxrings = 0; + } else { + cmrp->mrp_mask |= MRP_TX_RINGS; + cmrp->mrp_ntxrings = nmrp->mrp_ntxrings; + } + } + if (nmrp->mrp_mask & MRP_RXRINGS_UNSPEC) + cmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; + else if (cmrp->mrp_mask & MRP_RXRINGS_UNSPEC) + cmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; + + if (nmrp->mrp_mask & MRP_TXRINGS_UNSPEC) + cmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; + else if (cmrp->mrp_mask & MRP_TXRINGS_UNSPEC) + cmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; } } @@ -3757,26 +4327,29 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp) mac_client_impl_t *mcip; int err = 0; uint32_t resmask, newresmask; - mac_resource_props_t tmrp, umrp; + mac_resource_props_t *tmrp, *umrp; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - err = mac_validate_props(mrp); + err = mac_validate_props(mip, mrp); if (err != 0) return (err); - bcopy(&mip->mi_resource_props, &umrp, sizeof (mac_resource_props_t)); - resmask = umrp.mrp_mask; - mac_update_resources(mrp, &umrp, B_FALSE); - newresmask = umrp.mrp_mask; + umrp = kmem_zalloc(sizeof (*umrp), KM_SLEEP); + bcopy(&mip->mi_resource_props, umrp, sizeof (*umrp)); + resmask = umrp->mrp_mask; + mac_update_resources(mrp, umrp, B_FALSE); + newresmask = umrp->mrp_mask; if (resmask == 0 && newresmask != 0) { /* - * Bandwidth, priority or cpu link properties configured, + * Bandwidth, priority, cpu or pool link properties configured, * must disable fastpath. */ - if ((err = mac_fastpath_disable((mac_handle_t)mip)) != 0) + if ((err = mac_fastpath_disable((mac_handle_t)mip)) != 0) { + kmem_free(umrp, sizeof (*umrp)); return (err); + } } /* @@ -3784,19 +4357,93 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp) * we use a copy of bind_cpu and finally cache bind_cpu in mip. * This allows us to cache only user edits in mip. */ - bcopy(mrp, &tmrp, sizeof (mac_resource_props_t)); + tmrp = kmem_zalloc(sizeof (*tmrp), KM_SLEEP); + bcopy(mrp, tmrp, sizeof (*tmrp)); mcip = mac_primary_client_handle(mip); if (mcip != NULL && (mcip->mci_state_flags & MCIS_IS_AGGR_PORT) == 0) { - err = - mac_client_set_resources((mac_client_handle_t)mcip, &tmrp); + err = mac_client_set_resources((mac_client_handle_t)mcip, tmrp); + } else if ((mrp->mrp_mask & MRP_RX_RINGS || + mrp->mrp_mask & MRP_TX_RINGS)) { + mac_client_impl_t *vmcip; + + /* + * If the primary is not up, we need to check if there + * are any VLANs on this primary. If there are then + * we need to set this property on the VLANs since + * VLANs follow the primary they are based on. Just + * look for the first VLAN and change its properties, + * all the other VLANs should be in the same group. + */ + for (vmcip = mip->mi_clients_list; vmcip != NULL; + vmcip = vmcip->mci_client_next) { + if ((vmcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) && + mac_client_vid((mac_client_handle_t)vmcip) != + VLAN_ID_NONE) { + break; + } + } + if (vmcip != NULL) { + mac_resource_props_t *omrp; + mac_resource_props_t *vmrp; + + omrp = kmem_zalloc(sizeof (*omrp), KM_SLEEP); + bcopy(MCIP_RESOURCE_PROPS(vmcip), omrp, sizeof (*omrp)); + /* + * We dont' call mac_update_resources since we + * want to take only the ring properties and + * not all the properties that may have changed. + */ + vmrp = MCIP_RESOURCE_PROPS(vmcip); + if (mrp->mrp_mask & MRP_RX_RINGS) { + if (mrp->mrp_mask & MRP_RINGS_RESET) { + vmrp->mrp_mask &= ~MRP_RX_RINGS; + if (vmrp->mrp_mask & + MRP_RXRINGS_UNSPEC) { + vmrp->mrp_mask &= + ~MRP_RXRINGS_UNSPEC; + } + vmrp->mrp_nrxrings = 0; + } else { + vmrp->mrp_mask |= MRP_RX_RINGS; + vmrp->mrp_nrxrings = mrp->mrp_nrxrings; + } + } + if (mrp->mrp_mask & MRP_TX_RINGS) { + if (mrp->mrp_mask & MRP_RINGS_RESET) { + vmrp->mrp_mask &= ~MRP_TX_RINGS; + if (vmrp->mrp_mask & + MRP_TXRINGS_UNSPEC) { + vmrp->mrp_mask &= + ~MRP_TXRINGS_UNSPEC; + } + vmrp->mrp_ntxrings = 0; + } else { + vmrp->mrp_mask |= MRP_TX_RINGS; + vmrp->mrp_ntxrings = mrp->mrp_ntxrings; + } + } + if (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) + vmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; + + if (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) + vmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; + + if ((err = mac_client_set_rings_prop(vmcip, mrp, + omrp)) != 0) { + bcopy(omrp, MCIP_RESOURCE_PROPS(vmcip), + sizeof (*omrp)); + } else { + mac_set_prim_vlan_rings(mip, vmrp); + } + kmem_free(omrp, sizeof (*omrp)); + } } /* Only update the values if mac_client_set_resources succeeded */ if (err == 0) { - bcopy(&umrp, &mip->mi_resource_props, - sizeof (mac_resource_props_t)); + bcopy(umrp, &mip->mi_resource_props, sizeof (*umrp)); /* - * If bankwidth, priority or cpu link properties cleared, + * If bandwidth, priority or cpu link properties cleared, * renable fastpath. */ if (resmask != 0 && newresmask == 0) @@ -3804,6 +4451,8 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp) } else if (resmask == 0 && newresmask != 0) { mac_fastpath_enable((mac_handle_t)mip); } + kmem_free(tmrp, sizeof (*tmrp)); + kmem_free(umrp, sizeof (*umrp)); return (err); } @@ -3827,17 +4476,33 @@ mac_get_resources(mac_handle_t mh, mac_resource_props_t *mrp) mac_impl_t *mip = (mac_impl_t *)mh; mac_client_impl_t *mcip; - if (mip->mi_state_flags & MIS_IS_VNIC) { - mcip = mac_primary_client_handle(mip); - if (mcip != NULL) { - mac_client_get_resources((mac_client_handle_t)mcip, - mrp); - return; - } + mcip = mac_primary_client_handle(mip); + if (mcip != NULL) { + mac_client_get_resources((mac_client_handle_t)mcip, mrp); + return; } bcopy(&mip->mi_resource_props, mrp, sizeof (mac_resource_props_t)); } +/* + * Get the effective properties from the primary client of the + * specified MAC instance. + */ +void +mac_get_effective_resources(mac_handle_t mh, mac_resource_props_t *mrp) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + mac_client_impl_t *mcip; + + mcip = mac_primary_client_handle(mip); + if (mcip != NULL) { + mac_client_get_effective_resources((mac_client_handle_t)mcip, + mrp); + return; + } + bzero(mrp, sizeof (mac_resource_props_t)); +} + int mac_set_pvid(mac_handle_t mh, uint16_t pvid) { @@ -3904,8 +4569,10 @@ mac_rename_primary(mac_handle_t mh, const char *new_name) * the associated flow kstat. */ if (mip->mi_state_flags & MIS_IS_VNIC) { + mac_client_impl_t *mcip = mac_vnic_lower(mip); ASSERT(new_name != NULL); - mac_rename_flow_names(mac_vnic_lower(mip), new_name); + mac_rename_flow_names(mcip, new_name); + mac_stat_rename(mcip); goto done; } /* @@ -3954,6 +4621,10 @@ mac_rename_primary(mac_handle_t mh, const char *new_name) } } + /* Recreate kstats associated with aggr pseudo rings */ + if (mip->mi_state_flags & MIS_IS_AGGR) + mac_pseudo_ring_stat_rename(mip); + done: i_mac_perim_exit(mip); return (0); @@ -4187,8 +4858,14 @@ mac_client_single_rcvr(mac_client_impl_t *mcip) } int -mac_validate_props(mac_resource_props_t *mrp) +mac_validate_props(mac_impl_t *mip, mac_resource_props_t *mrp) { + boolean_t reset; + uint32_t rings_needed; + uint32_t rings_avail; + mac_group_type_t gtype; + mac_resource_props_t *mip_mrp; + if (mrp == NULL) return (0); @@ -4246,6 +4923,100 @@ mac_validate_props(mac_resource_props_t *mrp) if (err != 0) return (err); } + + if (!(mrp->mrp_mask & MRP_RX_RINGS) && + !(mrp->mrp_mask & MRP_TX_RINGS)) { + return (0); + } + + /* + * mip will be null when we come from mac_flow_create or + * mac_link_flow_modify. In the latter case it is a user flow, + * for which we don't support rings. In the former we would + * have validated the props beforehand (i_mac_unicast_add -> + * mac_client_set_resources -> validate for the primary and + * vnic_dev_create -> mac_client_set_resources -> validate for + * a vnic. + */ + if (mip == NULL) + return (0); + + /* + * We don't support setting rings property for a VNIC that is using a + * primary address (VLAN) + */ + if ((mip->mi_state_flags & MIS_IS_VNIC) && + mac_is_vnic_primary((mac_handle_t)mip)) { + return (ENOTSUP); + } + + mip_mrp = &mip->mi_resource_props; + /* + * The rings property should be validated against the NICs + * resources + */ + if (mip->mi_state_flags & MIS_IS_VNIC) + mip = (mac_impl_t *)mac_get_lower_mac_handle((mac_handle_t)mip); + + reset = mrp->mrp_mask & MRP_RINGS_RESET; + /* + * If groups are not supported, return error. + */ + if (((mrp->mrp_mask & MRP_RX_RINGS) && mip->mi_rx_groups == NULL) || + ((mrp->mrp_mask & MRP_TX_RINGS) && mip->mi_tx_groups == NULL)) { + return (EINVAL); + } + /* + * If we are just resetting, there is no validation needed. + */ + if (reset) + return (0); + + if (mrp->mrp_mask & MRP_RX_RINGS) { + rings_needed = mrp->mrp_nrxrings; + /* + * We just want to check if the number of additional + * rings requested is available. + */ + if (mip_mrp->mrp_mask & MRP_RX_RINGS) { + if (mrp->mrp_nrxrings > mip_mrp->mrp_nrxrings) + /* Just check for the additional rings */ + rings_needed -= mip_mrp->mrp_nrxrings; + else + /* We are not asking for additional rings */ + rings_needed = 0; + } + rings_avail = mip->mi_rxrings_avail; + gtype = mip->mi_rx_group_type; + } else { + rings_needed = mrp->mrp_ntxrings; + /* Similarly for the TX rings */ + if (mip_mrp->mrp_mask & MRP_TX_RINGS) { + if (mrp->mrp_ntxrings > mip_mrp->mrp_ntxrings) + /* Just check for the additional rings */ + rings_needed -= mip_mrp->mrp_ntxrings; + else + /* We are not asking for additional rings */ + rings_needed = 0; + } + rings_avail = mip->mi_txrings_avail; + gtype = mip->mi_tx_group_type; + } + + /* Error if the group is dynamic .. */ + if (gtype == MAC_GROUP_TYPE_DYNAMIC) { + /* + * .. and rings specified are more than available. + */ + if (rings_needed > rings_avail) + return (EINVAL); + } else { + /* + * OR group is static and we have specified some rings. + */ + if (rings_needed > 0) + return (EINVAL); + } return (0); } @@ -4266,11 +5037,18 @@ mac_virtual_link_update(mac_impl_t *mip) * mac handle in the client. */ void -mac_set_upper_mac(mac_client_handle_t mch, mac_handle_t mh) +mac_set_upper_mac(mac_client_handle_t mch, mac_handle_t mh, + mac_resource_props_t *mrp) { mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_impl_t *mip = (mac_impl_t *)mh; - mcip->mci_upper_mip = (mac_impl_t *)mh; + mcip->mci_upper_mip = mip; + /* If there are any properties, copy it over too */ + if (mrp != NULL) { + bcopy(mrp, &mip->mi_resource_props, + sizeof (mac_resource_props_t)); + } } /* @@ -4326,15 +5104,7 @@ mac_unmark_exclusive(mac_handle_t mh) } /* - * Set the MTU for the specified MAC. Note that this mechanism depends on - * the driver calling mac_maxsdu_update() to update the link MTU if it was - * successful in setting its MTU. - * - * Note that there is potential for improvement here. A better model might be - * to not require drivers to call mac_maxsdu_update(), but rather have this - * function update mi_sdu_max and send notifications if the driver setprop - * callback succeeds. This would remove the burden and complexity from - * drivers. + * Set the MTU for the specified MAC. */ int mac_set_mtu(mac_handle_t mh, uint_t new_mtu, uint_t *old_mtu_arg) @@ -4352,9 +5122,18 @@ mac_set_mtu(mac_handle_t mh, uint_t new_mtu, uint_t *old_mtu_arg) old_mtu = mip->mi_sdu_max; + if (new_mtu == 0 || new_mtu < mip->mi_sdu_min) { + rv = EINVAL; + goto bail; + } + if (old_mtu != new_mtu) { rv = mip->mi_callbacks->mc_setprop(mip->mi_driver, "mtu", MAC_PROP_MTU, sizeof (uint_t), &new_mtu); + if (rv != 0) + goto bail; + rv = mac_maxsdu_update(mh, new_mtu); + ASSERT(rv == 0); } bail: @@ -4365,13 +5144,18 @@ bail: return (rv); } +/* + * Return the RX h/w information for the group indexed by grp_num. + */ void -mac_get_hwgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, - uint_t *n_rings, uint_t *type, uint_t *n_clnts, char *clnts_name) +mac_get_hwrxgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, + uint_t *n_rings, uint_t *rings, uint_t *type, uint_t *n_clnts, + char *clnts_name) { mac_impl_t *mip = (mac_impl_t *)mh; mac_grp_client_t *mcip; uint_t i = 0, index = 0; + mac_ring_t *ring; /* Revisit when we implement fully dynamic group allocation */ ASSERT(grp_index >= 0 && grp_index < mip->mi_rx_group_count); @@ -4380,6 +5164,19 @@ mac_get_hwgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, *grp_num = mip->mi_rx_groups[grp_index].mrg_index; *type = mip->mi_rx_groups[grp_index].mrg_type; *n_rings = mip->mi_rx_groups[grp_index].mrg_cur_count; + ring = mip->mi_rx_groups[grp_index].mrg_rings; + for (index = 0; index < mip->mi_rx_groups[grp_index].mrg_cur_count; + index++) { + rings[index] = ring->mr_index; + ring = ring->mr_next; + } + /* Assuming the 1st is the default group */ + index = 0; + if (grp_index == 0) { + (void) strlcpy(clnts_name, "<default,mcast>,", + MAXCLIENTNAMELEN); + index += strlen("<default,mcast>,"); + } for (mcip = mip->mi_rx_groups[grp_index].mrg_clients; mcip != NULL; mcip = mcip->mgc_next) { int name_len = strlen(mcip->mgc_client->mci_name); @@ -4410,10 +5207,194 @@ mac_get_hwgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, rw_exit(&mip->mi_rw_lock); } +/* + * Return the TX h/w information for the group indexed by grp_num. + */ +void +mac_get_hwtxgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, + uint_t *n_rings, uint_t *rings, uint_t *type, uint_t *n_clnts, + char *clnts_name) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + mac_grp_client_t *mcip; + uint_t i = 0, index = 0; + mac_ring_t *ring; + + /* Revisit when we implement fully dynamic group allocation */ + ASSERT(grp_index >= 0 && grp_index <= mip->mi_tx_group_count); + + rw_enter(&mip->mi_rw_lock, RW_READER); + *grp_num = mip->mi_tx_groups[grp_index].mrg_index > 0 ? + mip->mi_tx_groups[grp_index].mrg_index : grp_index; + *type = mip->mi_tx_groups[grp_index].mrg_type; + *n_rings = mip->mi_tx_groups[grp_index].mrg_cur_count; + ring = mip->mi_tx_groups[grp_index].mrg_rings; + for (index = 0; index < mip->mi_tx_groups[grp_index].mrg_cur_count; + index++) { + rings[index] = ring->mr_index; + ring = ring->mr_next; + } + index = 0; + /* Default group has an index of -1 */ + if (mip->mi_tx_groups[grp_index].mrg_index < 0) { + (void) strlcpy(clnts_name, "<default>,", + MAXCLIENTNAMELEN); + index += strlen("<default>,"); + } + for (mcip = mip->mi_tx_groups[grp_index].mrg_clients; mcip != NULL; + mcip = mcip->mgc_next) { + int name_len = strlen(mcip->mgc_client->mci_name); + + /* + * MAXCLIENTNAMELEN is the buffer size reserved for client + * names. + * XXXX Formating the client name string needs to be moved + * to user land when fixing the size of dhi_clnts in + * dld_hwgrpinfo_t. We should use n_clients * client_name for + * dhi_clntsin instead of MAXCLIENTNAMELEN + */ + if (index + name_len >= MAXCLIENTNAMELEN) { + index = MAXCLIENTNAMELEN; + break; + } + bcopy(mcip->mgc_client->mci_name, &(clnts_name[index]), + name_len); + index += name_len; + clnts_name[index++] = ','; + i++; + } + + /* Get rid of the last , */ + if (index > 0) + clnts_name[index - 1] = '\0'; + *n_clnts = i; + rw_exit(&mip->mi_rw_lock); +} + +/* + * Return the group count for RX or TX. + */ uint_t -mac_hwgrp_num(mac_handle_t mh) +mac_hwgrp_num(mac_handle_t mh, int type) { mac_impl_t *mip = (mac_impl_t *)mh; - return (mip->mi_rx_group_count); + /* + * Return the Rx and Tx group count; for the Tx we need to + * include the default too. + */ + return (type == MAC_RING_TYPE_RX ? mip->mi_rx_group_count : + mip->mi_tx_groups != NULL ? mip->mi_tx_group_count + 1 : 0); +} + +/* + * The total number of free TX rings for this MAC. + */ +uint_t +mac_txavail_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_txrings_avail); +} + +/* + * The total number of free RX rings for this MAC. + */ +uint_t +mac_rxavail_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_rxrings_avail); +} + +/* + * The total number of reserved RX rings on this MAC. + */ +uint_t +mac_rxrsvd_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_rxrings_rsvd); +} + +/* + * The total number of reserved TX rings on this MAC. + */ +uint_t +mac_txrsvd_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_txrings_rsvd); +} + +/* + * Total number of free RX groups on this MAC. + */ +uint_t +mac_rxhwlnksavail_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_rxhwclnt_avail); +} + +/* + * Total number of RX groups reserved on this MAC. + */ +uint_t +mac_rxhwlnksrsvd_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_rxhwclnt_used); +} + +/* + * Total number of free TX groups on this MAC. + */ +uint_t +mac_txhwlnksavail_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_txhwclnt_avail); +} + +/* + * Total number of TX groups reserved on this MAC. + */ +uint_t +mac_txhwlnksrsvd_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_txhwclnt_used); +} + +/* + * Initialize the rings property for a mac client. A non-0 value for + * rxring or txring specifies the number of rings required, a value + * of MAC_RXRINGS_NONE/MAC_TXRINGS_NONE specifies that it doesn't need + * any RX/TX rings and a value of MAC_RXRINGS_DONTCARE/MAC_TXRINGS_DONTCARE + * means the system can decide whether it can give any rings or not. + */ +void +mac_client_set_rings(mac_client_handle_t mch, int rxrings, int txrings) +{ + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + + if (rxrings != MAC_RXRINGS_DONTCARE) { + mrp->mrp_mask |= MRP_RX_RINGS; + mrp->mrp_nrxrings = rxrings; + } + + if (txrings != MAC_TXRINGS_DONTCARE) { + mrp->mrp_mask |= MRP_TX_RINGS; + mrp->mrp_ntxrings = txrings; + } } diff --git a/usr/src/uts/common/io/mac/mac_datapath_setup.c b/usr/src/uts/common/io/mac/mac_datapath_setup.c index 379e488ee2..6f1661d5f2 100644 --- a/usr/src/uts/common/io/mac/mac_datapath_setup.c +++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c @@ -19,12 +19,15 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <sys/types.h> #include <sys/callb.h> +#include <sys/cpupart.h> +#include <sys/pool.h> +#include <sys/pool_pset.h> #include <sys/sdt.h> #include <sys/strsubr.h> #include <sys/strsun.h> @@ -40,6 +43,7 @@ #include <sys/mac_client_priv.h> #include <sys/mac_soft_ring.h> #include <sys/mac_flow_impl.h> +#include <sys/mac_stat.h> static void mac_srs_soft_rings_signal(mac_soft_ring_set_t *, uint_t); static void mac_srs_update_fanout_list(mac_soft_ring_set_t *); @@ -56,14 +60,10 @@ static void mac_srs_remove_glist(mac_soft_ring_set_t *); static void mac_srs_fanout_list_free(mac_soft_ring_set_t *); static void mac_soft_ring_remove(mac_soft_ring_set_t *, mac_soft_ring_t *); -static int mac_compute_soft_ring_count(flow_entry_t *, int); +static int mac_compute_soft_ring_count(flow_entry_t *, int, int); static void mac_walk_srs_and_bind(int); static void mac_walk_srs_and_unbind(int); -extern mac_group_t *mac_reserve_rx_group(mac_client_impl_t *, uint8_t *, - mac_rx_group_reserve_type_t); -extern void mac_release_rx_group(mac_client_impl_t *, mac_group_t *); - extern boolean_t mac_latency_optimize; static kmem_cache_t *mac_srs_cache; @@ -92,14 +92,6 @@ int mac_soft_ring_max_q_cnt = 1024; int mac_soft_ring_min_q_cnt = 256; int mac_soft_ring_poll_thres = 16; -/* - * Default value of number of TX rings to be assigned to a MAC client. - * If less than 'mac_tx_ring_count' worth of Tx rings is available, then - * as many as is available will be assigned to the newly created MAC client. - * If no TX rings are available, then MAC client(s) will be assigned the - * default Tx ring. Default Tx ring can be shared among multiple MAC clients. - */ -uint32_t mac_tx_ring_count = 32; boolean_t mac_tx_serialize = B_FALSE; /* @@ -157,9 +149,11 @@ static krwlock_t mac_srs_g_lock; boolean_t mac_srs_thread_bind = B_TRUE; /* - * CPU to fallback to, used by mac_next_bind_cpu(). + * Whether Rx/Tx interrupts should be re-targeted. Disabled by default. + * dladm command would override this. */ -processorid_t srs_bind_cpu = 0; +boolean_t mac_tx_intr_retarget = B_FALSE; +boolean_t mac_rx_intr_retarget = B_FALSE; /* * If cpu bindings are specified by user, then Tx SRS and its soft @@ -170,24 +164,39 @@ processorid_t srs_bind_cpu = 0; */ #define BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp) { \ processorid_t cpuid; \ - int i, j; \ + int i; \ mac_soft_ring_t *softring; \ + mac_cpus_t *srs_cpu; \ \ - cpuid = mrp->mrp_cpu[mrp->mrp_ncpus - 1]; \ - mac_srs_worker_bind(mac_tx_srs, cpuid); \ - if (TX_MULTI_RING_MODE(mac_tx_srs)) { \ - j = mrp->mrp_ncpus - 1; \ - for (i = 0; \ - i < mac_tx_srs->srs_oth_ring_count; i++, j--) { \ - if (j < 0) \ - j = mrp->mrp_ncpus - 1; \ - cpuid = mrp->mrp_cpu[j]; \ - softring = mac_tx_srs->srs_oth_soft_rings[i]; \ - (void) mac_soft_ring_bind(softring, cpuid); \ + srs_cpu = &mac_tx_srs->srs_cpu; \ + cpuid = srs_cpu->mc_tx_fanout_cpus[0]; \ + mac_srs_worker_bind(mac_tx_srs, cpuid); \ + if (MAC_TX_SOFT_RINGS(mac_tx_srs)) { \ + for (i = 0; i < mac_tx_srs->srs_tx_ring_count; i++) { \ + cpuid = srs_cpu->mc_tx_fanout_cpus[i]; \ + softring = mac_tx_srs->srs_tx_soft_rings[i]; \ + if (cpuid != -1) { \ + (void) mac_soft_ring_bind(softring, \ + cpuid); \ + } \ } \ } \ } +/* + * Re-targeting is allowed only for exclusive group or for primary. + */ +#define RETARGETABLE_CLIENT(group, mcip) \ + ((((group) != NULL) && \ + ((group)->mrg_state == MAC_GROUP_STATE_RESERVED)) || \ + mac_is_primary_client(mcip)) + +#define MAC_RING_RETARGETABLE(ring) \ + (((ring) != NULL) && \ + ((ring)->mr_info.mri_intr.mi_ddi_handle != NULL) && \ + !((ring)->mr_info.mri_intr.mi_ddi_shared)) + + /* INIT and FINI ROUTINES */ void @@ -218,7 +227,7 @@ mac_soft_ring_finish(void) } static void -mac_srs_soft_rings_free(mac_soft_ring_set_t *mac_srs, boolean_t release_tx_ring) +mac_srs_soft_rings_free(mac_soft_ring_set_t *mac_srs) { mac_soft_ring_t *softring, *next, *head; @@ -240,7 +249,7 @@ mac_srs_soft_rings_free(mac_soft_ring_set_t *mac_srs, boolean_t release_tx_ring) for (softring = head; softring != NULL; softring = next) { next = softring->s_ring_next; - mac_soft_ring_free(softring, release_tx_ring); + mac_soft_ring_free(softring); } } @@ -518,21 +527,30 @@ mac_srs_poll_state_change(mac_soft_ring_set_t *mac_srs, /* * Return the next CPU to be used to bind a MAC kernel thread. + * If a cpupart is specified, the cpu chosen must be from that + * cpu partition. */ static processorid_t -mac_next_bind_cpu(void) +mac_next_bind_cpu(cpupart_t *cpupart) { - static processorid_t srs_curr_cpu = -1; - cpu_t *cp; + static cpu_t *cp = NULL; + cpu_t *cp_start; ASSERT(MUTEX_HELD(&cpu_lock)); - srs_curr_cpu++; - cp = cpu_get(srs_curr_cpu); - if (cp == NULL || !cpu_is_online(cp)) - srs_curr_cpu = srs_bind_cpu; + if (cp == NULL) + cp = cpu_list; + + cp = cp->cpu_next_onln; + cp_start = cp; + + do { + if ((cpupart == NULL) || (cp->cpu_part == cpupart)) + return (cp->cpu_id); - return (srs_curr_cpu); + } while ((cp = cp->cpu_next_onln) != cp_start); + + return (NULL); } /* ARGSUSED */ @@ -588,7 +606,7 @@ mac_srs_cpu_setup(cpu_setup_t what, int id, void *arg) */ boolean_t mac_use_bw_heuristic = B_TRUE; static int -mac_compute_soft_ring_count(flow_entry_t *flent, int rx_srs_cnt) +mac_compute_soft_ring_count(flow_entry_t *flent, int rx_srs_cnt, int maxcpus) { uint64_t cpu_speed, bw = 0; int srings = 0; @@ -675,12 +693,85 @@ mac_compute_soft_ring_count(flow_entry_t *flent, int rx_srs_cnt) srings = 0; } /* Do some more massaging */ - srings = min(srings, ncpus); + srings = min(srings, maxcpus); srings = min(srings, MAX_SR_FANOUT); return (srings); } /* + * mac_tx_cpu_init: + * set up CPUs for Tx interrupt re-targeting and Tx worker + * thread binding + */ +static void +mac_tx_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp, + cpupart_t *cpupart) +{ + mac_soft_ring_set_t *tx_srs = flent->fe_tx_srs; + mac_srs_tx_t *srs_tx = &tx_srs->srs_tx; + mac_cpus_t *srs_cpu = &tx_srs->srs_cpu; + mac_soft_ring_t *sringp; + mac_ring_t *ring; + processorid_t worker_cpuid; + boolean_t retargetable_client = B_FALSE; + int i, j; + + if (RETARGETABLE_CLIENT((mac_group_t *)flent->fe_tx_ring_group, + flent->fe_mcip)) { + retargetable_client = B_TRUE; + } + + if (MAC_TX_SOFT_RINGS(tx_srs)) { + if (mrp != NULL) + j = mrp->mrp_ncpus - 1; + for (i = 0; i < tx_srs->srs_tx_ring_count; i++) { + if (mrp != NULL) { + if (j < 0) + j = mrp->mrp_ncpus - 1; + worker_cpuid = mrp->mrp_cpu[j]; + } else { + /* + * Bind interrupt to the next CPU available + * and leave the worker unbound. + */ + worker_cpuid = -1; + } + sringp = tx_srs->srs_tx_soft_rings[i]; + ring = (mac_ring_t *)sringp->s_ring_tx_arg2; + srs_cpu->mc_tx_fanout_cpus[i] = worker_cpuid; + if (MAC_RING_RETARGETABLE(ring) && + retargetable_client) { + mutex_enter(&cpu_lock); + srs_cpu->mc_tx_intr_cpu[i] = + (mrp != NULL) ? mrp->mrp_cpu[j] : + (mac_tx_intr_retarget ? + mac_next_bind_cpu(cpupart) : -1); + mutex_exit(&cpu_lock); + } else { + srs_cpu->mc_tx_intr_cpu[i] = -1; + } + if (mrp != NULL) + j--; + } + } else { + /* Tx mac_ring_handle_t is stored in st_arg2 */ + srs_cpu->mc_tx_fanout_cpus[0] = + (mrp != NULL) ? mrp->mrp_cpu[mrp->mrp_ncpus - 1] : -1; + ring = (mac_ring_t *)srs_tx->st_arg2; + if (MAC_RING_RETARGETABLE(ring) && retargetable_client) { + mutex_enter(&cpu_lock); + srs_cpu->mc_tx_intr_cpu[0] = (mrp != NULL) ? + mrp->mrp_cpu[mrp->mrp_ncpus - 1] : + (mac_tx_intr_retarget ? + mac_next_bind_cpu(cpupart) : -1); + mutex_exit(&cpu_lock); + } else { + srs_cpu->mc_tx_intr_cpu[0] = -1; + } + } +} + +/* * Assignment of user specified CPUs to a link. * * Minimum CPUs required to get an optimal assignmet: @@ -719,6 +810,7 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) int rx_srs_cnt, reqd_rx_cpu_cnt; int fanout_cpu_cnt, reqd_tx_cpu_cnt; int reqd_poll_worker_cnt, fanout_cnt_per_srs; + mac_resource_props_t *emrp = &flent->fe_effective_props; ASSERT(mrp->mrp_fanout_mode == MCM_CPUS); /* @@ -731,12 +823,11 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) no_of_cpus = mrp->mrp_ncpus; - if (mrp->mrp_intr_cpu != -1) { + if (mrp->mrp_rx_intr_cpu != -1) { /* * interrupt has been re-targetted. Poll * thread needs to be bound to interrupt - * CPU. Presently only fixed interrupts - * are re-targetted, MSI-x aren't. + * CPU. * * Find where in the list is the intr * CPU and swap it with the first one. @@ -744,11 +835,11 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) * list for poll. */ for (i = 0; i < no_of_cpus; i++) { - if (mrp->mrp_cpu[i] == mrp->mrp_intr_cpu) + if (mrp->mrp_cpu[i] == mrp->mrp_rx_intr_cpu) break; } mrp->mrp_cpu[i] = mrp->mrp_cpu[0]; - mrp->mrp_cpu[0] = mrp->mrp_intr_cpu; + mrp->mrp_cpu[0] = mrp->mrp_rx_intr_cpu; } /* @@ -768,8 +859,8 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) /* How many CPUs are needed for Tx side? */ tx_srs = flent->fe_tx_srs; - reqd_tx_cpu_cnt = TX_MULTI_RING_MODE(tx_srs) ? - tx_srs->srs_oth_ring_count : 1; + reqd_tx_cpu_cnt = MAC_TX_SOFT_RINGS(tx_srs) ? + tx_srs->srs_tx_ring_count : 1; /* CPUs needed for Rx SRSes poll and worker threads */ reqd_poll_worker_cnt = mac_latency_optimize ? @@ -806,14 +897,14 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_fanout_cnt = fanout_cnt_per_srs; - srs_cpu->mc_pollid = mrp->mrp_cpu[cpu_cnt++]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = srs_cpu->mc_pollid; - if (!mac_latency_optimize) - srs_cpu->mc_workerid = mrp->mrp_cpu[cpu_cnt++]; + srs_cpu->mc_rx_fanout_cnt = fanout_cnt_per_srs; + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt++]; + /* Retarget the interrupt to the same CPU as the poll */ + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = (mac_latency_optimize ? + srs_cpu->mc_rx_pollid : mrp->mrp_cpu[cpu_cnt++]); for (i = 0; i < fanout_cnt_per_srs; i++) - srs_cpu->mc_fanout_cpus[i] = mrp->mrp_cpu[cpu_cnt++]; + srs_cpu->mc_rx_fanout_cpus[i] = mrp->mrp_cpu[cpu_cnt++]; /* Do the assignment for h/w Rx SRSes */ if (flent->fe_rx_srs_cnt > 1) { @@ -831,23 +922,22 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_fanout_cnt = fanout_cnt_per_srs; + srs_cpu->mc_rx_fanout_cnt = fanout_cnt_per_srs; /* The first CPU in the list is the intr CPU */ - srs_cpu->mc_pollid = mrp->mrp_cpu[cpu_cnt++]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = srs_cpu->mc_pollid; - if (!mac_latency_optimize) { - srs_cpu->mc_workerid = - mrp->mrp_cpu[cpu_cnt++]; - } + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt++]; + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = + (mac_latency_optimize ? + srs_cpu->mc_rx_pollid : + mrp->mrp_cpu[cpu_cnt++]); for (i = 0; i < fanout_cnt_per_srs; i++) { - srs_cpu->mc_fanout_cpus[i] = + srs_cpu->mc_rx_fanout_cpus[i] = mrp->mrp_cpu[cpu_cnt++]; } ASSERT(cpu_cnt <= no_of_cpus); } } - return; + goto tx_cpu_init; } /* @@ -885,13 +975,15 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_fanout_cnt = 1; - srs_cpu->mc_pollid = mrp->mrp_cpu[cpu_cnt++]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = srs_cpu->mc_pollid; - if (!mac_latency_optimize && worker_assign) - srs_cpu->mc_workerid = mrp->mrp_cpu[cpu_cnt++]; - srs_cpu->mc_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; + srs_cpu->mc_rx_fanout_cnt = 1; + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt++]; + /* Retarget the interrupt to the same CPU as the poll */ + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = + ((!mac_latency_optimize && worker_assign) ? + mrp->mrp_cpu[cpu_cnt++] : srs_cpu->mc_rx_pollid); + + srs_cpu->mc_rx_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; /* Do CPU bindings for SRSes having h/w Rx rings */ if (flent->fe_rx_srs_cnt > 1) { @@ -909,22 +1001,21 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_pollid = + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = srs_cpu->mc_pollid; - if (!mac_latency_optimize && worker_assign) { - srs_cpu->mc_workerid = - mrp->mrp_cpu[++cpu_cnt]; - } - srs_cpu->mc_fanout_cnt = 1; - srs_cpu->mc_fanout_cpus[0] = + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = + ((!mac_latency_optimize && worker_assign) ? + mrp->mrp_cpu[++cpu_cnt] : + srs_cpu->mc_rx_pollid); + srs_cpu->mc_rx_fanout_cnt = 1; + srs_cpu->mc_rx_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; cpu_cnt++; ASSERT(cpu_cnt <= no_of_cpus); } } - return; + goto tx_cpu_init; } /* @@ -942,14 +1033,28 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_fanout_cnt = 1; - srs_cpu->mc_pollid = mrp->mrp_cpu[cpu_cnt]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = mrp->mrp_cpu[cpu_cnt]; - srs_cpu->mc_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; + srs_cpu->mc_rx_fanout_cnt = 1; + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt]; + /* Retarget the interrupt to the same CPU as the poll */ + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = mrp->mrp_cpu[cpu_cnt]; + srs_cpu->mc_rx_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; if (++cpu_cnt >= no_of_cpus) cpu_cnt = 0; } + +tx_cpu_init: + mac_tx_cpu_init(flent, mrp, NULL); + + /* + * Copy the user specified CPUs to the effective CPUs + */ + for (i = 0; i < mrp->mrp_ncpus; i++) { + emrp->mrp_cpu[i] = mrp->mrp_cpu[i]; + } + emrp->mrp_ncpus = mrp->mrp_ncpus; + emrp->mrp_mask = mrp->mrp_mask; + bzero(emrp->mrp_pool, MAXPATHLEN); } /* @@ -960,64 +1065,95 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) * with a flent. */ static void -mac_flow_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) +mac_flow_cpu_init(flow_entry_t *flent, cpupart_t *cpupart) { mac_soft_ring_set_t *rx_srs; processorid_t cpuid; - int j, srs_cnt, soft_ring_cnt = 0; + int i, j, k, srs_cnt, nscpus, maxcpus, soft_ring_cnt = 0; mac_cpus_t *srs_cpu; + mac_resource_props_t *emrp = &flent->fe_effective_props; + uint32_t cpus[MRP_NCPUS]; - if (mrp->mrp_mask & MRP_CPUS_USERSPEC) { - mac_flow_user_cpu_init(flent, mrp); - } else { + /* + * The maximum number of CPUs available can either be + * the number of CPUs in the pool or the number of CPUs + * in the system. + */ + maxcpus = (cpupart != NULL) ? cpupart->cp_ncpus : ncpus; + + /* + * Compute the number of soft rings needed on top for each Rx + * SRS. "rx_srs_cnt-1" indicates the number of Rx SRS + * associated with h/w Rx rings. Soft ring count needed for + * each h/w Rx SRS is computed and the same is applied to + * software classified Rx SRS. The first Rx SRS in fe_rx_srs[] + * is the software classified Rx SRS. + */ + soft_ring_cnt = mac_compute_soft_ring_count(flent, + flent->fe_rx_srs_cnt - 1, maxcpus); + if (soft_ring_cnt == 0) { /* - * Compute the number of soft rings needed on top for each Rx - * SRS. "rx_srs_cnt-1" indicates the number of Rx SRS - * associated with h/w Rx rings. Soft ring count needed for - * each h/w Rx SRS is computed and the same is applied to - * software classified Rx SRS. The first Rx SRS in fe_rx_srs[] - * is the software classified Rx SRS. + * Even when soft_ring_cnt is 0, we still need + * to create a soft ring for TCP, UDP and + * OTHER. So set it to 1. */ - soft_ring_cnt = mac_compute_soft_ring_count(flent, - flent->fe_rx_srs_cnt - 1); - if (soft_ring_cnt == 0) { - /* - * Even when soft_ring_cnt is 0, we still need - * to create a soft ring for TCP, UDP and - * OTHER. So set it to 1. - */ - soft_ring_cnt = 1; - } - for (srs_cnt = 0; srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) { - rx_srs = flent->fe_rx_srs[srs_cnt]; - srs_cpu = &rx_srs->srs_cpu; - if (rx_srs->srs_fanout_state == SRS_FANOUT_INIT) { - if (soft_ring_cnt == srs_cpu->mc_fanout_cnt) - continue; - rx_srs->srs_fanout_state = SRS_FANOUT_REINIT; - } - srs_cpu->mc_ncpus = soft_ring_cnt; - srs_cpu->mc_fanout_cnt = soft_ring_cnt; - mutex_enter(&cpu_lock); - for (j = 0; j < soft_ring_cnt; j++) { - cpuid = mac_next_bind_cpu(); - srs_cpu->mc_cpus[j] = cpuid; - srs_cpu->mc_fanout_cpus[j] = cpuid; - } - cpuid = mac_next_bind_cpu(); - srs_cpu->mc_pollid = cpuid; - /* increment ncpus to account for polling cpu */ + soft_ring_cnt = 1; + } + for (srs_cnt = 0; srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) { + rx_srs = flent->fe_rx_srs[srs_cnt]; + srs_cpu = &rx_srs->srs_cpu; + if (rx_srs->srs_fanout_state == SRS_FANOUT_INIT) + rx_srs->srs_fanout_state = SRS_FANOUT_REINIT; + srs_cpu->mc_ncpus = soft_ring_cnt; + srs_cpu->mc_rx_fanout_cnt = soft_ring_cnt; + mutex_enter(&cpu_lock); + for (j = 0; j < soft_ring_cnt; j++) { + cpuid = mac_next_bind_cpu(cpupart); + srs_cpu->mc_cpus[j] = cpuid; + srs_cpu->mc_rx_fanout_cpus[j] = cpuid; + } + cpuid = mac_next_bind_cpu(cpupart); + srs_cpu->mc_rx_pollid = cpuid; + srs_cpu->mc_rx_intr_cpu = (mac_rx_intr_retarget ? + srs_cpu->mc_rx_pollid : -1); + /* increment ncpus to account for polling cpu */ + srs_cpu->mc_ncpus++; + srs_cpu->mc_cpus[j++] = cpuid; + if (!mac_latency_optimize) { + cpuid = mac_next_bind_cpu(cpupart); srs_cpu->mc_ncpus++; srs_cpu->mc_cpus[j++] = cpuid; - if (!mac_latency_optimize) { - cpuid = mac_next_bind_cpu(); - srs_cpu->mc_ncpus++; - srs_cpu->mc_cpus[j++] = cpuid; - } - srs_cpu->mc_workerid = cpuid; - mutex_exit(&cpu_lock); } + srs_cpu->mc_rx_workerid = cpuid; + mutex_exit(&cpu_lock); } + + nscpus = 0; + for (srs_cnt = 0; srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) { + rx_srs = flent->fe_rx_srs[srs_cnt]; + srs_cpu = &rx_srs->srs_cpu; + for (j = 0; j < srs_cpu->mc_ncpus; j++) { + cpus[nscpus++] = srs_cpu->mc_cpus[j]; + } + } + + + /* + * Copy cpu list to fe_effective_props + * without duplicates. + */ + k = 0; + for (i = 0; i < nscpus; i++) { + for (j = 0; j < k; j++) { + if (emrp->mrp_cpu[j] == cpus[i]) + break; + } + if (j == k) + emrp->mrp_cpu[k++] = cpus[i]; + } + emrp->mrp_ncpus = k; + + mac_tx_cpu_init(flent, NULL, cpupart); } /* @@ -1025,15 +1161,46 @@ mac_flow_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) * (setup SRS and set/update FANOUT, B/W and PRIORITY) */ +/* + * mac_srs_fanout_list_alloc: + * + * The underlying device can expose upto MAX_RINGS_PER_GROUP worth of + * rings to a client. In such a case, MAX_RINGS_PER_GROUP worth of + * array space is needed to store Tx soft rings. Thus we allocate so + * much array space for srs_tx_soft_rings. + * + * And when it is an aggr, again we allocate MAX_RINGS_PER_GROUP worth + * of space to st_soft_rings. This array is used for quick access to + * soft ring associated with a pseudo Tx ring based on the pseudo + * ring's index (mr_index). + */ static void mac_srs_fanout_list_alloc(mac_soft_ring_set_t *mac_srs) { - mac_srs->srs_tcp_soft_rings = (mac_soft_ring_t **) - kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, KM_SLEEP); - mac_srs->srs_udp_soft_rings = (mac_soft_ring_t **) - kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, KM_SLEEP); - mac_srs->srs_oth_soft_rings = (mac_soft_ring_t **) - kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, KM_SLEEP); + mac_client_impl_t *mcip = mac_srs->srs_mcip; + + if (mac_srs->srs_type & SRST_TX) { + mac_srs->srs_tx_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * + MAX_RINGS_PER_GROUP, KM_SLEEP); + if (mcip->mci_state_flags & MCIS_IS_AGGR) { + mac_srs_tx_t *tx = &mac_srs->srs_tx; + + tx->st_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * + MAX_RINGS_PER_GROUP, KM_SLEEP); + } + } else { + mac_srs->srs_tcp_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, + KM_SLEEP); + mac_srs->srs_udp_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, + KM_SLEEP); + mac_srs->srs_oth_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, + KM_SLEEP); + } } static void @@ -1095,6 +1262,121 @@ mac_srs_poll_bind(mac_soft_ring_set_t *mac_srs, processorid_t cpuid) } /* + * Re-target interrupt to the passed CPU. If re-target is successful, + * set mc_rx_intr_cpu to the re-targeted CPU. Otherwise set it to -1. + */ +void +mac_rx_srs_retarget_intr(mac_soft_ring_set_t *mac_srs, processorid_t cpuid) +{ + cpu_t *cp; + mac_ring_t *ring = mac_srs->srs_ring; + mac_intr_t *mintr = &ring->mr_info.mri_intr; + flow_entry_t *flent = mac_srs->srs_flent; + boolean_t primary = mac_is_primary_client(mac_srs->srs_mcip); + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Don't re-target the interrupt for these cases: + * 1) ring is NULL + * 2) the interrupt is shared (mi_ddi_shared) + * 3) ddi_handle is NULL and !primary + * 4) primary, ddi_handle is NULL but fe_rx_srs_cnt > 2 + * Case 3 & 4 are because of mac_client_intr_cpu() routine. + * This routine will re-target fixed interrupt for primary + * mac client if the client has only one ring. In that + * case, mc_rx_intr_cpu will already have the correct value. + */ + if (ring == NULL || mintr->mi_ddi_shared || cpuid == -1 || + (mintr->mi_ddi_handle == NULL && !primary) || (primary && + mintr->mi_ddi_handle == NULL && flent->fe_rx_srs_cnt > 2)) { + mac_srs->srs_cpu.mc_rx_intr_cpu = -1; + return; + } + + if (mintr->mi_ddi_handle == NULL) + return; + + cp = cpu_get(cpuid); + if (cp == NULL || !cpu_is_online(cp)) + return; + + /* Drop the cpu_lock as ddi_intr_set_affinity() holds it */ + mutex_exit(&cpu_lock); + if (ddi_intr_set_affinity(mintr->mi_ddi_handle, cpuid) == DDI_SUCCESS) + mac_srs->srs_cpu.mc_rx_intr_cpu = cpuid; + else + mac_srs->srs_cpu.mc_rx_intr_cpu = -1; + mutex_enter(&cpu_lock); +} + +/* + * Re-target Tx interrupts + */ +void +mac_tx_srs_retarget_intr(mac_soft_ring_set_t *mac_srs) +{ + cpu_t *cp; + mac_ring_t *ring; + mac_intr_t *mintr; + mac_soft_ring_t *sringp; + mac_srs_tx_t *srs_tx; + mac_cpus_t *srs_cpu; + processorid_t cpuid; + int i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + srs_cpu = &mac_srs->srs_cpu; + if (MAC_TX_SOFT_RINGS(mac_srs)) { + for (i = 0; i < mac_srs->srs_tx_ring_count; i++) { + sringp = mac_srs->srs_tx_soft_rings[i]; + ring = (mac_ring_t *)sringp->s_ring_tx_arg2; + cpuid = srs_cpu->mc_tx_intr_cpu[i]; + cp = cpu_get(cpuid); + if (cp == NULL || !cpu_is_online(cp) || + !MAC_RING_RETARGETABLE(ring)) { + srs_cpu->mc_tx_retargeted_cpu[i] = -1; + continue; + } + mintr = &ring->mr_info.mri_intr; + /* + * Drop the cpu_lock as ddi_intr_set_affinity() + * holds it + */ + mutex_exit(&cpu_lock); + if (ddi_intr_set_affinity(mintr->mi_ddi_handle, + cpuid) == DDI_SUCCESS) { + srs_cpu->mc_tx_retargeted_cpu[i] = cpuid; + } else { + srs_cpu->mc_tx_retargeted_cpu[i] = -1; + } + mutex_enter(&cpu_lock); + } + } else { + cpuid = srs_cpu->mc_tx_intr_cpu[0]; + cp = cpu_get(cpuid); + if (cp == NULL || !cpu_is_online(cp)) { + srs_cpu->mc_tx_retargeted_cpu[0] = -1; + return; + } + srs_tx = &mac_srs->srs_tx; + ring = (mac_ring_t *)srs_tx->st_arg2; + if (MAC_RING_RETARGETABLE(ring)) { + mintr = &ring->mr_info.mri_intr; + mutex_exit(&cpu_lock); + if ((ddi_intr_set_affinity(mintr->mi_ddi_handle, + cpuid) == DDI_SUCCESS)) { + srs_cpu->mc_tx_retargeted_cpu[0] = cpuid; + } else { + srs_cpu->mc_tx_retargeted_cpu[0] = -1; + } + mutex_enter(&cpu_lock); + } + } +} + +/* * When a CPU comes back online, bind the MAC kernel threads which * were previously bound to that CPU, and had to be unbound because * the CPU was going away. @@ -1231,17 +1513,16 @@ done: static void mac_tx_srs_update_bwlimit(mac_soft_ring_set_t *srs, mac_resource_props_t *mrp) { - uint32_t tx_mode; + uint32_t tx_mode, ring_info = 0; mac_srs_tx_t *srs_tx = &srs->srs_tx; mac_client_impl_t *mcip = srs->srs_mcip; - mac_impl_t *mip = mcip->mci_mip; /* * We need to quiesce/restart the client here because mac_tx() and * srs->srs_tx->st_func do not hold srs->srs_lock while accessing * st_mode and related fields, which are modified by the code below. */ - mac_tx_client_quiesce(mcip, SRS_QUIESCE); + mac_tx_client_quiesce((mac_client_handle_t)mcip); mutex_enter(&srs->srs_lock); mutex_enter(&srs->srs_bw->mac_bw_lock); @@ -1250,14 +1531,18 @@ mac_tx_srs_update_bwlimit(mac_soft_ring_set_t *srs, mac_resource_props_t *mrp) if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) { /* Reset bandwidth limit */ if (tx_mode == SRS_TX_BW) { + if (srs_tx->st_arg2 != NULL) + ring_info = mac_hwring_getinfo(srs_tx->st_arg2); if (mac_tx_serialize || - (mip->mi_v12n_level & MAC_VIRT_SERIALIZE)) { + (ring_info & MAC_RING_TX_SERIALIZE)) { srs_tx->st_mode = SRS_TX_SERIALIZE; } else { srs_tx->st_mode = SRS_TX_DEFAULT; } } else if (tx_mode == SRS_TX_BW_FANOUT) { srs_tx->st_mode = SRS_TX_FANOUT; + } else if (tx_mode == SRS_TX_BW_AGGR) { + srs_tx->st_mode = SRS_TX_AGGR; } srs->srs_type &= ~SRST_BW_CONTROL; } else { @@ -1270,13 +1555,15 @@ mac_tx_srs_update_bwlimit(mac_soft_ring_set_t *srs, mac_resource_props_t *mrp) srs->srs_bw->mac_bw_drop_threshold = srs->srs_bw->mac_bw_limit << 1; srs->srs_type |= SRST_BW_CONTROL; - if (tx_mode != SRS_TX_BW && - tx_mode != SRS_TX_BW_FANOUT) { + if (tx_mode != SRS_TX_BW && tx_mode != SRS_TX_BW_FANOUT && + tx_mode != SRS_TX_BW_AGGR) { if (tx_mode == SRS_TX_SERIALIZE || tx_mode == SRS_TX_DEFAULT) { srs_tx->st_mode = SRS_TX_BW; } else if (tx_mode == SRS_TX_FANOUT) { srs_tx->st_mode = SRS_TX_BW_FANOUT; + } else if (tx_mode == SRS_TX_AGGR) { + srs_tx->st_mode = SRS_TX_BW_AGGR; } else { ASSERT(0); } @@ -1287,7 +1574,7 @@ done: mutex_exit(&srs->srs_bw->mac_bw_lock); mutex_exit(&srs->srs_lock); - mac_tx_client_restart(mcip); + mac_tx_client_restart((mac_client_handle_t)mcip); } /* @@ -1392,9 +1679,7 @@ mac_client_update_classifier(mac_client_impl_t *mcip, boolean_t enable) static void mac_srs_update_fanout_list(mac_soft_ring_set_t *mac_srs) { - int tcp_count = 0; - int udp_count = 0; - int oth_count = 0; + int tcp_count = 0, udp_count = 0, oth_count = 0, tx_count = 0; mac_soft_ring_t *softring; softring = mac_srs->srs_soft_ring_head; @@ -1403,33 +1688,35 @@ mac_srs_update_fanout_list(mac_soft_ring_set_t *mac_srs) mac_srs->srs_tcp_ring_count = 0; mac_srs->srs_udp_ring_count = 0; mac_srs->srs_oth_ring_count = 0; + mac_srs->srs_tx_ring_count = 0; return; } - softring = mac_srs->srs_soft_ring_head; - tcp_count = udp_count = oth_count = 0; - while (softring != NULL) { - if (softring->s_ring_type & ST_RING_TCP) + if (softring->s_ring_type & ST_RING_TCP) { mac_srs->srs_tcp_soft_rings[tcp_count++] = softring; - else if (softring->s_ring_type & ST_RING_UDP) + } else if (softring->s_ring_type & ST_RING_UDP) { mac_srs->srs_udp_soft_rings[udp_count++] = softring; - else + } else if (softring->s_ring_type & ST_RING_OTH) { mac_srs->srs_oth_soft_rings[oth_count++] = softring; + } else { + ASSERT(softring->s_ring_type & ST_RING_TX); + mac_srs->srs_tx_soft_rings[tx_count++] = softring; + } softring = softring->s_ring_next; } ASSERT(mac_srs->srs_soft_ring_count == - (tcp_count + udp_count + oth_count)); - + (tcp_count + udp_count + oth_count + tx_count)); mac_srs->srs_tcp_ring_count = tcp_count; mac_srs->srs_udp_ring_count = udp_count; mac_srs->srs_oth_ring_count = oth_count; + mac_srs->srs_tx_ring_count = tx_count; } void -mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, - pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs, +mac_srs_create_proto_softrings(int id, uint16_t type, pri_t pri, + mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs, processorid_t cpuid, mac_direct_rx_t rx_func, void *x_arg1, mac_resource_handle_t x_arg2, boolean_t set_bypass) { @@ -1446,7 +1733,7 @@ mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, mrf.mrf_flow_priority = pri; softring = mac_soft_ring_create(id, mac_soft_ring_worker_wait, - (void *)flent, (type|ST_RING_TCP), pri, mcip, mac_srs, + (type|ST_RING_TCP), pri, mcip, mac_srs, cpuid, rx_func, x_arg1, x_arg2); softring->s_ring_rx_arg2 = NULL; @@ -1481,7 +1768,7 @@ mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, * bypass the DLS layer. */ softring = mac_soft_ring_create(id, mac_soft_ring_worker_wait, - (void *)flent, (type|ST_RING_UDP), pri, mcip, mac_srs, + (type|ST_RING_UDP), pri, mcip, mac_srs, cpuid, rx_func, x_arg1, x_arg2); softring->s_ring_rx_arg2 = NULL; @@ -1493,7 +1780,7 @@ mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, /* Create the Oth softrings which has to go through the DLS */ softring = mac_soft_ring_create(id, mac_soft_ring_worker_wait, - (void *)flent, (type|ST_RING_OTH), pri, mcip, mac_srs, + (type|ST_RING_OTH), pri, mcip, mac_srs, cpuid, rx_func, x_arg1, x_arg2); softring->s_ring_rx_arg2 = NULL; } @@ -1507,19 +1794,16 @@ mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, * same CPU as that of the soft ring's. */ static void -mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, - mac_resource_props_t *mrp, mac_direct_rx_t rx_func, void *x_arg1, - mac_resource_handle_t x_arg2, mac_soft_ring_set_t *mac_rx_srs, - mac_soft_ring_set_t *mac_tx_srs) +mac_srs_fanout_modify(mac_client_impl_t *mcip, mac_direct_rx_t rx_func, + void *x_arg1, mac_resource_handle_t x_arg2, + mac_soft_ring_set_t *mac_rx_srs, mac_soft_ring_set_t *mac_tx_srs) { mac_soft_ring_t *softring; uint32_t soft_ring_flag = 0; processorid_t cpuid = -1; - boolean_t user_specified; int i, srings_present, new_fanout_cnt; mac_cpus_t *srs_cpu; - user_specified = mrp->mrp_mask & MRP_CPUS_USERSPEC; /* fanout state is REINIT. Set it back to INIT */ ASSERT(mac_rx_srs->srs_fanout_state == SRS_FANOUT_REINIT); mac_rx_srs->srs_fanout_state = SRS_FANOUT_INIT; @@ -1528,7 +1812,7 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, srings_present = mac_rx_srs->srs_tcp_ring_count; /* new request */ srs_cpu = &mac_rx_srs->srs_cpu; - new_fanout_cnt = srs_cpu->mc_fanout_cnt; + new_fanout_cnt = srs_cpu->mc_rx_fanout_cnt; mutex_enter(&mac_rx_srs->srs_lock); if (mac_rx_srs->srs_type & SRST_BW_CONTROL) @@ -1547,8 +1831,7 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, * Create the protocol softrings and set the * DLS bypass where possible. */ - mac_srs_create_proto_softrings(i, - (void *)flent, soft_ring_flag, + mac_srs_create_proto_softrings(i, soft_ring_flag, mac_rx_srs->srs_pri, mcip, mac_rx_srs, cpuid, rx_func, x_arg1, x_arg2, B_TRUE); } @@ -1583,7 +1866,7 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, ASSERT(new_fanout_cnt == mac_rx_srs->srs_tcp_ring_count); mutex_enter(&cpu_lock); for (i = 0; i < mac_rx_srs->srs_tcp_ring_count; i++) { - cpuid = srs_cpu->mc_fanout_cpus[i]; + cpuid = srs_cpu->mc_rx_fanout_cpus[i]; (void) mac_soft_ring_bind(mac_rx_srs->srs_udp_soft_rings[i], cpuid); (void) mac_soft_ring_bind(mac_rx_srs->srs_oth_soft_rings[i], @@ -1597,15 +1880,16 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, } } - mac_srs_worker_bind(mac_rx_srs, srs_cpu->mc_pollid); - mac_srs_poll_bind(mac_rx_srs, srs_cpu->mc_workerid); - + mac_srs_worker_bind(mac_rx_srs, srs_cpu->mc_rx_workerid); + mac_srs_poll_bind(mac_rx_srs, srs_cpu->mc_rx_pollid); + mac_rx_srs_retarget_intr(mac_rx_srs, srs_cpu->mc_rx_intr_cpu); /* * Bind Tx srs and soft ring threads too. Let's bind tx * srs to the last cpu in mrp list. */ - if (mac_tx_srs != NULL && user_specified) { + if (mac_tx_srs != NULL) { BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp); + mac_tx_srs_retarget_intr(mac_tx_srs); } mutex_exit(&cpu_lock); } @@ -1614,16 +1898,15 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, * Bind SRS threads and soft rings to CPUs/create fanout list. */ void -mac_srs_fanout_init(mac_client_impl_t *mcip, flow_entry_t *flent, - mac_resource_props_t *mrp, mac_direct_rx_t rx_func, void *x_arg1, - mac_resource_handle_t x_arg2, mac_soft_ring_set_t *mac_rx_srs, - mac_soft_ring_set_t *mac_tx_srs) +mac_srs_fanout_init(mac_client_impl_t *mcip, mac_resource_props_t *mrp, + mac_direct_rx_t rx_func, void *x_arg1, mac_resource_handle_t x_arg2, + mac_soft_ring_set_t *mac_rx_srs, mac_soft_ring_set_t *mac_tx_srs, + cpupart_t *cpupart) { int i; - processorid_t cpuid, worker_cpuid, poll_cpuid; + processorid_t cpuid; uint32_t soft_ring_flag = 0; int soft_ring_cnt; - boolean_t user_specified = B_FALSE; mac_cpus_t *srs_cpu = &mac_rx_srs->srs_cpu; /* @@ -1641,31 +1924,27 @@ mac_srs_fanout_init(mac_client_impl_t *mcip, flow_entry_t *flent, ASSERT(mac_rx_srs->srs_fanout_state == SRS_FANOUT_UNINIT); mac_rx_srs->srs_fanout_state = SRS_FANOUT_INIT; - user_specified = mrp->mrp_mask & MRP_CPUS_USERSPEC; /* * Ring count can be 0 if no fanout is required and no cpu * were specified. Leave the SRS worker and poll thread * unbound */ ASSERT(mrp != NULL); - soft_ring_cnt = srs_cpu->mc_fanout_cnt; + soft_ring_cnt = srs_cpu->mc_rx_fanout_cnt; /* Step 1: bind cpu contains cpu list where threads need to bind */ if (soft_ring_cnt > 0) { mutex_enter(&cpu_lock); for (i = 0; i < soft_ring_cnt; i++) { - cpuid = srs_cpu->mc_fanout_cpus[i]; + cpuid = srs_cpu->mc_rx_fanout_cpus[i]; /* Create the protocol softrings */ - mac_srs_create_proto_softrings(i, (void *)flent, - soft_ring_flag, mac_rx_srs->srs_pri, - mcip, mac_rx_srs, cpuid, rx_func, - x_arg1, x_arg2, B_FALSE); + mac_srs_create_proto_softrings(i, soft_ring_flag, + mac_rx_srs->srs_pri, mcip, mac_rx_srs, cpuid, + rx_func, x_arg1, x_arg2, B_FALSE); } - worker_cpuid = srs_cpu->mc_workerid; - poll_cpuid = srs_cpu->mc_pollid; - mac_srs_worker_bind(mac_rx_srs, worker_cpuid); - mac_srs_poll_bind(mac_rx_srs, poll_cpuid); - + mac_srs_worker_bind(mac_rx_srs, srs_cpu->mc_rx_workerid); + mac_srs_poll_bind(mac_rx_srs, srs_cpu->mc_rx_pollid); + mac_rx_srs_retarget_intr(mac_rx_srs, srs_cpu->mc_rx_intr_cpu); /* * Bind Tx srs and soft ring threads too. * Let's bind tx srs to the last cpu in @@ -1676,9 +1955,8 @@ mac_srs_fanout_init(mac_client_impl_t *mcip, flow_entry_t *flent, goto alldone; } - if (user_specified) { - BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp); - } + BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp); + mac_tx_srs_retarget_intr(mac_tx_srs); mutex_exit(&cpu_lock); } else { mutex_enter(&cpu_lock); @@ -1686,8 +1964,8 @@ mac_srs_fanout_init(mac_client_impl_t *mcip, flow_entry_t *flent, * For a subflow, mrp_workerid and mrp_pollid * is not set. */ - mac_srs_worker_bind(mac_rx_srs, mrp->mrp_workerid); - mac_srs_poll_bind(mac_rx_srs, mrp->mrp_pollid); + mac_srs_worker_bind(mac_rx_srs, mrp->mrp_rx_workerid); + mac_srs_poll_bind(mac_rx_srs, mrp->mrp_rx_pollid); mutex_exit(&cpu_lock); goto no_softrings; } @@ -1702,12 +1980,11 @@ alldone: no_softrings: if (mac_rx_srs->srs_type & SRST_FANOUT_PROTO) { mutex_enter(&cpu_lock); - cpuid = mac_next_bind_cpu(); + cpuid = mac_next_bind_cpu(cpupart); /* Create the protocol softrings */ - mac_srs_create_proto_softrings(0, (void *)flent, - soft_ring_flag, mac_rx_srs->srs_pri, - mcip, mac_rx_srs, cpuid, rx_func, - x_arg1, x_arg2, B_FALSE); + mac_srs_create_proto_softrings(0, soft_ring_flag, + mac_rx_srs->srs_pri, mcip, mac_rx_srs, cpuid, + rx_func, x_arg1, x_arg2, B_FALSE); mutex_exit(&cpu_lock); } else { /* @@ -1729,7 +2006,7 @@ no_softrings: void mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, mac_resource_props_t *mrp, mac_direct_rx_t rx_func, void *x_arg1, - mac_resource_handle_t x_arg2) + mac_resource_handle_t x_arg2, cpupart_t *cpupart) { mac_soft_ring_set_t *mac_rx_srs, *mac_tx_srs; int i, rx_srs_cnt; @@ -1739,7 +2016,7 @@ mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * This is an aggregation port. Fanout will be setup * over the aggregation itself. */ - if (mcip->mci_state_flags & MCIS_IS_AGGR_PORT) + if (mcip->mci_state_flags & MCIS_EXCLUSIVE) return; mac_rx_srs = flent->fe_rx_srs[0]; @@ -1754,12 +2031,18 @@ mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, /* No fanout for subflows */ if (flent->fe_type & FLOW_USER) { - mac_srs_fanout_init(mcip, flent, mrp, rx_func, - x_arg1, x_arg2, mac_rx_srs, mac_tx_srs); + mac_srs_fanout_init(mcip, mrp, rx_func, + x_arg1, x_arg2, mac_rx_srs, mac_tx_srs, + cpupart); return; } - mac_flow_cpu_init(flent, mrp); + if (mrp->mrp_mask & MRP_CPUS_USERSPEC) + mac_flow_user_cpu_init(flent, mrp); + else + mac_flow_cpu_init(flent, cpupart); + + mrp->mrp_rx_fanout_cnt = mac_rx_srs->srs_cpu.mc_rx_fanout_cnt; /* * Set up fanout for both SW (0th SRS) and HW classified @@ -1771,15 +2054,16 @@ mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, mac_tx_srs = NULL; switch (mac_rx_srs->srs_fanout_state) { case SRS_FANOUT_UNINIT: - mac_srs_fanout_init(mcip, flent, mrp, rx_func, - x_arg1, x_arg2, mac_rx_srs, mac_tx_srs); + mac_srs_fanout_init(mcip, mrp, rx_func, + x_arg1, x_arg2, mac_rx_srs, mac_tx_srs, + cpupart); break; case SRS_FANOUT_INIT: break; case SRS_FANOUT_REINIT: mac_rx_srs_quiesce(mac_rx_srs, SRS_QUIESCE); - mac_srs_fanout_modify(mcip, flent, mrp, rx_func, - x_arg1, x_arg2, mac_rx_srs, mac_tx_srs); + mac_srs_fanout_modify(mcip, rx_func, x_arg1, + x_arg2, mac_rx_srs, mac_tx_srs); mac_rx_srs_restart(mac_rx_srs); break; default: @@ -1791,7 +2075,7 @@ mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, } /* - * mac_create_soft_ring_set: + * mac_srs_create: * * Create a mac_soft_ring_set_t (SRS). If soft_ring_fanout_type is * SRST_TX, an SRS for Tx side is created. Otherwise an SRS for Rx side @@ -1867,6 +2151,7 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, mac_srs->srs_type = (srs_type | SRST_NO_SOFT_RINGS); mac_srs->srs_worker_cpuid = mac_srs->srs_worker_cpuid_save = -1; mac_srs->srs_poll_cpuid = mac_srs->srs_poll_cpuid_save = -1; + mac_srs->srs_mcip = mcip; mac_srs_fanout_list_alloc(mac_srs); /* @@ -1881,7 +2166,6 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, } else { mac_srs->srs_pri = mcip->mci_max_pri; } - mac_srs->srs_mcip = mcip; /* * We need to insert the SRS in the global list before * binding the SRS and SR threads. Otherwise there is a @@ -1959,7 +2243,7 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, mac_tx_srs_max_q_cnt : mac_tx_srs_hiwat; srs_tx->st_arg1 = x_arg1; srs_tx->st_arg2 = x_arg2; - return (mac_srs); + goto done; } if ((srs_type & SRST_FLOW) != 0 || @@ -1973,11 +2257,13 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, srs_rx->sr_arg2 = x_arg2; if (ring != NULL) { + uint_t ring_info; + /* Is the mac_srs created over the RX default group? */ if (ring->mr_gh == (mac_group_handle_t) - (&mcip->mci_mip->mi_rx_groups[0])) + MAC_DEFAULT_RX_GROUP(mcip->mci_mip)) { mac_srs->srs_type |= SRST_DEFAULT_GRP; - + } mac_srs->srs_ring = ring; ring->mr_srs = mac_srs; ring->mr_classify_type = MAC_HW_CLASSIFIER; @@ -1997,9 +2283,12 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, * so that we get a chance to switch into a polling * mode under backlog. */ - if (mcip->mci_mip->mi_v12n_level & MAC_VIRT_SERIALIZE) + ring_info = mac_hwring_getinfo((mac_ring_handle_t)ring); + if (ring_info & MAC_RING_RX_ENQUEUE) mac_srs->srs_state |= SRS_SOFTRING_QUEUE; } +done: + mac_srs_stat_create(mac_srs); return (mac_srs); } @@ -2043,7 +2332,7 @@ mac_find_fanout(flow_entry_t *flent, uint32_t link_type) /* * Change a group from h/w to s/w classification. */ -static void +void mac_rx_switch_grp_to_sw(mac_group_t *group) { mac_ring_t *ring; @@ -2063,11 +2352,11 @@ mac_rx_switch_grp_to_sw(mac_group_t *group) if (ring->mr_state != MR_INUSE) (void) mac_start_ring(ring); + /* * We need to perform SW classification * for packets landing in these rings */ - ring->mr_state = MR_INUSE; ring->mr_flag = 0; ring->mr_classify_type = MAC_SW_CLASSIFIER; } @@ -2079,14 +2368,38 @@ mac_rx_switch_grp_to_sw(mac_group_t *group) */ void mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, - mac_group_t *group, uint32_t link_type) + uint32_t link_type) +{ + cpupart_t *cpupart; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); + boolean_t use_default = B_FALSE; + + mac_rx_srs_group_setup(mcip, flent, link_type); + mac_tx_srs_group_setup(mcip, flent, link_type); + + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), + mac_rx_deliver, mcip, NULL, cpupart); + mac_set_pool_effective(use_default, cpupart, mrp, emrp); + pool_unlock(); +} + +/* + * Set up the RX SRSs. If the S/W SRS is not set, set it up, if there + * is a group associated with this MAC client, set up SRSs for individual + * h/w rings. + */ +void +mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, + uint32_t link_type) { mac_impl_t *mip = mcip->mci_mip; mac_soft_ring_set_t *mac_srs; - mac_soft_ring_set_t *tx_srs = NULL; mac_ring_t *ring; uint32_t fanout_type; - boolean_t created_srs = B_FALSE; + mac_group_t *rx_group = flent->fe_rx_ring_group; fanout_type = mac_find_fanout(flent, link_type); @@ -2096,64 +2409,23 @@ mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, /* Setup the Rx SRS */ mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type, mac_rx_deliver, mcip, NULL, NULL); - mutex_enter(&flent->fe_lock); flent->fe_cb_fn = (flow_fn_t)mac_srs->srs_rx.sr_lower_proc; flent->fe_cb_arg1 = (void *)mip; flent->fe_cb_arg2 = (void *)mac_srs; mutex_exit(&flent->fe_lock); - - /* Setup the Tx SRS as well */ - ASSERT(flent->fe_tx_srs == NULL); - tx_srs = mac_srs_create(mcip, flent, SRST_TX | link_type, - NULL, mcip, NULL, NULL); - - if (mcip->mci_share != NULL) { - mac_srs_tx_t *tx = &tx_srs->srs_tx; - ASSERT((mcip->mci_state_flags & MCIS_NO_HWRINGS) == 0); - /* - * A share requires a dedicated TX group. - * mac_reserve_tx_group() does the work needed to - * allocate a new group and populate that group - * with rings according to the driver requirements - * and limitations. - */ - tx->st_group = - mac_reserve_tx_group(mip, mcip->mci_share); - ASSERT(tx->st_group != NULL); - tx->st_group->mrg_tx_client = mcip; - } - mac_tx_srs_setup(mcip, flent, link_type); - created_srs = B_TRUE; } - if (group == NULL) { - if (created_srs) { - mac_fanout_setup(mcip, flent, - MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, - mcip, NULL); - } + if (rx_group == NULL) return; - } - /* * fanout for default SRS is done when default SRS are created * above. As each ring is added to the group, we setup the * SRS and fanout to it. */ - switch (group->mrg_state) { + switch (rx_group->mrg_state) { case MAC_GROUP_STATE_RESERVED: - /* - * The group is exclusively ours. Create a SRS - * for each ring in the group and allow the - * individual SRS to dynamically poll their - * Rx ring. Do this only if the client is not - * a VLAN MAC client since for VLAN we do - * s/w classification for the VID check. - */ - if (i_mac_flow_vid(mcip->mci_flent) != VLAN_ID_NONE) - break; - for (ring = group->mrg_rings; ring != NULL; + for (ring = rx_group->mrg_rings; ring != NULL; ring = ring->mr_next) { switch (ring->mr_state) { case MR_INUSE: @@ -2163,14 +2435,28 @@ mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, if (ring->mr_state != MR_INUSE) (void) mac_start_ring(ring); - ring->mr_state = MR_INUSE; - + /* + * Since the group is exclusively ours create + * an SRS for this ring to allow the + * individual SRS to dynamically poll the + * ring. Do this only if the client is not + * a VLAN MAC client, since for VLAN we do + * s/w classification for the VID check, and + * if it has a unicast address. + */ + if ((mcip->mci_state_flags & + MCIS_NO_UNICAST_ADDR) || + i_mac_flow_vid(mcip->mci_flent) != + VLAN_ID_NONE) { + break; + } mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type, mac_rx_deliver, mcip, NULL, ring); break; default: - cmn_err(CE_PANIC, "srs_setup: mcip = %p " + cmn_err(CE_PANIC, + "srs_setup: mcip = %p " "trying to add UNKNOWN ring = %p\n", (void *)mcip, (void *)ring); break; @@ -2181,43 +2467,102 @@ mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, /* * Set all rings of this group to software classified. * - * If the group is current RESERVED, the existing mac client - * (the only client on this group) is using this group - * exclusively. In that case we need to disable polling on - * the rings of the group (if it was enabled), and free the - * SRS associated with the rings. + * If the group is current RESERVED, the existing mac + * client (the only client on this group) is using + * this group exclusively. In that case we need to + * disable polling on the rings of the group (if it + * was enabled), and free the SRS associated with the + * rings. */ - mac_rx_switch_grp_to_sw(group); + mac_rx_switch_grp_to_sw(rx_group); break; default: ASSERT(B_FALSE); break; } - mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), - mac_rx_deliver, mcip, NULL); } +/* + * Set up the TX SRS. + */ void -mac_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, +mac_tx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t link_type) { + int cnt; + int ringcnt; + mac_ring_t *ring; + mac_group_t *grp; + + /* + * If we are opened exclusively (like aggr does for aggr_ports), + * don't set up Tx SRS and Tx soft rings as they won't be used. + * The same thing has to be done for Rx side also. See bug: + * 6880080 + */ + if (mcip->mci_state_flags & MCIS_EXCLUSIVE) { + /* + * If we have rings, start them here. + */ + if (flent->fe_tx_ring_group == NULL) + return; + grp = (mac_group_t *)flent->fe_tx_ring_group; + ringcnt = grp->mrg_cur_count; + ring = grp->mrg_rings; + for (cnt = 0; cnt < ringcnt; cnt++) { + if (ring->mr_state != MR_INUSE) { + (void) mac_start_ring(ring); + } + ring = ring->mr_next; + } + return; + } + if (flent->fe_tx_srs == NULL) { + (void) mac_srs_create(mcip, flent, SRST_TX | link_type, + NULL, mcip, NULL, NULL); + } + mac_tx_srs_setup(mcip, flent); +} + +/* + * Remove all the RX SRSs. If we want to remove only the SRSs associated + * with h/w rings, leave the S/W SRS alone. This is used when we want to + * move the MAC client from one group to another, so we need to teardown + * on the h/w SRSs. + */ +void +mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly) +{ mac_soft_ring_set_t *mac_srs; - mac_soft_ring_set_t *tx_srs; - mac_srs_tx_t *tx; int i; + int count = flent->fe_rx_srs_cnt; - for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + for (i = 0; i < count; i++) { + if (i == 0 && hwonly) + continue; mac_srs = flent->fe_rx_srs[i]; mac_rx_srs_quiesce(mac_srs, SRS_CONDEMNED); - /* - * Deal with all fanout tear down etc. - */ mac_srs_free(mac_srs); flent->fe_rx_srs[i] = NULL; + flent->fe_rx_srs_cnt--; } - flent->fe_rx_srs_cnt = 0; + ASSERT(!hwonly || flent->fe_rx_srs_cnt == 1); + ASSERT(hwonly || flent->fe_rx_srs_cnt == 0); +} + +/* + * Remove the TX SRS. + */ +void +mac_tx_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, + uint32_t link_type) +{ + mac_soft_ring_set_t *tx_srs; + mac_srs_tx_t *tx; + + if ((tx_srs = flent->fe_tx_srs) == NULL) + return; - tx_srs = flent->fe_tx_srs; tx = &tx_srs->srs_tx; switch (link_type) { case SRST_FLOW: @@ -2228,25 +2573,16 @@ mac_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, mac_tx_srs_quiesce(tx_srs, SRS_CONDEMNED); break; case SRST_LINK: - mac_tx_client_quiesce(mcip, SRS_CONDEMNED); - /* - * Release the TX resources. First the TX group, if any - * was assigned to the MAC client, which will cause the - * TX rings to be moved back to the pool. Then free the - * rings themselves. - */ - if (tx->st_group != NULL) { - mac_release_tx_group(tx_srs->srs_mcip->mci_mip, - tx->st_group); - tx->st_group = NULL; - } - if (tx->st_ring_count != 0) { - kmem_free(tx->st_rings, - sizeof (mac_ring_handle_t) * tx->st_ring_count); - } + mac_tx_client_condemn((mac_client_handle_t)mcip); if (tx->st_arg2 != NULL) { ASSERT(tx_srs->srs_type & SRST_TX); - mac_release_tx_ring(tx->st_arg2); + /* + * The ring itself will be stopped when + * we release the group or in the + * mac_datapath_teardown (for the default + * group) + */ + tx->st_arg2 = NULL; } break; default: @@ -2258,7 +2594,9 @@ mac_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, } /* - * This is the group state machine. The state of an Rx group is given by + * This is the group state machine. + * + * The state of an Rx group is given by * the following table. The default group and its rings are started in * mac_start itself and the default group stays in SHARED state until * mac_stop at which time the group and rings are stopped and and it @@ -2276,15 +2614,27 @@ mac_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * * Non-default 0 N.A. REGISTERED * Non-default 1 N.A. RESERVED - * Non-default > 1 N.A. SHARED * * Default 0 N.A. SHARED * Default 1 1 RESERVED * Default 1 > 1 SHARED * Default > 1 N.A. SHARED + * + * For a TX group, the following is the state table. + * + * Group type # of clients Group State + * in the group + * + * Non-default 0 REGISTERED + * Non-default 1 RESERVED + * + * Default 0 REGISTERED + * Default 1 RESERVED + * Default > 1 SHARED */ mac_group_state_t -mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) +mac_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip, + mac_group_t *defgrp, boolean_t rx_group) { mac_impl_t *mip = (mac_impl_t *)grp->mrg_mh; @@ -2292,11 +2642,11 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) /* Non-default group */ - if (grp != mip->mi_rx_groups) { - if (MAC_RX_GROUP_NO_CLIENT(grp)) + if (grp != defgrp) { + if (MAC_GROUP_NO_CLIENT(grp)) return (MAC_GROUP_STATE_REGISTERED); - *group_only_mcip = MAC_RX_GROUP_ONLY_CLIENT(grp); + *group_only_mcip = MAC_GROUP_ONLY_CLIENT(grp); if (*group_only_mcip != NULL) return (MAC_GROUP_STATE_RESERVED); @@ -2305,10 +2655,19 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) /* Default group */ - if (MAC_RX_GROUP_NO_CLIENT(grp) || mip->mi_nactiveclients != 1) + if (MAC_GROUP_NO_CLIENT(grp)) { + if (rx_group) + return (MAC_GROUP_STATE_SHARED); + else + return (MAC_GROUP_STATE_REGISTERED); + } + *group_only_mcip = MAC_GROUP_ONLY_CLIENT(grp); + if (*group_only_mcip == NULL) + return (MAC_GROUP_STATE_SHARED); + + if (rx_group && mip->mi_nactiveclients != 1) return (MAC_GROUP_STATE_SHARED); - *group_only_mcip = MAC_RX_GROUP_ONLY_CLIENT(grp); ASSERT(*group_only_mcip != NULL); return (MAC_GROUP_STATE_RESERVED); } @@ -2456,13 +2815,12 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) * * For NICs which have only 1 Rx ring (we treat NICs with no Rx rings * as NIC with a single default ring), we assign the only ring to - * primary Link as MAC_RX_HW_DEFAULT_RING. The primary Link SRS can do - * polling on it as long as it is the only link in use and we compare - * the MAC address for unicast packets before accepting an incoming - * packet (there is no need for S/W classification in this case). We - * disable polling on the only ring the moment 2nd link gets created - * (the polling remains enabled even though there are broadcast and - * multicast flows created). + * primary Link. The primary Link SRS can do polling on it as long as + * it is the only link in use and we compare the MAC address for unicast + * packets before accepting an incoming packet (there is no need for S/W + * classification in this case). We disable polling on the only ring the + * moment 2nd link gets created (the polling remains enabled even though + * there are broadcast and * multicast flows created). * * If the NIC has more than 1 Rx ring, we assign the default ring (the * 1st ring) to deal with broadcast, multicast and traffic for other @@ -2472,10 +2830,6 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) * (and its SRS) can continue to poll the assigned Rx ring at all times * independantly. * - * Right now we just assign MAC_RX_HW_DEFAULT_RING to note that it is - * primary NIC and later we will check to see how many Rx rings we - * have and can we get a non default Rx ring for the primary MAC. - * * Note: In future, if no fanout is specified, we try to assign 2 Rx * rings for the primary Link with the primary MAC address + TCP going * to one ring and primary MAC address + UDP|SCTP going to other ring. @@ -2487,56 +2841,128 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) * As an optimization, when a new NIC or VNIC is created, we can get * only one Rx ring and make it a TCP specific Rx ring and use the * H/W default Rx ring for the rest (this Rx ring is never polled). + * + * For clients that don't have MAC address, but want to receive and + * transmit packets (e.g, bpf, gvrp etc.), we need to setup the datapath. + * For such clients (identified by the MCIS_NO_UNICAST_ADDR flag) we + * always give the default group and use software classification (i.e. + * even if this is the only client in the default group, we will + * leave group as shared). */ int mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t link_type) { mac_impl_t *mip = mcip->mci_mip; - mac_group_t *group = NULL; - mac_group_t *default_group; + mac_group_t *rgroup = NULL; + mac_group_t *tgroup = NULL; + mac_group_t *default_rgroup; + mac_group_t *default_tgroup; int err; uint8_t *mac_addr; - mac_rx_group_reserve_type_t rtype = MAC_RX_RESERVE_NONDEFAULT; mac_group_state_t next_state; mac_client_impl_t *group_only_mcip; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); + boolean_t rxhw; + boolean_t txhw; + boolean_t use_default = B_FALSE; + cpupart_t *cpupart; + boolean_t no_unicast; + boolean_t isprimary = flent->fe_type & FLOW_PRIMARY_MAC; + mac_client_impl_t *reloc_pmcip = NULL; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); switch (link_type) { case SRST_FLOW: - mac_srs_group_setup(mcip, flent, NULL, link_type); + mac_srs_group_setup(mcip, flent, link_type); return (0); case SRST_LINK: + no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR; mac_addr = flent->fe_flow_desc.fd_dst_mac; - /* Check if we need to reserve the default group */ - if (flent->fe_type & FLOW_PRIMARY_MAC) - rtype = MAC_RX_RESERVE_DEFAULT; + /* Default RX group */ + default_rgroup = MAC_DEFAULT_RX_GROUP(mip); - if ((mcip->mci_state_flags & MCIS_NO_HWRINGS) == 0) { - /* - * Check to see if we can get an exclusive group for - * this mac address or if there already exists a - * group that has this mac address (case of VLANs). - * If no groups are available, use the default group. - */ - group = mac_reserve_rx_group(mcip, mac_addr, rtype); + /* Default TX group */ + default_tgroup = MAC_DEFAULT_TX_GROUP(mip); + + if (no_unicast) { + rgroup = default_rgroup; + tgroup = default_tgroup; + goto grp_found; } + rxhw = (mrp->mrp_mask & MRP_RX_RINGS) && + (mrp->mrp_nrxrings > 0 || + (mrp->mrp_mask & MRP_RXRINGS_UNSPEC)); + txhw = (mrp->mrp_mask & MRP_TX_RINGS) && + (mrp->mrp_ntxrings > 0 || + (mrp->mrp_mask & MRP_TXRINGS_UNSPEC)); - if (group == NULL) { - if ((mcip->mci_state_flags & MCIS_REQ_HWRINGS) != 0) - return (ENOSPC); - group = &mip->mi_rx_groups[0]; + /* + * By default we have given the primary all the rings + * i.e. the default group. Let's see if the primary + * needs to be relocated so that the addition of this + * client doesn't impact the primary's performance, + * i.e. if the primary is in the default group and + * we add this client, the primary will lose polling. + * We do this only for NICs supporting dynamic ring + * grouping and only when this is the first client + * after the primary (i.e. nactiveclients is 2) + */ + if (!isprimary && mip->mi_nactiveclients == 2 && + (group_only_mcip = mac_primary_client_handle(mip)) != + NULL && mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + reloc_pmcip = mac_check_primary_relocation( + group_only_mcip, rxhw); + } + /* + * Check to see if we can get an exclusive group for + * this mac address or if there already exists a + * group that has this mac address (case of VLANs). + * If no groups are available, use the default group. + */ + rgroup = mac_reserve_rx_group(mcip, mac_addr, B_FALSE); + if (rgroup == NULL && rxhw) { + err = ENOSPC; + goto setup_failed; + } else if (rgroup == NULL) { + rgroup = default_rgroup; + } + /* + * Check to see if we can get an exclusive group for + * this mac client. If no groups are available, use + * the default group. + */ + tgroup = mac_reserve_tx_group(mcip, B_FALSE); + if (tgroup == NULL && txhw) { + if (rgroup != NULL && rgroup != default_rgroup) + mac_release_rx_group(mcip, rgroup); + err = ENOSPC; + goto setup_failed; + } else if (tgroup == NULL) { + tgroup = default_tgroup; } /* * Some NICs don't support any Rx rings, so there may not * even be a default group. */ - if (group != NULL) { - flent->fe_rx_ring_group = group; + grp_found: + if (rgroup != NULL) { + if (rgroup != default_rgroup && + MAC_GROUP_NO_CLIENT(rgroup) && + (rxhw || mcip->mci_share != NULL)) { + MAC_RX_GRP_RESERVED(mip); + if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_RX_RING_RESERVED(mip, + rgroup->mrg_cur_count); + } + } + flent->fe_rx_ring_group = rgroup; /* * Add the client to the group. This could cause * either this group to move to the shared state or @@ -2545,18 +2971,29 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * actions on the default group are postponed to * the end of this function. */ - mac_rx_group_add_client(group, mcip); - next_state = mac_rx_group_next_state(group, - &group_only_mcip); - - ASSERT((next_state == MAC_GROUP_STATE_RESERVED && - mcip == group_only_mcip) || - (next_state == MAC_GROUP_STATE_SHARED && - group_only_mcip == NULL)); - - mac_set_rx_group_state(group, next_state); + mac_group_add_client(rgroup, mcip); + next_state = mac_group_next_state(rgroup, + &group_only_mcip, default_rgroup, B_TRUE); + mac_set_group_state(rgroup, next_state); } + if (tgroup != NULL) { + if (tgroup != default_tgroup && + MAC_GROUP_NO_CLIENT(tgroup) && + (txhw || mcip->mci_share != NULL)) { + MAC_TX_GRP_RESERVED(mip); + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_TX_RING_RESERVED(mip, + tgroup->mrg_cur_count); + } + } + flent->fe_tx_ring_group = tgroup; + mac_group_add_client(tgroup, mcip); + next_state = mac_group_next_state(tgroup, + &group_only_mcip, default_tgroup, B_FALSE); + tgroup->mrg_state = next_state; + } /* * Setup the Rx and Tx SRSes. If we got a pristine group * exclusively above, mac_srs_group_setup would simply create @@ -2564,18 +3001,23 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * reserved group, mac_srs_group_setup would also dismantle the * SRSes of the previously exclusive group */ - mac_srs_group_setup(mcip, flent, group, link_type); + mac_srs_group_setup(mcip, flent, link_type); + /* We are setting up minimal datapath only */ + if (no_unicast) + break; /* Program the S/W Classifer */ if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0) goto setup_failed; /* Program the H/W Classifier */ - if ((err = mac_add_macaddr(mip, group, mac_addr, + if ((err = mac_add_macaddr(mip, rgroup, mac_addr, (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0)) != 0) goto setup_failed; mcip->mci_unicast = mac_find_macaddr(mip, mac_addr); ASSERT(mcip->mci_unicast != NULL); + /* Initialize the v6 local addr used by link protection */ + mac_protect_update_v6_local_addr(mcip); break; default: @@ -2590,38 +3032,53 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * incoming broadcast traffic to the other groups and dismantle the * SRSes over the default group. */ - if (group != NULL) { - if (group != mip->mi_rx_groups) { - default_group = mip->mi_rx_groups; - if (default_group->mrg_state == + if (rgroup != NULL) { + if (rgroup != default_rgroup) { + if (default_rgroup->mrg_state == MAC_GROUP_STATE_RESERVED) { - group_only_mcip = MAC_RX_GROUP_ONLY_CLIENT( - default_group); + group_only_mcip = MAC_GROUP_ONLY_CLIENT( + default_rgroup); ASSERT(group_only_mcip != NULL && mip->mi_nactiveclients > 1); - mac_set_rx_group_state(default_group, + mac_set_group_state(default_rgroup, MAC_GROUP_STATE_SHARED); - mac_srs_group_setup(group_only_mcip, + mac_rx_srs_group_setup(group_only_mcip, + group_only_mcip->mci_flent, SRST_LINK); + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_setup(group_only_mcip, group_only_mcip->mci_flent, - default_group, SRST_LINK); + MCIP_RESOURCE_PROPS(group_only_mcip), + mac_rx_deliver, group_only_mcip, NULL, + cpupart); + mac_set_pool_effective(use_default, cpupart, + mrp, emrp); + pool_unlock(); } - ASSERT(default_group->mrg_state == + ASSERT(default_rgroup->mrg_state == MAC_GROUP_STATE_SHARED); } /* * If we get an exclusive group for a VLAN MAC client we * need to take the s/w path to make the additional check for * the vid. Disable polling and set it to s/w classification. + * Similarly for clients that don't have a unicast address. */ - if (group->mrg_state == MAC_GROUP_STATE_RESERVED && - i_mac_flow_vid(mcip->mci_flent) != VLAN_ID_NONE) { - mac_rx_switch_grp_to_sw(group); + if (rgroup->mrg_state == MAC_GROUP_STATE_RESERVED && + (i_mac_flow_vid(flent) != VLAN_ID_NONE || no_unicast)) { + mac_rx_switch_grp_to_sw(rgroup); } } + mac_set_rings_effective(mcip); return (0); setup_failed: + /* Switch the primary back to default group */ + if (reloc_pmcip != NULL) { + (void) mac_rx_switch_group(reloc_pmcip, + reloc_pmcip->mci_flent->fe_rx_ring_group, default_rgroup); + } mac_datapath_teardown(mcip, flent, link_type); return (err); } @@ -2637,12 +3094,14 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, mac_group_t *default_group; boolean_t check_default_group = B_FALSE; mac_group_state_t next_state; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); switch (link_type) { case SRST_FLOW: - mac_srs_group_teardown(mcip, flent, SRST_FLOW); + mac_rx_srs_group_teardown(flent, B_FALSE); + mac_tx_srs_group_teardown(mcip, flent, SRST_FLOW); return; case SRST_LINK: @@ -2666,7 +3125,9 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, mac_flow_wait(flent, FLOW_DRIVER_UPCALL); /* Now quiesce and destroy all SRS and soft rings */ - mac_srs_group_teardown(mcip, flent, SRST_LINK); + mac_rx_srs_group_teardown(flent, B_FALSE); + mac_tx_srs_group_teardown(mcip, flent, SRST_LINK); + ASSERT((mcip->mci_flent == flent) && (flent->fe_next == NULL)); @@ -2677,16 +3138,17 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * were the last client, release the group. */ group = flent->fe_rx_ring_group; + default_group = MAC_DEFAULT_RX_GROUP(mip); if (group != NULL) { - mac_rx_group_remove_client(group, mcip); - next_state = mac_rx_group_next_state(group, - &grp_only_mcip); + mac_group_remove_client(group, mcip); + next_state = mac_group_next_state(group, + &grp_only_mcip, default_group, B_TRUE); if (next_state == MAC_GROUP_STATE_RESERVED) { /* * Only one client left on this RX group. */ ASSERT(grp_only_mcip != NULL); - mac_set_rx_group_state(group, + mac_set_group_state(group, MAC_GROUP_STATE_RESERVED); group_only_flent = grp_only_mcip->mci_flent; @@ -2695,9 +3157,14 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * access on the group. Allow it to * dynamically poll the H/W rings etc. */ - mac_srs_group_setup(grp_only_mcip, - group_only_flent, group, SRST_LINK); + mac_rx_srs_group_setup(grp_only_mcip, + group_only_flent, SRST_LINK); + mac_fanout_setup(grp_only_mcip, + group_only_flent, + MCIP_RESOURCE_PROPS(grp_only_mcip), + mac_rx_deliver, grp_only_mcip, NULL, NULL); mac_rx_group_unmark(group, MR_INCIPIENT); + mac_set_rings_effective(grp_only_mcip); } else if (next_state == MAC_GROUP_STATE_REGISTERED) { /* * This is a non-default group being freed up. @@ -2705,19 +3172,95 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * to see if the primary client can get * exclusive access to the default group. */ - ASSERT(group != mip->mi_rx_groups); + ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); + if (mrp->mrp_mask & MRP_RX_RINGS) { + MAC_RX_GRP_RELEASED(mip); + if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_RX_RING_RELEASED(mip, + group->mrg_cur_count); + } + } mac_release_rx_group(mcip, group); - mac_set_rx_group_state(group, + mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); check_default_group = B_TRUE; } else { ASSERT(next_state == MAC_GROUP_STATE_SHARED); - mac_set_rx_group_state(group, + mac_set_group_state(group, MAC_GROUP_STATE_SHARED); mac_rx_group_unmark(group, MR_CONDEMNED); } flent->fe_rx_ring_group = NULL; } + /* + * Remove the client from the TX group. Additionally, if + * this a non-default group, then we also need to release + * the group. + */ + group = flent->fe_tx_ring_group; + default_group = MAC_DEFAULT_TX_GROUP(mip); + if (group != NULL) { + mac_group_remove_client(group, mcip); + next_state = mac_group_next_state(group, + &grp_only_mcip, default_group, B_FALSE); + if (next_state == MAC_GROUP_STATE_REGISTERED) { + if (group != default_group) { + if (mrp->mrp_mask & MRP_TX_RINGS) { + MAC_TX_GRP_RELEASED(mip); + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_TX_RING_RELEASED( + mip, group-> + mrg_cur_count); + } + } + mac_release_tx_group(mcip, group); + /* + * If the default group is reserved, + * then we need to set the effective + * rings as we would have given + * back some rings when the group + * was released + */ + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC && + default_group->mrg_state == + MAC_GROUP_STATE_RESERVED) { + grp_only_mcip = + MAC_GROUP_ONLY_CLIENT + (default_group); + mac_set_rings_effective( + grp_only_mcip); + } + } else { + mac_ring_t *ring; + int cnt; + int ringcnt; + + /* + * Stop all the rings except the + * default ring. + */ + ringcnt = group->mrg_cur_count; + ring = group->mrg_rings; + for (cnt = 0; cnt < ringcnt; cnt++) { + if (ring->mr_state == + MR_INUSE && ring != + (mac_ring_t *) + mip->mi_default_tx_ring) { + mac_stop_ring(ring); + ring->mr_flag = 0; + } + ring = ring->mr_next; + } + } + } else if (next_state == MAC_GROUP_STATE_RESERVED) { + mac_set_rings_effective(grp_only_mcip); + } + flent->fe_tx_ring_group = NULL; + group->mrg_state = next_state; + } break; default: ASSERT(B_FALSE); @@ -2731,21 +3274,53 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * over the default group. */ if (check_default_group) { - default_group = mip->mi_rx_groups; + default_group = MAC_DEFAULT_RX_GROUP(mip); ASSERT(default_group->mrg_state == MAC_GROUP_STATE_SHARED); - next_state = mac_rx_group_next_state(default_group, - &grp_only_mcip); + next_state = mac_group_next_state(default_group, + &grp_only_mcip, default_group, B_TRUE); if (next_state == MAC_GROUP_STATE_RESERVED) { ASSERT(grp_only_mcip != NULL && mip->mi_nactiveclients == 1); - mac_set_rx_group_state(default_group, + mac_set_group_state(default_group, MAC_GROUP_STATE_RESERVED); - mac_srs_group_setup(grp_only_mcip, + mac_rx_srs_group_setup(grp_only_mcip, + grp_only_mcip->mci_flent, SRST_LINK); + mac_fanout_setup(grp_only_mcip, grp_only_mcip->mci_flent, - default_group, SRST_LINK); + MCIP_RESOURCE_PROPS(grp_only_mcip), mac_rx_deliver, + grp_only_mcip, NULL, NULL); mac_rx_group_unmark(default_group, MR_INCIPIENT); + mac_set_rings_effective(grp_only_mcip); } } + + /* + * If the primary is the only one left and the MAC supports + * dynamic grouping, we need to see if the primary needs to + * be moved to the default group so that it can use all the + * H/W rings. + */ + if (!(flent->fe_type & FLOW_PRIMARY_MAC) && + mip->mi_nactiveclients == 1 && + mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + default_group = MAC_DEFAULT_RX_GROUP(mip); + grp_only_mcip = mac_primary_client_handle(mip); + if (grp_only_mcip == NULL) + return; + group_only_flent = grp_only_mcip->mci_flent; + mrp = MCIP_RESOURCE_PROPS(grp_only_mcip); + /* + * If the primary has an explicit property set, leave it + * alone. + */ + if (mrp->mrp_mask & MRP_RX_RINGS) + return; + /* + * Switch the primary to the default group. + */ + (void) mac_rx_switch_group(grp_only_mcip, + group_only_flent->fe_rx_ring_group, default_group); + } } /* DATAPATH TEAR DOWN ROUTINES (SRS and FANOUT teardown) */ @@ -2753,18 +3328,36 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, static void mac_srs_fanout_list_free(mac_soft_ring_set_t *mac_srs) { - ASSERT(mac_srs->srs_tcp_soft_rings != NULL); - kmem_free(mac_srs->srs_tcp_soft_rings, - sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); - mac_srs->srs_tcp_soft_rings = NULL; - ASSERT(mac_srs->srs_udp_soft_rings != NULL); - kmem_free(mac_srs->srs_udp_soft_rings, - sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); - mac_srs->srs_udp_soft_rings = NULL; - ASSERT(mac_srs->srs_oth_soft_rings != NULL); - kmem_free(mac_srs->srs_oth_soft_rings, - sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); - mac_srs->srs_oth_soft_rings = NULL; + if (mac_srs->srs_type & SRST_TX) { + mac_srs_tx_t *tx; + + ASSERT(mac_srs->srs_tcp_soft_rings == NULL); + ASSERT(mac_srs->srs_udp_soft_rings == NULL); + ASSERT(mac_srs->srs_oth_soft_rings == NULL); + ASSERT(mac_srs->srs_tx_soft_rings != NULL); + kmem_free(mac_srs->srs_tx_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_RINGS_PER_GROUP); + mac_srs->srs_tx_soft_rings = NULL; + tx = &mac_srs->srs_tx; + if (tx->st_soft_rings != NULL) { + kmem_free(tx->st_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_RINGS_PER_GROUP); + } + } else { + ASSERT(mac_srs->srs_tx_soft_rings == NULL); + ASSERT(mac_srs->srs_tcp_soft_rings != NULL); + kmem_free(mac_srs->srs_tcp_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); + mac_srs->srs_tcp_soft_rings = NULL; + ASSERT(mac_srs->srs_udp_soft_rings != NULL); + kmem_free(mac_srs->srs_udp_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); + mac_srs->srs_udp_soft_rings = NULL; + ASSERT(mac_srs->srs_oth_soft_rings != NULL); + kmem_free(mac_srs->srs_oth_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); + mac_srs->srs_oth_soft_rings = NULL; + } } /* @@ -2815,10 +3408,11 @@ mac_srs_free(mac_soft_ring_set_t *mac_srs) mac_pkt_drop(NULL, NULL, mac_srs->srs_first, B_FALSE); mac_srs_ring_free(mac_srs); - mac_srs_soft_rings_free(mac_srs, B_TRUE); + mac_srs_soft_rings_free(mac_srs); mac_srs_fanout_list_free(mac_srs); mac_srs->srs_bw = NULL; + mac_srs_stat_delete(mac_srs); kmem_cache_free(mac_srs_cache, mac_srs); } @@ -3126,13 +3720,19 @@ mac_tx_srs_add_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring) { mac_client_impl_t *mcip = mac_srs->srs_mcip; mac_soft_ring_t *soft_ring; - int count = mac_srs->srs_oth_ring_count; + int count = mac_srs->srs_tx_ring_count; + uint32_t soft_ring_type = ST_RING_TX; + uint_t ring_info; ASSERT(mac_srs->srs_state & SRS_QUIESCE); - soft_ring = mac_soft_ring_create(count, 0, NULL, - (ST_RING_OTH | ST_RING_TX), maxclsyspri, mcip, mac_srs, -1, + ring_info = mac_hwring_getinfo((mac_ring_handle_t)tx_ring); + if (mac_tx_serialize || (ring_info & MAC_RING_TX_SERIALIZE)) + soft_ring_type |= ST_RING_WORKER_ONLY; + soft_ring = mac_soft_ring_create(count, 0, + soft_ring_type, maxclsyspri, mcip, mac_srs, -1, NULL, mcip, (mac_resource_handle_t)tx_ring); - mac_srs->srs_oth_ring_count++; + mac_srs->srs_tx_ring_count++; + mac_srs_update_fanout_list(mac_srs); /* * put this soft ring in quiesce mode too so when we restart * all soft rings in the srs are in the same state. @@ -3177,7 +3777,7 @@ mac_soft_ring_remove(mac_soft_ring_set_t *mac_srs, mac_soft_ring_t *softring) mac_srs->srs_soft_ring_condemned_count--; mutex_exit(&mac_srs->srs_lock); - mac_soft_ring_free(softring, B_FALSE); + mac_soft_ring_free(softring); } void @@ -3185,70 +3785,59 @@ mac_tx_srs_del_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring) { int i; mac_soft_ring_t *soft_ring, *remove_sring; + mac_client_impl_t *mcip = mac_srs->srs_mcip; mutex_enter(&mac_srs->srs_lock); - for (i = 0; i < mac_srs->srs_oth_ring_count; i++) { - soft_ring = mac_srs->srs_oth_soft_rings[i]; + for (i = 0; i < mac_srs->srs_tx_ring_count; i++) { + soft_ring = mac_srs->srs_tx_soft_rings[i]; if (soft_ring->s_ring_tx_arg2 == tx_ring) break; } mutex_exit(&mac_srs->srs_lock); - ASSERT(i < mac_srs->srs_oth_ring_count); + ASSERT(i < mac_srs->srs_tx_ring_count); remove_sring = soft_ring; + /* + * In the case of aggr, the soft ring associated with a Tx ring + * is also stored in st_soft_rings[] array. That entry should + * be removed. + */ + if (mcip->mci_state_flags & MCIS_IS_AGGR) { + mac_srs_tx_t *tx = &mac_srs->srs_tx; + + ASSERT(tx->st_soft_rings[tx_ring->mr_index] == remove_sring); + tx->st_soft_rings[tx_ring->mr_index] = NULL; + } mac_soft_ring_remove(mac_srs, remove_sring); mac_srs_update_fanout_list(mac_srs); } /* * mac_tx_srs_setup(): - * * Used to setup Tx rings. If no free Tx ring is available, then default * Tx ring is used. */ void -mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent, - uint32_t srs_type) +mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent) { - mac_impl_t *mip = mcip->mci_mip; - mac_soft_ring_set_t *tx_srs; - int i, tx_ring_count = 0, tx_rings_reserved = 0; - mac_ring_handle_t *tx_rings = NULL; - uint32_t soft_ring_type; - mac_group_t *grp = NULL; - mac_ring_t *ring; - mac_srs_tx_t *tx; - boolean_t serialize = B_FALSE; - - tx_srs = flent->fe_tx_srs; - tx = &tx_srs->srs_tx; - - if (tx->st_group != NULL) { - grp = tx->st_group; - tx_ring_count = grp->mrg_cur_count; - } else { - tx_ring_count = mac_tx_ring_count; - } - - if (tx_ring_count != 0) { - tx_rings = kmem_zalloc(sizeof (mac_ring_handle_t) * - tx_ring_count, KM_SLEEP); - } - - /* - * Just use the default ring for now. We need to use - * the underlying link's ring set instead of the underlying - * NIC's. - */ - if (srs_type == SRST_FLOW || - (mcip->mci_state_flags & MCIS_NO_HWRINGS) != 0) { - /* use default ring */ - tx_rings[0] = (void *)mip->mi_default_tx_ring; - tx_rings_reserved++; - goto rings_assigned; - } - - if (mcip->mci_share != NULL) - ring = grp->mrg_rings; + mac_impl_t *mip = mcip->mci_mip; + mac_soft_ring_set_t *tx_srs = flent->fe_tx_srs; + int i; + int tx_ring_count = 0; + uint32_t soft_ring_type; + mac_group_t *grp = NULL; + mac_ring_t *ring; + mac_srs_tx_t *tx = &tx_srs->srs_tx; + boolean_t is_aggr; + uint_t ring_info = 0; + + is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR) != 0; + grp = flent->fe_tx_ring_group; + if (grp == NULL) { + ring = (mac_ring_t *)mip->mi_default_tx_ring; + goto no_group; + } + tx_ring_count = grp->mrg_cur_count; + ring = grp->mrg_rings; /* * An attempt is made to reserve 'tx_ring_count' number * of Tx rings. If tx_ring_count is 0, default Tx ring @@ -3258,87 +3847,80 @@ mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * then each Tx ring will have a Tx-side soft ring. All * these soft rings will be hang off Tx SRS. */ - for (i = 0; i < tx_ring_count; i++) { - if (mcip->mci_share != NULL) { - /* - * The ring was already chosen and associated - * with the TX group. Save it in the new - * array to keep as much of the code below common - * between the share and non-share cases. - */ - ASSERT(ring != NULL); - tx_rings[i] = (mac_ring_handle_t)ring; - ring = ring->mr_next; - } else { - tx_rings[i] = - (mac_ring_handle_t)mac_reserve_tx_ring(mip, NULL); - if (tx_rings[i] == NULL) { - /* - * We have run out of Tx rings. So - * give the default ring too. - */ - tx_rings[i] = (void *)mip->mi_default_tx_ring; - tx_rings_reserved++; + switch (grp->mrg_state) { + case MAC_GROUP_STATE_SHARED: + case MAC_GROUP_STATE_RESERVED: + if (tx_ring_count <= 1 && !is_aggr) { +no_group: + if (ring != NULL && + ring->mr_state != MR_INUSE) { + (void) mac_start_ring(ring); + ring_info = mac_hwring_getinfo( + (mac_ring_handle_t)ring); + } + tx->st_arg2 = (void *)ring; + mac_tx_srs_stat_recreate(tx_srs, B_FALSE); + if (tx_srs->srs_type & SRST_BW_CONTROL) { + tx->st_mode = SRS_TX_BW; + } else if (mac_tx_serialize || + (ring_info & MAC_RING_TX_SERIALIZE)) { + tx->st_mode = SRS_TX_SERIALIZE; + } else { + tx->st_mode = SRS_TX_DEFAULT; + } break; } - } - tx_rings_reserved++; - } - -rings_assigned: - if (mac_tx_serialize || (mip->mi_v12n_level & MAC_VIRT_SERIALIZE)) - serialize = B_TRUE; - /* - * Did we get the requested number of tx rings? - * There are 2 actions we can take depending upon the number - * of tx_rings we got. - * 1) If we got one, then get the tx_ring from the soft ring, - * save it in SRS and free up the soft ring. - * 2) If we got more than 1, then do the tx fanout among the - * rings we obtained. - */ - ASSERT(tx_rings_reserved != 0); - if (tx_rings_reserved == 1) { - tx->st_arg2 = (void *)tx_rings[0]; - /* For ring_count of 0 or 1, set the tx_mode and return */ - if (tx_srs->srs_type & SRST_BW_CONTROL) - tx->st_mode = SRS_TX_BW; - else if (serialize) - tx->st_mode = SRS_TX_SERIALIZE; - else - tx->st_mode = SRS_TX_DEFAULT; - } else { - /* - * We got multiple Tx rings for Tx fanout. - */ - soft_ring_type = ST_RING_OTH | ST_RING_TX; - if (tx_srs->srs_type & SRST_BW_CONTROL) { - tx->st_mode = SRS_TX_BW_FANOUT; - } else { - tx->st_mode = SRS_TX_FANOUT; - if (serialize) - soft_ring_type |= ST_RING_WORKER_ONLY; - } - for (i = 0; i < tx_rings_reserved; i++) { - (void) mac_soft_ring_create(i, 0, NULL, soft_ring_type, - maxclsyspri, mcip, tx_srs, -1, NULL, mcip, - (mac_resource_handle_t)tx_rings[i]); - } - mac_srs_update_fanout_list(tx_srs); + soft_ring_type = ST_RING_TX; + if (tx_srs->srs_type & SRST_BW_CONTROL) { + tx->st_mode = is_aggr ? + SRS_TX_BW_AGGR : SRS_TX_BW_FANOUT; + } else { + tx->st_mode = is_aggr ? SRS_TX_AGGR : + SRS_TX_FANOUT; + } + for (i = 0; i < tx_ring_count; i++) { + ASSERT(ring != NULL); + switch (ring->mr_state) { + case MR_INUSE: + case MR_FREE: + ASSERT(ring->mr_srs == NULL); + + if (ring->mr_state != MR_INUSE) + (void) mac_start_ring(ring); + ring_info = mac_hwring_getinfo( + (mac_ring_handle_t)ring); + if (mac_tx_serialize || (ring_info & + MAC_RING_TX_SERIALIZE)) { + soft_ring_type |= + ST_RING_WORKER_ONLY; + } + (void) mac_soft_ring_create(i, 0, + soft_ring_type, maxclsyspri, + mcip, tx_srs, -1, NULL, mcip, + (mac_resource_handle_t)ring); + break; + default: + cmn_err(CE_PANIC, + "srs_setup: mcip = %p " + "trying to add UNKNOWN ring = %p\n", + (void *)mcip, (void *)ring); + break; + } + ring = ring->mr_next; + } + mac_srs_update_fanout_list(tx_srs); + break; + default: + ASSERT(B_FALSE); + break; } tx->st_func = mac_tx_get_func(tx->st_mode); - - DTRACE_PROBE3(tx__srs___setup__return, mac_soft_ring_set_t *, tx_srs, - int, tx->st_mode, int, tx_srs->srs_oth_ring_count); - - if (tx_ring_count != 0) { - tx->st_ring_count = tx_rings_reserved; - tx->st_rings = kmem_zalloc(sizeof (mac_ring_handle_t) * - tx_rings_reserved, KM_SLEEP); - for (i = 0; i < tx->st_ring_count; i++) - tx->st_rings[i] = tx_rings[i]; - kmem_free(tx_rings, sizeof (mac_ring_handle_t) * tx_ring_count); + if (is_aggr) { + VERIFY(i_mac_capab_get((mac_handle_t)mip, + MAC_CAPAB_AGGR, &tx->st_capab_aggr)); } + DTRACE_PROBE3(tx__srs___setup__return, mac_soft_ring_set_t *, tx_srs, + int, tx->st_mode, int, tx_srs->srs_tx_ring_count); } /* @@ -3346,10 +3928,14 @@ rings_assigned: * its current link speed. */ void -mac_fanout_recompute_client(mac_client_impl_t *mcip) +mac_fanout_recompute_client(mac_client_impl_t *mcip, cpupart_t *cpupart) { uint64_t link_speed; mac_resource_props_t *mcip_mrp; + flow_entry_t *flent = mcip->mci_flent; + mac_soft_ring_set_t *rx_srs; + mac_cpus_t *srs_cpu; + int soft_ring_count, maxcpus; ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); @@ -3359,8 +3945,31 @@ mac_fanout_recompute_client(mac_client_impl_t *mcip) if ((link_speed != 0) && (link_speed != mcip->mci_flent->fe_nic_speed)) { mcip_mrp = MCIP_RESOURCE_PROPS(mcip); - mac_fanout_setup(mcip, mcip->mci_flent, - mcip_mrp, mac_rx_deliver, mcip, NULL); + /* + * Before calling mac_fanout_setup(), check to see if + * the SRSes already have the right number of soft + * rings. mac_fanout_setup() is a heavy duty operation + * where new cpu bindings are done for SRS and soft + * ring threads and interrupts re-targeted. + */ + maxcpus = (cpupart != NULL) ? cpupart->cp_ncpus : ncpus; + soft_ring_count = mac_compute_soft_ring_count(flent, + flent->fe_rx_srs_cnt - 1, maxcpus); + /* + * If soft_ring_count returned by + * mac_compute_soft_ring_count() is 0, bump it + * up by 1 because we always have atleast one + * TCP, UDP, and OTH soft ring associated with + * an SRS. + */ + soft_ring_count = (soft_ring_count == 0) ? + 1 : soft_ring_count; + rx_srs = flent->fe_rx_srs[0]; + srs_cpu = &rx_srs->srs_cpu; + if (soft_ring_count != srs_cpu->mc_rx_fanout_cnt) { + mac_fanout_setup(mcip, flent, mcip_mrp, + mac_rx_deliver, mcip, NULL, cpupart); + } } } @@ -3376,6 +3985,9 @@ void mac_fanout_recompute(mac_impl_t *mip) { mac_client_impl_t *mcip; + cpupart_t *cpupart; + boolean_t use_default; + mac_resource_props_t *mrp, *emrp; i_mac_perim_enter(mip); if ((mip->mi_state_flags & MIS_IS_VNIC) != 0 || @@ -3389,7 +4001,14 @@ mac_fanout_recompute(mac_impl_t *mip) if ((mcip->mci_state_flags & MCIS_SHARE_BOUND) != 0 || !MCIP_DATAPATH_SETUP(mcip)) continue; - mac_fanout_recompute_client(mcip); + mrp = MCIP_RESOURCE_PROPS(mcip); + emrp = MCIP_EFFECTIVE_PROPS(mcip); + use_default = B_FALSE; + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_recompute_client(mcip, cpupart); + mac_set_pool_effective(use_default, cpupart, mrp, emrp); + pool_unlock(); } i_mac_perim_exit(mip); } diff --git a/usr/src/uts/common/io/mac/mac_flow.c b/usr/src/uts/common/io/mac/mac_flow.c index 16b5ec4396..aa4985fe4c 100644 --- a/usr/src/uts/common/io/mac/mac_flow.c +++ b/usr/src/uts/common/io/mac/mac_flow.c @@ -29,10 +29,14 @@ #include <sys/mac.h> #include <sys/mac_impl.h> #include <sys/mac_client_impl.h> +#include <sys/mac_stat.h> #include <sys/dls.h> #include <sys/dls_impl.h> #include <sys/mac_soft_ring.h> #include <sys/ethernet.h> +#include <sys/cpupart.h> +#include <sys/pool.h> +#include <sys/pool_pset.h> #include <sys/vlan.h> #include <inet/ip.h> #include <inet/ip6.h> @@ -40,6 +44,16 @@ #include <netinet/udp.h> #include <netinet/sctp.h> +typedef struct flow_stats_s { + uint64_t fs_obytes; + uint64_t fs_opackets; + uint64_t fs_oerrors; + uint64_t fs_ibytes; + uint64_t fs_ipackets; + uint64_t fs_ierrors; +} flow_stats_t; + + /* global flow table, will be a per exclusive-zone table later */ static mod_hash_t *flow_hash; static krwlock_t flow_tab_lock; @@ -55,7 +69,7 @@ typedef struct { #define FS_OFF(f) (offsetof(flow_stats_t, f)) static flow_stats_info_t flow_stats_list[] = { - {"rbytes", FS_OFF(fs_rbytes)}, + {"rbytes", FS_OFF(fs_ibytes)}, {"ipackets", FS_OFF(fs_ipackets)}, {"ierrors", FS_OFF(fs_ierrors)}, {"obytes", FS_OFF(fs_obytes)}, @@ -83,19 +97,48 @@ flow_stat_init(kstat_named_t *knp) static int flow_stat_update(kstat_t *ksp, int rw) { - flow_entry_t *fep = ksp->ks_private; - flow_stats_t *fsp = &fep->fe_flowstats; - kstat_named_t *knp = ksp->ks_data; - uint64_t *statp; - int i; + flow_entry_t *fep = ksp->ks_private; + kstat_named_t *knp = ksp->ks_data; + uint64_t *statp; + int i; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + flow_stats_t flow_stats; + mac_soft_ring_set_t *mac_srs; if (rw != KSTAT_READ) return (EACCES); + bzero(&flow_stats, sizeof (flow_stats_t)); + + for (i = 0; i < fep->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)fep->fe_rx_srs[i]; + if (mac_srs == NULL) /* Multicast flow */ + break; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + flow_stats.fs_ibytes += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes; + + flow_stats.fs_ipackets += mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; + + flow_stats.fs_ierrors += mac_rx_stat->mrs_ierrors; + } + + mac_srs = (mac_soft_ring_set_t *)fep->fe_tx_srs; + if (mac_srs == NULL) /* Multicast flow */ + goto done; + mac_tx_stat = &mac_srs->srs_tx.st_stat; + + flow_stats.fs_obytes = mac_tx_stat->mts_obytes; + flow_stats.fs_opackets = mac_tx_stat->mts_opackets; + flow_stats.fs_oerrors = mac_tx_stat->mts_oerrors; + +done: for (i = 0; i < FS_SIZE; i++, knp++) { statp = (uint64_t *) - ((uchar_t *)fsp + flow_stats_list[i].fs_offset); - + ((uchar_t *)&flow_stats + flow_stats_list[i].fs_offset); knp->value.ui64 = *statp; } return (0); @@ -170,11 +213,11 @@ int mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name, void *client_cookie, uint_t type, flow_entry_t **flentp) { - flow_entry_t *flent = *flentp; - int err = 0; + flow_entry_t *flent = *flentp; + int err = 0; if (mrp != NULL) { - err = mac_validate_props(mrp); + err = mac_validate_props(NULL, mrp); if (err != 0) return (err); } @@ -221,6 +264,8 @@ mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name, mrp->mrp_priority = MPL_SUBFLOW_DEFAULT; else mrp->mrp_priority = MPL_LINK_DEFAULT; + bzero(mrp->mrp_pool, MAXPATHLEN); + bzero(&mrp->mrp_cpus, sizeof (mac_cpus_t)); bcopy(mrp, &flent->fe_effective_props, sizeof (mac_resource_props_t)); } @@ -593,7 +638,7 @@ mac_flow_destroy(flow_entry_t *flent) } else { mac_flow_cleanup(flent); } - + mac_misc_stat_delete(flent); mutex_destroy(&flent->fe_lock); cv_destroy(&flent->fe_cv); flow_stat_destroy(flent); @@ -617,13 +662,15 @@ mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp) int i; if ((mrp->mrp_mask & MRP_MAXBW) != 0 && - (fmrp->mrp_maxbw != mrp->mrp_maxbw)) { + (!(fmrp->mrp_mask & MRP_MAXBW) || + (fmrp->mrp_maxbw != mrp->mrp_maxbw))) { changed_mask |= MRP_MAXBW; - fmrp->mrp_maxbw = mrp->mrp_maxbw; if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) { fmrp->mrp_mask &= ~MRP_MAXBW; + fmrp->mrp_maxbw = 0; } else { fmrp->mrp_mask |= MRP_MAXBW; + fmrp->mrp_maxbw = mrp->mrp_maxbw; } } @@ -658,6 +705,22 @@ mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp) changed_mask |= MRP_CPUS; MAC_COPY_CPUS(mrp, fmrp); } + + /* + * Modify the rings property. + */ + if (mrp->mrp_mask & MRP_RX_RINGS || mrp->mrp_mask & MRP_TX_RINGS) + mac_set_rings_effective(flent->fe_mcip); + + if ((mrp->mrp_mask & MRP_POOL) != 0) { + if (strcmp(fmrp->mrp_pool, mrp->mrp_pool) != 0) + changed_mask |= MRP_POOL; + if (strlen(mrp->mrp_pool) == 0) + fmrp->mrp_mask &= ~MRP_POOL; + else + fmrp->mrp_mask |= MRP_POOL; + (void) strncpy(fmrp->mrp_pool, mrp->mrp_pool, MAXPATHLEN); + } return (changed_mask); } @@ -667,6 +730,9 @@ mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp) uint32_t changed_mask; mac_client_impl_t *mcip = flent->fe_mcip; mac_resource_props_t *mcip_mrp = MCIP_RESOURCE_PROPS(mcip); + mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); + cpupart_t *cpupart = NULL; + boolean_t use_default = B_FALSE; ASSERT(flent != NULL); ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); @@ -693,14 +759,24 @@ mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp) !(changed_mask & MRP_CPUS) && !(mcip_mrp->mrp_mask & MRP_CPUS_USERSPEC)) { mac_fanout_setup(mcip, flent, mcip_mrp, - mac_rx_deliver, mcip, NULL); + mac_rx_deliver, mcip, NULL, NULL); } } if (mrp->mrp_mask & MRP_PRIORITY) mac_flow_update_priority(mcip, flent); if (changed_mask & MRP_CPUS) - mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL); + mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL, + NULL); + + if (mrp->mrp_mask & MRP_POOL) { + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL, + cpupart); + mac_set_pool_effective(use_default, cpupart, mrp, emrp); + pool_unlock(); + } } /* @@ -1368,7 +1444,7 @@ mac_link_flow_modify(char *flow_name, mac_resource_props_t *mrp) datalink_id_t linkid; flow_tab_t *flow_tab; - err = mac_validate_props(mrp); + err = mac_validate_props(NULL, mrp); if (err != 0) return (err); @@ -1445,10 +1521,14 @@ static int mac_link_flow_walk_cb(flow_entry_t *flent, void *arg) { flow_walk_state_t *statep = arg; - mac_flowinfo_t finfo; + mac_flowinfo_t *finfo; + int err; - mac_link_flowinfo_copy(&finfo, flent); - return (statep->ws_func(&finfo, statep->ws_arg)); + finfo = kmem_zalloc(sizeof (*finfo), KM_SLEEP); + mac_link_flowinfo_copy(finfo, flent); + err = statep->ws_func(finfo, statep->ws_arg); + kmem_free(finfo, sizeof (*finfo)); + return (err); } /* @@ -1885,18 +1965,19 @@ flow_ip_accept(flow_tab_t *ft, flow_state_t *s) break; } case ETHERTYPE_IPV6: { - ip6_t *ip6h = (ip6_t *)l3_start; - uint16_t ip6_hdrlen; - uint8_t nexthdr; + ip6_t *ip6h = (ip6_t *)l3_start; + ip6_frag_t *frag = NULL; + uint16_t ip6_hdrlen; + uint8_t nexthdr; - if (!mac_ip_hdr_length_v6(s->fs_mp, ip6h, &ip6_hdrlen, - &nexthdr, NULL, NULL)) { + if (!mac_ip_hdr_length_v6(ip6h, s->fs_mp->b_wptr, &ip6_hdrlen, + &nexthdr, &frag)) { return (ENOBUFS); } l3info->l3_hdrsize = ip6_hdrlen; l3info->l3_protocol = nexthdr; l3info->l3_version = IPV6_VERSION; - l3info->l3_fragmented = B_FALSE; + l3info->l3_fragmented = (frag != NULL); break; } default: diff --git a/usr/src/uts/common/io/mac/mac_hio.c b/usr/src/uts/common/io/mac/mac_hio.c index 9810ac821c..703c42b4cc 100644 --- a/usr/src/uts/common/io/mac/mac_hio.c +++ b/usr/src/uts/common/io/mac/mac_hio.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ #include <sys/mac.h> #include <sys/mac_impl.h> #include <sys/mac_client_impl.h> +#include <sys/mac_client_priv.h> #include <sys/mac_soft_ring.h> @@ -129,7 +130,7 @@ mac_share_bind(mac_client_handle_t mch, uint64_t cookie, uint64_t *rcookie) * there are no in flight packets through a transmit ring * which is being bound to another domain. */ - mac_tx_client_quiesce(mcip, SRS_QUIESCE); + mac_tx_client_quiesce(mch); /* * For the receive path, no traffic will be sent up through @@ -148,7 +149,7 @@ mac_share_bind(mac_client_handle_t mch, uint64_t cookie, uint64_t *rcookie) /* * Resume transmit traffic for the MAC client. */ - mac_tx_client_restart(mcip); + mac_tx_client_restart(mch); i_mac_perim_exit(mip); @@ -182,7 +183,7 @@ mac_share_unbind(mac_client_handle_t mch) * been updated by mac_fanout_recompute(). Do the check here * now that the share has been unbound. */ - mac_fanout_recompute_client(mcip); + mac_fanout_recompute_client(mcip, NULL); i_mac_perim_exit(mip); } diff --git a/usr/src/uts/common/io/mac/mac_ndd.c b/usr/src/uts/common/io/mac/mac_ndd.c index 9d4fc4bc18..cf99ef64be 100644 --- a/usr/src/uts/common/io/mac/mac_ndd.c +++ b/usr/src/uts/common/io/mac/mac_ndd.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +29,7 @@ #include <sys/types.h> #include <sys/mac.h> #include <sys/mac_impl.h> +#include <sys/mac_client_priv.h> #include <inet/nd.h> #include <sys/mac_ether.h> #include <sys/policy.h> @@ -95,17 +96,16 @@ mac_ndd_get_names(mac_impl_t *mip, mblk_t *mp) { int size_out, i; mblk_t *tmp; - mac_priv_prop_t *mpriv; uint_t permflags; int status; uint64_t value; + char *prop_name; if (!mac_add_name(mp, "?", MAC_PROP_PERM_READ)) return (-1); /* first the known ndd mappings */ for (i = 0; i < mip->mi_type->mt_mappingcount; i++) { - permflags = MAC_PROP_PERM_RW; if ((mip->mi_type->mt_mapping[i].mp_flags & MAC_PROP_MAP_KSTAT) != 0) permflags = MAC_PROP_PERM_READ; @@ -113,8 +113,13 @@ mac_ndd_get_names(mac_impl_t *mip, mblk_t *mp) status = mip->mi_callbacks->mc_getprop(mip->mi_driver, mip->mi_type->mt_mapping[i].mp_name, mip->mi_type->mt_mapping[i].mp_prop_id, - 0, mip->mi_type->mt_mapping[i].mp_valsize, - &value, &permflags); + mip->mi_type->mt_mapping[i].mp_valsize, &value); + if (status != 0) + continue; + status = mac_prop_info((mac_handle_t)mip, + mip->mi_type->mt_mapping[i].mp_prop_id, + mip->mi_type->mt_mapping[i].mp_name, NULL, 0, + NULL, &permflags); if (status != 0) continue; } @@ -126,10 +131,14 @@ mac_ndd_get_names(mac_impl_t *mip, mblk_t *mp) /* now the driver's ndd variables */ for (i = 0; i < mip->mi_priv_prop_count; i++) { - mpriv = &mip->mi_priv_prop[i]; + prop_name = mip->mi_priv_prop[i]; + + if (mac_prop_info((mac_handle_t)mip, MAC_PROP_PRIVATE, + prop_name, NULL, 0, NULL, &permflags) != 0) + return (-1); /* skip over the "_" */ - if (!mac_add_name(mp, &mpriv->mpp_name[1], mpriv->mpp_flags)) + if (!mac_add_name(mp, &prop_name[1], permflags)) return (-1); } @@ -185,7 +194,6 @@ mac_ndd_get_ioctl(mac_impl_t *mip, mblk_t *mp, int avail, int *rval) uint16_t u16; uint32_t u32; uint64_t u64; - uint_t perm; if (mp->b_cont == NULL || avail < 2) return (EINVAL); @@ -258,9 +266,8 @@ mac_ndd_get_ioctl(mac_impl_t *mip, mblk_t *mp, int avail, int *rval) new_value = u32 = (long)u64; } else { status = mip->mi_callbacks->mc_getprop(mip->mi_driver, - name, mip->mi_type->mt_mapping[i].mp_prop_id, 0, - mip->mi_type->mt_mapping[i].mp_valsize, value, - &perm); + name, mip->mi_type->mt_mapping[i].mp_prop_id, + mip->mi_type->mt_mapping[i].mp_valsize, value); switch (mip->mi_type->mt_mapping[i].mp_valsize) { case 1: new_value = u8; @@ -294,7 +301,7 @@ mac_ndd_get_ioctl(mac_impl_t *mip, mblk_t *mp, int avail, int *rval) */ (void) snprintf(priv_name, sizeof (priv_name), "_%s", name); status = mip->mi_callbacks->mc_getprop(mip->mi_driver, priv_name, - MAC_PROP_PRIVATE, 0, avail - 2, mp1->b_rptr, &perm); + MAC_PROP_PRIVATE, avail - 2, mp1->b_rptr); if (status != 0) goto get_done; diff --git a/usr/src/uts/common/io/mac/mac_protect.c b/usr/src/uts/common/io/mac/mac_protect.c index 8bd527c8d5..c923bcdbe2 100644 --- a/usr/src/uts/common/io/mac/mac_protect.c +++ b/usr/src/uts/common/io/mac/mac_protect.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,68 +33,1668 @@ #include <sys/ethernet.h> #include <sys/vlan.h> #include <sys/dlpi.h> +#include <sys/avl.h> #include <inet/ip.h> #include <inet/ip6.h> #include <inet/arp.h> +#include <netinet/arp.h> +#include <netinet/udp.h> +#include <netinet/dhcp.h> +#include <netinet/dhcp6.h> /* - * Check if ipaddr is in the 'allowed-ips' list. + * Implementation overview for DHCP address detection + * + * The purpose of DHCP address detection is to relieve the user of having to + * manually configure static IP addresses when ip-nospoof protection is turned + * on. To achieve this, the mac layer needs to intercept DHCP packets to + * determine the assigned IP addresses. + * + * A DHCP handshake between client and server typically requires at least + * 4 messages: + * + * 1. DISCOVER - client attempts to locate DHCP servers via a + * broadcast message to its subnet. + * 2. OFFER - server responds to client with an IP address and + * other parameters. + * 3. REQUEST - client requests the offered address. + * 4. ACK - server verifies that the requested address matches + * the one it offered. + * + * DHCPv6 behaves pretty much the same way aside from different message names. + * + * Address information is embedded in either the OFFER or REQUEST message. + * We chose to intercept REQUEST because this is at the last part of the + * handshake and it indicates that the client intends to keep the address. + * Intercepting OFFERs is unreliable because the client may receive multiple + * offers from different servers, and we can't tell which address the client + * will keep. + * + * Each DHCP message has a transaction ID. We use this transaction ID to match + * REQUESTs with ACKs received from servers. + * + * For IPv4, the process to acquire a DHCP-assigned address is as follows: + * + * 1. Client sends REQUEST. a new dhcpv4_txn_t object is created and inserted + * in the the mci_v4_pending_txn table (keyed by xid). This object represents + * a new transaction. It contains the xid, the client ID and requested IP + * address. + * + * 2. Server responds with an ACK. The xid from this ACK is used to lookup the + * pending transaction from the mci_v4_pending_txn table. Once the object is + * found, it is removed from the pending table and inserted into the + * completed table (mci_v4_completed_txn, keyed by client ID) and the dynamic + * IP table (mci_v4_dyn_ip, keyed by IP address). + * + * 3. An outgoing packet that goes through the ip-nospoof path will be checked + * against the dynamic IP table. Packets that have the assigned DHCP address + * as the source IP address will pass the check and be admitted onto the + * network. + * + * IPv4 notes: + * + * If the server never responds with an ACK, there is a timer that is set after + * the insertion of the transaction into the pending table. When the timer + * fires, it will check whether the transaction is old (by comparing current + * time and the txn's timestamp), if so the transaction will be freed. along + * with this, any transaction in the completed/dyn-ip tables matching the client + * ID of this stale transaction will also be freed. If the client fails to + * extend a lease, we want to stop the client from using any IP addresses that + * were granted previously. + * + * A RELEASE message from the client will not cause a transaction to be created. + * The client ID in the RELEASE message will be used for finding and removing + * transactions in the completed and dyn-ip tables. + * + * + * For IPv6, the process to acquire a DHCPv6-assigned address is as follows: + * + * 1. Client sends REQUEST. The DUID is extracted and stored into a dhcpv6_cid_t + * structure. A new transaction structure (dhcpv6_txn_t) is also created and + * it will point to the dhcpv6_cid_t. If an existing transaction with a + * matching xid is not found, this dhcpv6_txn_t will be inserted into the + * mci_v6_pending_txn table (keyed by xid). + * + * 2. Server responds with a REPLY. If a pending transaction is found, the + * addresses in the reply will be placed into the dhcpv6_cid_t pointed to by + * the transaction. The dhcpv6_cid_t will then be moved to the mci_v6_cid + * table (keyed by cid). The associated addresses will be added to the + * mci_v6_dyn_ip table (while still being pointed to by the dhcpv6_cid_t). + * + * 3. IPv6 ip-nospoof will now check mci_v6_dyn_ip for matching packets. + * Packets with a source address matching one of the DHCPv6-assigned + * addresses will be allowed through. + * + * IPv6 notes: + * + * The v6 code shares the same timer as v4 for scrubbing stale transactions. + * Just like v4, as part of removing an expired transaction, a RELEASE will be + * be triggered on the cid associated with the expired transaction. + * + * The data structures used for v6 are slightly different because a v6 client + * may have multiple addresses associated with it. + */ + +/* + * These are just arbitrary limits meant for preventing abuse (e.g. a user + * flooding the network with bogus transactions). They are not meant to be + * user-modifiable so they are not exposed as linkprops. + */ +static ulong_t dhcp_max_pending_txn = 512; +static ulong_t dhcp_max_completed_txn = 512; +static time_t txn_cleanup_interval = 60; + +/* + * DHCPv4 transaction. It may be added to three different tables + * (keyed by different fields). + */ +typedef struct dhcpv4_txn { + uint32_t dt_xid; + time_t dt_timestamp; + uint8_t dt_cid[DHCP_MAX_OPT_SIZE]; + uint8_t dt_cid_len; + ipaddr_t dt_ipaddr; + avl_node_t dt_node; + avl_node_t dt_ipnode; + struct dhcpv4_txn *dt_next; +} dhcpv4_txn_t; + +/* + * DHCPv6 address. May be added to mci_v6_dyn_ip. + * It is always pointed to by its parent dhcpv6_cid_t structure. + */ +typedef struct dhcpv6_addr { + in6_addr_t da_addr; + avl_node_t da_node; + struct dhcpv6_addr *da_next; +} dhcpv6_addr_t; + +/* + * DHCPv6 client ID. May be added to mci_v6_cid. + * No dhcpv6_txn_t should be pointing to it after it is added to mci_v6_cid. + */ +typedef struct dhcpv6_cid { + uchar_t *dc_cid; + uint_t dc_cid_len; + dhcpv6_addr_t *dc_addr; + uint_t dc_addrcnt; + avl_node_t dc_node; +} dhcpv6_cid_t; + +/* + * DHCPv6 transaction. Unlike its v4 counterpart, this object gets freed up + * as soon as the transaction completes or expires. + */ +typedef struct dhcpv6_txn { + uint32_t dt_xid; + time_t dt_timestamp; + dhcpv6_cid_t *dt_cid; + avl_node_t dt_node; + struct dhcpv6_txn *dt_next; +} dhcpv6_txn_t; + +static void start_txn_cleanup_timer(mac_client_impl_t *); + +#define BUMP_STAT(m, s) (m)->mci_misc_stat.mms_##s++ + +/* + * Comparison functions for the 3 AVL trees used: + * mci_v4_pending_txn, mci_v4_completed_txn, mci_v4_dyn_ip + */ +static int +compare_dhcpv4_xid(const void *arg1, const void *arg2) +{ + const dhcpv4_txn_t *txn1 = arg1, *txn2 = arg2; + + if (txn1->dt_xid < txn2->dt_xid) + return (-1); + else if (txn1->dt_xid > txn2->dt_xid) + return (1); + else + return (0); +} + +static int +compare_dhcpv4_cid(const void *arg1, const void *arg2) +{ + const dhcpv4_txn_t *txn1 = arg1, *txn2 = arg2; + int ret; + + if (txn1->dt_cid_len < txn2->dt_cid_len) + return (-1); + else if (txn1->dt_cid_len > txn2->dt_cid_len) + return (1); + + if (txn1->dt_cid_len == 0) + return (0); + + ret = memcmp(txn1->dt_cid, txn2->dt_cid, txn1->dt_cid_len); + if (ret < 0) + return (-1); + else if (ret > 0) + return (1); + else + return (0); +} + +static int +compare_dhcpv4_ip(const void *arg1, const void *arg2) +{ + const dhcpv4_txn_t *txn1 = arg1, *txn2 = arg2; + + if (txn1->dt_ipaddr < txn2->dt_ipaddr) + return (-1); + else if (txn1->dt_ipaddr > txn2->dt_ipaddr) + return (1); + else + return (0); +} + +/* + * Find the specified DHCPv4 option. + */ +static int +get_dhcpv4_option(struct dhcp *dh4, uchar_t *end, uint8_t type, + uchar_t **opt, uint8_t *opt_len) +{ + uchar_t *start = (uchar_t *)dh4->options; + uint8_t otype, olen; + + while (start < end) { + if (*start == CD_PAD) { + start++; + continue; + } + if (*start == CD_END) + break; + + otype = *start++; + olen = *start++; + if (otype == type && olen > 0) { + *opt = start; + *opt_len = olen; + return (0); + } + start += olen; + } + return (ENOENT); +} + +/* + * Locate the start of a DHCPv4 header. + * The possible return values and associated meanings are: + * 0 - packet is DHCP and has a DHCP header. + * EINVAL - packet is not DHCP. the recommended action is to let it pass. + * ENOSPC - packet is a initial fragment that is DHCP or is unidentifiable. + * the recommended action is to drop it. + */ +static int +get_dhcpv4_info(ipha_t *ipha, uchar_t *end, struct dhcp **dh4) +{ + uint16_t offset_and_flags, client, server; + boolean_t first_frag = B_FALSE; + struct udphdr *udph; + uchar_t *dh; + + if (ipha->ipha_protocol != IPPROTO_UDP) + return (EINVAL); + + offset_and_flags = ntohs(ipha->ipha_fragment_offset_and_flags); + if ((offset_and_flags & (IPH_MF | IPH_OFFSET)) != 0) { + /* + * All non-initial fragments may pass because we cannot + * identify their type. It's safe to let them through + * because reassembly will fail if we decide to drop the + * initial fragment. + */ + if (((offset_and_flags << 3) & 0xffff) != 0) + return (EINVAL); + first_frag = B_TRUE; + } + /* drop packets without a udp header */ + udph = (struct udphdr *)((uchar_t *)ipha + IPH_HDR_LENGTH(ipha)); + if ((uchar_t *)&udph[1] > end) + return (ENOSPC); + + client = htons(IPPORT_BOOTPC); + server = htons(IPPORT_BOOTPS); + if (udph->uh_sport != client && udph->uh_sport != server && + udph->uh_dport != client && udph->uh_dport != server) + return (EINVAL); + + /* drop dhcp fragments */ + if (first_frag) + return (ENOSPC); + + dh = (uchar_t *)&udph[1]; + if (dh + BASE_PKT_SIZE > end) + return (EINVAL); + + *dh4 = (struct dhcp *)dh; + return (0); +} + +/* + * Wrappers for accesses to avl trees to improve readability. + * Their purposes are fairly self-explanatory. + */ +static dhcpv4_txn_t * +find_dhcpv4_pending_txn(mac_client_impl_t *mcip, uint32_t xid) +{ + dhcpv4_txn_t tmp_txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + tmp_txn.dt_xid = xid; + return (avl_find(&mcip->mci_v4_pending_txn, &tmp_txn, NULL)); +} + +static int +insert_dhcpv4_pending_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn) +{ + avl_index_t where; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (avl_find(&mcip->mci_v4_pending_txn, txn, &where) != NULL) + return (EEXIST); + + if (avl_numnodes(&mcip->mci_v4_pending_txn) >= dhcp_max_pending_txn) { + BUMP_STAT(mcip, dhcpdropped); + return (EAGAIN); + } + avl_insert(&mcip->mci_v4_pending_txn, txn, where); + return (0); +} + +static void +remove_dhcpv4_pending_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn) +{ + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + avl_remove(&mcip->mci_v4_pending_txn, txn); +} + +static dhcpv4_txn_t * +find_dhcpv4_completed_txn(mac_client_impl_t *mcip, uint8_t *cid, + uint8_t cid_len) +{ + dhcpv4_txn_t tmp_txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (cid_len > 0) + bcopy(cid, tmp_txn.dt_cid, cid_len); + tmp_txn.dt_cid_len = cid_len; + return (avl_find(&mcip->mci_v4_completed_txn, &tmp_txn, NULL)); +} + +/* + * After a pending txn is removed from the pending table, it is inserted + * into both the completed and dyn-ip tables. These two insertions are + * done together because a client ID must have 1:1 correspondence with + * an IP address and IP addresses must be unique in the dyn-ip table. + */ +static int +insert_dhcpv4_completed_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn) +{ + avl_index_t where; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (avl_find(&mcip->mci_v4_completed_txn, txn, &where) != NULL) + return (EEXIST); + + if (avl_numnodes(&mcip->mci_v4_completed_txn) >= + dhcp_max_completed_txn) { + BUMP_STAT(mcip, dhcpdropped); + return (EAGAIN); + } + + avl_insert(&mcip->mci_v4_completed_txn, txn, where); + if (avl_find(&mcip->mci_v4_dyn_ip, txn, &where) != NULL) { + avl_remove(&mcip->mci_v4_completed_txn, txn); + return (EEXIST); + } + avl_insert(&mcip->mci_v4_dyn_ip, txn, where); + return (0); +} + +static void +remove_dhcpv4_completed_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn) +{ + dhcpv4_txn_t *ctxn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if ((ctxn = avl_find(&mcip->mci_v4_dyn_ip, txn, NULL)) != NULL && + ctxn == txn) + avl_remove(&mcip->mci_v4_dyn_ip, txn); + + avl_remove(&mcip->mci_v4_completed_txn, txn); +} + +/* + * Check whether an IP address is in the dyn-ip table. */ static boolean_t -ipnospoof_check_ips(mac_protect_t *protect, ipaddr_t ipaddr) +check_dhcpv4_dyn_ip(mac_client_impl_t *mcip, ipaddr_t ipaddr) +{ + dhcpv4_txn_t tmp_txn, *txn; + + mutex_enter(&mcip->mci_protect_lock); + tmp_txn.dt_ipaddr = ipaddr; + txn = avl_find(&mcip->mci_v4_dyn_ip, &tmp_txn, NULL); + mutex_exit(&mcip->mci_protect_lock); + return (txn != NULL); +} + +/* + * Create/destroy a DHCPv4 transaction. + */ +static dhcpv4_txn_t * +create_dhcpv4_txn(uint32_t xid, uint8_t *cid, uint8_t cid_len, ipaddr_t ipaddr) +{ + dhcpv4_txn_t *txn; + + if ((txn = kmem_zalloc(sizeof (*txn), KM_NOSLEEP)) == NULL) + return (NULL); + + txn->dt_xid = xid; + txn->dt_timestamp = ddi_get_time(); + if (cid_len > 0) + bcopy(cid, &txn->dt_cid, cid_len); + txn->dt_cid_len = cid_len; + txn->dt_ipaddr = ipaddr; + return (txn); +} + +static void +free_dhcpv4_txn(dhcpv4_txn_t *txn) +{ + kmem_free(txn, sizeof (*txn)); +} + +/* + * Clean up all v4 tables. + */ +static void +flush_dhcpv4(mac_client_impl_t *mcip) +{ + void *cookie = NULL; + dhcpv4_txn_t *txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + while ((txn = avl_destroy_nodes(&mcip->mci_v4_dyn_ip, + &cookie)) != NULL) { + /* + * No freeing needed here because the same txn exists + * in the mci_v4_completed_txn table as well. + */ + } + cookie = NULL; + while ((txn = avl_destroy_nodes(&mcip->mci_v4_completed_txn, + &cookie)) != NULL) { + free_dhcpv4_txn(txn); + } + cookie = NULL; + while ((txn = avl_destroy_nodes(&mcip->mci_v4_pending_txn, + &cookie)) != NULL) { + free_dhcpv4_txn(txn); + } +} + +/* + * Cleanup stale DHCPv4 transactions. + */ +static void +txn_cleanup_v4(mac_client_impl_t *mcip) { - uint_t i; + dhcpv4_txn_t *txn, *ctxn, *next, *txn_list = NULL; /* - * unspecified addresses are harmless and are used by ARP,DHCP..etc. + * Find stale pending transactions and place them on a list + * to be removed. */ - if (ipaddr == INADDR_ANY) - return (B_TRUE); + for (txn = avl_first(&mcip->mci_v4_pending_txn); txn != NULL; + txn = avl_walk(&mcip->mci_v4_pending_txn, txn, AVL_AFTER)) { + if (ddi_get_time() - txn->dt_timestamp > + txn_cleanup_interval) { + DTRACE_PROBE2(found__expired__txn, + mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); - for (i = 0; i < protect->mp_ipaddrcnt; i++) { - if (protect->mp_ipaddrs[i] == ipaddr) - return (B_TRUE); + txn->dt_next = txn_list; + txn_list = txn; + } } - return (B_FALSE); + + /* + * Remove and free stale pending transactions and completed + * transactions with the same client IDs as the stale transactions. + */ + for (txn = txn_list; txn != NULL; txn = next) { + avl_remove(&mcip->mci_v4_pending_txn, txn); + + ctxn = find_dhcpv4_completed_txn(mcip, txn->dt_cid, + txn->dt_cid_len); + if (ctxn != NULL) { + DTRACE_PROBE2(removing__completed__txn, + mac_client_impl_t *, mcip, + dhcpv4_txn_t *, ctxn); + + remove_dhcpv4_completed_txn(mcip, ctxn); + free_dhcpv4_txn(ctxn); + } + next = txn->dt_next; + txn->dt_next = NULL; + + DTRACE_PROBE2(freeing__txn, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + free_dhcpv4_txn(txn); + } +} + +/* + * Core logic for intercepting outbound DHCPv4 packets. + */ +static void +intercept_dhcpv4_outbound(mac_client_impl_t *mcip, ipha_t *ipha, uchar_t *end) +{ + struct dhcp *dh4; + uchar_t *opt; + dhcpv4_txn_t *txn, *ctxn; + ipaddr_t ipaddr; + uint8_t opt_len, mtype, cid[DHCP_MAX_OPT_SIZE], cid_len; + + if (get_dhcpv4_info(ipha, end, &dh4) != 0) + return; + + if (get_dhcpv4_option(dh4, end, CD_DHCP_TYPE, &opt, &opt_len) != 0 || + opt_len != 1) { + DTRACE_PROBE2(mtype__not__found, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + return; + } + mtype = *opt; + if (mtype != REQUEST && mtype != RELEASE) { + DTRACE_PROBE3(ignored__mtype, mac_client_impl_t *, mcip, + struct dhcp *, dh4, uint8_t, mtype); + return; + } + + /* client ID is optional for IPv4 */ + if (get_dhcpv4_option(dh4, end, CD_CLIENT_ID, &opt, &opt_len) == 0 && + opt_len >= 2) { + bcopy(opt, cid, opt_len); + cid_len = opt_len; + } else { + bzero(cid, DHCP_MAX_OPT_SIZE); + cid_len = 0; + } + + mutex_enter(&mcip->mci_protect_lock); + if (mtype == RELEASE) { + DTRACE_PROBE2(release, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + + /* flush any completed txn with this cid */ + ctxn = find_dhcpv4_completed_txn(mcip, cid, cid_len); + if (ctxn != NULL) { + DTRACE_PROBE2(release__successful, mac_client_impl_t *, + mcip, struct dhcp *, dh4); + + remove_dhcpv4_completed_txn(mcip, ctxn); + free_dhcpv4_txn(ctxn); + } + goto done; + } + + /* + * If a pending txn already exists, we'll update its timestamp so + * it won't get flushed by the timer. We don't need to create new + * txns for retransmissions. + */ + if ((txn = find_dhcpv4_pending_txn(mcip, dh4->xid)) != NULL) { + DTRACE_PROBE2(update, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + txn->dt_timestamp = ddi_get_time(); + goto done; + } + + if (get_dhcpv4_option(dh4, end, CD_REQUESTED_IP_ADDR, + &opt, &opt_len) != 0 || opt_len != sizeof (ipaddr)) { + DTRACE_PROBE2(ipaddr__not__found, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + goto done; + } + bcopy(opt, &ipaddr, sizeof (ipaddr)); + if ((txn = create_dhcpv4_txn(dh4->xid, cid, cid_len, ipaddr)) == NULL) + goto done; + + if (insert_dhcpv4_pending_txn(mcip, txn) != 0) { + DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + free_dhcpv4_txn(txn); + goto done; + } + start_txn_cleanup_timer(mcip); + + DTRACE_PROBE2(txn__pending, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + +done: + mutex_exit(&mcip->mci_protect_lock); } /* - * Enforce ip-nospoof protection. Only IPv4 is supported for now. + * Core logic for intercepting inbound DHCPv4 packets. + */ +static void +intercept_dhcpv4_inbound(mac_client_impl_t *mcip, ipha_t *ipha, uchar_t *end) +{ + uchar_t *opt; + struct dhcp *dh4; + dhcpv4_txn_t *txn, *ctxn; + uint8_t opt_len, mtype; + + if (get_dhcpv4_info(ipha, end, &dh4) != 0) + return; + + if (get_dhcpv4_option(dh4, end, CD_DHCP_TYPE, &opt, &opt_len) != 0 || + opt_len != 1) { + DTRACE_PROBE2(mtype__not__found, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + return; + } + mtype = *opt; + if (mtype != ACK && mtype != NAK) { + DTRACE_PROBE3(ignored__mtype, mac_client_impl_t *, mcip, + struct dhcp *, dh4, uint8_t, mtype); + return; + } + + mutex_enter(&mcip->mci_protect_lock); + if ((txn = find_dhcpv4_pending_txn(mcip, dh4->xid)) == NULL) { + DTRACE_PROBE2(txn__not__found, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + goto done; + } + remove_dhcpv4_pending_txn(mcip, txn); + + /* + * We're about to move a txn from the pending table to the completed/ + * dyn-ip tables. If there is an existing completed txn with the + * same cid as our txn, we need to remove and free it. + */ + ctxn = find_dhcpv4_completed_txn(mcip, txn->dt_cid, txn->dt_cid_len); + if (ctxn != NULL) { + DTRACE_PROBE2(replacing__old__txn, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, ctxn); + remove_dhcpv4_completed_txn(mcip, ctxn); + free_dhcpv4_txn(ctxn); + } + if (mtype == NAK) { + DTRACE_PROBE2(nak__received, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + free_dhcpv4_txn(txn); + goto done; + } + if (insert_dhcpv4_completed_txn(mcip, txn) != 0) { + DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + free_dhcpv4_txn(txn); + goto done; + } + DTRACE_PROBE2(txn__completed, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + +done: + mutex_exit(&mcip->mci_protect_lock); +} + + +/* + * Comparison functions for the DHCPv6 AVL trees. */ static int -ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, - mblk_t *mp, mac_header_info_t *mhip) +compare_dhcpv6_xid(const void *arg1, const void *arg2) { - uint32_t sap = mhip->mhi_bindsap; - uchar_t *start = mp->b_rptr + mhip->mhi_hdrsize; - int err = EINVAL; + const dhcpv6_txn_t *txn1 = arg1, *txn2 = arg2; + + if (txn1->dt_xid < txn2->dt_xid) + return (-1); + else if (txn1->dt_xid > txn2->dt_xid) + return (1); + else + return (0); +} + +static int +compare_dhcpv6_ip(const void *arg1, const void *arg2) +{ + const dhcpv6_addr_t *ip1 = arg1, *ip2 = arg2; + int ret; + + ret = memcmp(&ip1->da_addr, &ip2->da_addr, sizeof (in6_addr_t)); + if (ret < 0) + return (-1); + else if (ret > 0) + return (1); + else + return (0); +} + +static int +compare_dhcpv6_cid(const void *arg1, const void *arg2) +{ + const dhcpv6_cid_t *cid1 = arg1, *cid2 = arg2; + int ret; + + if (cid1->dc_cid_len < cid2->dc_cid_len) + return (-1); + else if (cid1->dc_cid_len > cid2->dc_cid_len) + return (1); + + if (cid1->dc_cid_len == 0) + return (0); + + ret = memcmp(cid1->dc_cid, cid2->dc_cid, cid1->dc_cid_len); + if (ret < 0) + return (-1); + else if (ret > 0) + return (1); + else + return (0); +} + +/* + * Locate the start of a DHCPv6 header. + * The possible return values and associated meanings are: + * 0 - packet is DHCP and has a DHCP header. + * EINVAL - packet is not DHCP. the recommended action is to let it pass. + * ENOSPC - packet is a initial fragment that is DHCP or is unidentifiable. + * the recommended action is to drop it. + */ +static int +get_dhcpv6_info(ip6_t *ip6h, uchar_t *end, dhcpv6_message_t **dh6) +{ + uint16_t hdrlen, client, server; + boolean_t first_frag = B_FALSE; + ip6_frag_t *frag = NULL; + uint8_t proto; + struct udphdr *udph; + uchar_t *dh; + + if (!mac_ip_hdr_length_v6(ip6h, end, &hdrlen, &proto, &frag)) + return (ENOSPC); + + if (proto != IPPROTO_UDP) + return (EINVAL); + + if (frag != NULL) { + /* + * All non-initial fragments may pass because we cannot + * identify their type. It's safe to let them through + * because reassembly will fail if we decide to drop the + * initial fragment. + */ + if ((ntohs(frag->ip6f_offlg) & ~7) != 0) + return (EINVAL); + first_frag = B_TRUE; + } + /* drop packets without a udp header */ + udph = (struct udphdr *)((uchar_t *)ip6h + hdrlen); + if ((uchar_t *)&udph[1] > end) + return (ENOSPC); + + client = htons(IPPORT_DHCPV6C); + server = htons(IPPORT_DHCPV6S); + if (udph->uh_sport != client && udph->uh_sport != server && + udph->uh_dport != client && udph->uh_dport != server) + return (EINVAL); + + /* drop dhcp fragments */ + if (first_frag) + return (ENOSPC); + + dh = (uchar_t *)&udph[1]; + if (dh + sizeof (dhcpv6_message_t) > end) + return (EINVAL); + + *dh6 = (dhcpv6_message_t *)dh; + return (0); +} + +/* + * Find the specified DHCPv6 option. + */ +static dhcpv6_option_t * +get_dhcpv6_option(void *buf, size_t buflen, dhcpv6_option_t *oldopt, + uint16_t codenum, uint_t *retlenp) +{ + uchar_t *bp; + dhcpv6_option_t d6o; + uint_t olen; + + codenum = htons(codenum); + bp = buf; + while (buflen >= sizeof (dhcpv6_option_t)) { + bcopy(bp, &d6o, sizeof (d6o)); + olen = ntohs(d6o.d6o_len) + sizeof (d6o); + if (olen > buflen) + break; + if (d6o.d6o_code != codenum || d6o.d6o_len == 0 || + (oldopt != NULL && bp <= (uchar_t *)oldopt)) { + bp += olen; + buflen -= olen; + continue; + } + if (retlenp != NULL) + *retlenp = olen; + /* LINTED : alignment */ + return ((dhcpv6_option_t *)bp); + } + return (NULL); +} + +/* + * Get the status code from a reply message. + */ +static int +get_dhcpv6_status(dhcpv6_message_t *dh6, uchar_t *end, uint16_t *status) +{ + dhcpv6_option_t *d6o; + uint_t olen; + uint16_t s; + + d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL, + DHCPV6_OPT_STATUS_CODE, &olen); + + /* Success is implied if status code is missing */ + if (d6o == NULL) { + *status = DHCPV6_STAT_SUCCESS; + return (0); + } + if ((uchar_t *)d6o + olen > end) + return (EINVAL); + + olen -= sizeof (*d6o); + if (olen < sizeof (s)) + return (EINVAL); + + bcopy(&d6o[1], &s, sizeof (s)); + *status = ntohs(s); + return (0); +} + +/* + * Get the addresses from a reply message. + */ +static int +get_dhcpv6_addrs(dhcpv6_message_t *dh6, uchar_t *end, dhcpv6_cid_t *cid) +{ + dhcpv6_option_t *d6o; + dhcpv6_addr_t *next; + uint_t olen; + + d6o = NULL; + while ((d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], + d6o, DHCPV6_OPT_IA_NA, &olen)) != NULL) { + dhcpv6_option_t *d6so; + dhcpv6_iaaddr_t d6ia; + dhcpv6_addr_t **addrp; + uchar_t *obase; + uint_t solen; + + if (olen < sizeof (dhcpv6_ia_na_t) || + (uchar_t *)d6o + olen > end) + goto fail; + + obase = (uchar_t *)d6o + sizeof (dhcpv6_ia_na_t); + olen -= sizeof (dhcpv6_ia_na_t); + d6so = NULL; + while ((d6so = get_dhcpv6_option(obase, olen, d6so, + DHCPV6_OPT_IAADDR, &solen)) != NULL) { + if (solen < sizeof (dhcpv6_iaaddr_t) || + (uchar_t *)d6so + solen > end) + goto fail; + + bcopy(d6so, &d6ia, sizeof (d6ia)); + for (addrp = &cid->dc_addr; *addrp != NULL; + addrp = &(*addrp)->da_next) { + if (bcmp(&(*addrp)->da_addr, &d6ia.d6ia_addr, + sizeof (in6_addr_t)) == 0) + goto fail; + } + if ((*addrp = kmem_zalloc(sizeof (dhcpv6_addr_t), + KM_NOSLEEP)) == NULL) + goto fail; + + bcopy(&d6ia.d6ia_addr, &(*addrp)->da_addr, + sizeof (in6_addr_t)); + cid->dc_addrcnt++; + } + } + if (cid->dc_addrcnt == 0) + return (ENOENT); + + return (0); + +fail: + for (; cid->dc_addr != NULL; cid->dc_addr = next) { + next = cid->dc_addr->da_next; + kmem_free(cid->dc_addr, sizeof (dhcpv6_addr_t)); + cid->dc_addrcnt--; + } + ASSERT(cid->dc_addrcnt == 0); + return (EINVAL); +} + +/* + * Free a cid. + * Before this gets called the caller must ensure that all the + * addresses are removed from the mci_v6_dyn_ip table. + */ +static void +free_dhcpv6_cid(dhcpv6_cid_t *cid) +{ + dhcpv6_addr_t *addr, *next; + uint_t cnt = 0; + + kmem_free(cid->dc_cid, cid->dc_cid_len); + for (addr = cid->dc_addr; addr != NULL; addr = next) { + next = addr->da_next; + kmem_free(addr, sizeof (*addr)); + cnt++; + } + ASSERT(cnt == cid->dc_addrcnt); + kmem_free(cid, sizeof (*cid)); +} + +/* + * Extract the DUID from a message. The associated addresses will be + * extracted later from the reply message. + */ +static dhcpv6_cid_t * +create_dhcpv6_cid(dhcpv6_message_t *dh6, uchar_t *end) +{ + dhcpv6_option_t *d6o; + dhcpv6_cid_t *cid; + uchar_t *rawcid; + uint_t olen, rawcidlen; + + d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL, + DHCPV6_OPT_CLIENTID, &olen); + if (d6o == NULL || (uchar_t *)d6o + olen > end) + return (NULL); + + rawcidlen = olen - sizeof (*d6o); + if ((rawcid = kmem_zalloc(rawcidlen, KM_NOSLEEP)) == NULL) + return (NULL); + bcopy(d6o + 1, rawcid, rawcidlen); + + if ((cid = kmem_zalloc(sizeof (*cid), KM_NOSLEEP)) == NULL) { + kmem_free(rawcid, rawcidlen); + return (NULL); + } + cid->dc_cid = rawcid; + cid->dc_cid_len = rawcidlen; + return (cid); +} + +/* + * Remove a cid from mci_v6_cid. The addresses owned by the cid + * are also removed from mci_v6_dyn_ip. + */ +static void +remove_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid) +{ + dhcpv6_addr_t *addr, *tmp_addr; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + avl_remove(&mcip->mci_v6_cid, cid); + for (addr = cid->dc_addr; addr != NULL; addr = addr->da_next) { + tmp_addr = avl_find(&mcip->mci_v6_dyn_ip, addr, NULL); + if (tmp_addr == addr) + avl_remove(&mcip->mci_v6_dyn_ip, addr); + } +} + +/* + * Find and remove a matching cid and associated addresses from + * their respective tables. + */ +static void +release_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid) +{ + dhcpv6_cid_t *oldcid; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if ((oldcid = avl_find(&mcip->mci_v6_cid, cid, NULL)) == NULL) + return; + + /* + * Since cid belongs to a pending txn, it can't possibly be in + * mci_v6_cid. Anything that's found must be an existing cid. + */ + ASSERT(oldcid != cid); + remove_dhcpv6_cid(mcip, oldcid); + free_dhcpv6_cid(oldcid); +} + +/* + * Insert cid into mci_v6_cid. + */ +static int +insert_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid) +{ + avl_index_t where; + dhcpv6_addr_t *addr; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (avl_find(&mcip->mci_v6_cid, cid, &where) != NULL) + return (EEXIST); + + if (avl_numnodes(&mcip->mci_v6_cid) >= dhcp_max_completed_txn) { + BUMP_STAT(mcip, dhcpdropped); + return (EAGAIN); + } + avl_insert(&mcip->mci_v6_cid, cid, where); + for (addr = cid->dc_addr; addr != NULL; addr = addr->da_next) { + if (avl_find(&mcip->mci_v6_dyn_ip, addr, &where) != NULL) + goto fail; + + avl_insert(&mcip->mci_v6_dyn_ip, addr, where); + } + return (0); + +fail: + remove_dhcpv6_cid(mcip, cid); + return (EEXIST); +} + +/* + * Check whether an IP address is in the dyn-ip table. + */ +static boolean_t +check_dhcpv6_dyn_ip(mac_client_impl_t *mcip, in6_addr_t *addr) +{ + dhcpv6_addr_t tmp_addr, *a; + + mutex_enter(&mcip->mci_protect_lock); + bcopy(addr, &tmp_addr.da_addr, sizeof (in6_addr_t)); + a = avl_find(&mcip->mci_v6_dyn_ip, &tmp_addr, NULL); + mutex_exit(&mcip->mci_protect_lock); + return (a != NULL); +} + +static dhcpv6_txn_t * +find_dhcpv6_pending_txn(mac_client_impl_t *mcip, uint32_t xid) +{ + dhcpv6_txn_t tmp_txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + tmp_txn.dt_xid = xid; + return (avl_find(&mcip->mci_v6_pending_txn, &tmp_txn, NULL)); +} + +static void +remove_dhcpv6_pending_txn(mac_client_impl_t *mcip, dhcpv6_txn_t *txn) +{ + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + avl_remove(&mcip->mci_v6_pending_txn, txn); +} + +static dhcpv6_txn_t * +create_dhcpv6_txn(uint32_t xid, dhcpv6_cid_t *cid) +{ + dhcpv6_txn_t *txn; + + if ((txn = kmem_zalloc(sizeof (dhcpv6_txn_t), KM_NOSLEEP)) == NULL) + return (NULL); + + txn->dt_xid = xid; + txn->dt_cid = cid; + txn->dt_timestamp = ddi_get_time(); + return (txn); +} + +static void +free_dhcpv6_txn(dhcpv6_txn_t *txn) +{ + if (txn->dt_cid != NULL) + free_dhcpv6_cid(txn->dt_cid); + kmem_free(txn, sizeof (dhcpv6_txn_t)); +} + +static int +insert_dhcpv6_pending_txn(mac_client_impl_t *mcip, dhcpv6_txn_t *txn) +{ + avl_index_t where; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (avl_find(&mcip->mci_v6_pending_txn, txn, &where) != NULL) + return (EEXIST); + + if (avl_numnodes(&mcip->mci_v6_pending_txn) >= dhcp_max_pending_txn) { + BUMP_STAT(mcip, dhcpdropped); + return (EAGAIN); + } + avl_insert(&mcip->mci_v6_pending_txn, txn, where); + return (0); +} + +/* + * Clean up all v6 tables. + */ +static void +flush_dhcpv6(mac_client_impl_t *mcip) +{ + void *cookie = NULL; + dhcpv6_cid_t *cid; + dhcpv6_txn_t *txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + while (avl_destroy_nodes(&mcip->mci_v6_dyn_ip, &cookie) != NULL) { + } + cookie = NULL; + while ((cid = avl_destroy_nodes(&mcip->mci_v6_cid, &cookie)) != NULL) { + free_dhcpv6_cid(cid); + } + cookie = NULL; + while ((txn = avl_destroy_nodes(&mcip->mci_v6_pending_txn, + &cookie)) != NULL) { + free_dhcpv6_txn(txn); + } +} + +/* + * Cleanup stale DHCPv6 transactions. + */ +static void +txn_cleanup_v6(mac_client_impl_t *mcip) +{ + dhcpv6_txn_t *txn, *next, *txn_list = NULL; /* - * This handles the case where the mac header is not in - * the same mblk as the IP header. + * Find stale pending transactions and place them on a list + * to be removed. */ - if (start == mp->b_wptr) { - mp = mp->b_cont; + for (txn = avl_first(&mcip->mci_v6_pending_txn); txn != NULL; + txn = avl_walk(&mcip->mci_v6_pending_txn, txn, AVL_AFTER)) { + if (ddi_get_time() - txn->dt_timestamp > + txn_cleanup_interval) { + DTRACE_PROBE2(found__expired__txn, + mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + txn->dt_next = txn_list; + txn_list = txn; + } + } + + /* + * Remove and free stale pending transactions. + * Release any existing cids matching the stale transactions. + */ + for (txn = txn_list; txn != NULL; txn = next) { + avl_remove(&mcip->mci_v6_pending_txn, txn); + release_dhcpv6_cid(mcip, txn->dt_cid); + next = txn->dt_next; + txn->dt_next = NULL; + + DTRACE_PROBE2(freeing__txn, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + free_dhcpv6_txn(txn); + } + +} + +/* + * Core logic for intercepting outbound DHCPv6 packets. + */ +static void +intercept_dhcpv6_outbound(mac_client_impl_t *mcip, ip6_t *ip6h, uchar_t *end) +{ + dhcpv6_message_t *dh6; + dhcpv6_txn_t *txn; + dhcpv6_cid_t *cid = NULL; + uint32_t xid; + uint8_t mtype; + + if (get_dhcpv6_info(ip6h, end, &dh6) != 0) + return; + + mtype = dh6->d6m_msg_type; + if (mtype != DHCPV6_MSG_REQUEST && mtype != DHCPV6_MSG_RENEW && + mtype != DHCPV6_MSG_REBIND && mtype != DHCPV6_MSG_RELEASE) + return; + + if ((cid = create_dhcpv6_cid(dh6, end)) == NULL) + return; + + mutex_enter(&mcip->mci_protect_lock); + if (mtype == DHCPV6_MSG_RELEASE) { + release_dhcpv6_cid(mcip, cid); + goto done; + } + xid = DHCPV6_GET_TRANSID(dh6); + if ((txn = find_dhcpv6_pending_txn(mcip, xid)) != NULL) { + DTRACE_PROBE2(update, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + txn->dt_timestamp = ddi_get_time(); + goto done; + } + if ((txn = create_dhcpv6_txn(xid, cid)) == NULL) + goto done; + + cid = NULL; + if (insert_dhcpv6_pending_txn(mcip, txn) != 0) { + DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + free_dhcpv6_txn(txn); + goto done; + } + start_txn_cleanup_timer(mcip); + + DTRACE_PROBE2(txn__pending, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + +done: + if (cid != NULL) + free_dhcpv6_cid(cid); + + mutex_exit(&mcip->mci_protect_lock); +} + +/* + * Core logic for intercepting inbound DHCPv6 packets. + */ +static void +intercept_dhcpv6_inbound(mac_client_impl_t *mcip, ip6_t *ip6h, uchar_t *end) +{ + dhcpv6_message_t *dh6; + dhcpv6_txn_t *txn; + uint32_t xid; + uint8_t mtype; + uint16_t status; + + if (get_dhcpv6_info(ip6h, end, &dh6) != 0) + return; + + mtype = dh6->d6m_msg_type; + if (mtype != DHCPV6_MSG_REPLY) + return; + + mutex_enter(&mcip->mci_protect_lock); + xid = DHCPV6_GET_TRANSID(dh6); + if ((txn = find_dhcpv6_pending_txn(mcip, xid)) == NULL) { + DTRACE_PROBE2(txn__not__found, mac_client_impl_t *, mcip, + dhcpv6_message_t *, dh6); + goto done; + } + remove_dhcpv6_pending_txn(mcip, txn); + release_dhcpv6_cid(mcip, txn->dt_cid); + + if (get_dhcpv6_status(dh6, end, &status) != 0 || + status != DHCPV6_STAT_SUCCESS) { + DTRACE_PROBE2(error__status, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + goto done; + } + if (get_dhcpv6_addrs(dh6, end, txn->dt_cid) != 0) { + DTRACE_PROBE2(no__addrs, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + goto done; + } + if (insert_dhcpv6_cid(mcip, txn->dt_cid) != 0) { + DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + goto done; + } + DTRACE_PROBE2(txn__completed, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + + txn->dt_cid = NULL; + +done: + if (txn != NULL) + free_dhcpv6_txn(txn); + mutex_exit(&mcip->mci_protect_lock); +} + +/* + * Timer for cleaning up stale transactions. + */ +static void +txn_cleanup_timer(void *arg) +{ + mac_client_impl_t *mcip = arg; + + mutex_enter(&mcip->mci_protect_lock); + if (mcip->mci_txn_cleanup_tid == 0) { + /* do nothing if timer got cancelled */ + mutex_exit(&mcip->mci_protect_lock); + return; + } + mcip->mci_txn_cleanup_tid = 0; + + txn_cleanup_v4(mcip); + txn_cleanup_v6(mcip); + + /* + * Restart timer if pending transactions still exist. + */ + if (!avl_is_empty(&mcip->mci_v4_pending_txn) || + !avl_is_empty(&mcip->mci_v6_pending_txn)) { + DTRACE_PROBE1(restarting__timer, mac_client_impl_t *, mcip); + + mcip->mci_txn_cleanup_tid = timeout(txn_cleanup_timer, mcip, + drv_usectohz(txn_cleanup_interval * 1000000)); + } + mutex_exit(&mcip->mci_protect_lock); +} + +static void +start_txn_cleanup_timer(mac_client_impl_t *mcip) +{ + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (mcip->mci_txn_cleanup_tid == 0) { + mcip->mci_txn_cleanup_tid = timeout(txn_cleanup_timer, mcip, + drv_usectohz(txn_cleanup_interval * 1000000)); + } +} + +static void +cancel_txn_cleanup_timer(mac_client_impl_t *mcip) +{ + timeout_id_t tid; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + + /* + * This needs to be a while loop because the timer could get + * rearmed during untimeout(). + */ + while ((tid = mcip->mci_txn_cleanup_tid) != 0) { + mcip->mci_txn_cleanup_tid = 0; + mutex_exit(&mcip->mci_protect_lock); + (void) untimeout(tid); + mutex_enter(&mcip->mci_protect_lock); + } +} + +/* + * Get the start/end pointers of an L3 packet and also do pullup if needed. + * pulled-up packet needs to be freed by the caller. + */ +static int +get_l3_info(mblk_t *mp, size_t hdrsize, uchar_t **start, uchar_t **end, + mblk_t **nmp) +{ + uchar_t *s, *e; + mblk_t *newmp = NULL; + + /* + * Pullup if necessary but reject packets that do not have + * a proper mac header. + */ + s = mp->b_rptr + hdrsize; + e = mp->b_wptr; + + if (s > mp->b_wptr) + return (EINVAL); + + if (!OK_32PTR(s) || mp->b_cont != NULL) { /* - * IP header missing. Let the packet through. + * Temporarily adjust mp->b_rptr to ensure proper + * alignment of IP header in newmp. */ - if (mp == NULL) - return (0); + DTRACE_PROBE1(pullup__needed, mblk_t *, mp); + + mp->b_rptr += hdrsize; + newmp = msgpullup(mp, -1); + mp->b_rptr -= hdrsize; + + if (newmp == NULL) + return (ENOMEM); + + s = newmp->b_rptr; + e = newmp->b_wptr; + } + + *start = s; + *end = e; + *nmp = newmp; + return (0); +} + +void +mac_protect_intercept_dhcp_one(mac_client_impl_t *mcip, mblk_t *mp) +{ + mac_impl_t *mip = mcip->mci_mip; + uchar_t *start, *end; + mblk_t *nmp = NULL; + mac_header_info_t mhi; + int err; + + err = mac_vlan_header_info((mac_handle_t)mip, mp, &mhi); + if (err != 0) { + DTRACE_PROBE2(invalid__header, mac_client_impl_t *, mcip, + mblk_t *, mp); + return; + } + + err = get_l3_info(mp, mhi.mhi_hdrsize, &start, &end, &nmp); + if (err != 0) { + DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip, + mblk_t *, mp); + return; + } + + switch (mhi.mhi_bindsap) { + case ETHERTYPE_IP: { + ipha_t *ipha = (ipha_t *)start; + + if (start + sizeof (ipha_t) > end) + return; + + intercept_dhcpv4_inbound(mcip, ipha, end); + break; + } + case ETHERTYPE_IPV6: { + ip6_t *ip6h = (ip6_t *)start; + + if (start + sizeof (ip6_t) > end) + return; + + intercept_dhcpv6_inbound(mcip, ip6h, end); + break; + } + } + freemsg(nmp); +} + +void +mac_protect_intercept_dhcp(mac_client_impl_t *mcip, mblk_t *mp) +{ + /* + * Skip checks if we are part of an aggr. + */ + if ((mcip->mci_state_flags & MCIS_IS_AGGR_PORT) != 0) + return; + + for (; mp != NULL; mp = mp->b_next) + mac_protect_intercept_dhcp_one(mcip, mp); +} + +void +mac_protect_flush_dhcp(mac_client_impl_t *mcip) +{ + mutex_enter(&mcip->mci_protect_lock); + flush_dhcpv4(mcip); + flush_dhcpv6(mcip); + mutex_exit(&mcip->mci_protect_lock); +} + +void +mac_protect_cancel_timer(mac_client_impl_t *mcip) +{ + mutex_enter(&mcip->mci_protect_lock); + cancel_txn_cleanup_timer(mcip); + mutex_exit(&mcip->mci_protect_lock); +} + +/* + * Check if addr is in the 'allowed-ips' list. + */ + +/* ARGSUSED */ +static boolean_t +ipnospoof_check_v4(mac_client_impl_t *mcip, mac_protect_t *protect, + ipaddr_t *addr) +{ + uint_t i; + + /* + * The unspecified address is allowed. + */ + if (*addr == INADDR_ANY) + return (B_TRUE); + + for (i = 0; i < protect->mp_ipaddrcnt; i++) { + mac_ipaddr_t *v4addr = &protect->mp_ipaddrs[i]; + + if (v4addr->ip_version == IPV4_VERSION && + V4_PART_OF_V6(v4addr->ip_addr) == *addr) + return (B_TRUE); + } + return (check_dhcpv4_dyn_ip(mcip, *addr)); +} + +static boolean_t +ipnospoof_check_v6(mac_client_impl_t *mcip, mac_protect_t *protect, + in6_addr_t *addr) +{ + uint_t i; + + /* + * The unspecified address and the v6 link local address are allowed. + */ + if (IN6_IS_ADDR_UNSPECIFIED(addr) || + ((mcip->mci_protect_flags & MPT_FLAG_V6_LOCAL_ADDR_SET) != 0 && + IN6_ARE_ADDR_EQUAL(&mcip->mci_v6_local_addr, addr))) + return (B_TRUE); + + + for (i = 0; i < protect->mp_ipaddrcnt; i++) { + mac_ipaddr_t *v6addr = &protect->mp_ipaddrs[i]; + + if (v6addr->ip_version == IPV6_VERSION && + IN6_ARE_ADDR_EQUAL(&v6addr->ip_addr, addr)) + return (B_TRUE); + } + return (check_dhcpv6_dyn_ip(mcip, addr)); +} + +/* + * Checks various fields within an IPv6 NDP packet. + */ +static boolean_t +ipnospoof_check_ndp(mac_client_impl_t *mcip, mac_protect_t *protect, + ip6_t *ip6h, uchar_t *end) +{ + icmp6_t *icmp_nd = (icmp6_t *)&ip6h[1]; + int hdrlen, optlen, opttype, len; + uint_t addrlen, maclen; + uint8_t type; + nd_opt_hdr_t *opt; + struct nd_opt_lla *lla = NULL; + + /* + * NDP packets do not have extension headers so the ICMPv6 header + * must immediately follow the IPv6 header. + */ + if (ip6h->ip6_nxt != IPPROTO_ICMPV6) + return (B_TRUE); + + /* ICMPv6 header missing */ + if ((uchar_t *)&icmp_nd[1] > end) + return (B_FALSE); + + len = end - (uchar_t *)icmp_nd; + type = icmp_nd->icmp6_type; + + switch (type) { + case ND_ROUTER_SOLICIT: + hdrlen = sizeof (nd_router_solicit_t); + break; + case ND_ROUTER_ADVERT: + hdrlen = sizeof (nd_router_advert_t); + break; + case ND_NEIGHBOR_SOLICIT: + hdrlen = sizeof (nd_neighbor_solicit_t); + break; + case ND_NEIGHBOR_ADVERT: + hdrlen = sizeof (nd_neighbor_advert_t); + break; + case ND_REDIRECT: + hdrlen = sizeof (nd_redirect_t); + break; + default: + return (B_TRUE); + } + + if (len < hdrlen) + return (B_FALSE); + + /* SLLA option checking is needed for RS/RA/NS */ + opttype = ND_OPT_SOURCE_LINKADDR; + + switch (type) { + case ND_NEIGHBOR_ADVERT: { + nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp_nd; + + if (!ipnospoof_check_v6(mcip, protect, &na->nd_na_target)) { + DTRACE_PROBE2(ndp__na__fail, + mac_client_impl_t *, mcip, ip6_t *, ip6h); + return (B_FALSE); + } + + /* TLLA option for NA */ + opttype = ND_OPT_TARGET_LINKADDR; + break; + } + case ND_REDIRECT: { + /* option checking not needed for RD */ + return (B_TRUE); + } + default: + break; + } - start = mp->b_rptr; + if (len == hdrlen) { + /* no options, we're done */ + return (B_TRUE); } + opt = (nd_opt_hdr_t *)((uchar_t *)icmp_nd + hdrlen); + optlen = len - hdrlen; + + /* find the option header we need */ + while (optlen > sizeof (nd_opt_hdr_t)) { + if (opt->nd_opt_type == opttype) { + lla = (struct nd_opt_lla *)opt; + break; + } + optlen -= 8 * opt->nd_opt_len; + opt = (nd_opt_hdr_t *) + ((uchar_t *)opt + 8 * opt->nd_opt_len); + } + if (lla == NULL) + return (B_TRUE); + + addrlen = lla->nd_opt_lla_len * 8 - sizeof (nd_opt_hdr_t); + maclen = mcip->mci_mip->mi_info.mi_addr_length; + + if (addrlen != maclen || + bcmp(mcip->mci_unicast->ma_addr, + lla->nd_opt_lla_hdw_addr, maclen) != 0) { + DTRACE_PROBE2(ndp__lla__fail, + mac_client_impl_t *, mcip, ip6_t *, ip6h); + return (B_FALSE); + } + + DTRACE_PROBE2(ndp__lla__ok, mac_client_impl_t *, mcip, ip6_t *, ip6h); + return (B_TRUE); +} + +/* + * Enforce ip-nospoof protection. + */ +static int +ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, + mblk_t *mp, mac_header_info_t *mhip) +{ + size_t hdrsize = mhip->mhi_hdrsize; + uint32_t sap = mhip->mhi_bindsap; + uchar_t *start, *end; + mblk_t *nmp = NULL; + int err; + + err = get_l3_info(mp, hdrsize, &start, &end, &nmp); + if (err != 0) { + DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip, + mblk_t *, mp); + return (err); + } + err = EINVAL; switch (sap) { case ETHERTYPE_IP: { ipha_t *ipha = (ipha_t *)start; - if (start + sizeof (ipha_t) > mp->b_wptr || !OK_32PTR(start)) + if (start + sizeof (ipha_t) > end) goto fail; - if (!ipnospoof_check_ips(protect, ipha->ipha_src)) + if (!ipnospoof_check_v4(mcip, protect, &ipha->ipha_src)) goto fail; + intercept_dhcpv4_outbound(mcip, ipha, end); break; } case ETHERTYPE_ARP: { @@ -103,7 +1703,7 @@ ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, ipaddr_t spaddr; uchar_t *shaddr; - if (start + sizeof (arh_t) > mp->b_wptr) + if (start + sizeof (arh_t) > end) goto fail; maclen = mcip->mci_mip->mi_info.mi_addr_length; @@ -114,7 +1714,7 @@ ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, goto fail; arplen = sizeof (arh_t) + 2 * hlen + 2 * plen; - if (start + arplen > mp->b_wptr) + if (start + arplen > end) goto fail; shaddr = start + sizeof (arh_t); @@ -123,20 +1723,230 @@ ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, goto fail; bcopy(shaddr + hlen, &spaddr, sizeof (spaddr)); - if (!ipnospoof_check_ips(protect, spaddr)) + if (!ipnospoof_check_v4(mcip, protect, &spaddr)) goto fail; break; } - default: + case ETHERTYPE_IPV6: { + ip6_t *ip6h = (ip6_t *)start; + + if (start + sizeof (ip6_t) > end) + goto fail; + + if (!ipnospoof_check_v6(mcip, protect, &ip6h->ip6_src)) + goto fail; + + if (!ipnospoof_check_ndp(mcip, protect, ip6h, end)) + goto fail; + + intercept_dhcpv6_outbound(mcip, ip6h, end); break; } + } + freemsg(nmp); return (0); fail: - /* increment ipnospoof stat here */ + freemsg(nmp); return (err); } +static boolean_t +dhcpnospoof_check_cid(mac_protect_t *p, uchar_t *cid, uint_t cidlen) +{ + int i; + + for (i = 0; i < p->mp_cidcnt; i++) { + mac_dhcpcid_t *dcid = &p->mp_cids[i]; + + if (dcid->dc_len == cidlen && + bcmp(dcid->dc_id, cid, cidlen) == 0) + return (B_TRUE); + } + return (B_FALSE); +} + +static boolean_t +dhcpnospoof_check_v4(mac_client_impl_t *mcip, mac_protect_t *p, + ipha_t *ipha, uchar_t *end) +{ + struct dhcp *dh4; + uchar_t *cid; + uint_t maclen, cidlen = 0; + uint8_t optlen; + int err; + + if ((err = get_dhcpv4_info(ipha, end, &dh4)) != 0) + return (err == EINVAL); + + maclen = mcip->mci_mip->mi_info.mi_addr_length; + if (dh4->hlen == maclen && + bcmp(mcip->mci_unicast->ma_addr, dh4->chaddr, maclen) != 0) { + return (B_FALSE); + } + if (get_dhcpv4_option(dh4, end, CD_CLIENT_ID, &cid, &optlen) == 0) + cidlen = optlen; + + if (cidlen == 0) + return (B_TRUE); + + if (*cid == ARPHRD_ETHER && cidlen - 1 == maclen && + bcmp(mcip->mci_unicast->ma_addr, cid + 1, maclen) == 0) + return (B_TRUE); + + return (dhcpnospoof_check_cid(p, cid, cidlen)); +} + +static boolean_t +dhcpnospoof_check_v6(mac_client_impl_t *mcip, mac_protect_t *p, + ip6_t *ip6h, uchar_t *end) +{ + dhcpv6_message_t *dh6; + dhcpv6_option_t *d6o; + uint8_t mtype; + uchar_t *cid, *lladdr = NULL; + uint_t cidlen, maclen, addrlen = 0; + uint16_t cidtype; + int err; + + if ((err = get_dhcpv6_info(ip6h, end, &dh6)) != 0) + return (err == EINVAL); + + /* + * We only check client-generated messages. + */ + mtype = dh6->d6m_msg_type; + if (mtype == DHCPV6_MSG_ADVERTISE || mtype == DHCPV6_MSG_REPLY || + mtype == DHCPV6_MSG_RECONFIGURE) + return (B_TRUE); + + d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL, + DHCPV6_OPT_CLIENTID, &cidlen); + if (d6o == NULL || (uchar_t *)d6o + cidlen > end) + return (B_TRUE); + + cid = (uchar_t *)&d6o[1]; + cidlen -= sizeof (*d6o); + if (cidlen < sizeof (cidtype)) + return (B_TRUE); + + bcopy(cid, &cidtype, sizeof (cidtype)); + cidtype = ntohs(cidtype); + if (cidtype == DHCPV6_DUID_LLT && cidlen >= sizeof (duid_llt_t)) { + lladdr = cid + sizeof (duid_llt_t); + addrlen = cidlen - sizeof (duid_llt_t); + } + if (cidtype == DHCPV6_DUID_LL && cidlen >= sizeof (duid_ll_t)) { + lladdr = cid + sizeof (duid_ll_t); + addrlen = cidlen - sizeof (duid_ll_t); + } + maclen = mcip->mci_mip->mi_info.mi_addr_length; + if (lladdr != NULL && addrlen == maclen && + bcmp(mcip->mci_unicast->ma_addr, lladdr, maclen) == 0) { + return (B_TRUE); + } + return (dhcpnospoof_check_cid(p, cid, cidlen)); +} + +/* + * Enforce dhcp-nospoof protection. + */ +static int +dhcpnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, + mblk_t *mp, mac_header_info_t *mhip) +{ + size_t hdrsize = mhip->mhi_hdrsize; + uint32_t sap = mhip->mhi_bindsap; + uchar_t *start, *end; + mblk_t *nmp = NULL; + int err; + + err = get_l3_info(mp, hdrsize, &start, &end, &nmp); + if (err != 0) { + DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip, + mblk_t *, mp); + return (err); + } + err = EINVAL; + + switch (sap) { + case ETHERTYPE_IP: { + ipha_t *ipha = (ipha_t *)start; + + if (start + sizeof (ipha_t) > end) + goto fail; + + if (!dhcpnospoof_check_v4(mcip, protect, ipha, end)) + goto fail; + + break; + } + case ETHERTYPE_IPV6: { + ip6_t *ip6h = (ip6_t *)start; + + if (start + sizeof (ip6_t) > end) + goto fail; + + if (!dhcpnospoof_check_v6(mcip, protect, ip6h, end)) + goto fail; + + break; + } + } + freemsg(nmp); + return (0); + +fail: + /* increment dhcpnospoof stat here */ + freemsg(nmp); + return (err); +} + +/* + * This needs to be called whenever the mac client's mac address changes. + */ +void +mac_protect_update_v6_local_addr(mac_client_impl_t *mcip) +{ + uint8_t *p, *macaddr = mcip->mci_unicast->ma_addr; + uint_t i, media = mcip->mci_mip->mi_info.mi_media; + in6_addr_t token, *v6addr = &mcip->mci_v6_local_addr; + in6_addr_t ll_template = {(uint32_t)V6_LINKLOCAL, 0x0, 0x0, 0x0}; + + + bzero(&token, sizeof (token)); + p = (uint8_t *)&token.s6_addr32[2]; + + switch (media) { + case DL_ETHER: + bcopy(macaddr, p, 3); + p[0] ^= 0x2; + p[3] = 0xff; + p[4] = 0xfe; + bcopy(macaddr + 3, p + 5, 3); + break; + case DL_IB: + ASSERT(mcip->mci_mip->mi_info.mi_addr_length == 20); + bcopy(macaddr + 12, p, 8); + p[0] |= 2; + break; + default: + /* + * We do not need to generate the local address for link types + * that do not support link protection. Wifi pretends to be + * ethernet so it is covered by the DL_ETHER case (note the + * use of mi_media instead of mi_nativemedia). + */ + return; + } + + for (i = 0; i < 4; i++) { + v6addr->s6_addr32[i] = token.s6_addr32[i] | + ll_template.s6_addr32[i]; + } + mcip->mci_protect_flags |= MPT_FLAG_V6_LOCAL_ADDR_SET; +} + /* * Enforce link protection on one packet. */ @@ -159,7 +1969,6 @@ mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp) mblk_t *, mp); return (err); } - protect = &mrp->mrp_protect; types = protect->mp_types; @@ -167,12 +1976,12 @@ mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp) if (mhi.mhi_saddr != NULL && bcmp(mcip->mci_unicast->ma_addr, mhi.mhi_saddr, mip->mi_info.mi_addr_length) != 0) { + BUMP_STAT(mcip, macspoofed); DTRACE_PROBE2(mac__nospoof__fail, mac_client_impl_t *, mcip, mblk_t *, mp); return (EINVAL); } } - if ((types & MPT_RESTRICTED) != 0) { uint32_t vid = VLAN_ID(mhi.mhi_tci); uint32_t sap = mhi.mhi_bindsap; @@ -182,6 +1991,7 @@ mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp) * the vid is not spoofed. */ if (vid != 0 && !mac_client_check_flow_vid(mcip, vid)) { + BUMP_STAT(mcip, restricted); DTRACE_PROBE2(restricted__vid__invalid, mac_client_impl_t *, mcip, mblk_t *, mp); return (EINVAL); @@ -189,20 +1999,28 @@ mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp) if (sap != ETHERTYPE_IP && sap != ETHERTYPE_IPV6 && sap != ETHERTYPE_ARP) { + BUMP_STAT(mcip, restricted); DTRACE_PROBE2(restricted__fail, mac_client_impl_t *, mcip, mblk_t *, mp); return (EINVAL); } } - if ((types & MPT_IPNOSPOOF) != 0) { - if ((err = ipnospoof_check(mcip, protect, - mp, &mhi)) != 0) { + if ((err = ipnospoof_check(mcip, protect, mp, &mhi)) != 0) { + BUMP_STAT(mcip, ipspoofed); DTRACE_PROBE2(ip__nospoof__fail, mac_client_impl_t *, mcip, mblk_t *, mp); return (err); } } + if ((types & MPT_DHCPNOSPOOF) != 0) { + if ((err = dhcpnospoof_check(mcip, protect, mp, &mhi)) != 0) { + BUMP_STAT(mcip, dhcpspoofed); + DTRACE_PROBE2(dhcp__nospoof__fail, + mac_client_impl_t *, mcip, mblk_t *, mp); + return (err); + } + } return (0); } @@ -242,11 +2060,89 @@ mac_protect_check(mac_client_handle_t mch, mblk_t *mp) boolean_t mac_protect_enabled(mac_client_handle_t mch, uint32_t type) { - mac_client_impl_t *mcip = (mac_client_impl_t *)mch; - mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + return (MAC_PROTECT_ENABLED((mac_client_impl_t *)mch, type)); +} - ASSERT(mrp != NULL); - return ((mrp->mrp_protect.mp_types & type) != 0); +static int +validate_ips(mac_protect_t *p) +{ + uint_t i, j; + + if (p->mp_ipaddrcnt == MPT_RESET) + return (0); + + if (p->mp_ipaddrcnt > MPT_MAXIPADDR) + return (EINVAL); + + for (i = 0; i < p->mp_ipaddrcnt; i++) { + mac_ipaddr_t *addr = &p->mp_ipaddrs[i]; + + /* + * The unspecified address is implicitly allowed + * so there's no need to add it to the list. + */ + if (addr->ip_version == IPV4_VERSION) { + if (V4_PART_OF_V6(addr->ip_addr) == INADDR_ANY) + return (EINVAL); + } else if (addr->ip_version == IPV6_VERSION) { + if (IN6_IS_ADDR_UNSPECIFIED(&addr->ip_addr)) + return (EINVAL); + } else { + /* invalid ip version */ + return (EINVAL); + } + + for (j = 0; j < p->mp_ipaddrcnt; j++) { + mac_ipaddr_t *addr1 = &p->mp_ipaddrs[j]; + + if (i == j || addr->ip_version != addr1->ip_version) + continue; + + /* found a duplicate */ + if ((addr->ip_version == IPV4_VERSION && + V4_PART_OF_V6(addr->ip_addr) == + V4_PART_OF_V6(addr1->ip_addr)) || + IN6_ARE_ADDR_EQUAL(&addr->ip_addr, + &addr1->ip_addr)) + return (EINVAL); + } + } + return (0); +} + +/* ARGSUSED */ +static int +validate_cids(mac_protect_t *p) +{ + uint_t i, j; + + if (p->mp_cidcnt == MPT_RESET) + return (0); + + if (p->mp_cidcnt > MPT_MAXCID) + return (EINVAL); + + for (i = 0; i < p->mp_cidcnt; i++) { + mac_dhcpcid_t *cid = &p->mp_cids[i]; + + if (cid->dc_len > MPT_MAXCIDLEN || + (cid->dc_form != CIDFORM_TYPED && + cid->dc_form != CIDFORM_HEX && + cid->dc_form != CIDFORM_STR)) + return (EINVAL); + + for (j = 0; j < p->mp_cidcnt; j++) { + mac_dhcpcid_t *cid1 = &p->mp_cids[j]; + + if (i == j || cid->dc_len != cid1->dc_len) + continue; + + /* found a duplicate */ + if (bcmp(cid->dc_id, cid1->dc_id, cid->dc_len) == 0) + return (EINVAL); + } + } + return (0); } /* @@ -256,33 +2152,18 @@ int mac_protect_validate(mac_resource_props_t *mrp) { mac_protect_t *p = &mrp->mrp_protect; + int err; /* check for invalid types */ if (p->mp_types != MPT_RESET && (p->mp_types & ~MPT_ALL) != 0) return (EINVAL); - if (p->mp_ipaddrcnt != MPT_RESET) { - uint_t i, j; - - if (p->mp_ipaddrcnt > MPT_MAXIPADDR) - return (EINVAL); + if ((err = validate_ips(p)) != 0) + return (err); - for (i = 0; i < p->mp_ipaddrcnt; i++) { - /* - * The unspecified address is implicitly allowed - * so there's no need to add it to the list. - */ - if (p->mp_ipaddrs[i] == INADDR_ANY) - return (EINVAL); + if ((err = validate_cids(p)) != 0) + return (err); - for (j = 0; j < p->mp_ipaddrcnt; j++) { - /* found a duplicate */ - if (i != j && - p->mp_ipaddrs[i] == p->mp_ipaddrs[j]) - return (EINVAL); - } - } - } return (0); } @@ -326,9 +2207,8 @@ mac_protect_update(mac_resource_props_t *new, mac_resource_props_t *curr) curr->mrp_mask |= MRP_PROTECT; } } - if (np->mp_ipaddrcnt != 0) { - if (np->mp_ipaddrcnt < MPT_MAXIPADDR) { + if (np->mp_ipaddrcnt <= MPT_MAXIPADDR) { bcopy(np->mp_ipaddrs, cp->mp_ipaddrs, sizeof (cp->mp_ipaddrs)); cp->mp_ipaddrcnt = np->mp_ipaddrcnt; @@ -337,4 +2217,47 @@ mac_protect_update(mac_resource_props_t *new, mac_resource_props_t *curr) cp->mp_ipaddrcnt = 0; } } + if (np->mp_cidcnt != 0) { + if (np->mp_cidcnt <= MPT_MAXCID) { + bcopy(np->mp_cids, cp->mp_cids, sizeof (cp->mp_cids)); + cp->mp_cidcnt = np->mp_cidcnt; + } else if (np->mp_cidcnt == MPT_RESET) { + bzero(cp->mp_cids, sizeof (cp->mp_cids)); + cp->mp_cidcnt = 0; + } + } +} + +void +mac_protect_init(mac_client_impl_t *mcip) +{ + mutex_init(&mcip->mci_protect_lock, NULL, MUTEX_DRIVER, NULL); + mcip->mci_protect_flags = 0; + mcip->mci_txn_cleanup_tid = 0; + avl_create(&mcip->mci_v4_pending_txn, compare_dhcpv4_xid, + sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_node)); + avl_create(&mcip->mci_v4_completed_txn, compare_dhcpv4_cid, + sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_node)); + avl_create(&mcip->mci_v4_dyn_ip, compare_dhcpv4_ip, + sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_ipnode)); + avl_create(&mcip->mci_v6_pending_txn, compare_dhcpv6_xid, + sizeof (dhcpv6_txn_t), offsetof(dhcpv6_txn_t, dt_node)); + avl_create(&mcip->mci_v6_cid, compare_dhcpv6_cid, + sizeof (dhcpv6_cid_t), offsetof(dhcpv6_cid_t, dc_node)); + avl_create(&mcip->mci_v6_dyn_ip, compare_dhcpv6_ip, + sizeof (dhcpv6_addr_t), offsetof(dhcpv6_addr_t, da_node)); +} + +void +mac_protect_fini(mac_client_impl_t *mcip) +{ + avl_destroy(&mcip->mci_v6_dyn_ip); + avl_destroy(&mcip->mci_v6_cid); + avl_destroy(&mcip->mci_v6_pending_txn); + avl_destroy(&mcip->mci_v4_dyn_ip); + avl_destroy(&mcip->mci_v4_completed_txn); + avl_destroy(&mcip->mci_v4_pending_txn); + mcip->mci_txn_cleanup_tid = 0; + mcip->mci_protect_flags = 0; + mutex_destroy(&mcip->mci_protect_lock); } diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c index 43501e3505..f0fe17ae0a 100644 --- a/usr/src/uts/common/io/mac/mac_provider.c +++ b/usr/src/uts/common/io/mac/mac_provider.c @@ -40,6 +40,7 @@ #include <sys/mac_client_impl.h> #include <sys/mac_client_priv.h> #include <sys/mac_soft_ring.h> +#include <sys/mac_stat.h> #include <sys/dld.h> #include <sys/modctl.h> #include <sys/fs/dv_node.h> @@ -53,6 +54,8 @@ #include <sys/ddi_intr_impl.h> #include <sys/disp.h> #include <sys/sdt.h> +#include <sys/pattr.h> +#include <sys/strsun.h> /* * MAC Provider Interface. @@ -298,8 +301,7 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) /* * Register the private properties. */ - mac_register_priv_prop(mip, mregp->m_priv_props, - mregp->m_priv_prop_count); + mac_register_priv_prop(mip, mregp->m_priv_props); /* * Stash the driver callbacks into the mac_impl_t, but first sanity @@ -334,6 +336,9 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) * Initialize the capabilities */ + bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t)); + bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t)); + if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL)) mip->mi_state_flags |= MIS_IS_VNIC; @@ -371,18 +376,6 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) } /* - * The driver must set mc_tx entry point to NULL when it advertises - * CAP_RINGS for tx rings. - */ - if (mip->mi_tx_groups != NULL) { - if (mregp->m_callbacks->mc_tx != NULL) - goto fail; - } else { - if (mregp->m_callbacks->mc_tx == NULL) - goto fail; - } - - /* * Initialize MAC addresses. Must be called after mac_init_rings(). */ mac_init_macaddr(mip); @@ -396,7 +389,7 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) /* * Initialize the kstats for this device. */ - mac_stat_create(mip); + mac_driver_stat_create(mip); /* Zero out any properties. */ bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t)); @@ -466,7 +459,7 @@ fail: mip->mi_info.mi_unicst_addr = NULL; } - mac_stat_destroy(mip); + mac_driver_stat_delete(mip); if (mip->mi_type != NULL) { atomic_dec_32(&mip->mi_type->mt_ref); @@ -484,6 +477,7 @@ fail: mac_minor_rele(minor); } + mip->mi_state_flags = 0; mac_unregister_priv_prop(mip); /* @@ -532,7 +526,7 @@ mac_unregister(mac_handle_t mh) ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags & MIS_EXCLUSIVE)); - mac_stat_destroy(mip); + mac_driver_stat_delete(mip); (void) mod_hash_remove(i_mac_impl_hash, (mod_hash_key_t)mip->mi_name, &val); @@ -772,11 +766,7 @@ mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) void mac_tx_update(mac_handle_t mh) { - /* - * Walk the list of MAC clients (mac_client_handle) - * and update - */ - i_mac_tx_srs_notify((mac_impl_t *)mh, NULL); + mac_tx_ring_update(mh, NULL); } /* @@ -959,6 +949,151 @@ mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max) return (0); } +static void +mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring) +{ + mac_client_impl_t *mcip; + flow_entry_t *flent; + mac_soft_ring_set_t *mac_rx_srs; + mac_cpus_t *srs_cpu; + int i; + + if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) && + (!ring->mr_info.mri_intr.mi_ddi_shared)) { + /* interrupt can be re-targeted */ + ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); + flent = mcip->mci_flent; + if (ring->mr_type == MAC_RING_TYPE_RX) { + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_rx_srs = flent->fe_rx_srs[i]; + if (mac_rx_srs->srs_ring != ring) + continue; + srs_cpu = &mac_rx_srs->srs_cpu; + mutex_enter(&cpu_lock); + mac_rx_srs_retarget_intr(mac_rx_srs, + srs_cpu->mc_rx_intr_cpu); + mutex_exit(&cpu_lock); + break; + } + } else { + if (flent->fe_tx_srs != NULL) { + mutex_enter(&cpu_lock); + mac_tx_srs_retarget_intr( + flent->fe_tx_srs); + mutex_exit(&cpu_lock); + } + } + } +} + +/* + * Clients like aggr create pseudo rings (mac_ring_t) and expose them to + * their clients. There is a 1-1 mapping pseudo ring and the hardware + * ring. ddi interrupt handles are exported from the hardware ring to + * the pseudo ring. Thus when the interrupt handle changes, clients of + * aggr that are using the handle need to use the new handle and + * re-target their interrupts. + */ +static void +mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring, + ddi_intr_handle_t ddh) +{ + mac_ring_t *pring; + mac_group_t *pgroup; + mac_impl_t *pmip; + char macname[MAXNAMELEN]; + mac_perim_handle_t p_mph; + uint64_t saved_gen_num; + +again: + pring = (mac_ring_t *)ring->mr_prh; + pgroup = (mac_group_t *)pring->mr_gh; + pmip = (mac_impl_t *)pgroup->mrg_mh; + saved_gen_num = ring->mr_gen_num; + (void) strlcpy(macname, pmip->mi_name, MAXNAMELEN); + /* + * We need to enter aggr's perimeter. The locking hierarchy + * dictates that aggr's perimeter should be entered first + * and then the port's perimeter. So drop the port's + * perimeter, enter aggr's and then re-enter port's + * perimeter. + */ + i_mac_perim_exit(mip); + /* + * While we know pmip is the aggr's mip, there is a + * possibility that aggr could have unregistered by + * the time we exit port's perimeter (mip) and + * enter aggr's perimeter (pmip). To avoid that + * scenario, enter aggr's perimeter using its name. + */ + if (mac_perim_enter_by_macname(macname, &p_mph) != 0) + return; + i_mac_perim_enter(mip); + /* + * Check if the ring got assigned to another aggregation before + * be could enter aggr's and the port's perimeter. When a ring + * gets deleted from an aggregation, it calls mac_stop_ring() + * which increments the generation number. So checking + * generation number will be enough. + */ + if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) { + i_mac_perim_exit(mip); + mac_perim_exit(p_mph); + i_mac_perim_enter(mip); + goto again; + } + + /* Check if pseudo ring is still present */ + if (ring->mr_prh != NULL) { + pring->mr_info.mri_intr.mi_ddi_handle = ddh; + pring->mr_info.mri_intr.mi_ddi_shared = + ring->mr_info.mri_intr.mi_ddi_shared; + if (ddh != NULL) + mac_ring_intr_retarget(pgroup, pring); + } + i_mac_perim_exit(mip); + mac_perim_exit(p_mph); +} +/* + * API called by driver to provide new interrupt handle for TX/RX rings. + * This usually happens when IRM (Interrupt Resource Manangement) + * framework either gives the driver more MSI-x interrupts or takes + * away MSI-x interrupts from the driver. + */ +void +mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh) +{ + mac_ring_t *ring = (mac_ring_t *)mrh; + mac_group_t *group = (mac_group_t *)ring->mr_gh; + mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; + + i_mac_perim_enter(mip); + ring->mr_info.mri_intr.mi_ddi_handle = ddh; + if (ddh == NULL) { + /* Interrupts being reset */ + ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE; + if (ring->mr_prh != NULL) { + mac_pseudo_ring_intr_retarget(mip, ring, ddh); + return; + } + } else { + /* New interrupt handle */ + mac_compare_ddi_handle(mip->mi_rx_groups, + mip->mi_rx_group_count, ring); + if (!ring->mr_info.mri_intr.mi_ddi_shared) { + mac_compare_ddi_handle(mip->mi_tx_groups, + mip->mi_tx_group_count, ring); + } + if (ring->mr_prh != NULL) { + mac_pseudo_ring_intr_retarget(mip, ring, ddh); + return; + } else { + mac_ring_intr_retarget(group, ring); + } + } + i_mac_perim_exit(mip); +} + /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */ /* @@ -1141,16 +1276,8 @@ mac_group_add_ring(mac_group_handle_t gh, int index) int ret; i_mac_perim_enter(mip); - - /* - * Only RX rings can be added or removed by drivers currently. - */ - ASSERT(group->mrg_type == MAC_RING_TYPE_RX); - ret = i_mac_group_add_ring(group, NULL, index); - i_mac_perim_exit(mip); - return (ret); } @@ -1166,13 +1293,167 @@ mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh) mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; i_mac_perim_enter(mip); + i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE); + i_mac_perim_exit(mip); +} - /* - * Only RX rings can be added or removed by drivers currently. - */ - ASSERT(group->mrg_type == MAC_RING_TYPE_RX); +/* + * mac_prop_info_*() callbacks called from the driver's prefix_propinfo() + * entry points. + */ - i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE); +void +mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; - i_mac_perim_exit(mip); + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + ASSERT(pr->pr_default_size >= sizeof (uint8_t)); + + *(uint8_t *)(pr->pr_default) = val; + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + ASSERT(pr->pr_default_size >= sizeof (uint64_t)); + + bcopy(&val, pr->pr_default, sizeof (val)); + + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + ASSERT(pr->pr_default_size >= sizeof (uint32_t)); + + bcopy(&val, pr->pr_default, sizeof (val)); + + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + if (strlen(str) > pr->pr_default_size) + pr->pr_default_status = ENOBUFS; + else + (void) strlcpy(pr->pr_default, str, strlen(str)); + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph, + link_flowctrl_t val) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t)); + + bcopy(&val, pr->pr_default, sizeof (val)); + + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min, + uint32_t max) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + mac_propval_range_t *range = pr->pr_range; + + /* nothing to do if the caller doesn't want the range info */ + if (range == NULL) + return; + + range->mpr_count = 1; + range->mpr_type = MAC_PROPVAL_UINT32; + range->mpr_range_uint32[0].mpur_min = min; + range->mpr_range_uint32[0].mpur_max = max; + pr->pr_flags |= MAC_PROP_INFO_RANGE; +} + +void +mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + pr->pr_perm = perm; + pr->pr_flags |= MAC_PROP_INFO_PERM; +} + +void mac_hcksum_get(mblk_t *mp, uint32_t *start, uint32_t *stuff, + uint32_t *end, uint32_t *value, uint32_t *flags_ptr) +{ + uint32_t flags; + + ASSERT(DB_TYPE(mp) == M_DATA); + + flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS; + if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) { + if (value != NULL) + *value = (uint32_t)DB_CKSUM16(mp); + if ((flags & HCK_PARTIALCKSUM) != 0) { + if (start != NULL) + *start = (uint32_t)DB_CKSUMSTART(mp); + if (stuff != NULL) + *stuff = (uint32_t)DB_CKSUMSTUFF(mp); + if (end != NULL) + *end = (uint32_t)DB_CKSUMEND(mp); + } + } + + if (flags_ptr != NULL) + *flags_ptr = flags; +} + +void mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff, + uint32_t end, uint32_t value, uint32_t flags) +{ + ASSERT(DB_TYPE(mp) == M_DATA); + + DB_CKSUMSTART(mp) = (intptr_t)start; + DB_CKSUMSTUFF(mp) = (intptr_t)stuff; + DB_CKSUMEND(mp) = (intptr_t)end; + DB_CKSUMFLAGS(mp) = (uint16_t)flags; + DB_CKSUM16(mp) = (uint16_t)value; +} + +void +mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) +{ + ASSERT(DB_TYPE(mp) == M_DATA); + + if (flags != NULL) { + *flags = DB_CKSUMFLAGS(mp) & HW_LSO; + if ((*flags != 0) && (mss != NULL)) + *mss = (uint32_t)DB_LSOMSS(mp); + } } diff --git a/usr/src/uts/common/io/mac/mac_sched.c b/usr/src/uts/common/io/mac/mac_sched.c index 8b7f718497..9e1b2b0a55 100644 --- a/usr/src/uts/common/io/mac/mac_sched.c +++ b/usr/src/uts/common/io/mac/mac_sched.c @@ -50,6 +50,8 @@ static mac_tx_cookie_t mac_tx_fanout_mode(mac_soft_ring_set_t *, mblk_t *, uintptr_t, uint16_t, mblk_t **); static mac_tx_cookie_t mac_tx_bw_mode(mac_soft_ring_set_t *, mblk_t *, uintptr_t, uint16_t, mblk_t **); +static mac_tx_cookie_t mac_tx_aggr_mode(mac_soft_ring_set_t *, mblk_t *, + uintptr_t, uint16_t, mblk_t **); typedef struct mac_tx_mode_s { mac_tx_srs_mode_t mac_tx_mode; @@ -57,18 +59,34 @@ typedef struct mac_tx_mode_s { } mac_tx_mode_t; /* - * There are five modes of operation on the Tx side. These modes get set + * There are seven modes of operation on the Tx side. These modes get set * in mac_tx_srs_setup(). Except for the experimental TX_SERIALIZE mode, * none of the other modes are user configurable. They get selected by * the system depending upon whether the link (or flow) has multiple Tx - * rings or a bandwidth configured, etc. + * rings or a bandwidth configured, or if the link is an aggr, etc. + * + * When the Tx SRS is operating in aggr mode (st_mode) or if there are + * multiple Tx rings owned by Tx SRS, then each Tx ring (pseudo or + * otherwise) will have a soft ring associated with it. These soft rings + * are stored in srs_tx_soft_rings[] array. + * + * Additionally in the case of aggr, there is the st_soft_rings[] array + * in the mac_srs_tx_t structure. This array is used to store the same + * set of soft rings that are present in srs_tx_soft_rings[] array but + * in a different manner. The soft ring associated with the pseudo Tx + * ring is saved at mr_index (of the pseudo ring) in st_soft_rings[] + * array. This helps in quickly getting the soft ring associated with the + * Tx ring when aggr_find_tx_ring() returns the pseudo Tx ring that is to + * be used for transmit. */ mac_tx_mode_t mac_tx_mode_list[] = { {SRS_TX_DEFAULT, mac_tx_single_ring_mode}, {SRS_TX_SERIALIZE, mac_tx_serializer_mode}, {SRS_TX_FANOUT, mac_tx_fanout_mode}, {SRS_TX_BW, mac_tx_bw_mode}, - {SRS_TX_BW_FANOUT, mac_tx_bw_mode} + {SRS_TX_BW_FANOUT, mac_tx_bw_mode}, + {SRS_TX_AGGR, mac_tx_aggr_mode}, + {SRS_TX_BW_AGGR, mac_tx_bw_mode} }; /* @@ -307,21 +325,16 @@ int mac_srs_worker_wakeup_ticks = 0; } \ } -#define TX_SINGLE_RING_MODE(mac_srs) \ - ((mac_srs)->srs_tx.st_mode == SRS_TX_DEFAULT || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_SERIALIZE || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_BW) - #define TX_BANDWIDTH_MODE(mac_srs) \ ((mac_srs)->srs_tx.st_mode == SRS_TX_BW || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_BW_FANOUT) + (mac_srs)->srs_tx.st_mode == SRS_TX_BW_FANOUT || \ + (mac_srs)->srs_tx.st_mode == SRS_TX_BW_AGGR) #define TX_SRS_TO_SOFT_RING(mac_srs, head, hint) { \ - uint_t hash, indx; \ - hash = HASH_HINT(hint); \ - indx = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count); \ - softring = mac_srs->srs_oth_soft_rings[indx]; \ - (void) (mac_tx_soft_ring_process(softring, head, 0, NULL)); \ + if (tx_mode == SRS_TX_BW_FANOUT) \ + (void) mac_tx_fanout_mode(mac_srs, head, hint, 0, NULL);\ + else \ + (void) mac_tx_aggr_mode(mac_srs, head, hint, 0, NULL); \ } /* @@ -341,7 +354,7 @@ int mac_srs_worker_wakeup_ticks = 0; } else { \ ASSERT(!((srs)->srs_state & SRS_TX_BLOCKED)); \ (srs)->srs_state |= SRS_TX_BLOCKED; \ - (srs)->srs_tx.st_blocked_cnt++; \ + (srs)->srs_tx.st_stat.mts_blockcnt++; \ } \ } @@ -364,7 +377,7 @@ int mac_srs_worker_wakeup_ticks = 0; (srs)->srs_tx.st_hiwat_cnt++; \ if ((srs)->srs_count > (srs)->srs_tx.st_max_q_cnt) { \ /* increment freed stats */ \ - (srs)->srs_tx.st_drop_count += cnt; \ + (srs)->srs_tx.st_stat.mts_sdrops += cnt; \ /* \ * b_prev may be set to the fanout hint \ * hence can't use freemsg directly \ @@ -391,7 +404,7 @@ int mac_srs_worker_wakeup_ticks = 0; #define MAC_TX_SRS_DROP_MESSAGE(srs, mp, cookie) { \ mac_pkt_drop(NULL, NULL, mp, B_FALSE); \ /* increment freed stats */ \ - mac_srs->srs_tx.st_drop_count++; \ + mac_srs->srs_tx.st_stat.mts_sdrops++; \ cookie = (mac_tx_cookie_t)srs; \ } @@ -415,7 +428,7 @@ mac_rx_drop_pkt(mac_soft_ring_set_t *srs, mblk_t *mp) MAC_UPDATE_SRS_SIZE_LOCKED(srs, msgdsize(mp)); mutex_exit(&srs->srs_lock); - srs_rx->sr_drop_count++; + srs_rx->sr_stat.mrs_sdrops++; freemsg(mp); } @@ -448,7 +461,7 @@ mac_srs_fire(void *arg) * 'hint' is fanout_hint (type of uint64_t) which is given by the TCP/IP stack, * and it is used on the TX path. */ -#define HASH_HINT(hint) \ +#define HASH_HINT(hint) \ ((hint) ^ ((hint) >> 24) ^ ((hint) >> 16) ^ ((hint) >> 8)) @@ -797,8 +810,8 @@ mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp, * packets or because mblk's need to be concatenated using * pullupmsg(). */ - if (mac_src_ipv6_fanout || !mac_ip_hdr_length_v6(mp, ip6h, - &hdr_len, &nexthdr, NULL, NULL)) { + if (mac_src_ipv6_fanout || !mac_ip_hdr_length_v6(ip6h, + mp->b_wptr, &hdr_len, &nexthdr, NULL)) { goto src_based_fanout; } whereptr = (uint8_t *)ip6h + hdr_len; @@ -1302,13 +1315,8 @@ check_again: tail->b_next = NULL; smcip = mac_srs->srs_mcip; - if ((mac_srs->srs_type & SRST_FLOW) || - (smcip == NULL)) { - FLOW_STAT_UPDATE(mac_srs->srs_flent, - rbytes, sz); - FLOW_STAT_UPDATE(mac_srs->srs_flent, - ipackets, count); - } + SRS_RX_STAT_UPDATE(mac_srs, pollbytes, sz); + SRS_RX_STAT_UPDATE(mac_srs, pollcnt, count); /* * If there are any promiscuous mode callbacks @@ -1316,9 +1324,6 @@ check_again: * if appropriate and also update the counters. */ if (smcip != NULL) { - smcip->mci_stat_ibytes += sz; - smcip->mci_stat_ipackets += count; - if (smcip->mci_mip->mi_promisc_list != NULL) { mutex_exit(lock); mac_promisc_dispatch(smcip->mci_mip, @@ -1331,15 +1336,14 @@ check_again: mac_srs->srs_bw->mac_bw_polled += sz; mutex_exit(&mac_srs->srs_bw->mac_bw_lock); } - srs_rx->sr_poll_count += count; MAC_RX_SRS_ENQUEUE_CHAIN(mac_srs, head, tail, count, sz); if (count <= 10) - srs_rx->sr_chain_cnt_undr10++; + srs_rx->sr_stat.mrs_chaincntundr10++; else if (count > 10 && count <= 50) - srs_rx->sr_chain_cnt_10to50++; + srs_rx->sr_stat.mrs_chaincnt10to50++; else - srs_rx->sr_chain_cnt_over50++; + srs_rx->sr_stat.mrs_chaincntover50++; } /* @@ -1637,10 +1641,17 @@ again: * callbacks for broadcast and multicast packets are delivered from * mac_rx() and we don't need to worry about that case in this path */ - if (mcip != NULL && mcip->mci_promisc_list != NULL) { - mutex_exit(&mac_srs->srs_lock); - mac_promisc_client_dispatch(mcip, head); - mutex_enter(&mac_srs->srs_lock); + if (mcip != NULL) { + if (mcip->mci_promisc_list != NULL) { + mutex_exit(&mac_srs->srs_lock); + mac_promisc_client_dispatch(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } + if (MAC_PROTECT_ENABLED(mcip, MPT_IPNOSPOOF)) { + mutex_exit(&mac_srs->srs_lock); + mac_protect_intercept_dhcp(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } } /* @@ -1886,7 +1897,7 @@ again: /* zero bandwidth: drop all and return to interrupt mode */ mutex_enter(&mac_srs->srs_bw->mac_bw_lock); if (mac_srs->srs_bw->mac_bw_limit == 0) { - srs_rx->sr_drop_count += cnt; + srs_rx->sr_stat.mrs_sdrops += cnt; ASSERT(mac_srs->srs_bw->mac_bw_sz >= sz); mac_srs->srs_bw->mac_bw_sz -= sz; mac_srs->srs_bw->mac_bw_drop_bytes += sz; @@ -1908,10 +1919,17 @@ again: * callbacks for broadcast and multicast packets are delivered from * mac_rx() and we don't need to worry about that case in this path */ - if (mcip != NULL && mcip->mci_promisc_list != NULL) { - mutex_exit(&mac_srs->srs_lock); - mac_promisc_client_dispatch(mcip, head); - mutex_enter(&mac_srs->srs_lock); + if (mcip != NULL) { + if (mcip->mci_promisc_list != NULL) { + mutex_exit(&mac_srs->srs_lock); + mac_promisc_client_dispatch(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } + if (MAC_PROTECT_ENABLED(mcip, MPT_IPNOSPOOF)) { + mutex_exit(&mac_srs->srs_lock); + mac_protect_intercept_dhcp(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } } /* @@ -2285,7 +2303,6 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, size_t sz = 0; size_t chain_sz, sz1; mac_bw_ctl_t *mac_bw; - mac_client_impl_t *smcip; mac_srs_rx_t *srs_rx = &mac_srs->srs_rx; /* @@ -2302,15 +2319,14 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, } mutex_enter(&mac_srs->srs_lock); - smcip = mac_srs->srs_mcip; - if (mac_srs->srs_type & SRST_FLOW || smcip == NULL) { - FLOW_STAT_UPDATE(mac_srs->srs_flent, rbytes, sz); - FLOW_STAT_UPDATE(mac_srs->srs_flent, ipackets, count); - } - if (smcip != NULL) { - smcip->mci_stat_ibytes += sz; - smcip->mci_stat_ipackets += count; + if (loopback) { + SRS_RX_STAT_UPDATE(mac_srs, lclbytes, sz); + SRS_RX_STAT_UPDATE(mac_srs, lclcnt, count); + + } else { + SRS_RX_STAT_UPDATE(mac_srs, intrbytes, sz); + SRS_RX_STAT_UPDATE(mac_srs, intrcnt, count); } /* @@ -2323,12 +2339,10 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, mac_bw = mac_srs->srs_bw; ASSERT(mac_bw != NULL); mutex_enter(&mac_bw->mac_bw_lock); - /* Count the packets and bytes via interrupt */ - srs_rx->sr_intr_count += count; mac_bw->mac_bw_intr += sz; if (mac_bw->mac_bw_limit == 0) { /* zero bandwidth: drop all */ - srs_rx->sr_drop_count += count; + srs_rx->sr_stat.mrs_sdrops += count; mac_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_bw->mac_bw_lock); mutex_exit(&mac_srs->srs_lock); @@ -2370,7 +2384,7 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, } if (head != NULL) { /* Drop any packet over the threshold */ - srs_rx->sr_drop_count += count; + srs_rx->sr_stat.mrs_sdrops += count; mutex_enter(&mac_bw->mac_bw_lock); mac_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_bw->mac_bw_lock); @@ -2392,7 +2406,7 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, if (!(mac_srs->srs_type & SRST_BW_CONTROL) && (srs_rx->sr_poll_pkt_cnt > srs_rx->sr_hiwat)) { mac_bw = mac_srs->srs_bw; - srs_rx->sr_drop_count += count; + srs_rx->sr_stat.mrs_sdrops += count; mutex_enter(&mac_bw->mac_bw_lock); mac_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_bw->mac_bw_lock); @@ -2402,8 +2416,6 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, } MAC_RX_SRS_ENQUEUE_CHAIN(mac_srs, mp_chain, tail, count, sz); - /* Count the packets entering via interrupt path */ - srs_rx->sr_intr_count += count; if (!(mac_srs->srs_state & SRS_PROC)) { /* @@ -2510,7 +2522,7 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, /* * Ignore fanout hint if we don't have multiple tx rings. */ - if (!TX_MULTI_RING_MODE(mac_srs)) + if (!MAC_TX_SOFT_RINGS(mac_srs)) fanout_hint = 0; if (mac_srs->srs_first != NULL) @@ -2550,25 +2562,30 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, } /* - * There are five tx modes: + * There are seven tx modes: * * 1) Default mode (SRS_TX_DEFAULT) * 2) Serialization mode (SRS_TX_SERIALIZE) * 3) Fanout mode (SRS_TX_FANOUT) * 4) Bandwdith mode (SRS_TX_BW) * 5) Fanout and Bandwidth mode (SRS_TX_BW_FANOUT) + * 6) aggr Tx mode (SRS_TX_AGGR) + * 7) aggr Tx bw mode (SRS_TX_BW_AGGR) * * The tx mode in which an SRS operates is decided in mac_tx_srs_setup() * based on the number of Tx rings requested for an SRS and whether * bandwidth control is requested or not. * - * In the default mode (i.e., no fanout/no bandwidth), the SRS acts as a - * pass-thru. Packets will go directly to mac_tx_send(). When the underlying - * Tx ring runs out of Tx descs, it starts queueing up packets in SRS. - * When flow-control is relieved, the srs_worker drains the queued - * packets and informs blocked clients to restart sending packets. + * The default mode (i.e., no fanout/no bandwidth) is used when the + * underlying NIC does not have Tx rings or just one Tx ring. In this mode, + * the SRS acts as a pass-thru. Packets will go directly to mac_tx_send(). + * When the underlying Tx ring runs out of Tx descs, it starts queueing up + * packets in SRS. When flow-control is relieved, the srs_worker drains + * the queued packets and informs blocked clients to restart sending + * packets. * - * In the SRS_TX_SERIALIZE mode, all calls to mac_tx() are serialized. + * In the SRS_TX_SERIALIZE mode, all calls to mac_tx() are serialized. This + * mode is used when the link has no Tx rings or only one Tx ring. * * In the SRS_TX_FANOUT mode, packets will be fanned out to multiple * Tx rings. Each Tx ring will have a soft ring associated with it. @@ -2581,6 +2598,19 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, * SRS. If fanout to multiple Tx rings is configured, the packets will * be fanned out among the soft rings associated with the Tx rings. * + * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine + * invokes an aggr function, aggr_find_tx_ring(), to find a pseudo Tx ring + * belonging to a port on which the packet has to be sent. Aggr will + * always have a pseudo Tx ring associated with it even when it is an + * aggregation over a single NIC that has no Tx rings. Even in such a + * case, the single pseudo Tx ring will have a soft ring associated with + * it and the soft ring will hang off the SRS. + * + * If a bandwidth is specified for an aggr, SRS_TX_BW_AGGR mode is used. + * In this mode, the bandwidth is first applied on the outgoing packets + * and later mac_tx_addr_mode() function is called to send the packet out + * of one of the pseudo Tx rings. + * * Four flags are used in srs_state for indicating flow control * conditions : SRS_TX_BLOCKED, SRS_TX_HIWAT, SRS_TX_WAKEUP_CLIENT. * SRS_TX_BLOCKED indicates out of Tx descs. SRS expects a wakeup from the @@ -2625,7 +2655,6 @@ mac_tx_single_ring_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, uintptr_t fanout_hint, uint16_t flag, mblk_t **ret_mp) { mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; - boolean_t is_subflow; mac_tx_stats_t stats; mac_tx_cookie_t cookie = NULL; @@ -2656,10 +2685,8 @@ mac_tx_single_ring_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mutex_exit(&mac_srs->srs_lock); } - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); - mp_chain = mac_tx_send(srs_tx->st_arg1, srs_tx->st_arg2, - mp_chain, (is_subflow ? &stats : NULL)); + mp_chain, &stats); /* * Multiple threads could be here sending packets. @@ -2676,9 +2703,7 @@ mac_tx_single_ring_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mutex_exit(&mac_srs->srs_lock); return (cookie); } - - if (is_subflow) - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); + SRS_TX_STATS_UPDATE(mac_srs, &stats); return (NULL); } @@ -2696,7 +2721,6 @@ static mac_tx_cookie_t mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, uintptr_t fanout_hint, uint16_t flag, mblk_t **ret_mp) { - boolean_t is_subflow; mac_tx_stats_t stats; mac_tx_cookie_t cookie = NULL; mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; @@ -2726,10 +2750,8 @@ mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mac_srs->srs_state |= SRS_PROC; mutex_exit(&mac_srs->srs_lock); - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); - mp_chain = mac_tx_send(srs_tx->st_arg1, srs_tx->st_arg2, - mp_chain, (is_subflow ? &stats : NULL)); + mp_chain, &stats); mutex_enter(&mac_srs->srs_lock); mac_srs->srs_state &= ~SRS_PROC; @@ -2747,8 +2769,8 @@ mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, } mutex_exit(&mac_srs->srs_lock); - if (is_subflow && cookie == NULL) - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); + if (cookie == NULL) + SRS_TX_STATS_UPDATE(mac_srs, &stats); return (cookie); } @@ -2766,8 +2788,8 @@ mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, */ #define MAC_TX_SOFT_RING_PROCESS(chain) { \ - index = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count), \ - softring = mac_srs->srs_oth_soft_rings[index]; \ + index = COMPUTE_INDEX(hash, mac_srs->srs_tx_ring_count), \ + softring = mac_srs->srs_tx_soft_rings[index]; \ cookie = mac_tx_soft_ring_process(softring, chain, flag, ret_mp); \ DTRACE_PROBE2(tx__fanout, uint64_t, hash, uint_t, index); \ } @@ -2781,7 +2803,8 @@ mac_tx_fanout_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, uint_t index; mac_tx_cookie_t cookie = NULL; - ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_FANOUT); + ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_FANOUT || + mac_srs->srs_tx.st_mode == SRS_TX_BW_FANOUT); if (fanout_hint != 0) { /* * The hint is specified by the caller, simply pass the @@ -2926,18 +2949,18 @@ mac_tx_bw_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, hash = HASH_HINT(fanout_hint); indx = COMPUTE_INDEX(hash, - mac_srs->srs_oth_ring_count); - softring = mac_srs->srs_oth_soft_rings[indx]; + mac_srs->srs_tx_ring_count); + softring = mac_srs->srs_tx_soft_rings[indx]; return (mac_tx_soft_ring_process(softring, mp_chain, flag, ret_mp)); + } else if (srs_tx->st_mode == SRS_TX_BW_AGGR) { + return (mac_tx_aggr_mode(mac_srs, mp_chain, + fanout_hint, flag, ret_mp)); } else { - boolean_t is_subflow; mac_tx_stats_t stats; - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); - mp_chain = mac_tx_send(srs_tx->st_arg1, srs_tx->st_arg2, - mp_chain, (is_subflow ? &stats : NULL)); + mp_chain, &stats); if (mp_chain != NULL) { mutex_enter(&mac_srs->srs_lock); @@ -2951,13 +2974,68 @@ mac_tx_bw_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mutex_exit(&mac_srs->srs_lock); return (cookie); } - if (is_subflow) - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); + SRS_TX_STATS_UPDATE(mac_srs, &stats); return (NULL); } } +/* + * mac_tx_aggr_mode + * + * This routine invokes an aggr function, aggr_find_tx_ring(), to find + * a (pseudo) Tx ring belonging to a port on which the packet has to + * be sent. aggr_find_tx_ring() first finds the outgoing port based on + * L2/L3/L4 policy and then uses the fanout_hint passed to it to pick + * a Tx ring from the selected port. + * + * Note that a port can be deleted from the aggregation. In such a case, + * the aggregation layer first separates the port from the rest of the + * ports making sure that port (and thus any Tx rings associated with + * it) won't get selected in the call to aggr_find_tx_ring() function. + * Later calls are made to mac_group_rem_ring() passing pseudo Tx ring + * handles one by one which in turn will quiesce the Tx SRS and remove + * the soft ring associated with the pseudo Tx ring. Unlike Rx side + * where a cookie is used to protect against mac_rx_ring() calls on + * rings that have been removed, no such cookie is needed on the Tx + * side as the pseudo Tx ring won't be available anymore to + * aggr_find_tx_ring() once the port has been removed. + */ +static mac_tx_cookie_t +mac_tx_aggr_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, + uintptr_t fanout_hint, uint16_t flag, mblk_t **ret_mp) +{ + mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; + mac_tx_ring_fn_t find_tx_ring_fn; + mac_ring_handle_t ring = NULL; + void *arg; + mac_soft_ring_t *sringp; + + find_tx_ring_fn = srs_tx->st_capab_aggr.mca_find_tx_ring_fn; + arg = srs_tx->st_capab_aggr.mca_arg; + if (find_tx_ring_fn(arg, mp_chain, fanout_hint, &ring) == NULL) + return (NULL); + sringp = srs_tx->st_soft_rings[((mac_ring_t *)ring)->mr_index]; + return (mac_tx_soft_ring_process(sringp, mp_chain, flag, ret_mp)); +} + +void +mac_tx_invoke_callbacks(mac_client_impl_t *mcip, mac_tx_cookie_t cookie) +{ + mac_cb_t *mcb; + mac_tx_notify_cb_t *mtnfp; + + /* Wakeup callback registered clients */ + MAC_CALLBACK_WALKER_INC(&mcip->mci_tx_notify_cb_info); + for (mcb = mcip->mci_tx_notify_cb_list; mcb != NULL; + mcb = mcb->mcb_nextp) { + mtnfp = (mac_tx_notify_cb_t *)mcb->mcb_objp; + mtnfp->mtnf_fn(mtnfp->mtnf_arg, cookie); + } + MAC_CALLBACK_WALKER_DCR(&mcip->mci_tx_notify_cb_info, + &mcip->mci_tx_notify_cb_list); +} + /* ARGSUSED */ void mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) @@ -2966,7 +3044,6 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) size_t sz; uint32_t tx_mode; uint_t saved_pkt_count; - boolean_t is_subflow; mac_tx_stats_t stats; mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; clock_t now; @@ -2977,7 +3054,6 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) mac_srs->srs_state |= SRS_PROC; - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); tx_mode = srs_tx->st_mode; if (tx_mode == SRS_TX_DEFAULT || tx_mode == SRS_TX_SERIALIZE) { if (mac_srs->srs_first != NULL) { @@ -3000,16 +3076,13 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) tail->b_next = mac_srs->srs_first; mac_srs->srs_first = head; mac_srs->srs_count += - (saved_pkt_count - stats.ts_opackets); + (saved_pkt_count - stats.mts_opackets); if (mac_srs->srs_last == NULL) mac_srs->srs_last = tail; MAC_TX_SRS_BLOCK(mac_srs, head); } else { srs_tx->st_woken_up = B_FALSE; - if (is_subflow) { - FLOW_TX_STATS_UPDATE( - mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); } } } else if (tx_mode == SRS_TX_BW) { @@ -3065,10 +3138,10 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) tail->b_next = mac_srs->srs_first; mac_srs->srs_first = head; mac_srs->srs_count += - (saved_pkt_count - stats.ts_opackets); + (saved_pkt_count - stats.mts_opackets); if (mac_srs->srs_last == NULL) mac_srs->srs_last = tail; - size_sent = sz - stats.ts_obytes; + size_sent = sz - stats.mts_obytes; mac_srs->srs_size += size_sent; mac_srs->srs_bw->mac_bw_sz += size_sent; if (mac_srs->srs_bw->mac_bw_used > size_sent) { @@ -3080,15 +3153,11 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) MAC_TX_SRS_BLOCK(mac_srs, head); } else { srs_tx->st_woken_up = B_FALSE; - if (is_subflow) { - FLOW_TX_STATS_UPDATE( - mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); } } - } else if (tx_mode == SRS_TX_BW_FANOUT) { + } else if (tx_mode == SRS_TX_BW_FANOUT || tx_mode == SRS_TX_BW_AGGR) { mblk_t *prev; - mac_soft_ring_t *softring; uint64_t hint; /* @@ -3155,8 +3224,6 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) */ if (mac_srs->srs_count == 0 && (mac_srs->srs_state & (SRS_TX_HIWAT | SRS_TX_WAKEUP_CLIENT | SRS_ENQUEUED))) { - mac_tx_notify_cb_t *mtnfp; - mac_cb_t *mcb; mac_client_impl_t *mcip = mac_srs->srs_mcip; boolean_t wakeup_required = B_FALSE; @@ -3168,16 +3235,7 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) SRS_TX_WAKEUP_CLIENT | SRS_ENQUEUED); mutex_exit(&mac_srs->srs_lock); if (wakeup_required) { - /* Wakeup callback registered clients */ - MAC_CALLBACK_WALKER_INC(&mcip->mci_tx_notify_cb_info); - for (mcb = mcip->mci_tx_notify_cb_list; mcb != NULL; - mcb = mcb->mcb_nextp) { - mtnfp = (mac_tx_notify_cb_t *)mcb->mcb_objp; - mtnfp->mtnf_fn(mtnfp->mtnf_arg, - (mac_tx_cookie_t)mac_srs); - } - MAC_CALLBACK_WALKER_DCR(&mcip->mci_tx_notify_cb_info, - &mcip->mci_tx_notify_cb_list); + mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)mac_srs); /* * If the client is not the primary MAC client, then we * need to send the notification to the clients upper @@ -3276,11 +3334,10 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, } /* - * Fastpath: if there's only one client, and there's no - * multicast listeners, we simply send the packet down to the - * underlying NIC. + * Fastpath: if there's only one client, we simply send + * the packet down to the underlying NIC. */ - if (mip->mi_nactiveclients == 1 && mip->mi_promisc_list == NULL) { + if (mip->mi_nactiveclients == 1) { DTRACE_PROBE2(fastpath, mac_client_impl_t *, src_mcip, mblk_t *, mp_chain); @@ -3293,9 +3350,7 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, msgdsize(mp)); CHECK_VID_AND_ADD_TAG(mp); - MAC_TX(mip, ring, mp, - ((src_mcip->mci_state_flags & MCIS_SHARE_BOUND) != - 0)); + MAC_TX(mip, ring, mp, src_mcip); /* * If the driver is out of descriptors and does a @@ -3336,12 +3391,6 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, CHECK_VID_AND_ADD_TAG(mp); /* - * Check if there are promiscuous mode callbacks defined. - */ - if (mip->mi_promisc_list != NULL) - mac_promisc_dispatch(mip, mp, src_mcip); - - /* * Find the destination. */ dst_flow_ent = mac_tx_classify(mip, mp); @@ -3395,16 +3444,31 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, B_TRUE); } else { /* - * loopback the packet to a - * local MAC client. We force a context - * switch if both source and destination - * MAC clients are used by IP, i.e. bypass - * is set. + * loopback the packet to a local MAC + * client. We force a context switch + * if both source and destination MAC + * clients are used by IP, i.e. + * bypass is set. */ boolean_t do_switch; mac_client_impl_t *dst_mcip = dst_flow_ent->fe_mcip; + /* + * Check if there are promiscuous mode + * callbacks defined. This check is + * done here in the 'else' case and + * not in other cases because this + * path is for local loopback + * communication which does not go + * through MAC_TX(). For paths that go + * through MAC_TX(), the promisc_list + * check is done inside the MAC_TX() + * macro. + */ + if (mip->mi_promisc_list != NULL) + mac_promisc_dispatch(mip, mp, src_mcip); + do_switch = ((src_mcip->mci_state_flags & dst_mcip->mci_state_flags & MCIS_CLIENT_POLL_CAPABLE) != 0); @@ -3422,9 +3486,7 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, * Unknown destination, send via the underlying * NIC. */ - MAC_TX(mip, ring, mp, - ((src_mcip->mci_state_flags & MCIS_SHARE_BOUND) != - 0)); + MAC_TX(mip, ring, mp, src_mcip); if (mp != NULL) { /* * Adjust for the last packet that @@ -3440,15 +3502,9 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, } done: - src_mcip->mci_stat_obytes += obytes; - src_mcip->mci_stat_opackets += opackets; - src_mcip->mci_stat_oerrors += oerrors; - - if (stats != NULL) { - stats->ts_opackets = opackets; - stats->ts_obytes = obytes; - stats->ts_oerrors = oerrors; - } + stats->mts_obytes = obytes; + stats->mts_opackets = opackets; + stats->mts_oerrors = oerrors; return (mp); } @@ -3466,8 +3522,8 @@ mac_tx_srs_ring_present(mac_soft_ring_set_t *srs, mac_ring_t *tx_ring) if (srs->srs_tx.st_arg2 == tx_ring) return (B_TRUE); - for (i = 0; i < srs->srs_oth_ring_count; i++) { - soft_ring = srs->srs_oth_soft_rings[i]; + for (i = 0; i < srs->srs_tx_ring_count; i++) { + soft_ring = srs->srs_tx_soft_rings[i]; if (soft_ring->s_ring_tx_arg2 == tx_ring) return (B_TRUE); } @@ -3476,6 +3532,29 @@ mac_tx_srs_ring_present(mac_soft_ring_set_t *srs, mac_ring_t *tx_ring) } /* + * mac_tx_srs_get_soft_ring + * + * Returns the TX soft ring associated with the given ring, if present. + */ +mac_soft_ring_t * +mac_tx_srs_get_soft_ring(mac_soft_ring_set_t *srs, mac_ring_t *tx_ring) +{ + int i; + mac_soft_ring_t *soft_ring; + + if (srs->srs_tx.st_arg2 == tx_ring) + return (NULL); + + for (i = 0; i < srs->srs_tx_ring_count; i++) { + soft_ring = srs->srs_tx_soft_rings[i]; + if (soft_ring->s_ring_tx_arg2 == tx_ring) + return (soft_ring); + } + + return (NULL); +} + +/* * mac_tx_srs_wakeup * * Called when Tx desc become available. Wakeup the appropriate worker @@ -3490,11 +3569,16 @@ mac_tx_srs_wakeup(mac_soft_ring_set_t *mac_srs, mac_ring_handle_t ring) mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; mutex_enter(&mac_srs->srs_lock); - if (TX_SINGLE_RING_MODE(mac_srs)) { + /* + * srs_tx_ring_count == 0 is the single ring mode case. In + * this mode, there will not be Tx soft rings associated + * with the SRS. + */ + if (!MAC_TX_SOFT_RINGS(mac_srs)) { if (srs_tx->st_arg2 == ring && mac_srs->srs_state & SRS_TX_BLOCKED) { mac_srs->srs_state &= ~SRS_TX_BLOCKED; - srs_tx->st_unblocked_cnt++; + srs_tx->st_stat.mts_unblockcnt++; cv_signal(&mac_srs->srs_async); } /* @@ -3507,15 +3591,17 @@ mac_tx_srs_wakeup(mac_soft_ring_set_t *mac_srs, mac_ring_handle_t ring) return; } - /* If you are here, it is for FANOUT or BW_FANOUT case */ - ASSERT(TX_MULTI_RING_MODE(mac_srs)); - for (i = 0; i < mac_srs->srs_oth_ring_count; i++) { - sringp = mac_srs->srs_oth_soft_rings[i]; + /* + * If you are here, it is for FANOUT, BW_FANOUT, + * AGGR_MODE or AGGR_BW_MODE case + */ + for (i = 0; i < mac_srs->srs_tx_ring_count; i++) { + sringp = mac_srs->srs_tx_soft_rings[i]; mutex_enter(&sringp->s_ring_lock); if (sringp->s_ring_tx_arg2 == ring) { if (sringp->s_ring_state & S_RING_BLOCK) { sringp->s_ring_state &= ~S_RING_BLOCK; - sringp->s_ring_unblocked_cnt++; + sringp->s_st_stat.mts_unblockcnt++; cv_signal(&sringp->s_ring_async); } sringp->s_ring_tx_woken_up = B_TRUE; @@ -3619,6 +3705,7 @@ mac_rx_soft_ring_process(mac_client_impl_t *mcip, mac_soft_ring_t *ringp, mutex_enter(&ringp->s_ring_lock); ringp->s_ring_total_inpkt += cnt; + ringp->s_ring_total_rbytes += sz; if ((mac_srs->srs_rx.sr_poll_pkt_cnt <= 1) && !(ringp->s_ring_type & ST_RING_WORKER_ONLY)) { /* If on processor or blanking on, then enqueue and return */ @@ -3831,11 +3918,14 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, ASSERT(mp_chain != NULL); ASSERT(MUTEX_NOT_HELD(&ringp->s_ring_lock)); /* - * Only two modes can come here; either it can be - * SRS_TX_BW_FANOUT or SRS_TX_FANOUT + * The following modes can come here: SRS_TX_BW_FANOUT, + * SRS_TX_FANOUT, SRS_TX_AGGR, SRS_TX_BW_AGGR. */ + ASSERT(MAC_TX_SOFT_RINGS(mac_srs)); ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_FANOUT || - mac_srs->srs_tx.st_mode == SRS_TX_BW_FANOUT); + mac_srs->srs_tx.st_mode == SRS_TX_BW_FANOUT || + mac_srs->srs_tx.st_mode == SRS_TX_AGGR || + mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); if (ringp->s_ring_type & ST_RING_WORKER_ONLY) { /* Serialization mode */ @@ -3871,7 +3961,6 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, * tx_srs_drain() completely drains out the * messages. */ - boolean_t is_subflow; mac_tx_stats_t stats; if (ringp->s_ring_state & S_RING_ENQUEUED) { @@ -3890,11 +3979,9 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, */ mutex_exit(&ringp->s_ring_lock); } - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); mp_chain = mac_tx_send(ringp->s_ring_tx_arg1, - ringp->s_ring_tx_arg2, mp_chain, - (is_subflow ? &stats : NULL)); + ringp->s_ring_tx_arg2, mp_chain, &stats); /* * Multiple threads could be here sending packets. @@ -3912,9 +3999,9 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, mutex_exit(&ringp->s_ring_lock); return (cookie); } - if (is_subflow) { - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); + SOFTRING_TX_STATS_UPDATE(ringp, &stats); + return (NULL); } } diff --git a/usr/src/uts/common/io/mac/mac_soft_ring.c b/usr/src/uts/common/io/mac/mac_soft_ring.c index 25cc66ed52..151c99893b 100644 --- a/usr/src/uts/common/io/mac/mac_soft_ring.c +++ b/usr/src/uts/common/io/mac/mac_soft_ring.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -88,6 +88,7 @@ #include <sys/mac_client_impl.h> #include <sys/mac_soft_ring.h> #include <sys/mac_flow_impl.h> +#include <sys/mac_stat.h> static void mac_rx_soft_ring_drain(mac_soft_ring_t *); static void mac_soft_ring_fire(void *); @@ -145,7 +146,7 @@ mac_soft_ring_worker_wakeup(mac_soft_ring_t *ringp) * thread to the assigned CPU. */ mac_soft_ring_t * -mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, +mac_soft_ring_create(int id, clock_t wait, uint16_t type, pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs, processorid_t cpuid, mac_direct_rx_t rx_func, void *x_arg1, mac_resource_handle_t x_arg2) @@ -162,9 +163,13 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, } else if (type & ST_RING_UDP) { (void) snprintf(name, sizeof (name), "mac_udp_soft_ring_%d_%p", id, (void *)mac_srs); - } else { + } else if (type & ST_RING_OTH) { (void) snprintf(name, sizeof (name), "mac_oth_soft_ring_%d_%p", id, (void *)mac_srs); + } else { + ASSERT(type & ST_RING_TX); + (void) snprintf(name, sizeof (name), + "mac_tx_soft_ring_%d_%p", id, (void *)mac_srs); } bzero(ringp, sizeof (mac_soft_ring_t)); @@ -177,7 +182,6 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, ringp->s_ring_wait = MSEC_TO_TICK(wait); ringp->s_ring_mcip = mcip; ringp->s_ring_set = mac_srs; - ringp->s_ring_flent = flent; /* * Protect against access from DR callbacks (mac_walk_srs_bind/unbind) @@ -202,6 +206,14 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, ringp->s_ring_tx_hiwat = (mac_tx_soft_ring_hiwat > mac_tx_soft_ring_max_q_cnt) ? mac_tx_soft_ring_max_q_cnt : mac_tx_soft_ring_hiwat; + if (mcip->mci_state_flags & MCIS_IS_AGGR) { + mac_srs_tx_t *tx = &mac_srs->srs_tx; + + ASSERT(tx->st_soft_rings[ + ((mac_ring_t *)x_arg2)->mr_index] == NULL); + tx->st_soft_rings[((mac_ring_t *)x_arg2)->mr_index] = + ringp; + } } else { ringp->s_ring_drain_func = mac_rx_soft_ring_drain; ringp->s_ring_rx_func = rx_func; @@ -213,6 +225,8 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, if (cpuid != -1) (void) mac_soft_ring_bind(ringp, cpuid); + mac_soft_ring_stat_create(ringp); + return (ringp); } @@ -222,18 +236,14 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, * Free the soft ring once we are done with it. */ void -mac_soft_ring_free(mac_soft_ring_t *softring, boolean_t release_tx_ring) +mac_soft_ring_free(mac_soft_ring_t *softring) { ASSERT((softring->s_ring_state & (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE | S_RING_PROC)) == (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE)); mac_pkt_drop(NULL, NULL, softring->s_ring_first, B_FALSE); - if (release_tx_ring && softring->s_ring_tx_arg2 != NULL) { - ASSERT(softring->s_ring_type & ST_RING_TX); - mac_release_tx_ring(softring->s_ring_tx_arg2); - } - if (softring->s_ring_ksp) - kstat_delete(softring->s_ring_ksp); + softring->s_ring_tx_arg2 = NULL; + mac_soft_ring_stat_delete(softring); mac_callback_free(softring->s_ring_notify_cb_list); kmem_cache_free(mac_soft_ring_cache, softring); } @@ -642,7 +652,6 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) void *arg2; mblk_t *tail; uint_t saved_pkt_count, saved_size; - boolean_t is_subflow; mac_tx_stats_t stats; mac_soft_ring_set_t *mac_srs = ringp->s_ring_set; @@ -652,7 +661,6 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) ASSERT(!(ringp->s_ring_state & S_RING_PROC)); ringp->s_ring_state |= S_RING_PROC; - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); arg1 = ringp->s_ring_tx_arg1; arg2 = ringp->s_ring_tx_arg2; @@ -675,8 +683,8 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) tail->b_next = ringp->s_ring_first; ringp->s_ring_first = mp; ringp->s_ring_count += - (saved_pkt_count - stats.ts_opackets); - ringp->s_ring_size += (saved_size - stats.ts_obytes); + (saved_pkt_count - stats.mts_opackets); + ringp->s_ring_size += (saved_size - stats.mts_obytes); if (ringp->s_ring_last == NULL) ringp->s_ring_last = tail; @@ -684,7 +692,7 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) ringp->s_ring_tx_woken_up = B_FALSE; } else { ringp->s_ring_state |= S_RING_BLOCK; - ringp->s_ring_blocked_cnt++; + ringp->s_st_stat.mts_blockcnt++; } ringp->s_ring_state &= ~S_RING_PROC; @@ -692,17 +700,13 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) return; } else { ringp->s_ring_tx_woken_up = B_FALSE; - if (is_subflow) { - FLOW_TX_STATS_UPDATE( - mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); + SOFTRING_TX_STATS_UPDATE(ringp, &stats); } } if (ringp->s_ring_count == 0 && ringp->s_ring_state & (S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED)) { - mac_tx_notify_cb_t *mtnfp; - mac_cb_t *mcb; mac_client_impl_t *mcip = ringp->s_ring_mcip; boolean_t wakeup_required = B_FALSE; @@ -714,16 +718,7 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) ~(S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED); mutex_exit(&ringp->s_ring_lock); if (wakeup_required) { - /* Wakeup callback registered clients */ - MAC_CALLBACK_WALKER_INC(&mcip->mci_tx_notify_cb_info); - for (mcb = mcip->mci_tx_notify_cb_list; mcb != NULL; - mcb = mcb->mcb_nextp) { - mtnfp = (mac_tx_notify_cb_t *)mcb->mcb_objp; - mtnfp->mtnf_fn(mtnfp->mtnf_arg, - (mac_tx_cookie_t)ringp); - } - MAC_CALLBACK_WALKER_DCR(&mcip->mci_tx_notify_cb_info, - &mcip->mci_tx_notify_cb_list); + mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)ringp); /* * If the client is not the primary MAC client, then we * need to send the notification to the clients upper diff --git a/usr/src/uts/common/io/mac/mac_stat.c b/usr/src/uts/common/io/mac/mac_stat.c index 87f2f914ff..31972f94d8 100644 --- a/usr/src/uts/common/io/mac/mac_stat.c +++ b/usr/src/uts/common/io/mac/mac_stat.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,10 +33,40 @@ #include <sys/kstat.h> #include <sys/mac.h> #include <sys/mac_impl.h> +#include <sys/mac_client_impl.h> +#include <sys/mac_stat.h> +#include <sys/mac_soft_ring.h> +#include <sys/vlan.h> #define MAC_KSTAT_NAME "mac" #define MAC_KSTAT_CLASS "net" +enum mac_stat { + MAC_STAT_LCL, + MAC_STAT_LCLBYTES, + MAC_STAT_INTRS, + MAC_STAT_INTRBYTES, + MAC_STAT_POLLS, + MAC_STAT_POLLBYTES, + MAC_STAT_RXSDROPS, + MAC_STAT_CHU10, + MAC_STAT_CH10T50, + MAC_STAT_CHO50, + MAC_STAT_BLOCK, + MAC_STAT_UNBLOCK, + MAC_STAT_TXSDROPS, + MAC_STAT_TX_ERRORS, + MAC_STAT_MACSPOOFED, + MAC_STAT_IPSPOOFED, + MAC_STAT_DHCPSPOOFED, + MAC_STAT_RESTRICTED, + MAC_STAT_DHCPDROPPED, + MAC_STAT_MULTIRCVBYTES, + MAC_STAT_BRDCSTRCVBYTES, + MAC_STAT_MULTIXMTBYTES, + MAC_STAT_BRDCSTXMTBYTES +}; + static mac_stat_info_t i_mac_si[] = { { MAC_STAT_IFSPEED, "ifspeed", KSTAT_DATA_UINT64, 0 }, { MAC_STAT_MULTIRCV, "multircv", KSTAT_DATA_UINT32, 0 }, @@ -60,7 +90,6 @@ static mac_stat_info_t i_mac_si[] = { { MAC_STAT_OBYTES, "obytes64", KSTAT_DATA_UINT64, 0 }, { MAC_STAT_OPACKETS, "opackets64", KSTAT_DATA_UINT64, 0 } }; - #define MAC_NKSTAT \ (sizeof (i_mac_si) / sizeof (mac_stat_info_t)) @@ -70,7 +99,6 @@ static mac_stat_info_t i_mac_mod_si[] = { { MAC_STAT_LINK_UP, "link_up", KSTAT_DATA_UINT32, 0 }, { MAC_STAT_PROMISC, "promisc", KSTAT_DATA_UINT32, 0 } }; - #define MAC_MOD_NKSTAT \ (sizeof (i_mac_mod_si) / sizeof (mac_stat_info_t)) @@ -79,11 +107,195 @@ static mac_stat_info_t i_mac_mod_si[] = { #define MAC_TYPE_KSTAT_OFFSET MAC_KSTAT_OFFSET + MAC_NKSTAT /* + * Definitions for per rx ring statistics + */ +static mac_stat_info_t i_mac_rx_ring_si[] = { + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_HDROPS, "hdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_RX_RING_NKSTAT \ + (sizeof (i_mac_rx_ring_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per tx ring statistics + */ +static mac_stat_info_t i_mac_tx_ring_si[] = { + { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT64, 0} +}; +#define MAC_TX_RING_NKSTAT \ + (sizeof (i_mac_tx_ring_si) / sizeof (mac_stat_info_t)) + + +/* + * Definitions for per software lane tx statistics + */ +static mac_stat_info_t i_mac_tx_swlane_si[] = { + { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OERRORS, "oerrors", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BLOCK, "blockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_UNBLOCK, "unblockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_TXSDROPS, "txsdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_TX_SWLANE_NKSTAT \ + (sizeof (i_mac_tx_swlane_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per software lane rx statistics + */ +static mac_stat_info_t i_mac_rx_swlane_si[] = { + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_LCL, "local", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_LCLBYTES, "localbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRS, "intrs", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRBYTES, "intrbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RXSDROPS, "rxsdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_RX_SWLANE_NKSTAT \ + (sizeof (i_mac_rx_swlane_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per hardware lane rx statistics + */ +static mac_stat_info_t i_mac_rx_hwlane_si[] = { + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRS, "intrs", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRBYTES, "intrbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_POLLS, "polls", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_POLLBYTES, "pollbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RXSDROPS, "rxsdrops", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CHU10, "chainunder10", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CH10T50, "chain10to50", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CHO50, "chainover50", KSTAT_DATA_UINT64, 0} +}; +#define MAC_RX_HWLANE_NKSTAT \ + (sizeof (i_mac_rx_hwlane_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for misc statistics + */ +static mac_stat_info_t i_mac_misc_si[] = { + { MAC_STAT_MULTIRCV, "multircv", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BRDCSTRCV, "brdcstrcv", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_MULTIXMT, "multixmt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BRDCSTXMT, "brdcstxmt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_MULTIRCVBYTES, "multircvbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BRDCSTRCVBYTES, "brdcstrcvbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_MULTIXMTBYTES, "multixmtbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BRDCSTXMTBYTES, "brdcstxmtbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_TX_ERRORS, "txerrors", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_MACSPOOFED, "macspoofed", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_IPSPOOFED, "ipspoofed", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_DHCPSPOOFED, "dhcpspoofed", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RESTRICTED, "restricted", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_DHCPDROPPED, "dhcpdropped", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_LCL, "local", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_LCLBYTES, "localbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRS, "intrs", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRBYTES, "intrbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_POLLS, "polls", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_POLLBYTES, "pollbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RXSDROPS, "rxsdrops", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CHU10, "chainunder10", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CH10T50, "chain10to50", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CHO50, "chainover50", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OERRORS, "oerrors", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BLOCK, "blockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_UNBLOCK, "unblockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_TXSDROPS, "txsdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_SUMMARY_NKSTAT \ + (sizeof (i_mac_misc_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per hardware lane tx statistics + */ +static mac_stat_info_t i_mac_tx_hwlane_si[] = { + { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OERRORS, "oerrors", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BLOCK, "blockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_UNBLOCK, "unblockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_TXSDROPS, "txsdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_TX_HWLANE_NKSTAT \ + (sizeof (i_mac_tx_hwlane_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per fanout rx statistics + */ +static mac_stat_info_t i_mac_rx_fanout_si[] = { + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, +}; +#define MAC_RX_FANOUT_NKSTAT \ + (sizeof (i_mac_rx_fanout_si) / sizeof (mac_stat_info_t)) + +/* * Private functions. */ +typedef struct { + uint_t si_offset; +} stat_info_t; + +#define RX_SRS_STAT_OFF(f) (offsetof(mac_rx_stats_t, f)) +static stat_info_t rx_srs_stats_list[] = { + {RX_SRS_STAT_OFF(mrs_lclbytes)}, + {RX_SRS_STAT_OFF(mrs_lclcnt)}, + {RX_SRS_STAT_OFF(mrs_pollcnt)}, + {RX_SRS_STAT_OFF(mrs_pollbytes)}, + {RX_SRS_STAT_OFF(mrs_intrcnt)}, + {RX_SRS_STAT_OFF(mrs_intrbytes)}, + {RX_SRS_STAT_OFF(mrs_sdrops)}, + {RX_SRS_STAT_OFF(mrs_chaincntundr10)}, + {RX_SRS_STAT_OFF(mrs_chaincnt10to50)}, + {RX_SRS_STAT_OFF(mrs_chaincntover50)}, + {RX_SRS_STAT_OFF(mrs_ierrors)} +}; +#define RX_SRS_STAT_SIZE \ + (sizeof (rx_srs_stats_list) / sizeof (stat_info_t)) + +#define TX_SOFTRING_STAT_OFF(f) (offsetof(mac_tx_stats_t, f)) +static stat_info_t tx_softring_stats_list[] = { + {TX_SOFTRING_STAT_OFF(mts_obytes)}, + {TX_SOFTRING_STAT_OFF(mts_opackets)}, + {TX_SOFTRING_STAT_OFF(mts_oerrors)}, + {TX_SOFTRING_STAT_OFF(mts_blockcnt)}, + {TX_SOFTRING_STAT_OFF(mts_unblockcnt)}, + {TX_SOFTRING_STAT_OFF(mts_sdrops)}, +}; +#define TX_SOFTRING_STAT_SIZE \ + (sizeof (tx_softring_stats_list) / sizeof (stat_info_t)) + +static void +i_mac_add_stats(void *sum, void *op1, void *op2, + stat_info_t stats_list[], uint_t size) +{ + int i; + + for (i = 0; i < size; i++) { + uint64_t *op1_val = (uint64_t *) + ((uchar_t *)op1 + stats_list[i].si_offset); + uint64_t *op2_val = (uint64_t *) + ((uchar_t *)op2 + stats_list[i].si_offset); + uint64_t *sum_val = (uint64_t *) + ((uchar_t *)sum + stats_list[i].si_offset); + + *sum_val = *op1_val + *op2_val; + } +} + static int -i_mac_stat_update(kstat_t *ksp, int rw) +i_mac_driver_stat_update(kstat_t *ksp, int rw) { mac_impl_t *mip = ksp->ks_private; kstat_named_t *knp = ksp->ks_data; @@ -136,6 +348,587 @@ i_mac_kstat_init(kstat_named_t *knp, mac_stat_info_t *si, uint_t count) } } +static int +i_mac_stat_update(kstat_t *ksp, int rw, uint64_t (*fn)(void *, uint_t), + mac_stat_info_t *msi, uint_t count) +{ + kstat_named_t *knp = ksp->ks_data; + uint_t i; + uint64_t val; + + if (rw != KSTAT_READ) + return (EACCES); + + for (i = 0; i < count; i++) { + val = fn(ksp->ks_private, msi[i].msi_stat); + + switch (msi[i].msi_type) { + case KSTAT_DATA_UINT64: + knp->value.ui64 = val; + break; + case KSTAT_DATA_UINT32: + knp->value.ui32 = (uint32_t)val; + break; + default: + ASSERT(B_FALSE); + break; + } + knp++; + } + return (0); +} + +/* + * Create kstat with given name - statname, update function - fn + * and initialize it with given names - init_stat_info + */ +static kstat_t * +i_mac_stat_create(void *handle, const char *modname, const char *statname, + int (*fn) (kstat_t *, int), + mac_stat_info_t *init_stat_info, uint_t count) +{ + kstat_t *ksp; + kstat_named_t *knp; + + ksp = kstat_create(modname, 0, statname, "net", + KSTAT_TYPE_NAMED, count, 0); + + if (ksp == NULL) + return (NULL); + + ksp->ks_update = fn; + ksp->ks_private = handle; + + knp = (kstat_named_t *)ksp->ks_data; + i_mac_kstat_init(knp, init_stat_info, count); + kstat_install(ksp); + + return (ksp); +} + +/* + * Per rx ring statistics + */ +uint64_t +mac_rx_ring_stat_get(void *handle, uint_t stat) +{ + mac_ring_t *ring = (mac_ring_t *)handle; + uint64_t val = 0; + + /* + * XXX Every ring-capable driver must implement an entry point to + * query per ring statistics. CR 6893122 tracks this work item. + * Once this bug is fixed, the framework should fail registration + * for a driver that does not implement this entry point and + * assert ring->mr_stat != NULL here. + */ + if (ring->mr_stat != NULL) + ring->mr_stat(ring->mr_driver, stat, &val); + + return (val); +} + +static int +i_mac_rx_ring_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, mac_rx_ring_stat_get, + i_mac_rx_ring_si, MAC_RX_RING_NKSTAT)); +} + +static void +i_mac_rx_ring_stat_create(mac_ring_t *ring, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(ring, modname, statname, + i_mac_rx_ring_stat_update, i_mac_rx_ring_si, MAC_RX_RING_NKSTAT); + + ring->mr_ksp = ksp; +} + +/* + * Per tx ring statistics + */ +uint64_t +mac_tx_ring_stat_get(void *handle, uint_t stat) +{ + mac_ring_t *ring = (mac_ring_t *)handle; + uint64_t val = 0; + + /* + * XXX Every ring-capable driver must implement an entry point to + * query per ring statistics. CR 6893122 tracks this work item. + * Once this bug is fixed, the framework should fail registration + * for a driver that does not implement this entry point and + * assert ring->mr_stat != NULL here. + */ + if (ring->mr_stat != NULL) + ring->mr_stat(ring->mr_driver, stat, &val); + + return (val); +} + +static int +i_mac_tx_ring_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, mac_tx_ring_stat_get, + i_mac_tx_ring_si, MAC_TX_RING_NKSTAT)); +} + +static void +i_mac_tx_ring_stat_create(mac_ring_t *ring, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(ring, modname, statname, + i_mac_tx_ring_stat_update, i_mac_tx_ring_si, MAC_TX_RING_NKSTAT); + + ring->mr_ksp = ksp; +} + +/* + * Per software lane tx statistics + */ +static uint64_t +i_mac_tx_swlane_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)handle; + mac_tx_stats_t *mac_tx_stat = &mac_srs->srs_tx.st_stat; + + switch (stat) { + case MAC_STAT_OBYTES: + return (mac_tx_stat->mts_obytes); + + case MAC_STAT_OPACKETS: + return (mac_tx_stat->mts_opackets); + + case MAC_STAT_OERRORS: + return (mac_tx_stat->mts_oerrors); + + case MAC_STAT_BLOCK: + return (mac_tx_stat->mts_blockcnt); + + case MAC_STAT_UNBLOCK: + return (mac_tx_stat->mts_unblockcnt); + + case MAC_STAT_TXSDROPS: + return (mac_tx_stat->mts_sdrops); + + default: + return (0); + } +} + +static int +i_mac_tx_swlane_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_tx_swlane_stat_get, + i_mac_tx_swlane_si, MAC_TX_SWLANE_NKSTAT)); +} + +static void +i_mac_tx_swlane_stat_create(mac_soft_ring_set_t *mac_srs, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(mac_srs, modname, statname, + i_mac_tx_swlane_stat_update, i_mac_tx_swlane_si, + MAC_TX_SWLANE_NKSTAT); + + mac_srs->srs_ksp = ksp; +} + +/* + * Per software lane rx statistics + */ +static uint64_t +i_mac_rx_swlane_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)handle; + mac_rx_stats_t *mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + switch (stat) { + case MAC_STAT_IPACKETS: + return (mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_lclcnt); + + case MAC_STAT_RBYTES: + return (mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_lclbytes); + + case MAC_STAT_LCL: + return (mac_rx_stat->mrs_lclcnt); + + case MAC_STAT_LCLBYTES: + return (mac_rx_stat->mrs_lclbytes); + + case MAC_STAT_INTRS: + return (mac_rx_stat->mrs_intrcnt); + + case MAC_STAT_INTRBYTES: + return (mac_rx_stat->mrs_intrbytes); + + case MAC_STAT_RXSDROPS: + return (mac_rx_stat->mrs_sdrops); + + default: + return (0); + } +} + +static int +i_mac_rx_swlane_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_rx_swlane_stat_get, + i_mac_rx_swlane_si, MAC_RX_SWLANE_NKSTAT)); +} + +static void +i_mac_rx_swlane_stat_create(mac_soft_ring_set_t *mac_srs, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(mac_srs, modname, statname, + i_mac_rx_swlane_stat_update, i_mac_rx_swlane_si, + MAC_RX_SWLANE_NKSTAT); + + mac_srs->srs_ksp = ksp; +} + + +/* + * Per hardware lane rx statistics + */ +static uint64_t +i_mac_rx_hwlane_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)handle; + mac_rx_stats_t *mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + switch (stat) { + case MAC_STAT_IPACKETS: + return (mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt); + + case MAC_STAT_RBYTES: + return (mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes); + + case MAC_STAT_INTRS: + return (mac_rx_stat->mrs_intrcnt); + + case MAC_STAT_INTRBYTES: + return (mac_rx_stat->mrs_intrbytes); + + case MAC_STAT_POLLS: + return (mac_rx_stat->mrs_pollcnt); + + case MAC_STAT_POLLBYTES: + return (mac_rx_stat->mrs_pollbytes); + + case MAC_STAT_RXSDROPS: + return (mac_rx_stat->mrs_sdrops); + + case MAC_STAT_CHU10: + return (mac_rx_stat->mrs_chaincntundr10); + + case MAC_STAT_CH10T50: + return (mac_rx_stat->mrs_chaincnt10to50); + + case MAC_STAT_CHO50: + return (mac_rx_stat->mrs_chaincntover50); + + default: + return (0); + } +} + +static int +i_mac_rx_hwlane_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_rx_hwlane_stat_get, + i_mac_rx_hwlane_si, MAC_RX_HWLANE_NKSTAT)); +} + +static void +i_mac_rx_hwlane_stat_create(mac_soft_ring_set_t *mac_srs, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(mac_srs, modname, statname, + i_mac_rx_hwlane_stat_update, i_mac_rx_hwlane_si, + MAC_RX_HWLANE_NKSTAT); + + mac_srs->srs_ksp = ksp; +} + + +/* + * Misc statistics + * + * Counts for + * - Multicast/broadcast Rx/Tx counts + * - Tx errors + */ +static uint64_t +i_mac_misc_stat_get(void *handle, uint_t stat) +{ + flow_entry_t *flent = handle; + mac_client_impl_t *mcip = flent->fe_mcip; + mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + + mac_rx_stat = &mac_misc_stat->mms_defunctrxlanestats; + mac_tx_stat = &mac_misc_stat->mms_defuncttxlanestats; + + switch (stat) { + case MAC_STAT_MULTIRCV: + return (mac_misc_stat->mms_multircv); + + case MAC_STAT_BRDCSTRCV: + return (mac_misc_stat->mms_brdcstrcv); + + case MAC_STAT_MULTIXMT: + return (mac_misc_stat->mms_multixmt); + + case MAC_STAT_BRDCSTXMT: + return (mac_misc_stat->mms_brdcstxmt); + + case MAC_STAT_MULTIRCVBYTES: + return (mac_misc_stat->mms_multircvbytes); + + case MAC_STAT_BRDCSTRCVBYTES: + return (mac_misc_stat->mms_brdcstrcvbytes); + + case MAC_STAT_MULTIXMTBYTES: + return (mac_misc_stat->mms_multixmtbytes); + + case MAC_STAT_BRDCSTXMTBYTES: + return (mac_misc_stat->mms_brdcstxmtbytes); + + case MAC_STAT_TX_ERRORS: + return (mac_misc_stat->mms_txerrors); + + case MAC_STAT_MACSPOOFED: + return (mac_misc_stat->mms_macspoofed); + + case MAC_STAT_IPSPOOFED: + return (mac_misc_stat->mms_ipspoofed); + + case MAC_STAT_DHCPSPOOFED: + return (mac_misc_stat->mms_dhcpspoofed); + + case MAC_STAT_RESTRICTED: + return (mac_misc_stat->mms_restricted); + + case MAC_STAT_DHCPDROPPED: + return (mac_misc_stat->mms_dhcpdropped); + + case MAC_STAT_IPACKETS: + return (mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt); + + case MAC_STAT_RBYTES: + return (mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes); + + case MAC_STAT_LCL: + return (mac_rx_stat->mrs_lclcnt); + + case MAC_STAT_LCLBYTES: + return (mac_rx_stat->mrs_lclbytes); + + case MAC_STAT_INTRS: + return (mac_rx_stat->mrs_intrcnt); + + case MAC_STAT_INTRBYTES: + return (mac_rx_stat->mrs_intrbytes); + + case MAC_STAT_POLLS: + return (mac_rx_stat->mrs_pollcnt); + + case MAC_STAT_POLLBYTES: + return (mac_rx_stat->mrs_pollbytes); + + case MAC_STAT_RXSDROPS: + return (mac_rx_stat->mrs_sdrops); + + case MAC_STAT_CHU10: + return (mac_rx_stat->mrs_chaincntundr10); + + case MAC_STAT_CH10T50: + return (mac_rx_stat->mrs_chaincnt10to50); + + case MAC_STAT_CHO50: + return (mac_rx_stat->mrs_chaincntover50); + + case MAC_STAT_OBYTES: + return (mac_tx_stat->mts_obytes); + + case MAC_STAT_OPACKETS: + return (mac_tx_stat->mts_opackets); + + case MAC_STAT_OERRORS: + return (mac_tx_stat->mts_oerrors); + + case MAC_STAT_BLOCK: + return (mac_tx_stat->mts_blockcnt); + + case MAC_STAT_UNBLOCK: + return (mac_tx_stat->mts_unblockcnt); + + case MAC_STAT_TXSDROPS: + return (mac_tx_stat->mts_sdrops); + + default: + return (0); + } +} + +static int +i_mac_misc_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_misc_stat_get, + i_mac_misc_si, MAC_SUMMARY_NKSTAT)); +} + +static void +i_mac_misc_stat_create(flow_entry_t *flent, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(flent, modname, statname, + i_mac_misc_stat_update, i_mac_misc_si, + MAC_SUMMARY_NKSTAT); + + flent->fe_misc_stat_ksp = ksp; +} + +/* + * Per hardware lane tx statistics + */ +static uint64_t +i_mac_tx_hwlane_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_t *ringp = (mac_soft_ring_t *)handle; + mac_tx_stats_t *mac_tx_stat = &ringp->s_st_stat; + + switch (stat) { + case MAC_STAT_OBYTES: + return (mac_tx_stat->mts_obytes); + + case MAC_STAT_OPACKETS: + return (mac_tx_stat->mts_opackets); + + case MAC_STAT_OERRORS: + return (mac_tx_stat->mts_oerrors); + + case MAC_STAT_BLOCK: + return (mac_tx_stat->mts_blockcnt); + + case MAC_STAT_UNBLOCK: + return (mac_tx_stat->mts_unblockcnt); + + case MAC_STAT_TXSDROPS: + return (mac_tx_stat->mts_sdrops); + + default: + return (0); + } +} + +static int +i_mac_tx_hwlane_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_tx_hwlane_stat_get, + i_mac_tx_hwlane_si, MAC_TX_HWLANE_NKSTAT)); +} + +static void +i_mac_tx_hwlane_stat_create(mac_soft_ring_t *ringp, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(ringp, modname, statname, + i_mac_tx_hwlane_stat_update, i_mac_tx_hwlane_si, + MAC_TX_HWLANE_NKSTAT); + + ringp->s_ring_ksp = ksp; +} + +/* + * Per fanout rx statistics + */ +static uint64_t +i_mac_rx_fanout_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_t *tcp_ringp = (mac_soft_ring_t *)handle; + mac_soft_ring_t *udp_ringp = NULL, *oth_ringp = NULL; + mac_soft_ring_set_t *mac_srs = tcp_ringp->s_ring_set; + int index; + uint64_t val; + + mutex_enter(&mac_srs->srs_lock); + /* Extract corresponding udp and oth ring pointers */ + for (index = 0; mac_srs->srs_tcp_soft_rings[index] != NULL; index++) { + if (mac_srs->srs_tcp_soft_rings[index] == tcp_ringp) { + udp_ringp = mac_srs->srs_udp_soft_rings[index]; + oth_ringp = mac_srs->srs_oth_soft_rings[index]; + break; + } + } + + ASSERT((udp_ringp != NULL) && (oth_ringp != NULL)); + + switch (stat) { + case MAC_STAT_RBYTES: + val = (tcp_ringp->s_ring_total_rbytes) + + (udp_ringp->s_ring_total_rbytes) + + (oth_ringp->s_ring_total_rbytes); + break; + + case MAC_STAT_IPACKETS: + val = (tcp_ringp->s_ring_total_inpkt) + + (udp_ringp->s_ring_total_inpkt) + + (oth_ringp->s_ring_total_inpkt); + break; + + default: + val = 0; + break; + } + mutex_exit(&mac_srs->srs_lock); + return (val); +} + +static int +i_mac_rx_fanout_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_rx_fanout_stat_get, + i_mac_rx_fanout_si, MAC_RX_FANOUT_NKSTAT)); +} + +static void +i_mac_rx_fanout_stat_create(mac_soft_ring_t *ringp, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(ringp, modname, statname, + i_mac_rx_fanout_stat_update, i_mac_rx_fanout_si, + MAC_RX_FANOUT_NKSTAT); + + ringp->s_ring_ksp = ksp; +} + /* * Exported functions. */ @@ -147,7 +940,7 @@ i_mac_kstat_init(kstat_named_t *knp, mac_stat_info_t *si, uint_t count) * also maintained by the driver. */ void -mac_stat_create(mac_impl_t *mip) +mac_driver_stat_create(mac_impl_t *mip) { kstat_t *ksp; kstat_named_t *knp; @@ -161,7 +954,7 @@ mac_stat_create(mac_impl_t *mip) if (ksp == NULL) return; - ksp->ks_update = i_mac_stat_update; + ksp->ks_update = i_mac_driver_stat_update; ksp->ks_private = mip; mip->mi_ksp = ksp; mip->mi_kstat_count = count; @@ -181,7 +974,7 @@ mac_stat_create(mac_impl_t *mip) /*ARGSUSED*/ void -mac_stat_destroy(mac_impl_t *mip) +mac_driver_stat_delete(mac_impl_t *mip) { if (mip->mi_ksp != NULL) { kstat_delete(mip->mi_ksp); @@ -191,15 +984,311 @@ mac_stat_destroy(mac_impl_t *mip) } uint64_t -mac_stat_default(mac_impl_t *mip, uint_t stat) +mac_driver_stat_default(mac_impl_t *mip, uint_t stat) { uint_t stat_index; if (IS_MAC_STAT(stat)) { stat_index = stat - MAC_STAT_MIN; + ASSERT(stat_index < MAC_NKSTAT); return (i_mac_si[stat_index].msi_default); } ASSERT(IS_MACTYPE_STAT(stat)); stat_index = stat - MACTYPE_STAT_MIN; + ASSERT(stat_index < mip->mi_type->mt_statcount); return (mip->mi_type->mt_stats[stat_index].msi_default); } + +void +mac_ring_stat_create(mac_ring_t *ring) +{ + mac_impl_t *mip = ring->mr_mip; + char statname[MAXNAMELEN]; + char modname[MAXNAMELEN]; + + if (mip->mi_state_flags & MIS_IS_AGGR) { + (void) strlcpy(modname, mip->mi_clients_list->mci_name, + MAXNAMELEN); + } else + (void) strlcpy(modname, mip->mi_name, MAXNAMELEN); + + switch (ring->mr_type) { + case MAC_RING_TYPE_RX: + (void) snprintf(statname, sizeof (statname), "mac_rx_ring%d", + ring->mr_index); + i_mac_rx_ring_stat_create(ring, modname, statname); + break; + + case MAC_RING_TYPE_TX: + (void) snprintf(statname, sizeof (statname), "mac_tx_ring%d", + ring->mr_index); + i_mac_tx_ring_stat_create(ring, modname, statname); + break; + + default: + ASSERT(B_FALSE); + break; + } +} + +void +mac_srs_stat_create(mac_soft_ring_set_t *mac_srs) +{ + flow_entry_t *flent = mac_srs->srs_flent; + char statname[MAXNAMELEN]; + boolean_t is_tx_srs; + + /* No hardware/software lanes for user defined flows */ + if ((flent->fe_type & FLOW_USER) != 0) + return; + + is_tx_srs = ((mac_srs->srs_type & SRST_TX) != 0); + + if (is_tx_srs) { + mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; + mac_ring_t *ring = srs_tx->st_arg2; + + if (ring != NULL) { + (void) snprintf(statname, sizeof (statname), + "mac_tx_hwlane%d", ring->mr_index); + } else { + (void) snprintf(statname, sizeof (statname), + "mac_tx_swlane0"); + } + i_mac_tx_swlane_stat_create(mac_srs, flent->fe_flow_name, + statname); + } else { + mac_ring_t *ring = mac_srs->srs_ring; + + if (ring == NULL) { + (void) snprintf(statname, sizeof (statname), + "mac_rx_swlane0"); + i_mac_rx_swlane_stat_create(mac_srs, + flent->fe_flow_name, statname); + } else { + (void) snprintf(statname, sizeof (statname), + "mac_rx_hwlane%d", ring->mr_index); + i_mac_rx_hwlane_stat_create(mac_srs, + flent->fe_flow_name, statname); + } + } +} + +void +mac_misc_stat_create(flow_entry_t *flent) +{ + char statname[MAXNAMELEN]; + + /* No misc stats for user defined or mcast/bcast flows */ + if (((flent->fe_type & FLOW_USER) != 0) || + ((flent->fe_type & FLOW_MCAST) != 0)) + return; + + (void) snprintf(statname, sizeof (statname), "mac_misc_stat"); + i_mac_misc_stat_create(flent, flent->fe_flow_name, statname); +} + +void +mac_soft_ring_stat_create(mac_soft_ring_t *ringp) +{ + mac_soft_ring_set_t *mac_srs = ringp->s_ring_set; + flow_entry_t *flent = ringp->s_ring_mcip->mci_flent; + mac_ring_t *ring = (mac_ring_t *)ringp->s_ring_tx_arg2; + boolean_t is_tx_srs; + char statname[MAXNAMELEN]; + + /* No hardware/software lanes for user defined flows */ + if ((flent->fe_type & FLOW_USER) != 0) + return; + + is_tx_srs = ((mac_srs->srs_type & SRST_TX) != 0); + if (is_tx_srs) { /* tx side hardware lane */ + ASSERT(ring != NULL); + (void) snprintf(statname, sizeof (statname), "mac_tx_hwlane%d", + ring->mr_index); + i_mac_tx_hwlane_stat_create(ringp, flent->fe_flow_name, + statname); + } else { /* rx side fanout */ + /* Maintain single stat for (tcp, udp, oth) */ + if (ringp->s_ring_type & ST_RING_TCP) { + int index; + mac_soft_ring_t *softring; + + for (index = 0, softring = mac_srs->srs_soft_ring_head; + softring != NULL; + index++, softring = softring->s_ring_next) { + if (softring == ringp) + break; + } + + if (mac_srs->srs_ring == NULL) { + (void) snprintf(statname, sizeof (statname), + "mac_rx_swlane0_fanout%d", index/3); + } else { + (void) snprintf(statname, sizeof (statname), + "mac_rx_hwlane%d_fanout%d", + mac_srs->srs_ring->mr_index, index/3); + } + i_mac_rx_fanout_stat_create(ringp, flent->fe_flow_name, + statname); + } + } +} + +void +mac_ring_stat_delete(mac_ring_t *ring) +{ + if (ring->mr_ksp != NULL) { + kstat_delete(ring->mr_ksp); + ring->mr_ksp = NULL; + } +} + +void +mac_srs_stat_delete(mac_soft_ring_set_t *mac_srs) +{ + boolean_t is_tx_srs; + + is_tx_srs = ((mac_srs->srs_type & SRST_TX) != 0); + if (!is_tx_srs) { + /* + * Rx ring has been taken away. Before destroying corresponding + * SRS, save the stats recorded by that SRS. + */ + mac_client_impl_t *mcip = mac_srs->srs_mcip; + mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat; + mac_rx_stats_t *mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + i_mac_add_stats(&mac_misc_stat->mms_defunctrxlanestats, + mac_rx_stat, &mac_misc_stat->mms_defunctrxlanestats, + rx_srs_stats_list, RX_SRS_STAT_SIZE); + } + + if (mac_srs->srs_ksp != NULL) { + kstat_delete(mac_srs->srs_ksp); + mac_srs->srs_ksp = NULL; + } +} + +void +mac_misc_stat_delete(flow_entry_t *flent) +{ + if (flent->fe_misc_stat_ksp != NULL) { + kstat_delete(flent->fe_misc_stat_ksp); + flent->fe_misc_stat_ksp = NULL; + } +} + +void +mac_soft_ring_stat_delete(mac_soft_ring_t *ringp) +{ + mac_soft_ring_set_t *mac_srs = ringp->s_ring_set; + boolean_t is_tx_srs; + + is_tx_srs = ((mac_srs->srs_type & SRST_TX) != 0); + if (is_tx_srs) { + /* + * Tx ring has been taken away. Before destroying corresponding + * soft ring, save the stats recorded by that soft ring. + */ + mac_client_impl_t *mcip = mac_srs->srs_mcip; + mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat; + mac_tx_stats_t *mac_tx_stat = &ringp->s_st_stat; + + i_mac_add_stats(&mac_misc_stat->mms_defuncttxlanestats, + mac_tx_stat, &mac_misc_stat->mms_defuncttxlanestats, + tx_softring_stats_list, TX_SOFTRING_STAT_SIZE); + } + + if (ringp->s_ring_ksp) { + kstat_delete(ringp->s_ring_ksp); + ringp->s_ring_ksp = NULL; + } +} + +void +mac_pseudo_ring_stat_rename(mac_impl_t *mip) +{ + mac_group_t *group; + mac_ring_t *ring; + + /* Recreate pseudo rx ring kstats */ + for (group = mip->mi_rx_groups; group != NULL; + group = group->mrg_next) { + for (ring = group->mrg_rings; ring != NULL; + ring = ring->mr_next) { + mac_ring_stat_delete(ring); + mac_ring_stat_create(ring); + } + } + + /* Recreate pseudo tx ring kstats */ + for (group = mip->mi_tx_groups; group != NULL; + group = group->mrg_next) { + for (ring = group->mrg_rings; ring != NULL; + ring = ring->mr_next) { + mac_ring_stat_delete(ring); + mac_ring_stat_create(ring); + } + } +} + +void +mac_stat_rename(mac_client_impl_t *mcip) +{ + flow_entry_t *flent = mcip->mci_flent; + mac_soft_ring_set_t *mac_srs; + mac_soft_ring_t *ringp; + int i, j; + + ASSERT(flent != NULL); + + /* Recreate rx SRSes kstats */ + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_srs_stat_delete(mac_srs); + mac_srs_stat_create(mac_srs); + + /* Recreate rx fanout kstats */ + for (j = 0; j < mac_srs->srs_tcp_ring_count; j++) { + ringp = mac_srs->srs_tcp_soft_rings[j]; + mac_soft_ring_stat_delete(ringp); + mac_soft_ring_stat_create(ringp); + } + } + + /* Recreate tx SRS kstats */ + mac_srs = (mac_soft_ring_set_t *)flent->fe_tx_srs; + mac_srs_stat_delete(mac_srs); + mac_srs_stat_create(mac_srs); + + /* Recreate tx sofring kstats */ + for (ringp = mac_srs->srs_soft_ring_head; ringp; + ringp = ringp->s_ring_next) { + mac_soft_ring_stat_delete(ringp); + mac_soft_ring_stat_create(ringp); + } + + /* Recreate misc kstats */ + mac_misc_stat_delete(flent); + mac_misc_stat_create(flent); +} + +void +mac_tx_srs_stat_recreate(mac_soft_ring_set_t *tx_srs, boolean_t add_stats) +{ + mac_client_impl_t *mcip = tx_srs->srs_mcip; + mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat; + mac_tx_stats_t *mac_tx_stat = &tx_srs->srs_tx.st_stat; + + if (add_stats) { + /* Add the stats to cumulative stats */ + i_mac_add_stats(&mac_misc_stat->mms_defuncttxlanestats, + mac_tx_stat, &mac_misc_stat->mms_defuncttxlanestats, + tx_softring_stats_list, TX_SOFTRING_STAT_SIZE); + } + + bzero(mac_tx_stat, sizeof (mac_tx_stats_t)); + mac_srs_stat_delete(tx_srs); + mac_srs_stat_create(tx_srs); +} diff --git a/usr/src/uts/common/io/mac/mac_util.c b/usr/src/uts/common/io/mac/mac_util.c index 371145e68c..3d9d2f9b39 100644 --- a/usr/src/uts/common/io/mac/mac_util.c +++ b/usr/src/uts/common/io/mac/mac_util.c @@ -244,14 +244,23 @@ mac_fix_cksum(mblk_t *mp_chain) offset, cksum); *(up) = (uint16_t)(cksum ? cksum : ~cksum); + /* + * Flag the packet so that it appears + * that the checksum has already been + * verified by the hardware. + */ + flags &= ~HCK_FULLCKSUM; flags |= HCK_FULLCKSUM_OK; - value = 0xffff; + value = 0; } if (flags & HCK_IPV4_HDRCKSUM) { ASSERT(ipha != NULL); ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); + flags &= ~HCK_IPV4_HDRCKSUM; + flags |= HCK_IPV4_HDRCKSUM_OK; + } } @@ -292,8 +301,8 @@ mac_fix_cksum(mblk_t *mp_chain) * been verified by the hardware. */ flags &= ~HCK_PARTIALCKSUM; - flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK); - value = 0xffff; + flags |= HCK_FULLCKSUM_OK; + value = 0; } (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, @@ -470,27 +479,25 @@ mac_pkt_drop(void *arg, mac_resource_handle_t resource, mblk_t *mp, * returns B_TRUE. */ boolean_t -mac_ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length, - uint8_t *next_hdr, boolean_t *ip_fragmented, uint32_t *ip_frag_ident) +mac_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr, uint16_t *hdr_length, + uint8_t *next_hdr, ip6_frag_t **fragp) { uint16_t length; uint_t ehdrlen; uint8_t *whereptr; - uint8_t *endptr; uint8_t *nexthdrp; ip6_dest_t *desthdr; ip6_rthdr_t *rthdr; ip6_frag_t *fraghdr; - endptr = mp->b_wptr; if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr) return (B_FALSE); ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); length = IPV6_HDR_LEN; whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ - if (ip_fragmented != NULL) - *ip_fragmented = B_FALSE; + if (fragp != NULL) + *fragp = NULL; nexthdrp = &ip6h->ip6_nxt; while (whereptr < endptr) { @@ -521,10 +528,8 @@ mac_ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length, if ((uchar_t *)&fraghdr[1] > endptr) return (B_FALSE); nexthdrp = &fraghdr->ip6f_nxt; - if (ip_fragmented != NULL) - *ip_fragmented = B_TRUE; - if (ip_frag_ident != NULL) - *ip_frag_ident = fraghdr->ip6f_ident; + if (fragp != NULL) + *fragp = fraghdr; break; case IPPROTO_NONE: /* No next header means we're finished */ @@ -561,6 +566,13 @@ mac_ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length, } } +/* + * The following set of routines are there to take care of interrupt + * re-targeting for legacy (fixed) interrupts. Some older versions + * of the popular NICs like e1000g do not support MSI-X interrupts + * and they reserve fixed interrupts for RX/TX rings. To re-target + * these interrupts, PCITOOL ioctls need to be used. + */ typedef struct mac_dladm_intr { int ino; int cpu_id; @@ -807,13 +819,20 @@ mac_client_set_intr_cpu(void *arg, mac_client_handle_t mch, int32_t cpuid) mac_client_impl_t *mcip = (mac_client_impl_t *)mch; mac_resource_props_t *mrp; mac_perim_handle_t mph; + flow_entry_t *flent = mcip->mci_flent; + mac_soft_ring_set_t *rx_srs; + mac_cpus_t *srs_cpu; - if (cpuid == -1 || !mac_check_interrupt_binding(mdip, cpuid)) - return; - + if (!mac_check_interrupt_binding(mdip, cpuid)) + cpuid = -1; mac_perim_enter_by_mh((mac_handle_t)mcip->mci_mip, &mph); mrp = MCIP_RESOURCE_PROPS(mcip); - mrp->mrp_intr_cpu = cpuid; + mrp->mrp_rx_intr_cpu = cpuid; + if (flent != NULL && flent->fe_rx_srs_cnt == 2) { + rx_srs = flent->fe_rx_srs[1]; + srs_cpu = &rx_srs->srs_cpu; + srs_cpu->mc_rx_intr_cpu = cpuid; + } mac_perim_exit(mph); } @@ -825,18 +844,29 @@ mac_client_intr_cpu(mac_client_handle_t mch) mac_soft_ring_set_t *rx_srs; flow_entry_t *flent = mcip->mci_flent; mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + mac_ring_t *ring; + mac_intr_t *mintr; /* * Check if we need to retarget the interrupt. We do this only * for the primary MAC client. We do this if we have the only - * exclusive ring in the group. + * exclusive ring in the group. */ if (mac_is_primary_client(mcip) && flent->fe_rx_srs_cnt == 2) { rx_srs = flent->fe_rx_srs[1]; srs_cpu = &rx_srs->srs_cpu; - if (mrp->mrp_intr_cpu == srs_cpu->mc_pollid) + ring = rx_srs->srs_ring; + mintr = &ring->mr_info.mri_intr; + /* + * If ddi_handle is present or the poll CPU is + * already bound to the interrupt CPU, return -1. + */ + if (mintr->mi_ddi_handle != NULL || + ((mrp->mrp_ncpus != 0) && + (mrp->mrp_rx_intr_cpu == srs_cpu->mc_rx_pollid))) { return (-1); - return (srs_cpu->mc_pollid); + } + return (srs_cpu->mc_rx_pollid); } return (-1); } @@ -970,8 +1000,8 @@ mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) } case ETHERTYPE_IPV6: { ip6_t *ip6hp; + ip6_frag_t *frag = NULL; uint16_t hdr_length; - uint32_t ip_frag_ident; /* * If the header is not aligned or the header doesn't fit @@ -984,8 +1014,8 @@ mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) !OK_32PTR((char *)ip6hp)) goto done; - if (!mac_ip_hdr_length_v6(mp, ip6hp, &hdr_length, &proto, - &ip_fragmented, &ip_frag_ident)) + if (!mac_ip_hdr_length_v6(ip6hp, mp->b_wptr, &hdr_length, + &proto, &frag)) goto done; skip_len += hdr_length; @@ -994,7 +1024,7 @@ mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) * the frag_id to generate the hash inorder to get * better distribution. */ - if (ip_fragmented || (policy & MAC_PKT_HASH_L3) != 0) { + if (frag != NULL || (policy & MAC_PKT_HASH_L3) != 0) { uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]); uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]); @@ -1003,8 +1033,8 @@ mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) policy &= ~MAC_PKT_HASH_L3; } - if (ip_fragmented) { - uint8_t *identp = (uint8_t *)&ip_frag_ident; + if (frag != NULL) { + uint8_t *identp = (uint8_t *)&frag->ip6f_ident; hash ^= PKT_HASH_4BYTES(identp); goto done; } diff --git a/usr/src/uts/common/io/mii/mii.c b/usr/src/uts/common/io/mii/mii.c index 2187553b40..bfff2a52e8 100644 --- a/usr/src/uts/common/io/mii/mii.c +++ b/usr/src/uts/common/io/mii/mii.c @@ -650,12 +650,10 @@ mii_m_loop_ioctl(mii_handle_t mh, queue_t *wq, mblk_t *mp) int mii_m_getprop(mii_handle_t mh, const char *name, mac_prop_id_t num, - uint_t flags, uint_t sz, void *val, uint_t *permp) + uint_t sz, void *val) { phy_handle_t *ph; int err = 0; - uint_t perm; - boolean_t dfl = flags & MAC_PROP_DEFAULT; _NOTE(ARGUNUSED(name)); @@ -665,54 +663,36 @@ mii_m_getprop(mii_handle_t mh, const char *name, mac_prop_id_t num, mutex_enter(&mh->m_lock); ph = mh->m_phy; - perm = MAC_PROP_PERM_RW; #define CASE_PROP_ABILITY(PROP, VAR) \ case MAC_PROP_ADV_##PROP: \ - perm = MAC_PROP_PERM_READ; \ - *(uint8_t *)val = \ - dfl ? ph->phy_cap_##VAR : ph->phy_adv_##VAR; \ + *(uint8_t *)val = ph->phy_adv_##VAR; \ break; \ \ case MAC_PROP_EN_##PROP: \ - if (!ph->phy_cap_##VAR) \ - perm = MAC_PROP_PERM_READ; \ - *(uint8_t *)val = \ - dfl ? ph->phy_cap_##VAR : ph->phy_en_##VAR; \ + *(uint8_t *)val = ph->phy_en_##VAR; \ break; switch (num) { case MAC_PROP_DUPLEX: - perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (link_duplex_t)) { - bcopy(&ph->phy_duplex, val, sizeof (link_duplex_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (link_duplex_t)); + bcopy(&ph->phy_duplex, val, sizeof (link_duplex_t)); break; - case MAC_PROP_SPEED: - perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (uint64_t)) { - uint64_t speed = ph->phy_speed * 1000000ull; - bcopy(&speed, val, sizeof (speed)); - } else { - err = EINVAL; - } + case MAC_PROP_SPEED: { + uint64_t speed = ph->phy_speed * 1000000ull; + ASSERT(sz >= sizeof (uint64_t)); + bcopy(&speed, val, sizeof (speed)); break; + } case MAC_PROP_AUTONEG: - *(uint8_t *)val = - dfl ? ph->phy_cap_aneg : ph->phy_adv_aneg; + *(uint8_t *)val = ph->phy_adv_aneg; break; case MAC_PROP_FLOWCTRL: - if (sz >= sizeof (link_flowctrl_t)) { - bcopy(&ph->phy_flowctrl, val, - sizeof (link_flowctrl_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (link_flowctrl_t)); + bcopy(&ph->phy_flowctrl, val, sizeof (link_flowctrl_t)); break; CASE_PROP_ABILITY(1000FDX_CAP, 1000_fdx) @@ -728,15 +708,57 @@ mii_m_getprop(mii_handle_t mh, const char *name, mac_prop_id_t num, break; } - if (err == 0) { - *permp = perm; - } - mutex_exit(&mh->m_lock); return (err); } +void +mii_m_propinfo(mii_handle_t mh, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + phy_handle_t *ph; + + _NOTE(ARGUNUSED(name)); + + mutex_enter(&mh->m_lock); + + ph = mh->m_phy; + + switch (num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, ph->phy_cap_aneg); + break; + +#define CASE_PROP_PERM(PROP, VAR) \ + case MAC_PROP_ADV_##PROP: \ + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); \ + mac_prop_info_set_default_uint8(prh, ph->phy_cap_##VAR); \ + break; \ + \ + case MAC_PROP_EN_##PROP: \ + if (!ph->phy_cap_##VAR) \ + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); \ + mac_prop_info_set_default_uint8(prh, ph->phy_cap_##VAR); \ + break; + + CASE_PROP_PERM(1000FDX_CAP, 1000_fdx) + CASE_PROP_PERM(1000HDX_CAP, 1000_hdx) + CASE_PROP_PERM(100T4_CAP, 100_t4) + CASE_PROP_PERM(100FDX_CAP, 100_fdx) + CASE_PROP_PERM(100HDX_CAP, 100_hdx) + CASE_PROP_PERM(10FDX_CAP, 10_fdx) + CASE_PROP_PERM(10HDX_CAP, 10_hdx) + } + + mutex_exit(&mh->m_lock); +} + int mii_m_setprop(mii_handle_t mh, const char *name, mac_prop_id_t num, uint_t sz, const void *valp) @@ -813,65 +835,62 @@ mii_m_setprop(mii_handle_t mh, const char *name, mac_prop_id_t num, advp = &ph->phy_en_aneg; macpp = &mh->m_en_aneg; break; - case MAC_PROP_FLOWCTRL: - if (sz < sizeof (link_flowctrl_t)) { - rv = EINVAL; - } else { - link_flowctrl_t fc; - boolean_t chg; + case MAC_PROP_FLOWCTRL: { + link_flowctrl_t fc; + boolean_t chg; - bcopy(valp, &fc, sizeof (fc)); + ASSERT(sz >= sizeof (link_flowctrl_t)); + bcopy(valp, &fc, sizeof (fc)); - chg = fc == ph->phy_en_flowctrl ? B_FALSE : B_TRUE; - switch (fc) { - case LINK_FLOWCTRL_NONE: - ph->phy_en_pause = B_FALSE; - ph->phy_en_asmpause = B_FALSE; + chg = fc == ph->phy_en_flowctrl ? B_FALSE : B_TRUE; + switch (fc) { + case LINK_FLOWCTRL_NONE: + ph->phy_en_pause = B_FALSE; + ph->phy_en_asmpause = B_FALSE; + ph->phy_en_flowctrl = fc; + break; + /* + * Note that while we don't have a way to advertise + * that we can RX pause (we just won't send pause + * frames), we advertise full support. The MAC driver + * will learn of the configuration via the saved value + * of the tunable. + */ + case LINK_FLOWCTRL_BI: + case LINK_FLOWCTRL_RX: + if (ph->phy_cap_pause) { + ph->phy_en_pause = B_TRUE; + ph->phy_en_asmpause = B_TRUE; ph->phy_en_flowctrl = fc; - break; - /* - * Note that while we don't have a way to - * advertise that we can RX pause (we just - * won't send pause frames), we advertise full - * support. The MAC driver will learn of the - * configuration via the saved value of the - * tunable. - */ - case LINK_FLOWCTRL_BI: - case LINK_FLOWCTRL_RX: - if (ph->phy_cap_pause) { - ph->phy_en_pause = B_TRUE; - ph->phy_en_asmpause = B_TRUE; - ph->phy_en_flowctrl = fc; - } else { - rv = EINVAL; - } - break; - - /* - * Tell the other side that we can assert - * pause, but we cannot resend. - */ - case LINK_FLOWCTRL_TX: - if (ph->phy_cap_asmpause) { - ph->phy_en_pause = B_FALSE; - ph->phy_en_flowctrl = fc; - ph->phy_en_asmpause = B_TRUE; - } else { - rv = EINVAL; - } - break; - default: + } else { rv = EINVAL; - break; } - if ((rv == 0) && chg) { - mh->m_en_flowctrl = fc; - mh->m_tstate = MII_STATE_RESET; - cv_broadcast(&mh->m_cv); + break; + + /* + * Tell the other side that we can assert pause, but + * we cannot resend. + */ + case LINK_FLOWCTRL_TX: + if (ph->phy_cap_asmpause) { + ph->phy_en_pause = B_FALSE; + ph->phy_en_flowctrl = fc; + ph->phy_en_asmpause = B_TRUE; + } else { + rv = EINVAL; } + break; + default: + rv = EINVAL; + break; + } + if ((rv == 0) && chg) { + mh->m_en_flowctrl = fc; + mh->m_tstate = MII_STATE_RESET; + cv_broadcast(&mh->m_cv); } break; + } default: rv = ENOTSUP; diff --git a/usr/src/uts/common/io/mwl/mwl.c b/usr/src/uts/common/io/mwl/mwl.c index ce99b07504..98d0892326 100644 --- a/usr/src/uts/common/io/mwl/mwl.c +++ b/usr/src/uts/common/io/mwl/mwl.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -86,11 +86,13 @@ static int mwl_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int mwl_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, + void *wldp_buf); +static void mwl_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t mwl_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, mwl_m_stat, mwl_m_start, mwl_m_stop, @@ -98,12 +100,14 @@ static mac_callbacks_t mwl_m_callbacks = { mwl_m_multicst, mwl_m_unicst, mwl_m_tx, + NULL, mwl_m_ioctl, NULL, NULL, NULL, mwl_m_setprop, - mwl_m_getprop + mwl_m_getprop, + mwl_m_propinfo }; #define MWL_DBG_ATTACH (1 << 0) @@ -3746,17 +3750,26 @@ mwl_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int mwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct mwl_softc *sc = (struct mwl_softc *)arg; int err = 0; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +mwl_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct mwl_softc *sc = (struct mwl_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + static int mwl_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/mxfe/mxfe.c b/usr/src/uts/common/io/mxfe/mxfe.c index d48164a80f..790c936fd5 100644 --- a/usr/src/uts/common/io/mxfe/mxfe.c +++ b/usr/src/uts/common/io/mxfe/mxfe.c @@ -29,7 +29,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -114,9 +114,11 @@ static int mxfe_m_stat(void *, uint_t, uint64_t *); static int mxfe_m_start(void *); static void mxfe_m_stop(void *); static int mxfe_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int mxfe_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void mxfe_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static unsigned mxfe_intr(caddr_t); static void mxfe_startmac(mxfe_t *); static void mxfe_stopmac(mxfe_t *); @@ -170,7 +172,7 @@ static void mxfe_dprintf(mxfe_t *, const char *, int, char *, ...); #define KIOIP KSTAT_INTR_PTR(mxfep->mxfe_intrstat) static mac_callbacks_t mxfe_m_callbacks = { - MC_SETPROP | MC_GETPROP, + MC_SETPROP | MC_GETPROP | MC_PROPINFO, mxfe_m_stat, mxfe_m_start, mxfe_m_stop, @@ -178,12 +180,14 @@ static mac_callbacks_t mxfe_m_callbacks = { mxfe_m_multicst, mxfe_m_unicst, mxfe_m_tx, + NULL, NULL, /* mc_ioctl */ NULL, /* mc_getcapab */ NULL, /* mc_open */ NULL, /* mc_close */ mxfe_m_setprop, - mxfe_m_getprop + mxfe_m_getprop, + mxfe_m_propinfo }; /* @@ -2877,90 +2881,50 @@ mxfe_m_stat(void *arg, uint_t stat, uint64_t *val) /*ARGSUSED*/ int -mxfe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +mxfe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { mxfe_t *mxfep = arg; int err = 0; - boolean_t dfl = flags & MAC_PROP_DEFAULT; - if (sz == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; switch (num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (link_duplex_t)) { - bcopy(&mxfep->mxfe_duplex, val, - sizeof (link_duplex_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (link_duplex_t)); + bcopy(&mxfep->mxfe_duplex, val, sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (uint64_t)) { - bcopy(&mxfep->mxfe_ifspeed, val, sizeof (uint64_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (uint64_t)); + bcopy(&mxfep->mxfe_ifspeed, val, sizeof (uint64_t)); break; case MAC_PROP_AUTONEG: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_aneg : mxfep->mxfe_adv_aneg; + *(uint8_t *)val = mxfep->mxfe_adv_aneg; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100fdx : mxfep->mxfe_adv_100fdx; - break; case MAC_PROP_EN_100FDX_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100fdx : mxfep->mxfe_adv_100fdx; + *(uint8_t *)val = mxfep->mxfe_adv_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100hdx : mxfep->mxfe_adv_100hdx; - break; case MAC_PROP_EN_100HDX_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100hdx : mxfep->mxfe_adv_100hdx; + *(uint8_t *)val = mxfep->mxfe_adv_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_10fdx : mxfep->mxfe_adv_10fdx; - break; case MAC_PROP_EN_10FDX_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_10fdx : mxfep->mxfe_adv_10fdx; + *(uint8_t *)val = mxfep->mxfe_adv_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_10hdx : mxfep->mxfe_adv_10hdx; - break; case MAC_PROP_EN_10HDX_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_10hdx : mxfep->mxfe_adv_10hdx; + *(uint8_t *)val = mxfep->mxfe_adv_10hdx; break; case MAC_PROP_ADV_100T4_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100T4 : mxfep->mxfe_adv_100T4; - break; case MAC_PROP_EN_100T4_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100T4 : mxfep->mxfe_adv_100T4; + *(uint8_t *)val = mxfep->mxfe_adv_100T4; break; default: @@ -3041,6 +3005,51 @@ mxfe_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, return (0); } +static void +mxfe_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t mph) +{ + mxfe_t *mxfep = arg; + + _NOTE(ARGUNUSED(name)); + + switch (num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + mac_prop_info_set_perm(mph, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_aneg); + break; + + case MAC_PROP_EN_100FDX_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_100fdx); + break; + + case MAC_PROP_EN_100HDX_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_100hdx); + break; + + case MAC_PROP_EN_10FDX_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_10fdx); + break; + + case MAC_PROP_EN_10HDX_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_10hdx); + break; + + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_100T4); + break; + } +} + /* * Debugging and error reporting. */ diff --git a/usr/src/uts/common/io/myri10ge/drv/myri10ge.c b/usr/src/uts/common/io/myri10ge/drv/myri10ge.c index d2bda2311b..7cdbad3249 100644 --- a/usr/src/uts/common/io/myri10ge/drv/myri10ge.c +++ b/usr/src/uts/common/io/myri10ge/drv/myri10ge.c @@ -2380,8 +2380,7 @@ myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum) return; } - (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, - csum, HCK_PARTIALCKSUM, 0); + mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM); } static mblk_t * @@ -2889,7 +2888,7 @@ static inline void myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) { uint32_t lso_flag; - lso_info_get(mp, mss, &lso_flag); + mac_lso_get(mp, mss, &lso_flag); (*flags) |= lso_flag; } @@ -2902,8 +2901,7 @@ myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp) int ok; mss = 0; - hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, NULL, - &tx_offload_flags); + mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); ok = pullupmsg(mp, -1); @@ -2912,8 +2910,7 @@ myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp) return (DDI_FAILURE); } MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup); - (void) hcksum_assoc(mp, NULL, NULL, start, stuff, NULL, - NULL, tx_offload_flags, 0); + mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags); if (tx_offload_flags & HW_LSO) DB_LSOMSS(mp) = (uint16_t)mss; lso_info_set(mp, mss, tx_offload_flags); @@ -3347,8 +3344,7 @@ myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp, again: /* Setup checksum offloading, if needed */ - hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, NULL, - &tx_offload_flags); + mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); if (tx_offload_flags & HW_LSO) { max_segs = MYRI10GE_MAX_SEND_DESC_TSO; @@ -3796,6 +3792,58 @@ myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num) return (0); } +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + struct myri10ge_slice_state *ss; + + ss = (struct myri10ge_slice_state *)rh; + switch (stat) { + case MAC_STAT_RBYTES: + *val = ss->rx_stats.ibytes; + break; + + case MAC_STAT_IPACKETS: + *val = ss->rx_stats.ipackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + struct myri10ge_slice_state *ss; + + ss = (struct myri10ge_slice_state *)rh; + switch (stat) { + case MAC_STAT_OBYTES: + *val = ss->tx.stats.obytes; + break; + + case MAC_STAT_OPACKETS: + *val = ss->tx.stats.opackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + static int myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh) { @@ -3843,6 +3891,7 @@ myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = myri10ge_ring_start; infop->mri_stop = NULL; infop->mri_poll = myri10ge_poll_rx; + infop->mri_stat = myri10ge_rx_ring_stat; mintr->mi_handle = (mac_intr_handle_t)ss; mintr->mi_enable = myri10ge_rx_ring_intr_enable; mintr->mi_disable = myri10ge_rx_ring_intr_disable; @@ -3853,6 +3902,7 @@ myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = NULL; infop->mri_stop = NULL; infop->mri_tx = myri10ge_send_wrapper; + infop->mri_stat = myri10ge_tx_ring_stat; break; default: break; @@ -5329,6 +5379,7 @@ static mac_callbacks_t myri10ge_m_callbacks = { myri10ge_m_multicst, NULL, NULL, + NULL, myri10ge_m_ioctl, myri10ge_m_getcapab }; diff --git a/usr/src/uts/common/io/myri10ge/drv/myri10ge_lro.c b/usr/src/uts/common/io/myri10ge/drv/myri10ge_lro.c index 2d03fceac6..ba2177e0fe 100644 --- a/usr/src/uts/common/io/myri10ge/drv/myri10ge_lro.c +++ b/usr/src/uts/common/io/myri10ge/drv/myri10ge_lro.c @@ -118,8 +118,8 @@ myri10ge_lro_flush(struct myri10ge_slice_state *ss, struct lro_entry *lro, tcp->th_sum = 0xffff ^ tcp_csum; } - (void) hcksum_assoc(lro->m_head, NULL, NULL, 0, 0, 0, - 0, HCK_IPV4_HDRCKSUM | HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0); + mac_hcksum_set(lro->m_head, 0, 0, 0, + 0, HCK_IPV4_HDRCKSUM_OK | HCK_FULLCKSUM_OK); mbl->cnt += lro->append_cnt; myri10ge_mbl_append(ss, mbl, lro->m_head); diff --git a/usr/src/uts/common/io/myri10ge/drv/myri10ge_var.h b/usr/src/uts/common/io/myri10ge/drv/myri10ge_var.h index 6840795e94..24889e48a6 100644 --- a/usr/src/uts/common/io/myri10ge/drv/myri10ge_var.h +++ b/usr/src/uts/common/io/myri10ge/drv/myri10ge_var.h @@ -57,9 +57,7 @@ extern "C" { #include <sys/sunddi.h> #include <sys/strsubr.h> /* for hw cksum stuff */ #include <sys/pattr.h> /* for hw cksum stuff */ -#ifdef MYRICOM_PRIV #include <netinet/in.h> /* for hw cksum stuff */ -#endif #include <netinet/ip.h> /* for hw cksum stuff */ #include <netinet/ip6.h> /* for hw cksum stuff */ #include <netinet/tcp.h> /* for hw cksum stuff */ diff --git a/usr/src/uts/common/io/net80211/net80211_ioctl.c b/usr/src/uts/common/io/net80211/net80211_ioctl.c index 93212719e3..25ef1e4fde 100644 --- a/usr/src/uts/common/io/net80211/net80211_ioctl.c +++ b/usr/src/uts/common/io/net80211/net80211_ioctl.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2457,22 +2457,14 @@ ieee80211_setprop(void *ic_arg, const char *pr_name, mac_prop_id_t wldp_pr_num, /* ARGSUSED */ int ieee80211_getprop(void *ic_arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; struct ieee80211com *ic = ic_arg; - if (wldp_length == 0) { - err = EINVAL; - return (err); - } - bzero(wldp_buf, wldp_length); - ASSERT(ic != NULL); IEEE80211_LOCK(ic); - *perm = MAC_PROP_PERM_RW; - switch (wldp_pr_num) { /* mac_prop_id */ case MAC_PROP_WL_ESSID: @@ -2497,34 +2489,27 @@ ieee80211_getprop(void *ic_arg, const char *pr_name, mac_prop_id_t wldp_pr_num, wl_get_desrates(ic, wldp_buf); break; case MAC_PROP_WL_LINKSTATUS: - *perm = MAC_PROP_PERM_READ; wl_get_linkstatus(ic, wldp_buf); break; case MAC_PROP_WL_ESS_LIST: - *perm = MAC_PROP_PERM_READ; wl_get_esslist(ic, wldp_buf); break; case MAC_PROP_WL_SUPPORTED_RATES: - *perm = MAC_PROP_PERM_READ; wl_get_suprates(ic, wldp_buf); break; case MAC_PROP_WL_RSSI: - *perm = MAC_PROP_PERM_READ; wl_get_rssi(ic, wldp_buf); break; case MAC_PROP_WL_CAPABILITY: - *perm = MAC_PROP_PERM_READ; wl_get_capability(ic, wldp_buf); break; case MAC_PROP_WL_WPA: wl_get_wpa(ic, wldp_buf); break; case MAC_PROP_WL_SCANRESULTS: - *perm = MAC_PROP_PERM_READ; wl_get_scanresults(ic, wldp_buf); break; case MAC_PROP_WL_CREATE_IBSS: - *perm = MAC_PROP_PERM_READ; wl_get_createibss(ic, wldp_buf); break; case MAC_PROP_WL_KEY_TAB: @@ -2545,3 +2530,25 @@ ieee80211_getprop(void *ic_arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } + +void ieee80211_propinfo(void *ic_arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(pr_name, ic_arg)); + + /* + * By default permissions are read/write unless specified + * otherwise by the driver. + */ + + switch (wldp_pr_num) { + case MAC_PROP_WL_LINKSTATUS: + case MAC_PROP_WL_ESS_LIST: + case MAC_PROP_WL_SUPPORTED_RATES: + case MAC_PROP_WL_RSSI: + case MAC_PROP_WL_CAPABILITY: + case MAC_PROP_WL_SCANRESULTS: + case MAC_PROP_WL_CREATE_IBSS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } +} diff --git a/usr/src/uts/common/io/nge/nge_main.c b/usr/src/uts/common/io/nge/nge_main.c index 583e9bd61e..1aad680aa7 100644 --- a/usr/src/uts/common/io/nge/nge_main.c +++ b/usr/src/uts/common/io/nge/nge_main.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -177,14 +177,17 @@ static boolean_t nge_m_getcapab(void *, mac_capab_t, void *); static int nge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int nge_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void nge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int nge_set_priv_prop(nge_t *, const char *, uint_t, const void *); static int nge_get_priv_prop(nge_t *, const char *, uint_t, - uint_t, void *); + void *); #define NGE_M_CALLBACK_FLAGS\ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | \ + MC_PROPINFO) static mac_callbacks_t nge_m_callbacks = { NGE_M_CALLBACK_FLAGS, @@ -195,27 +198,27 @@ static mac_callbacks_t nge_m_callbacks = { nge_m_multicst, nge_m_unicst, nge_m_tx, + NULL, nge_m_ioctl, nge_m_getcapab, NULL, NULL, nge_m_setprop, - nge_m_getprop + nge_m_getprop, + nge_m_propinfo }; -mac_priv_prop_t nge_priv_props[] = { - {"_tx_bcopy_threshold", MAC_PROP_PERM_RW}, - {"_rx_bcopy_threshold", MAC_PROP_PERM_RW}, - {"_recv_max_packet", MAC_PROP_PERM_RW}, - {"_poll_quiet_time", MAC_PROP_PERM_RW}, - {"_poll_busy_time", MAC_PROP_PERM_RW}, - {"_rx_intr_hwater", MAC_PROP_PERM_RW}, - {"_rx_intr_lwater", MAC_PROP_PERM_RW}, +char *nge_priv_props[] = { + "_tx_bcopy_threshold", + "_rx_bcopy_threshold", + "_recv_max_packet", + "_poll_quiet_time", + "_poll_busy_time", + "_rx_intr_hwater", + "_rx_intr_lwater", + NULL }; -#define NGE_MAX_PRIV_PROPS \ - (sizeof (nge_priv_props)/sizeof (mac_priv_prop_t)) - static int nge_add_intrs(nge_t *, int); static void nge_rem_intrs(nge_t *); static int nge_register_intrs_and_init_locks(nge_t *); @@ -1750,193 +1753,167 @@ reprogram: static int nge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { nge_t *ngep = barg; int err = 0; link_flowctrl_t fl; uint64_t speed; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); - - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; - - bzero(pr_val, pr_valsize); switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (link_duplex_t)) { - bcopy(&ngep->param_link_duplex, pr_val, - sizeof (link_duplex_t)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_duplex_t)); + bcopy(&ngep->param_link_duplex, pr_val, + sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (uint64_t)) { - speed = ngep->param_link_speed * 1000000ull; - bcopy(&speed, pr_val, sizeof (speed)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint64_t)); + speed = ngep->param_link_speed * 1000000ull; + bcopy(&speed, pr_val, sizeof (speed)); break; case MAC_PROP_AUTONEG: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_autoneg; - } + *(uint8_t *)pr_val = ngep->param_adv_autoneg; break; case MAC_PROP_FLOWCTRL: - if (pr_valsize >= sizeof (link_flowctrl_t)) { - if (pr_flags & MAC_PROP_DEFAULT) { - fl = LINK_FLOWCTRL_BI; - bcopy(&fl, pr_val, sizeof (fl)); - break; - } - if (ngep->param_link_rx_pause && - !ngep->param_link_tx_pause) - fl = LINK_FLOWCTRL_RX; - - if (!ngep->param_link_rx_pause && - !ngep->param_link_tx_pause) - fl = LINK_FLOWCTRL_NONE; - - if (!ngep->param_link_rx_pause && - ngep->param_link_tx_pause) - fl = LINK_FLOWCTRL_TX; - - if (ngep->param_link_rx_pause && - ngep->param_link_tx_pause) - fl = LINK_FLOWCTRL_BI; - bcopy(&fl, pr_val, sizeof (fl)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_flowctrl_t)); + if (ngep->param_link_rx_pause && + !ngep->param_link_tx_pause) + fl = LINK_FLOWCTRL_RX; + + if (!ngep->param_link_rx_pause && + !ngep->param_link_tx_pause) + fl = LINK_FLOWCTRL_NONE; + + if (!ngep->param_link_rx_pause && + ngep->param_link_tx_pause) + fl = LINK_FLOWCTRL_TX; + + if (ngep->param_link_rx_pause && + ngep->param_link_tx_pause) + fl = LINK_FLOWCTRL_BI; + bcopy(&fl, pr_val, sizeof (fl)); break; case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_1000fdx; - } + *(uint8_t *)pr_val = ngep->param_adv_1000fdx; break; case MAC_PROP_EN_1000FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_1000fdx; - } + *(uint8_t *)pr_val = ngep->param_en_1000fdx; break; case MAC_PROP_ADV_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 0; - } else { - *(uint8_t *)pr_val = ngep->param_adv_1000hdx; - } + *(uint8_t *)pr_val = ngep->param_adv_1000hdx; break; case MAC_PROP_EN_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 0; - } else { - *(uint8_t *)pr_val = ngep->param_en_1000hdx; - } + *(uint8_t *)pr_val = ngep->param_en_1000hdx; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_100fdx; - } + *(uint8_t *)pr_val = ngep->param_adv_100fdx; break; case MAC_PROP_EN_100FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_100fdx; - } + *(uint8_t *)pr_val = ngep->param_en_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_100hdx; - } + *(uint8_t *)pr_val = ngep->param_adv_100hdx; break; case MAC_PROP_EN_100HDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_100hdx; - } + *(uint8_t *)pr_val = ngep->param_en_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_10fdx; - } + *(uint8_t *)pr_val = ngep->param_adv_10fdx; break; case MAC_PROP_EN_10FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_10fdx; - } + *(uint8_t *)pr_val = ngep->param_en_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_10hdx; - } + *(uint8_t *)pr_val = ngep->param_adv_10hdx; break; case MAC_PROP_EN_10HDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_10hdx; - } + *(uint8_t *)pr_val = ngep->param_en_10hdx; break; case MAC_PROP_ADV_100T4_CAP: case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = 0; break; case MAC_PROP_PRIVATE: - err = nge_get_priv_prop(ngep, pr_name, pr_flags, + err = nge_get_priv_prop(ngep, pr_name, pr_valsize, pr_val); break; - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = ETHERMTU; - if (ngep->dev_spec_param.jumbo) - range.range_uint32[0].mpur_max = NGE_MAX_MTU; - bcopy(&range, pr_val, sizeof (range)); - break; - } default: err = ENOTSUP; } return (err); } +static void +nge_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + nge_t *ngep = barg; + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, LINK_FLOWCTRL_BI); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, ETHERMTU, + ngep->dev_spec_param.jumbo ? NGE_MAX_MTU : ETHERMTU); + break; + + case MAC_PROP_PRIVATE: { + char valstr[64]; + int value; + + bzero(valstr, sizeof (valstr)); + if (strcmp(pr_name, "_tx_bcopy_threshold") == 0) { + value = NGE_TX_COPY_SIZE; + } else if (strcmp(pr_name, "_rx_bcopy_threshold") == 0) { + value = NGE_RX_COPY_SIZE; + } else if (strcmp(pr_name, "_recv_max_packet") == 0) { + value = 128; + } else if (strcmp(pr_name, "_poll_quiet_time") == 0) { + value = NGE_POLL_QUIET_TIME; + } else if (strcmp(pr_name, "_poll_busy_time") == 0) { + value = NGE_POLL_BUSY_TIME; + } else if (strcmp(pr_name, "_rx_intr_hwater") == 0) { + value = 1; + } else if (strcmp(pr_name, "_rx_intr_lwater") == 0) { + value = 8; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + } + } + +} + /* ARGSUSED */ static int nge_set_priv_prop(nge_t *ngep, const char *pr_name, uint_t pr_valsize, @@ -2056,49 +2033,44 @@ reprogram: } static int -nge_get_priv_prop(nge_t *ngep, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val) +nge_get_priv_prop(nge_t *ngep, const char *pr_name, uint_t pr_valsize, + void *pr_val) { int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; if (strcmp(pr_name, "_tx_bcopy_threshold") == 0) { - value = (is_default ? NGE_TX_COPY_SIZE : - ngep->param_txbcopy_threshold); + value = ngep->param_txbcopy_threshold; err = 0; goto done; } if (strcmp(pr_name, "_rx_bcopy_threshold") == 0) { - value = (is_default ? NGE_RX_COPY_SIZE : - ngep->param_rxbcopy_threshold); + value = ngep->param_rxbcopy_threshold; err = 0; goto done; } if (strcmp(pr_name, "_recv_max_packet") == 0) { - value = (is_default ? 128 : ngep->param_recv_max_packet); + value = ngep->param_recv_max_packet; err = 0; goto done; } if (strcmp(pr_name, "_poll_quiet_time") == 0) { - value = (is_default ? NGE_POLL_QUIET_TIME : - ngep->param_poll_quiet_time); + value = ngep->param_poll_quiet_time; err = 0; goto done; } if (strcmp(pr_name, "_poll_busy_time") == 0) { - value = (is_default ? NGE_POLL_BUSY_TIME : - ngep->param_poll_busy_time); + value = ngep->param_poll_busy_time; err = 0; goto done; } if (strcmp(pr_name, "_rx_intr_hwater") == 0) { - value = (is_default ? 1 : ngep->param_rx_intr_hwater); + value = ngep->param_rx_intr_hwater; err = 0; goto done; } if (strcmp(pr_name, "_rx_intr_lwater") == 0) { - value = (is_default ? 8 : ngep->param_rx_intr_lwater); + value = ngep->param_rx_intr_lwater; err = 0; goto done; } @@ -2561,7 +2533,6 @@ nge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) macp->m_max_sdu = ngep->default_mtu; macp->m_margin = VTAG_SIZE; macp->m_priv_props = nge_priv_props; - macp->m_priv_prop_count = NGE_MAX_PRIV_PROPS; /* * Finally, we're ready to register ourselves with the mac * interface; if this succeeds, we're all ready to start() diff --git a/usr/src/uts/common/io/nge/nge_rx.c b/usr/src/uts/common/io/nge/nge_rx.c index 86484445d3..c362117fd2 100644 --- a/usr/src/uts/common/io/nge/nge_rx.c +++ b/usr/src/uts/common/io/nge/nge_rx.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -271,19 +271,18 @@ nge_rxsta_handle(nge_t *ngep, uint32_t stflag, uint32_t *pflags) case RXD_CK8G_TCP_SUM: case RXD_CK8G_UDP_SUM: - *pflags |= HCK_FULLCKSUM; - *pflags |= HCK_IPV4_HDRCKSUM; + *pflags |= HCK_IPV4_HDRCKSUM_OK; *pflags |= HCK_FULLCKSUM_OK; break; case RXD_CK8G_TCP_SUM_ERR: case RXD_CK8G_UDP_SUM_ERR: sw_stp->tcp_hwsum_err++; - *pflags |= HCK_IPV4_HDRCKSUM; + *pflags |= HCK_IPV4_HDRCKSUM_OK; break; case RXD_CK8G_IP_HSUM: - *pflags |= HCK_IPV4_HDRCKSUM; + *pflags |= HCK_IPV4_HDRCKSUM_OK; break; case RXD_CK8G_NO_HSUM: @@ -379,8 +378,7 @@ nge_recv_ring(nge_t *ngep) } if (mp != NULL) { if (!(flag_err & (RX_SUM_NO | RX_SUM_ERR))) { - (void) hcksum_assoc(mp, NULL, NULL, - 0, 0, 0, 0, sum_flags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, sum_flags); } *tail = mp; tail = &mp->b_next; diff --git a/usr/src/uts/common/io/nge/nge_tx.c b/usr/src/uts/common/io/nge/nge_tx.c index c16368bd5f..6ece5b5730 100644 --- a/usr/src/uts/common/io/nge/nge_tx.c +++ b/usr/src/uts/common/io/nge/nge_tx.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -362,8 +362,7 @@ nge_send_copy(nge_t *ngep, mblk_t *mp, send_ring_t *srp) sw_tx_sbd_t *ssbdp; boolean_t tfint; - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, - NULL, NULL, &flags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &flags); bds = 0x1; if ((uint32_t)-1 == (start_index = nge_tx_alloc(ngep, bds))) @@ -476,7 +475,7 @@ nge_send_mapped(nge_t *ngep, mblk_t *mp, size_t fragno) slot = 0; dmah = dmah_list.head; - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &flags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &flags); for (bp = mp; bp != NULL; bp = bp->b_cont) { diff --git a/usr/src/uts/common/io/ntxn/unm_nic_main.c b/usr/src/uts/common/io/ntxn/unm_nic_main.c index 4165589454..be99c52ff3 100644 --- a/usr/src/uts/common/io/ntxn/unm_nic_main.c +++ b/usr/src/uts/common/io/ntxn/unm_nic_main.c @@ -23,7 +23,7 @@ * Use is subject to license terms. */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <sys/types.h> @@ -649,8 +649,7 @@ unm_tx_csum(cmdDescType0_t *desc, mblk_t *mp, pktinfo_t *pktinfo) if (pktinfo->etype == htons(ETHERTYPE_IP)) { uint32_t start, flags; - hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, - &flags); + mac_hcksum_get(mp, &start, NULL, NULL, NULL, &flags); if ((flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) == 0) return; @@ -1306,11 +1305,11 @@ unm_process_rcv(unm_adapter *adapter, statusDesc_t *desc) if (desc->u1.s1.status == STATUS_CKSUM_OK) { adapter->stats.csummed++; cksum_flags = - HCK_FULLCKSUM_OK | HCK_IPV4_HDRCKSUM | HCK_FULLCKSUM; + HCK_FULLCKSUM_OK | HCK_IPV4_HDRCKSUM_OK; } else { cksum_flags = 0; } - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, cksum_flags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, cksum_flags); adapter->stats.no_rcv++; adapter->stats.rxbytes += pkt_length; @@ -2533,9 +2532,7 @@ static mac_callbacks_t ntxn_m_callbacks = { ntxn_m_multicst, ntxn_m_unicst, ntxn_m_tx, -#ifndef SOLARIS11 - NULL, /* mc_resources */ -#endif + NULL, /* mc_reserved */ ntxn_m_ioctl, ntxn_m_getcapab, NULL, /* mc_open */ diff --git a/usr/src/uts/common/io/nxge/nxge_fflp.c b/usr/src/uts/common/io/nxge/nxge_fflp.c index 39e107486e..ac1528275a 100644 --- a/usr/src/uts/common/io/nxge/nxge_fflp.c +++ b/usr/src/uts/common/io/nxge/nxge_fflp.c @@ -18,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -448,6 +449,7 @@ nxge_main_mac_assign_rdc_table(p_nxge_t nxgep) npi_status_t rs = NPI_SUCCESS; hostinfo_t mac_rdc; npi_handle_t handle; + int i; handle = nxgep->npi_reg_handle; mac_rdc.value = 0; @@ -456,6 +458,12 @@ nxge_main_mac_assign_rdc_table(p_nxge_t nxgep) switch (nxgep->function_num) { case 0: case 1: + /* + * Tests indicate that it is OK not to re-initialize the + * hostinfo registers for the XMAC's alternate MAC + * addresses. But that is necessary for BMAC (case 2 + * and case 3 below) + */ rs = npi_mac_hostinfo_entry(handle, OP_SET, nxgep->function_num, XMAC_UNIQUE_HOST_INFO_ENTRY, &mac_rdc); break; @@ -463,6 +471,9 @@ nxge_main_mac_assign_rdc_table(p_nxge_t nxgep) case 3: rs = npi_mac_hostinfo_entry(handle, OP_SET, nxgep->function_num, BMAC_UNIQUE_HOST_INFO_ENTRY, &mac_rdc); + for (i = 1; i <= BMAC_MAX_ALT_ADDR_ENTRY; i++) + rs |= npi_mac_hostinfo_entry(handle, OP_SET, + nxgep->function_num, i, &mac_rdc); break; default: NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL, @@ -488,7 +499,6 @@ nxge_alt_mcast_mac_assign_rdc_table(p_nxge_t nxgep) npi_status_t rs = NPI_SUCCESS; hostinfo_t mac_rdc; npi_handle_t handle; - int i; handle = nxgep->npi_reg_handle; mac_rdc.value = 0; @@ -497,25 +507,13 @@ nxge_alt_mcast_mac_assign_rdc_table(p_nxge_t nxgep) switch (nxgep->function_num) { case 0: case 1: - /* - * Tests indicate that it is OK not to re-initialize the - * hostinfo registers for the XMAC's alternate MAC - * addresses. But that is necessary for BMAC (case 2 - * and case 3 below) - */ rs = npi_mac_hostinfo_entry(handle, OP_SET, - nxgep->function_num, - XMAC_MULTI_HOST_INFO_ENTRY, &mac_rdc); + nxgep->function_num, XMAC_MULTI_HOST_INFO_ENTRY, &mac_rdc); break; case 2: case 3: - for (i = 1; i <= BMAC_MAX_ALT_ADDR_ENTRY; i++) - rs |= npi_mac_hostinfo_entry(handle, OP_SET, - nxgep->function_num, i, &mac_rdc); - - rs |= npi_mac_hostinfo_entry(handle, OP_SET, - nxgep->function_num, - BMAC_MULTI_HOST_INFO_ENTRY, &mac_rdc); + rs = npi_mac_hostinfo_entry(handle, OP_SET, + nxgep->function_num, BMAC_MULTI_HOST_INFO_ENTRY, &mac_rdc); break; default: NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL, diff --git a/usr/src/uts/common/io/nxge/nxge_hio.c b/usr/src/uts/common/io/nxge/nxge_hio.c index 1130955670..2eaadd7b7c 100644 --- a/usr/src/uts/common/io/nxge/nxge_hio.c +++ b/usr/src/uts/common/io/nxge/nxge_hio.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -50,6 +50,7 @@ extern npi_status_t npi_rxdma_dump_rdc_table(npi_handle_t, uint8_t); extern int nxge_m_mmac_remove(void *arg, int slot); extern int nxge_m_mmac_add_g(void *arg, const uint8_t *maddr, int rdctbl, boolean_t usetbl); +extern int nxge_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); /* The following function may be found in nxge_[t|r]xdma.c */ extern npi_status_t nxge_txdma_channel_disable(nxge_t *, int); @@ -428,6 +429,7 @@ nxge_grp_dc_add( nxge_hio_dc_t *dc; nxge_grp_set_t *set; nxge_status_t status = NXGE_OK; + int error = 0; NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_grp_dc_add")); @@ -501,8 +503,13 @@ nxge_grp_dc_add( dc->group = group; - if (isLDOMguest(nxge)) - (void) nxge_hio_ldsv_add(nxge, dc); + if (isLDOMguest(nxge)) { + error = nxge_hio_ldsv_add(nxge, dc); + if (error != 0) { + MUTEX_EXIT(&nhd->lock); + return (NXGE_ERROR); + } + } NXGE_DC_SET(set->owned.map, channel); set->owned.count++; @@ -1778,6 +1785,10 @@ nxge_hio_share_bind(mac_share_handle_t shandle, uint64_t cookie, uint64_t rmap, tmap, hv_rmap, hv_tmap; int rv; + ASSERT(shp != NULL); + ASSERT(shp->nxgep != NULL); + ASSERT(shp->vrp != NULL); + nxge = shp->nxgep; vr = (nxge_hio_vr_t *)shp->vrp; @@ -1956,16 +1967,17 @@ nxge_hio_unshare( int nxge_hio_addres(nxge_hio_vr_t *vr, mac_ring_type_t type, uint64_t *map) { - nxge_t *nxge = (nxge_t *)vr->nxge; + nxge_t *nxge; nxge_grp_t *group; int groupid; int i, rv = 0; int max_dcs; - NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_addres")); + ASSERT(vr != NULL); + ASSERT(vr->nxge != NULL); + nxge = (nxge_t *)vr->nxge; - if (!nxge) - return (EINVAL); + NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_addres")); /* * For each ring associated with the group, add the resources @@ -1984,6 +1996,8 @@ nxge_hio_addres(nxge_hio_vr_t *vr, mac_ring_type_t type, uint64_t *map) group = nxge->rx_set.group[groupid]; } + ASSERT(group != NULL); + if (group->map == 0) { NXGE_DEBUG_MSG((nxge, HIO_CTL, "There is no rings associated " "with this VR")); @@ -2424,6 +2438,7 @@ nxge_hio_rdc_unshare( nxge_grp_set_t *set = &nxge->rx_set; nxge_grp_t *group; int grpid; + int i; NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_rdc_unshare")); @@ -2484,6 +2499,14 @@ nxge_hio_rdc_unshare( } NXGE_DEBUG_MSG((nxge, HIO_CTL, "<== nxge_hio_rdc_unshare")); + + for (i = 0; i < NXGE_MAX_RDCS; i++) { + if (nxge->rx_ring_handles[i].channel == channel) { + nxge_rx_ring_start( + (mac_ring_driver_t)&nxge->rx_ring_handles[i], + nxge->rx_ring_handles[i].ring_gen_num); + } + } } /* diff --git a/usr/src/uts/common/io/nxge/nxge_hio_guest.c b/usr/src/uts/common/io/nxge/nxge_hio_guest.c index 3c552f2058..176c6a4e09 100644 --- a/usr/src/uts/common/io/nxge/nxge_hio_guest.c +++ b/usr/src/uts/common/io/nxge/nxge_hio_guest.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -169,7 +169,6 @@ static void nxge_check_guest_state(nxge_hio_vr_t *); * Context: * Guest domain */ -/* ARGSUSED */ int nxge_hio_vr_add(nxge_t *nxge) { @@ -411,6 +410,20 @@ nxge_guest_dc_alloc( return (0); } +int +nxge_hio_get_dc_htable_idx(nxge_t *nxge, vpc_type_t type, uint32_t channel) +{ + nxge_hio_dc_t *dc; + + ASSERT(isLDOMguest(nxge)); + + dc = nxge_grp_dc_find(nxge, type, channel); + if (dc == NULL) + return (-1); + + return (dc->ldg.vector); +} + /* * res_map_parse * diff --git a/usr/src/uts/common/io/nxge/nxge_intr.c b/usr/src/uts/common/io/nxge/nxge_intr.c index 0e6f85a0b6..2e73677ca5 100644 --- a/usr/src/uts/common/io/nxge/nxge_intr.c +++ b/usr/src/uts/common/io/nxge/nxge_intr.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -903,26 +903,23 @@ nxge_hio_rdsv_add( * Context: * Guest domain */ -hv_rv_t -nxge_hio_ldsv_add( - nxge_t *nxge, - nxge_hio_dc_t *dc) +int +nxge_hio_ldsv_add(nxge_t *nxge, nxge_hio_dc_t *dc) { nxge_ldgv_t *control; nxge_ldg_t *group; nxge_ldv_t *device; - hv_rv_t hv_rv; if (dc->type == VP_BOUND_TX) { NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_ldsv_add(TDC %d)", dc->channel)); - if ((hv_rv = nxge_hio_tdsv_add(nxge, dc)) != 0) - return (hv_rv); + if (nxge_hio_tdsv_add(nxge, dc) != 0) + return (EIO); } else { NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_ldsv_add(RDC %d)", dc->channel)); - if ((hv_rv = nxge_hio_rdsv_add(nxge, dc)) != 0) - return (hv_rv); + if (nxge_hio_rdsv_add(nxge, dc) != 0) + return (EIO); } dc->ldg.map |= (1 << dc->ldg.ldsv); diff --git a/usr/src/uts/common/io/nxge/nxge_kstats.c b/usr/src/uts/common/io/nxge/nxge_kstats.c index c9fa73c35f..34cfafc58d 100644 --- a/usr/src/uts/common/io/nxge/nxge_kstats.c +++ b/usr/src/uts/common/io/nxge/nxge_kstats.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/nxge/nxge_impl.h> #include <sys/nxge/nxge_hio.h> @@ -2192,6 +2190,86 @@ nxge_m_tx_stat( return (val); } +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +nxge_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + p_nxge_ring_handle_t rhp = (p_nxge_ring_handle_t)rdriver; + p_nxge_t nxgep = rhp->nxgep; + int r_index; + p_nxge_stats_t statsp; + + ASSERT(nxgep != NULL); + statsp = (p_nxge_stats_t)nxgep->statsp; + ASSERT(statsp != NULL); + r_index = rhp->index + nxgep->pt_config.hw_config.start_rdc; + + if (statsp->rdc_ksp[r_index] == NULL) + return (0); + + switch (stat) { + case MAC_STAT_IERRORS: + *val = statsp->rdc_stats[r_index].ierrors; + break; + + case MAC_STAT_RBYTES: + *val = statsp->rdc_stats[r_index].ibytes; + break; + + case MAC_STAT_IPACKETS: + *val = statsp->rdc_stats[r_index].ipackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +nxge_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + p_nxge_ring_handle_t rhp = (p_nxge_ring_handle_t)rdriver; + p_nxge_t nxgep = rhp->nxgep; + int r_index; + p_nxge_stats_t statsp; + + ASSERT(nxgep != NULL); + statsp = (p_nxge_stats_t)nxgep->statsp; + ASSERT(statsp != NULL); + r_index = nxgep->pt_config.hw_config.tdc.start + rhp->index; + + if (statsp->tdc_ksp[r_index] == NULL) + return (0); + + switch (stat) { + case MAC_STAT_OERRORS: + *val = statsp->tdc_stats[r_index].oerrors; + break; + + case MAC_STAT_OBYTES: + *val = statsp->tdc_stats[r_index].obytes; + break; + + case MAC_STAT_OPACKETS: + *val = statsp->tdc_stats[r_index].opackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + /* ARGSUSED */ int nxge_m_stat(void *arg, uint_t stat, uint64_t *value) diff --git a/usr/src/uts/common/io/nxge/nxge_mac.c b/usr/src/uts/common/io/nxge/nxge_mac.c index dd8387652a..38aa5cc722 100644 --- a/usr/src/uts/common/io/nxge/nxge_mac.c +++ b/usr/src/uts/common/io/nxge/nxge_mac.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -3340,16 +3340,46 @@ fail: return (NXGE_ERROR | rs); } +static npi_status_t +nxge_rx_mac_mcast_hash_table(p_nxge_t nxgep) +{ + uint32_t i; + uint16_t hashtab_e; + p_hash_filter_t hash_filter; + uint8_t portn; + npi_handle_t handle; + npi_status_t rs = NPI_SUCCESS; + + portn = NXGE_GET_PORT_NUM(nxgep->function_num); + handle = nxgep->npi_handle; + + /* + * Load the multicast hash filter bits. + */ + hash_filter = nxgep->hash_filter; + for (i = 0; i < MAC_MAX_HASH_ENTRY; i++) { + if (hash_filter != NULL) { + hashtab_e = (uint16_t)hash_filter->hash_filter_regs[ + (NMCFILTER_REGS - 1) - i]; + } else { + hashtab_e = 0; + } + + if ((rs = npi_mac_hashtab_entry(handle, OP_SET, portn, i, + (uint16_t *)&hashtab_e)) != NPI_SUCCESS) + return (rs); + } -/* Initialize the RxMAC sub-block */ + return (NPI_SUCCESS); +} +/* + * Initialize the RxMAC sub-block + */ nxge_status_t nxge_rx_mac_init(p_nxge_t nxgep) { npi_attr_t ap; - uint32_t i; - uint16_t hashtab_e; - p_hash_filter_t hash_filter; nxge_port_t portt; uint8_t portn; npi_handle_t handle; @@ -3370,9 +3400,8 @@ nxge_rx_mac_init(p_nxge_t nxgep) addr0 = ntohs(addr16p[2]); addr1 = ntohs(addr16p[1]); addr2 = ntohs(addr16p[0]); - SET_MAC_ATTR3(handle, ap, portn, MAC_PORT_ADDR, addr0, addr1, addr2, - rs); - + SET_MAC_ATTR3(handle, ap, portn, MAC_PORT_ADDR, + addr0, addr1, addr2, rs); if (rs != NPI_SUCCESS) goto fail; SET_MAC_ATTR3(handle, ap, portn, MAC_PORT_ADDR_FILTER, 0, 0, 0, rs); @@ -3382,22 +3411,9 @@ nxge_rx_mac_init(p_nxge_t nxgep) if (rs != NPI_SUCCESS) goto fail; - /* - * Load the multicast hash filter bits. - */ - hash_filter = nxgep->hash_filter; - for (i = 0; i < MAC_MAX_HASH_ENTRY; i++) { - if (hash_filter != NULL) { - hashtab_e = (uint16_t)hash_filter->hash_filter_regs[ - (NMCFILTER_REGS - 1) - i]; - } else { - hashtab_e = 0; - } - - if ((rs = npi_mac_hashtab_entry(handle, OP_SET, portn, i, - (uint16_t *)&hashtab_e)) != NPI_SUCCESS) - goto fail; - } + rs = nxge_rx_mac_mcast_hash_table(nxgep); + if (rs != NPI_SUCCESS) + goto fail; if (portt == PORT_TYPE_XMAC) { if ((rs = npi_xmac_rx_iconfig(handle, INIT, portn, @@ -3413,48 +3429,51 @@ nxge_rx_mac_init(p_nxge_t nxgep) if (nxgep->filter.all_phys_cnt != 0) xconfig |= CFG_XMAC_RX_PROMISCUOUS; - if (nxgep->filter.all_multicast_cnt != 0) xconfig |= CFG_XMAC_RX_PROMISCUOUSGROUP; xconfig |= CFG_XMAC_RX_HASH_FILTER; - if ((rs = npi_xmac_rx_config(handle, INIT, portn, - xconfig)) != NPI_SUCCESS) + if ((rs = npi_xmac_rx_config(handle, INIT, + portn, xconfig)) != NPI_SUCCESS) goto fail; nxgep->mac.rx_config = xconfig; - /* Comparison of mac unique address is always enabled on XMAC */ - + /* + * Comparison of mac unique address is always + * enabled on XMAC + */ if ((rs = npi_xmac_zap_rx_counters(handle, portn)) != NPI_SUCCESS) goto fail; } else { - (void) nxge_fflp_init_hostinfo(nxgep); - if (npi_bmac_rx_iconfig(nxgep->npi_handle, INIT, portn, 0) != NPI_SUCCESS) goto fail; + nxgep->mac.rx_iconfig = NXGE_BMAC_RX_INTRS; + (void) nxge_fflp_init_hostinfo(nxgep); + bconfig = CFG_BMAC_RX_DISCARD_ON_ERR | CFG_BMAC_RX & ~CFG_BMAC_RX_STRIP_CRC; if (nxgep->filter.all_phys_cnt != 0) bconfig |= CFG_BMAC_RX_PROMISCUOUS; - if (nxgep->filter.all_multicast_cnt != 0) bconfig |= CFG_BMAC_RX_PROMISCUOUSGROUP; bconfig |= CFG_BMAC_RX_HASH_FILTER; - if ((rs = npi_bmac_rx_config(handle, INIT, portn, - bconfig)) != NPI_SUCCESS) + if ((rs = npi_bmac_rx_config(handle, INIT, + portn, bconfig)) != NPI_SUCCESS) goto fail; nxgep->mac.rx_config = bconfig; - /* Always enable comparison of mac unique address */ - if ((rs = npi_mac_altaddr_enable(handle, portn, 0)) - != NPI_SUCCESS) + /* + * Always enable comparison of mac unique address + */ + if ((rs = npi_mac_altaddr_enable(handle, + portn, 0)) != NPI_SUCCESS) goto fail; } @@ -4919,9 +4938,9 @@ nxge_add_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) uint32_t mchash; p_hash_filter_t hash_filter; uint16_t hash_bit; - boolean_t rx_init = B_FALSE; uint_t j; nxge_status_t status = NXGE_OK; + npi_status_t rs; NXGE_DEBUG_MSG((nxgep, MAC_CTL, "==> nxge_add_mcast_addr")); @@ -4933,6 +4952,7 @@ nxge_add_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) nxgep->hash_filter = KMEM_ZALLOC(sizeof (hash_filter_t), KM_SLEEP); } + hash_filter = nxgep->hash_filter; j = mchash / HASH_REG_WIDTH; hash_bit = (1 << (mchash % HASH_REG_WIDTH)); @@ -4940,19 +4960,14 @@ nxge_add_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) hash_filter->hash_bit_ref_cnt[mchash]++; if (hash_filter->hash_bit_ref_cnt[mchash] == 1) { hash_filter->hash_ref_cnt++; - rx_init = B_TRUE; - } - if (rx_init) { - if ((status = nxge_rx_mac_disable(nxgep)) != NXGE_OK) - goto fail; - if ((status = nxge_rx_mac_enable(nxgep)) != NXGE_OK) - goto fail; } - RW_EXIT(&nxgep->filter_lock); + rs = nxge_rx_mac_mcast_hash_table(nxgep); + if (rs != NPI_SUCCESS) + goto fail; + RW_EXIT(&nxgep->filter_lock); NXGE_DEBUG_MSG((nxgep, MAC_CTL, "<== nxge_add_mcast_addr")); - return (NXGE_OK); fail: RW_EXIT(&nxgep->filter_lock); @@ -4969,9 +4984,9 @@ nxge_del_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) uint32_t mchash; p_hash_filter_t hash_filter; uint16_t hash_bit; - boolean_t rx_init = B_FALSE; uint_t j; nxge_status_t status = NXGE_OK; + npi_status_t rs; NXGE_DEBUG_MSG((nxgep, MAC_CTL, "==> nxge_del_mcast_addr")); RW_ENTER_WRITER(&nxgep->filter_lock); @@ -4990,8 +5005,8 @@ nxge_del_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) hash_bit = (1 << (mchash % HASH_REG_WIDTH)); hash_filter->hash_filter_regs[j] &= ~hash_bit; hash_filter->hash_ref_cnt--; - rx_init = B_TRUE; } + if (hash_filter->hash_ref_cnt == 0) { NXGE_DEBUG_MSG((NULL, STR_CTL, "De-allocating hash filter storage.")); @@ -4999,12 +5014,10 @@ nxge_del_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) nxgep->hash_filter = NULL; } - if (rx_init) { - if ((status = nxge_rx_mac_disable(nxgep)) != NXGE_OK) - goto fail; - if ((status = nxge_rx_mac_enable(nxgep)) != NXGE_OK) - goto fail; - } + rs = nxge_rx_mac_mcast_hash_table(nxgep); + if (rs != NPI_SUCCESS) + goto fail; + RW_EXIT(&nxgep->filter_lock); NXGE_DEBUG_MSG((nxgep, MAC_CTL, "<== nxge_del_mcast_addr")); diff --git a/usr/src/uts/common/io/nxge/nxge_main.c b/usr/src/uts/common/io/nxge/nxge_main.c index c8df562520..885f521ed3 100644 --- a/usr/src/uts/common/io/nxge/nxge_main.c +++ b/usr/src/uts/common/io/nxge/nxge_main.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -296,12 +296,13 @@ static boolean_t nxge_m_getcapab(void *, mac_capab_t, void *); static int nxge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int nxge_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void nxge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); +static void nxge_priv_propinfo(const char *, mac_prop_info_handle_t); static int nxge_set_priv_prop(nxge_t *, const char *, uint_t, const void *); -static int nxge_get_priv_prop(nxge_t *, const char *, uint_t, uint_t, - void *, uint_t *); -static int nxge_get_def_val(nxge_t *, mac_prop_id_t, uint_t, void *); +static int nxge_get_priv_prop(nxge_t *, const char *, uint_t, void *); static void nxge_fill_ring(void *, mac_ring_type_t, const int, const int, mac_ring_info_t *, mac_ring_handle_t); static void nxge_group_add_ring(mac_group_driver_t, mac_ring_driver_t, @@ -312,34 +313,32 @@ static void nxge_group_rem_ring(mac_group_driver_t, mac_ring_driver_t, static void nxge_niu_peu_reset(p_nxge_t nxgep); static void nxge_set_pci_replay_timeout(nxge_t *); -mac_priv_prop_t nxge_priv_props[] = { - {"_adv_10gfdx_cap", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_RW}, - {"_function_number", MAC_PROP_PERM_READ}, - {"_fw_version", MAC_PROP_PERM_READ}, - {"_port_mode", MAC_PROP_PERM_READ}, - {"_hot_swap_phy", MAC_PROP_PERM_READ}, - {"_rxdma_intr_time", MAC_PROP_PERM_RW}, - {"_rxdma_intr_pkts", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_tcp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_udp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_ah", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_sctp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_tcp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_udp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_ah", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_sctp", MAC_PROP_PERM_RW}, - {"_soft_lso_enable", MAC_PROP_PERM_RW} +char *nxge_priv_props[] = { + "_adv_10gfdx_cap", + "_adv_pause_cap", + "_function_number", + "_fw_version", + "_port_mode", + "_hot_swap_phy", + "_rxdma_intr_time", + "_rxdma_intr_pkts", + "_class_opt_ipv4_tcp", + "_class_opt_ipv4_udp", + "_class_opt_ipv4_ah", + "_class_opt_ipv4_sctp", + "_class_opt_ipv6_tcp", + "_class_opt_ipv6_udp", + "_class_opt_ipv6_ah", + "_class_opt_ipv6_sctp", + "_soft_lso_enable", + NULL }; -#define NXGE_MAX_PRIV_PROPS \ - (sizeof (nxge_priv_props)/sizeof (mac_priv_prop_t)) - #define NXGE_NEPTUNE_MAGIC 0x4E584745UL #define MAX_DUMP_SZ 256 #define NXGE_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) mac_callbacks_t nxge_m_callbacks = { NXGE_M_CALLBACK_FLAGS, @@ -350,12 +349,14 @@ mac_callbacks_t nxge_m_callbacks = { nxge_m_multicst, NULL, NULL, + NULL, nxge_m_ioctl, nxge_m_getcapab, NULL, NULL, nxge_m_setprop, - nxge_m_getprop + nxge_m_getprop, + nxge_m_propinfo }; void @@ -4547,16 +4548,12 @@ nxge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, const void *pr_val) { nxge_t *nxgep = barg; - p_nxge_param_t param_arr; - p_nxge_stats_t statsp; + p_nxge_param_t param_arr = nxgep->param_arr; + p_nxge_stats_t statsp = nxgep->statsp; int err = 0; - uint8_t val; - uint32_t cur_mtu, new_mtu, old_framesize; - link_flowctrl_t fl; NXGE_DEBUG_MSG((nxgep, NXGE_CTL, "==> nxge_m_setprop")); - param_arr = nxgep->param_arr; - statsp = nxgep->statsp; + mutex_enter(nxgep->genlock); if (statsp->port_stats.lb_mode != nxge_lb_normal && nxge_param_locked(pr_num)) { @@ -4570,139 +4567,115 @@ nxge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, return (EBUSY); } - val = *(uint8_t *)pr_val; switch (pr_num) { - case MAC_PROP_EN_1000FDX_CAP: - nxgep->param_en_1000fdx = val; - param_arr[param_anar_1000fdx].value = val; - - goto reprogram; + case MAC_PROP_EN_1000FDX_CAP: + nxgep->param_en_1000fdx = + param_arr[param_anar_1000fdx].value = *(uint8_t *)pr_val; + goto reprogram; - case MAC_PROP_EN_100FDX_CAP: - nxgep->param_en_100fdx = val; - param_arr[param_anar_100fdx].value = val; + case MAC_PROP_EN_100FDX_CAP: + nxgep->param_en_100fdx = + param_arr[param_anar_100fdx].value = *(uint8_t *)pr_val; + goto reprogram; - goto reprogram; + case MAC_PROP_EN_10FDX_CAP: + nxgep->param_en_10fdx = + param_arr[param_anar_10fdx].value = *(uint8_t *)pr_val; + goto reprogram; - case MAC_PROP_EN_10FDX_CAP: - nxgep->param_en_10fdx = val; - param_arr[param_anar_10fdx].value = val; + case MAC_PROP_AUTONEG: + param_arr[param_autoneg].value = *(uint8_t *)pr_val; + goto reprogram; - goto reprogram; + case MAC_PROP_MTU: { + uint32_t cur_mtu, new_mtu, old_framesize; - case MAC_PROP_EN_1000HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_ADV_10FDX_CAP: - case MAC_PROP_ADV_10HDX_CAP: - case MAC_PROP_STATUS: - case MAC_PROP_SPEED: - case MAC_PROP_DUPLEX: - err = EINVAL; /* cannot set read-only properties */ - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_setprop: read only property %d", - pr_num)); - break; + cur_mtu = nxgep->mac.default_mtu; + ASSERT(pr_valsize >= sizeof (new_mtu)); + bcopy(pr_val, &new_mtu, sizeof (new_mtu)); - case MAC_PROP_AUTONEG: - param_arr[param_autoneg].value = val; + NXGE_DEBUG_MSG((nxgep, NXGE_CTL, + "==> nxge_m_setprop: set MTU: %d is_jumbo %d", + new_mtu, nxgep->mac.is_jumbo)); - goto reprogram; + if (new_mtu == cur_mtu) { + err = 0; + break; + } - case MAC_PROP_MTU: - cur_mtu = nxgep->mac.default_mtu; - bcopy(pr_val, &new_mtu, sizeof (new_mtu)); - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_setprop: set MTU: %d is_jumbo %d", - new_mtu, nxgep->mac.is_jumbo)); + if (nxgep->nxge_mac_state == NXGE_MAC_STARTED) { + err = EBUSY; + break; + } - if (new_mtu == cur_mtu) { - err = 0; - break; - } + if ((new_mtu < NXGE_DEFAULT_MTU) || + (new_mtu > NXGE_MAXIMUM_MTU)) { + err = EINVAL; + break; + } - if (nxgep->nxge_mac_state == NXGE_MAC_STARTED) { - err = EBUSY; - break; - } + old_framesize = (uint32_t)nxgep->mac.maxframesize; + nxgep->mac.maxframesize = (uint16_t) + (new_mtu + NXGE_EHEADER_VLAN_CRC); + if (nxge_mac_set_framesize(nxgep)) { + nxgep->mac.maxframesize = + (uint16_t)old_framesize; + err = EINVAL; + break; + } - if ((new_mtu < NXGE_DEFAULT_MTU) || - (new_mtu > NXGE_MAXIMUM_MTU)) { - err = EINVAL; - break; - } + nxgep->mac.default_mtu = new_mtu; + nxgep->mac.is_jumbo = (new_mtu > NXGE_DEFAULT_MTU); - old_framesize = (uint32_t)nxgep->mac.maxframesize; - nxgep->mac.maxframesize = (uint16_t) - (new_mtu + NXGE_EHEADER_VLAN_CRC); - if (nxge_mac_set_framesize(nxgep)) { - nxgep->mac.maxframesize = - (uint16_t)old_framesize; - err = EINVAL; - break; - } + NXGE_DEBUG_MSG((nxgep, NXGE_CTL, + "==> nxge_m_setprop: set MTU: %d maxframe %d", + new_mtu, nxgep->mac.maxframesize)); + break; + } - err = mac_maxsdu_update(nxgep->mach, new_mtu); - if (err) { - nxgep->mac.maxframesize = - (uint16_t)old_framesize; - err = EINVAL; - break; - } + case MAC_PROP_FLOWCTRL: { + link_flowctrl_t fl; - nxgep->mac.default_mtu = new_mtu; - if (new_mtu > NXGE_DEFAULT_MTU) - nxgep->mac.is_jumbo = B_TRUE; - else - nxgep->mac.is_jumbo = B_FALSE; + ASSERT(pr_valsize >= sizeof (fl)); + bcopy(pr_val, &fl, sizeof (fl)); - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_setprop: set MTU: %d maxframe %d", - new_mtu, nxgep->mac.maxframesize)); + switch (fl) { + case LINK_FLOWCTRL_NONE: + param_arr[param_anar_pause].value = 0; break; - case MAC_PROP_FLOWCTRL: - bcopy(pr_val, &fl, sizeof (fl)); - switch (fl) { - default: - err = EINVAL; - break; - - case LINK_FLOWCTRL_NONE: - param_arr[param_anar_pause].value = 0; - break; - - case LINK_FLOWCTRL_RX: - param_arr[param_anar_pause].value = 1; - break; + case LINK_FLOWCTRL_RX: + param_arr[param_anar_pause].value = 1; + break; - case LINK_FLOWCTRL_TX: - case LINK_FLOWCTRL_BI: + case LINK_FLOWCTRL_TX: + case LINK_FLOWCTRL_BI: + err = EINVAL; + break; + default: + err = EINVAL; + break; + } +reprogram: + if ((err == 0) && !isLDOMguest(nxgep)) { + if (!nxge_param_link_update(nxgep)) { err = EINVAL; - break; } + } else { + err = EINVAL; + } + break; + } -reprogram: - if (err == 0) { - if (!nxge_param_link_update(nxgep)) { - err = EINVAL; - } - } - break; - case MAC_PROP_PRIVATE: - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_setprop: private property")); - err = nxge_set_priv_prop(nxgep, pr_name, pr_valsize, - pr_val); - break; + case MAC_PROP_PRIVATE: + NXGE_DEBUG_MSG((nxgep, NXGE_CTL, + "==> nxge_m_setprop: private property")); + err = nxge_set_priv_prop(nxgep, pr_name, pr_valsize, pr_val); + break; - default: - err = ENOTSUP; - break; + default: + err = ENOTSUP; + break; } mutex_exit(nxgep->genlock); @@ -4714,142 +4687,198 @@ reprogram: static int nxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { nxge_t *nxgep = barg; p_nxge_param_t param_arr = nxgep->param_arr; p_nxge_stats_t statsp = nxgep->statsp; - int err = 0; - link_flowctrl_t fl; - uint64_t tmp = 0; - link_state_t ls; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, "==> nxge_m_getprop: pr_num %d", pr_num)); - if (pr_valsize == 0) - return (EINVAL); + switch (pr_num) { + case MAC_PROP_DUPLEX: + *(uint8_t *)pr_val = statsp->mac_stats.link_duplex; + break; - *perm = MAC_PROP_PERM_RW; + case MAC_PROP_SPEED: { + uint64_t val = statsp->mac_stats.link_speed * 1000000ull; - if ((is_default) && (pr_num != MAC_PROP_PRIVATE)) { - err = nxge_get_def_val(nxgep, pr_num, pr_valsize, pr_val); - return (err); + ASSERT(pr_valsize >= sizeof (val)); + bcopy(&val, pr_val, sizeof (val)); + break; } - bzero(pr_val, pr_valsize); - switch (pr_num) { - case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)pr_val = statsp->mac_stats.link_duplex; - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_getprop: duplex mode %d", - *(uint8_t *)pr_val)); - break; + case MAC_PROP_STATUS: { + link_state_t state = statsp->mac_stats.link_up ? + LINK_STATE_UP : LINK_STATE_DOWN; - case MAC_PROP_SPEED: - if (pr_valsize < sizeof (uint64_t)) - return (EINVAL); - *perm = MAC_PROP_PERM_READ; - tmp = statsp->mac_stats.link_speed * 1000000ull; - bcopy(&tmp, pr_val, sizeof (tmp)); - break; + ASSERT(pr_valsize >= sizeof (state)); + bcopy(&state, pr_val, sizeof (state)); + break; + } - case MAC_PROP_STATUS: - if (pr_valsize < sizeof (link_state_t)) - return (EINVAL); - *perm = MAC_PROP_PERM_READ; - if (!statsp->mac_stats.link_up) - ls = LINK_STATE_DOWN; - else - ls = LINK_STATE_UP; - bcopy(&ls, pr_val, sizeof (ls)); - break; + case MAC_PROP_AUTONEG: + *(uint8_t *)pr_val = param_arr[param_autoneg].value; + break; - case MAC_PROP_AUTONEG: - *(uint8_t *)pr_val = - param_arr[param_autoneg].value; - break; + case MAC_PROP_FLOWCTRL: { + link_flowctrl_t fl = param_arr[param_anar_pause].value != 0 ? + LINK_FLOWCTRL_RX : LINK_FLOWCTRL_NONE; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (fl)); + bcopy(&fl, pr_val, sizeof (fl)); + break; + } - fl = LINK_FLOWCTRL_NONE; - if (param_arr[param_anar_pause].value) { - fl = LINK_FLOWCTRL_RX; - } - bcopy(&fl, pr_val, sizeof (fl)); - break; + case MAC_PROP_ADV_1000FDX_CAP: + *(uint8_t *)pr_val = param_arr[param_anar_1000fdx].value; + break; - case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)pr_val = - param_arr[param_anar_1000fdx].value; - break; + case MAC_PROP_EN_1000FDX_CAP: + *(uint8_t *)pr_val = nxgep->param_en_1000fdx; + break; - case MAC_PROP_EN_1000FDX_CAP: - *(uint8_t *)pr_val = nxgep->param_en_1000fdx; - break; + case MAC_PROP_ADV_100FDX_CAP: + *(uint8_t *)pr_val = param_arr[param_anar_100fdx].value; + break; - case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)pr_val = - param_arr[param_anar_100fdx].value; - break; + case MAC_PROP_EN_100FDX_CAP: + *(uint8_t *)pr_val = nxgep->param_en_100fdx; + break; - case MAC_PROP_EN_100FDX_CAP: - *(uint8_t *)pr_val = nxgep->param_en_100fdx; - break; + case MAC_PROP_ADV_10FDX_CAP: + *(uint8_t *)pr_val = param_arr[param_anar_10fdx].value; + break; - case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)pr_val = - param_arr[param_anar_10fdx].value; - break; + case MAC_PROP_EN_10FDX_CAP: + *(uint8_t *)pr_val = nxgep->param_en_10fdx; + break; - case MAC_PROP_EN_10FDX_CAP: - *(uint8_t *)pr_val = nxgep->param_en_10fdx; - break; + case MAC_PROP_PRIVATE: + return (nxge_get_priv_prop(nxgep, pr_name, pr_valsize, + pr_val)); - case MAC_PROP_EN_1000HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_ADV_10HDX_CAP: - err = ENOTSUP; - break; + default: + return (ENOTSUP); + } - case MAC_PROP_PRIVATE: - err = nxge_get_priv_prop(nxgep, pr_name, pr_flags, - pr_valsize, pr_val, perm); - break; + return (0); +} - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = NXGE_DEFAULT_MTU; - range.range_uint32[0].mpur_max = NXGE_MAXIMUM_MTU; - bcopy(&range, pr_val, sizeof (range)); - break; - } - default: - err = EINVAL; - break; +static void +nxge_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + nxge_t *nxgep = barg; + p_nxge_stats_t statsp = nxgep->statsp; + + /* + * By default permissions are read/write unless specified + * otherwise by the driver. + */ + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + /* + * Note that read-only properties don't need to + * provide default values since they cannot be + * changed by the administrator. + */ + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, LINK_FLOWCTRL_RX); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + NXGE_DEFAULT_MTU, NXGE_MAXIMUM_MTU); + break; + + case MAC_PROP_PRIVATE: + nxge_priv_propinfo(pr_name, prh); + break; + } + + mutex_enter(nxgep->genlock); + if (statsp->port_stats.lb_mode != nxge_lb_normal && + nxge_param_locked(pr_num)) { + /* + * Some properties are locked (read-only) while the + * device is in any sort of loopback mode. + */ + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } + mutex_exit(nxgep->genlock); +} - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, "<== nxge_m_getprop")); +static void +nxge_priv_propinfo(const char *pr_name, mac_prop_info_handle_t prh) +{ + char valstr[64]; - return (err); + bzero(valstr, sizeof (valstr)); + + if (strcmp(pr_name, "_function_number") == 0 || + strcmp(pr_name, "_fw_version") == 0 || + strcmp(pr_name, "_port_mode") == 0 || + strcmp(pr_name, "_hot_swap_phy") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + + } else if (strcmp(pr_name, "_rxdma_intr_time") == 0) { + (void) snprintf(valstr, sizeof (valstr), + "%d", RXDMA_RCR_TO_DEFAULT); + + } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { + (void) snprintf(valstr, sizeof (valstr), + "%d", RXDMA_RCR_PTHRES_DEFAULT); + + } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0 || + strcmp(pr_name, "_class_opt_ipv4_udp") == 0 || + strcmp(pr_name, "_class_opt_ipv4_ah") == 0 || + strcmp(pr_name, "_class_opt_ipv4_sctp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_tcp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_udp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_ah") == 0 || + strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%x", + NXGE_CLASS_FLOW_GEN_SERVER); + + } else if (strcmp(pr_name, "_soft_lso_enable") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", 0); + + } else if (strcmp(pr_name, "_adv_10gfdx_cap") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", 1); + + } else if (strcmp(pr_name, "_adv_pause_cap") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", 1); + } + + if (strlen(valstr) > 0) + mac_prop_info_set_default_str(prh, valstr); } /* ARGSUSED */ @@ -5104,23 +5133,19 @@ nxge_set_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_valsize, } static int -nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val, uint_t *perm) +nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_valsize, + void *pr_val) { p_nxge_param_t param_arr = nxgep->param_arr; char valstr[MAXNAMELEN]; int err = EINVAL; uint_t strsize; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, "==> nxge_get_priv_prop: property %s", pr_name)); /* function number */ if (strcmp(pr_name, "_function_number") == 0) { - if (is_default) - return (ENOTSUP); - *perm = MAC_PROP_PERM_READ; (void) snprintf(valstr, sizeof (valstr), "%d", nxgep->function_num); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, @@ -5134,9 +5159,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Neptune firmware version */ if (strcmp(pr_name, "_fw_version") == 0) { - if (is_default) - return (ENOTSUP); - *perm = MAC_PROP_PERM_READ; (void) snprintf(valstr, sizeof (valstr), "%s", nxgep->vpd_info.ver); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, @@ -5150,9 +5172,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* port PHY mode */ if (strcmp(pr_name, "_port_mode") == 0) { - if (is_default) - return (ENOTSUP); - *perm = MAC_PROP_PERM_READ; switch (nxgep->mac.portmode) { case PORT_1G_COPPER: (void) snprintf(valstr, sizeof (valstr), "1G copper %s", @@ -5221,9 +5240,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Hot swappable PHY */ if (strcmp(pr_name, "_hot_swap_phy") == 0) { - if (is_default) - return (ENOTSUP); - *perm = MAC_PROP_PERM_READ; (void) snprintf(valstr, sizeof (valstr), "%s", nxgep->hot_swappable_phy ? "yes" : "no"); @@ -5241,12 +5257,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Receive Interrupt Blanking Parameters */ if (strcmp(pr_name, "_rxdma_intr_time") == 0) { err = 0; - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), - "%d", RXDMA_RCR_TO_DEFAULT); - goto done; - } - (void) snprintf(valstr, sizeof (valstr), "%d", nxgep->intr_timeout); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, @@ -5258,11 +5268,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { err = 0; - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), - "%d", RXDMA_RCR_PTHRES_DEFAULT); - goto done; - } (void) snprintf(valstr, sizeof (valstr), "%d", nxgep->intr_threshold); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, @@ -5274,12 +5279,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Classification and Load Distribution Configuration */ if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv4_tcp]); @@ -5292,12 +5291,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv4_udp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv4_udp]); @@ -5309,12 +5302,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, goto done; } if (strcmp(pr_name, "_class_opt_ipv4_ah") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv4_ah]); @@ -5327,12 +5314,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv4_sctp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv4_sctp]); @@ -5345,12 +5326,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv6_tcp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv6_tcp]); @@ -5363,12 +5338,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv6_udp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv6_udp]); @@ -5381,12 +5350,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv6_ah") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv6_ah]); @@ -5399,12 +5362,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv6_sctp]); @@ -5418,11 +5375,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Software LSO */ if (strcmp(pr_name, "_soft_lso_enable") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%d", 0); - err = 0; - goto done; - } (void) snprintf(valstr, sizeof (valstr), "%d", nxgep->soft_lso_enable); err = 0; @@ -5434,8 +5386,7 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_adv_10gfdx_cap") == 0) { err = 0; - if (is_default || - nxgep->param_arr[param_anar_10gfdx].value != 0) { + if (nxgep->param_arr[param_anar_10gfdx].value != 0) { (void) snprintf(valstr, sizeof (valstr), "%d", 1); goto done; } else { @@ -5445,8 +5396,7 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_adv_pause_cap") == 0) { err = 0; - if (is_default || - nxgep->param_arr[param_anar_pause].value != 0) { + if (nxgep->param_arr[param_anar_pause].value != 0) { (void) snprintf(valstr, sizeof (valstr), "%d", 1); goto done; } else { @@ -5587,6 +5537,7 @@ nxge_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num) ring = nxgep->tx_rings->rings[channel]; MUTEX_ENTER(&ring->lock); + ASSERT(ring->tx_ring_handle == NULL); ring->tx_ring_handle = rhp->ring_handle; MUTEX_EXIT(&ring->lock); @@ -5605,11 +5556,12 @@ nxge_tx_ring_stop(mac_ring_driver_t rdriver) ring = nxgep->tx_rings->rings[channel]; MUTEX_ENTER(&ring->lock); + ASSERT(ring->tx_ring_handle != NULL); ring->tx_ring_handle = (mac_ring_handle_t)NULL; MUTEX_EXIT(&ring->lock); } -static int +int nxge_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num) { p_nxge_ring_handle_t rhp = (p_nxge_ring_handle_t)rdriver; @@ -5623,23 +5575,25 @@ nxge_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num) MUTEX_ENTER(&ring->lock); - if (nxgep->rx_channel_started[channel] == B_TRUE) { + if (ring->started) { + ASSERT(ring->started == B_FALSE); MUTEX_EXIT(&ring->lock); return (0); } /* set rcr_ring */ for (i = 0; i < nxgep->ldgvp->maxldvs; i++) { - if ((nxgep->ldgvp->ldvp[i].is_rxdma == 1) && + if ((nxgep->ldgvp->ldvp[i].is_rxdma) && (nxgep->ldgvp->ldvp[i].channel == channel)) { ring->ldvp = &nxgep->ldgvp->ldvp[i]; ring->ldgp = nxgep->ldgvp->ldvp[i].ldgp; } } - nxgep->rx_channel_started[channel] = B_TRUE; ring->rcr_mac_handle = rhp->ring_handle; ring->rcr_gen_num = mr_gen_num; + ring->started = B_TRUE; + rhp->ring_gen_num = mr_gen_num; MUTEX_EXIT(&ring->lock); return (0); @@ -5657,11 +5611,53 @@ nxge_rx_ring_stop(mac_ring_driver_t rdriver) ring = nxgep->rx_rcr_rings->rcr_rings[channel]; MUTEX_ENTER(&ring->lock); - nxgep->rx_channel_started[channel] = B_FALSE; + ASSERT(ring->started == B_TRUE); ring->rcr_mac_handle = NULL; + ring->ldvp = NULL; + ring->ldgp = NULL; + ring->started = B_FALSE; MUTEX_EXIT(&ring->lock); } +static int +nxge_ring_get_htable_idx(p_nxge_t nxgep, mac_ring_type_t type, uint32_t channel) +{ + int i; + +#if defined(sun4v) + if (isLDOMguest(nxgep)) { + return (nxge_hio_get_dc_htable_idx(nxgep, + (type == MAC_RING_TYPE_TX) ? VP_BOUND_TX : VP_BOUND_RX, + channel)); + } +#endif + + ASSERT(nxgep->ldgvp != NULL); + + switch (type) { + case MAC_RING_TYPE_TX: + for (i = 0; i < nxgep->ldgvp->maxldvs; i++) { + if ((nxgep->ldgvp->ldvp[i].is_txdma) && + (nxgep->ldgvp->ldvp[i].channel == channel)) { + return ((int) + nxgep->ldgvp->ldvp[i].ldgp->htable_idx); + } + } + break; + + case MAC_RING_TYPE_RX: + for (i = 0; i < nxgep->ldgvp->maxldvs; i++) { + if ((nxgep->ldgvp->ldvp[i].is_rxdma) && + (nxgep->ldgvp->ldvp[i].channel == channel)) { + return ((int) + nxgep->ldgvp->ldvp[i].ldgp->htable_idx); + } + } + } + + return (-1); +} + /* * Callback funtion for MAC layer to register all rings. */ @@ -5671,13 +5667,22 @@ nxge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, { p_nxge_t nxgep = (p_nxge_t)arg; p_nxge_hw_pt_cfg_t p_cfgp = &nxgep->pt_config.hw_config; + p_nxge_intr_t intrp; + uint32_t channel; + int htable_idx; + p_nxge_ring_handle_t rhandlep; + + ASSERT(nxgep != NULL); + ASSERT(p_cfgp != NULL); + ASSERT(infop != NULL); - NXGE_DEBUG_MSG((nxgep, TX_CTL, + NXGE_DEBUG_MSG((nxgep, DDI_CTL, "==> nxge_fill_ring 0x%x index %d", rtype, index)); + switch (rtype) { case MAC_RING_TYPE_TX: { - p_nxge_ring_handle_t rhandlep; + mac_intr_t *mintr = &infop->mri_intr; NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_fill_ring (TX) 0x%x index %d ntdcs %d", @@ -5689,17 +5694,31 @@ nxge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, rhandlep->index = index; rhandlep->ring_handle = rh; + channel = nxgep->pt_config.hw_config.tdc.start + index; + rhandlep->channel = channel; + intrp = (p_nxge_intr_t)&nxgep->nxge_intr_type; + htable_idx = nxge_ring_get_htable_idx(nxgep, rtype, + channel); + if (htable_idx >= 0) + mintr->mi_ddi_handle = intrp->htable[htable_idx]; + else + mintr->mi_ddi_handle = NULL; + infop->mri_driver = (mac_ring_driver_t)rhandlep; infop->mri_start = nxge_tx_ring_start; infop->mri_stop = nxge_tx_ring_stop; infop->mri_tx = nxge_tx_ring_send; - + infop->mri_stat = nxge_tx_ring_stat; + infop->mri_flags = MAC_RING_TX_SERIALIZE; break; } + case MAC_RING_TYPE_RX: { - p_nxge_ring_handle_t rhandlep; - int nxge_rindex; mac_intr_t nxge_mac_intr; + int nxge_rindex; + p_nxge_intr_t intrp; + + intrp = (p_nxge_intr_t)&nxgep->nxge_intr_type; NXGE_DEBUG_MSG((nxgep, RX_CTL, "==> nxge_fill_ring (RX) 0x%x index %d nrdcs %d", @@ -5710,34 +5729,47 @@ nxge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, * Find the ring index in the nxge instance. */ nxge_rindex = nxge_get_rxring_index(nxgep, rg_index, index); + channel = nxgep->pt_config.hw_config.start_rdc + index; + intrp = (p_nxge_intr_t)&nxgep->nxge_intr_type; ASSERT((nxge_rindex >= 0) && (nxge_rindex < p_cfgp->max_rdcs)); rhandlep = &nxgep->rx_ring_handles[nxge_rindex]; rhandlep->nxgep = nxgep; rhandlep->index = nxge_rindex; rhandlep->ring_handle = rh; + rhandlep->channel = channel; /* * Entrypoint to enable interrupt (disable poll) and * disable interrupt (enable poll). */ + bzero(&nxge_mac_intr, sizeof (nxge_mac_intr)); nxge_mac_intr.mi_handle = (mac_intr_handle_t)rhandlep; nxge_mac_intr.mi_enable = (mac_intr_enable_t)nxge_disable_poll; nxge_mac_intr.mi_disable = (mac_intr_disable_t)nxge_enable_poll; + + htable_idx = nxge_ring_get_htable_idx(nxgep, rtype, + channel); + if (htable_idx >= 0) + nxge_mac_intr.mi_ddi_handle = intrp->htable[htable_idx]; + else + nxge_mac_intr.mi_ddi_handle = NULL; + infop->mri_driver = (mac_ring_driver_t)rhandlep; infop->mri_start = nxge_rx_ring_start; infop->mri_stop = nxge_rx_ring_stop; - infop->mri_intr = nxge_mac_intr; /* ??? */ + infop->mri_intr = nxge_mac_intr; infop->mri_poll = nxge_rx_poll; - + infop->mri_stat = nxge_rx_ring_stat; + infop->mri_flags = MAC_RING_RX_ENQUEUE; break; } + default: break; } - NXGE_DEBUG_MSG((nxgep, DDI_CTL, "<== nxge_fill_ring 0x%x", - rtype)); + NXGE_DEBUG_MSG((nxgep, DDI_CTL, "<== nxge_fill_ring 0x%x", rtype)); } static void @@ -6181,6 +6213,8 @@ nxge_add_intrs_adv_type(p_nxge_t nxgep, uint32_t int_type) return (NXGE_ERROR | NXGE_DDI_FAILED); } + + ldgp->htable_idx = x; intrp->intr_added++; } @@ -6341,6 +6375,8 @@ nxge_add_intrs_adv_type_fix(p_nxge_t nxgep, uint32_t int_type) return (NXGE_ERROR | NXGE_DDI_FAILED); } + + ldgp->htable_idx = x; intrp->intr_added++; } @@ -6516,13 +6552,10 @@ nxge_mac_register(p_nxge_t nxgep) macp->m_max_sdu = nxgep->mac.default_mtu; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = nxge_priv_props; - macp->m_priv_prop_count = NXGE_MAX_PRIV_PROPS; - if (isLDOMguest(nxgep)) { - macp->m_v12n = MAC_VIRT_LEVEL1 | MAC_VIRT_SERIALIZE; - } else { - macp->m_v12n = MAC_VIRT_HIO | MAC_VIRT_LEVEL1 | \ - MAC_VIRT_SERIALIZE; - } + if (isLDOMguest(nxgep)) + macp->m_v12n = MAC_VIRT_LEVEL1; + else + macp->m_v12n = MAC_VIRT_HIO | MAC_VIRT_LEVEL1; NXGE_DEBUG_MSG((nxgep, MAC_CTL, "==> nxge_mac_register: instance %d " @@ -6975,40 +7008,6 @@ nxge_create_msi_property(p_nxge_t nxgep) return (nmsi); } -/* ARGSUSED */ -static int -nxge_get_def_val(nxge_t *nxgep, mac_prop_id_t pr_num, uint_t pr_valsize, - void *pr_val) -{ - int err = 0; - link_flowctrl_t fl; - - switch (pr_num) { - case MAC_PROP_AUTONEG: - *(uint8_t *)pr_val = 1; - break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); - fl = LINK_FLOWCTRL_RX; - bcopy(&fl, pr_val, sizeof (fl)); - break; - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_EN_1000FDX_CAP: - *(uint8_t *)pr_val = 1; - break; - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_EN_100FDX_CAP: - *(uint8_t *)pr_val = 1; - break; - default: - err = ENOTSUP; - break; - } - return (err); -} - - /* * The following is a software around for the Neptune hardware's * interrupt bugs; The Neptune hardware may generate spurious interrupts when diff --git a/usr/src/uts/common/io/nxge/nxge_rxdma.c b/usr/src/uts/common/io/nxge/nxge_rxdma.c index 9751396cf8..16931c739b 100644 --- a/usr/src/uts/common/io/nxge/nxge_rxdma.c +++ b/usr/src/uts/common/io/nxge/nxge_rxdma.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1814,7 +1814,7 @@ nxge_rx_intr(void *arg1, void *arg2) channel = ldvp->channel; ldgp = ldvp->ldgp; - if (!isLDOMguest(nxgep) && (!nxgep->rx_channel_started[channel])) { + if (!isLDOMguest(nxgep) && (!rcrp->started)) { NXGE_DEBUG_MSG((nxgep, INT_CTL, "<== nxge_rx_intr: channel is not started")); @@ -2718,8 +2718,7 @@ nxge_receive_packet(p_nxge_t nxgep, is_valid, multi, is_tcp_udp, frag, error_type)); if (is_tcp_udp && !frag && !error_type) { - (void) hcksum_assoc(nmp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM_OK | HCK_FULLCKSUM, 0); + mac_hcksum_set(nmp, 0, 0, 0, 0, HCK_FULLCKSUM_OK); NXGE_DEBUG_MSG((nxgep, RX_CTL, "==> nxge_receive_packet: Full tcp/udp cksum " "is_valid 0x%x multi 0x%llx pkt %d frag %d " diff --git a/usr/src/uts/common/io/nxge/nxge_send.c b/usr/src/uts/common/io/nxge/nxge_send.c index 4f7edf292a..7b78fa8af6 100644 --- a/usr/src/uts/common/io/nxge/nxge_send.c +++ b/usr/src/uts/common/io/nxge/nxge_send.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -62,6 +62,8 @@ nxge_tx_ring_task(void *arg) { p_tx_ring_t ring = (p_tx_ring_t)arg; + ASSERT(ring->tx_ring_handle != NULL); + MUTEX_ENTER(&ring->lock); (void) nxge_txdma_reclaim(ring->nxgep, ring, 0); MUTEX_EXIT(&ring->lock); @@ -274,8 +276,8 @@ nxge_start(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp) } } - hcksum_retrieve(mp, NULL, NULL, &start_offset, - &stuff_offset, &end_offset, &value, &cksum_flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, &end_offset, + &value, &cksum_flags); if (!NXGE_IS_VLAN_PACKET(mp->b_rptr)) { start_offset += sizeof (ether_header_t); stuff_offset += sizeof (ether_header_t); @@ -809,7 +811,7 @@ nxge_start_control_header_only: i = TXDMA_DESC_NEXT_INDEX(i, 1, tx_ring_p->tx_wrap_mask); if (ngathers > nxge_tx_max_gathers) { good_packet = B_FALSE; - hcksum_retrieve(mp, NULL, NULL, &start_offset, + mac_hcksum_get(mp, &start_offset, &stuff_offset, &end_offset, &value, &cksum_flags); diff --git a/usr/src/uts/common/io/nxge/nxge_txdma.c b/usr/src/uts/common/io/nxge/nxge_txdma.c index 68b823b01c..f3fd19a3c2 100644 --- a/usr/src/uts/common/io/nxge/nxge_txdma.c +++ b/usr/src/uts/common/io/nxge/nxge_txdma.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1021,7 +1021,7 @@ nxge_txdma_reclaim(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, int nmblks) "==> nxge_txdma_reclaim: dump desc:")); pkt_len = tx_desc_pp->bits.hdw.tr_len; - tdc_stats->obytes += pkt_len; + tdc_stats->obytes += (pkt_len - TX_PKT_HEADER_SIZE); tdc_stats->opackets += tx_desc_pp->bits.hdw.sop; NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_txdma_reclaim: pkt_len %d " diff --git a/usr/src/uts/common/io/nxge/nxge_virtual.c b/usr/src/uts/common/io/nxge/nxge_virtual.c index 27840f8b30..395ded69b7 100644 --- a/usr/src/uts/common/io/nxge/nxge_virtual.c +++ b/usr/src/uts/common/io/nxge/nxge_virtual.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2542,10 +2542,6 @@ nxge_set_hw_dma_config(p_nxge_t nxgep) tdc_grp_p->grp_index = group->index; } - for (i = 0; i < NXGE_MAX_RDCS; i++) { - nxgep->rx_channel_started[i] = B_FALSE; - } - /* * Setup RDC groups */ diff --git a/usr/src/uts/common/io/pcan/pcan.c b/usr/src/uts/common/io/pcan/pcan.c index a22601cca2..be1fbf4aec 100644 --- a/usr/src/uts/common/io/pcan/pcan.c +++ b/usr/src/uts/common/io/pcan/pcan.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -103,11 +103,12 @@ static int pcan_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int pcan_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void pcan_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); mac_callbacks_t pcan_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, pcan_gstat, pcan_start, pcan_stop, @@ -115,12 +116,14 @@ mac_callbacks_t pcan_m_callbacks = { pcan_sdmulti, pcan_saddr, pcan_tx, + NULL, pcan_ioctl, NULL, NULL, NULL, pcan_m_setprop, - pcan_m_getprop + pcan_m_getprop, + pcan_m_propinfo }; static char *pcan_name_str = "pcan"; @@ -4525,7 +4528,7 @@ pcan_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, /* ARGSUSED */ static int pcan_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; pcan_maci_t *pcan_p = (pcan_maci_t *)arg; @@ -4536,9 +4539,6 @@ pcan_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, err = EINVAL; return (err); } - bzero(wldp_buf, wldp_length); - - *perm = MAC_PROP_PERM_RW; switch (wldp_pr_num) { /* mac_prop_id */ @@ -4558,22 +4558,18 @@ pcan_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, pcan_get_encrypt(pcan_p, wldp_buf); break; case MAC_PROP_WL_BSSTYPE: - *perm = MAC_PROP_PERM_READ; pcan_get_bsstype(pcan_p, wldp_buf); break; case MAC_PROP_WL_LINKSTATUS: pcan_get_linkstatus(pcan_p, wldp_buf); break; case MAC_PROP_WL_ESS_LIST: - *perm = MAC_PROP_PERM_READ; pcan_get_esslist(pcan_p, wldp_buf); break; case MAC_PROP_WL_SUPPORTED_RATES: - *perm = MAC_PROP_PERM_READ; pcan_get_suprates(wldp_buf); break; case MAC_PROP_WL_RSSI: - *perm = MAC_PROP_PERM_READ; err = pcan_get_rssi(pcan_p, wldp_buf); break; case MAC_PROP_WL_RADIO: @@ -4610,6 +4606,23 @@ pcan_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } +static void +pcan_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + _NOTE(ARGUNUSED(arg, pr_name)); + + switch (wldp_pr_num) { + case MAC_PROP_WL_BSSTYPE: + case MAC_PROP_WL_ESS_LIST: + case MAC_PROP_WL_SUPPORTED_RATES: + case MAC_PROP_WL_RSSI: + mac_prop_info_set_perm(mph, MAC_PROP_PERM_READ); + break; + } +} + + /* * quiesce(9E) entry point. * diff --git a/usr/src/uts/common/io/pcwl/pcwl.c b/usr/src/uts/common/io/pcwl/pcwl.c index 58d7028c39..bf1bfc7fd4 100644 --- a/usr/src/uts/common/io/pcwl/pcwl.c +++ b/usr/src/uts/common/io/pcwl/pcwl.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -85,13 +85,14 @@ static int pcwl_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int pcwl_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void pcwl_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wlpd_pr_num, mac_prop_info_handle_t mph); static void pcwl_delay(pcwl_maci_t *, clock_t); mac_callbacks_t pcwl_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, pcwl_gstat, pcwl_start, pcwl_stop, @@ -99,12 +100,14 @@ mac_callbacks_t pcwl_m_callbacks = { pcwl_sdmulti, pcwl_saddr, pcwl_tx, + NULL, pcwl_ioctl, NULL, NULL, NULL, pcwl_m_setprop, - pcwl_m_getprop + pcwl_m_getprop, + pcwl_m_propinfo }; static char *pcwl_name_str = "pcwl"; @@ -4400,18 +4403,11 @@ pcwl_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, /* ARGSUSED */ static int pcwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; - pcwl_maci_t *pcwl_p = (pcwl_maci_t *)arg; - if (wldp_length == 0) { - err = EINVAL; - return (err); - } - bzero(wldp_buf, wldp_length); - mutex_enter(&pcwl_p->pcwl_glock); if (!(pcwl_p->pcwl_flag & PCWL_CARD_READY)) { mutex_exit(&pcwl_p->pcwl_glock); @@ -4419,8 +4415,6 @@ pcwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } - *perm = MAC_PROP_PERM_RW; - switch (wldp_pr_num) { /* mac_prop_id */ case MAC_PROP_WL_ESSID: @@ -4442,19 +4436,15 @@ pcwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, pcwl_get_bsstype(pcwl_p, wldp_buf); break; case MAC_PROP_WL_LINKSTATUS: - *perm = MAC_PROP_PERM_READ; err = pcwl_get_linkstatus(pcwl_p, wldp_buf); break; case MAC_PROP_WL_ESS_LIST: - *perm = MAC_PROP_PERM_READ; pcwl_get_esslist(pcwl_p, wldp_buf); break; case MAC_PROP_WL_SUPPORTED_RATES: - *perm = MAC_PROP_PERM_READ; pcwl_get_suprates(wldp_buf); break; case MAC_PROP_WL_RSSI: - *perm = MAC_PROP_PERM_READ; pcwl_get_param_rssi(pcwl_p, wldp_buf); break; case MAC_PROP_WL_RADIO: @@ -4493,6 +4483,23 @@ pcwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } + +static void +pcwl_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wlpd_pr_num, + mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(arg, pr_name)); + + switch (wlpd_pr_num) { + case MAC_PROP_WL_LINKSTATUS: + case MAC_PROP_WL_ESS_LIST: + case MAC_PROP_WL_SUPPORTED_RATES: + case MAC_PROP_WL_RSSI: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } +} + + /* * quiesce(9E) entry point. * diff --git a/usr/src/uts/common/io/ral/rt2560.c b/usr/src/uts/common/io/ral/rt2560.c index 45e32d00dd..74733347cb 100644 --- a/usr/src/uts/common/io/ral/rt2560.c +++ b/usr/src/uts/common/io/ral/rt2560.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -172,10 +172,12 @@ static void rt2560_m_ioctl(void *, queue_t *, mblk_t *); static int rt2560_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int rt2560_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void rt2560_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t rt2560_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rt2560_m_stat, rt2560_m_start, rt2560_m_stop, @@ -183,12 +185,14 @@ static mac_callbacks_t rt2560_m_callbacks = { rt2560_m_multicst, rt2560_m_unicst, rt2560_m_tx, + NULL, rt2560_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, rt2560_m_setprop, - rt2560_m_getprop + rt2560_m_getprop, + rt2560_m_propinfo }; uint32_t ral_dbg_flags = 0; @@ -2138,18 +2142,27 @@ rt2560_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int rt2560_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct rt2560_softc *sc = arg; int err; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } static void +rt2560_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct rt2560_softc *sc = arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + +static void rt2560_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) { struct rt2560_softc *sc = (struct rt2560_softc *)arg; diff --git a/usr/src/uts/common/io/rge/rge_main.c b/usr/src/uts/common/io/rge/rge_main.c index 773d474301..7ad85f53de 100644 --- a/usr/src/uts/common/io/rge/rge_main.c +++ b/usr/src/uts/common/io/rge/rge_main.c @@ -123,6 +123,7 @@ static mac_callbacks_t rge_m_callbacks = { rge_m_multicst, rge_m_unicst, rge_m_tx, + NULL, rge_m_ioctl, rge_m_getcapab }; diff --git a/usr/src/uts/common/io/rge/rge_rxtx.c b/usr/src/uts/common/io/rge/rge_rxtx.c index a2d881c67b..9b16c2ae82 100644 --- a/usr/src/uts/common/io/rge/rge_rxtx.c +++ b/usr/src/uts/common/io/rge/rge_rxtx.c @@ -287,11 +287,11 @@ rge_receive_packet(rge_t *rgep, uint32_t slot) proto = rx_status & RBD_FLAG_PROTOCOL; if ((proto == RBD_FLAG_TCP && !(rx_status & RBD_TCP_CKSUM_ERR)) || (proto == RBD_FLAG_UDP && !(rx_status & RBD_UDP_CKSUM_ERR))) - pflags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK; + pflags |= HCK_FULLCKSUM_OK; if (proto != RBD_FLAG_NONE_IP && !(rx_status & RBD_IP_CKSUM_ERR)) - pflags |= HCK_IPV4_HDRCKSUM; + pflags |= HCK_IPV4_HDRCKSUM_OK; if (pflags != 0) { - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, pflags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, pflags); } return (mp); @@ -574,7 +574,7 @@ rge_send_copy(rge_t *rgep, mblk_t *mp, uint16_t tci) /* * h/w checksum offload flags */ - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); if (pflags & HCK_FULLCKSUM) { ASSERT(totlen >= sizeof (struct ether_header) + sizeof (struct ip)); diff --git a/usr/src/uts/common/io/rtls/rtls.c b/usr/src/uts/common/io/rtls/rtls.c index 4a0f6fef9a..d470fdfc7d 100644 --- a/usr/src/uts/common/io/rtls/rtls.c +++ b/usr/src/uts/common/io/rtls/rtls.c @@ -93,10 +93,6 @@ static int rtls_m_multicst(void *, boolean_t, const uint8_t *); static int rtls_m_promisc(void *, boolean_t); static mblk_t *rtls_m_tx(void *, mblk_t *); static int rtls_m_stat(void *, uint_t, uint64_t *); -static int rtls_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); -static int rtls_m_setprop(void *, const char *, mac_prop_id_t, uint_t, - const void *); static uint_t rtls_intr(caddr_t); @@ -184,13 +180,7 @@ static mac_callbacks_t rtls_m_callbacks = { rtls_m_promisc, rtls_m_multicst, rtls_m_unicst, - rtls_m_tx, - NULL, /* mc_ioctl */ - NULL, /* mc_getcapab */ - NULL, /* mc_open */ - NULL, /* mc_close */ - rtls_m_setprop, - rtls_m_getprop, + rtls_m_tx }; static mii_ops_t rtls_mii_ops = { @@ -912,24 +902,6 @@ rtls_m_stat(void *arg, uint_t stat, uint64_t *val) return (0); } -int -rtls_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) -{ - rtls_t *rtlsp = arg; - - return (mii_m_getprop(rtlsp->mii, name, num, flags, sz, val, perm)); -} - -int -rtls_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, - const void *val) -{ - rtls_t *rtlsp = arg; - - return (mii_m_setprop(rtlsp->mii, name, num, sz, val)); -} - /* * rtls_send() -- send a packet * diff --git a/usr/src/uts/common/io/rtw/rtw.c b/usr/src/uts/common/io/rtw/rtw.c index be463e40d5..c237184a6e 100644 --- a/usr/src/uts/common/io/rtw/rtw.c +++ b/usr/src/uts/common/io/rtw/rtw.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,6 +41,7 @@ #include <sys/mac_provider.h> #include <sys/mac_wifi.h> #include <sys/net80211.h> +#include <sys/byteorder.h> #include "rtwreg.h" #include "rtwvar.h" #include "smc93cx6var.h" @@ -139,10 +140,12 @@ static void rtw_m_ioctl(void *, queue_t *, mblk_t *); static int rtw_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int rtw_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void rtw_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t rtw_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rtw_m_stat, rtw_m_start, rtw_m_stop, @@ -150,12 +153,14 @@ static mac_callbacks_t rtw_m_callbacks = { rtw_m_multicst, rtw_m_unicst, rtw_m_tx, + NULL, rtw_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, rtw_m_setprop, - rtw_m_getprop + rtw_m_getprop, + rtw_m_propinfo }; DDI_DEFINE_STREAM_OPS(rtw_dev_ops, nulldev, nulldev, rtw_attach, rtw_detach, @@ -2914,17 +2919,25 @@ rtw_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int rtw_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { rtw_softc_t *rsc = arg; int err; err = ieee80211_getprop(&rsc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +rtw_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + rtw_softc_t *rsc = arg; + + ieee80211_propinfo(&rsc->sc_ic, pr_name, wldp_pr_num, prh); +} static int rtw_m_start(void *arg) diff --git a/usr/src/uts/common/io/rum/rum.c b/usr/src/uts/common/io/rum/rum.c index f76fb0fae1..abec7e727d 100644 --- a/usr/src/uts/common/io/rum/rum.c +++ b/usr/src/uts/common/io/rum/rum.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ #include <sys/mac_provider.h> #include <sys/mac_wifi.h> #include <sys/net80211.h> +#include <sys/byteorder.h> #define USBDRV_MAJOR_VER 2 #define USBDRV_MINOR_VER 0 @@ -260,10 +261,12 @@ static void rum_m_ioctl(void *, queue_t *, mblk_t *); static int rum_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int rum_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void rum_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t rum_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rum_m_stat, rum_m_start, rum_m_stop, @@ -271,12 +274,14 @@ static mac_callbacks_t rum_m_callbacks = { rum_m_multicst, rum_m_unicst, rum_m_tx, + NULL, rum_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, rum_m_setprop, - rum_m_getprop + rum_m_getprop, + rum_m_propinfo }; static void rum_amrr_start(struct rum_softc *, struct ieee80211_node *); @@ -2088,18 +2093,27 @@ rum_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int rum_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct rum_softc *sc = (struct rum_softc *)arg; int err; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } static void +rum_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct rum_softc *sc = (struct rum_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + +static void rum_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) { struct rum_softc *sc = (struct rum_softc *)arg; diff --git a/usr/src/uts/common/io/rwd/rt2661.c b/usr/src/uts/common/io/rwd/rt2661.c index 6419dbd96d..df3e688e4d 100644 --- a/usr/src/uts/common/io/rwd/rt2661.c +++ b/usr/src/uts/common/io/rwd/rt2661.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -271,11 +271,13 @@ static int rt2661_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int rt2661_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, + void *wldp_buf); +static void rt2661_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); static mac_callbacks_t rt2661_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rt2661_m_stat, rt2661_m_start, rt2661_m_stop, @@ -283,12 +285,14 @@ static mac_callbacks_t rt2661_m_callbacks = { rt2661_m_multicst, rt2661_m_unicst, rt2661_m_tx, + NULL, rt2661_m_ioctl, NULL, NULL, NULL, rt2661_m_setprop, - rt2661_m_getprop + rt2661_m_getprop, + rt2661_m_propinfo }; #ifdef DEBUG @@ -2617,17 +2621,26 @@ rt2661_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int rt2661_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct rt2661_softc *sc = (struct rt2661_softc *)arg; int err = 0; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +rt2661_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + struct rt2661_softc *sc = (struct rt2661_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + static int rt2661_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/rwn/rt2860.c b/usr/src/uts/common/io/rwn/rt2860.c index 588d5c3730..703f1df19a 100644 --- a/usr/src/uts/common/io/rwn/rt2860.c +++ b/usr/src/uts/common/io/rwn/rt2860.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -245,12 +245,14 @@ static void rt2860_m_ioctl(void *, queue_t *, mblk_t *); static int rt2860_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); +static void rt2860_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t prh); static int rt2860_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, + void *wldp_buf); static mac_callbacks_t rt2860_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rt2860_m_stat, rt2860_m_start, rt2860_m_stop, @@ -258,12 +260,14 @@ static mac_callbacks_t rt2860_m_callbacks = { rt2860_m_multicst, rt2860_m_unicst, rt2860_m_tx, + NULL, rt2860_m_ioctl, NULL, NULL, NULL, rt2860_m_setprop, - rt2860_m_getprop + rt2860_m_getprop, + rt2860_m_propinfo }; #ifdef DEBUG @@ -2635,17 +2639,26 @@ rt2860_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int rt2860_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct rt2860_softc *sc = (struct rt2860_softc *)arg; int err = 0; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +rt2860_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct rt2860_softc *sc = (struct rt2860_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + static int rt2860_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/sfe/sfe_util.c b/usr/src/uts/common/io/sfe/sfe_util.c index c600c6927a..fb6d4ceb9b 100644 --- a/usr/src/uts/common/io/sfe/sfe_util.c +++ b/usr/src/uts/common/io/sfe/sfe_util.c @@ -32,7 +32,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -4069,6 +4069,7 @@ static mac_callbacks_t gem_m_callbacks = { gem_m_multicst, gem_m_unicst, gem_m_tx, + NULL, gem_m_ioctl, gem_m_getcapab, }; diff --git a/usr/src/uts/common/io/simnet/simnet.c b/usr/src/uts/common/io/simnet/simnet.c index f1a172dd9b..727fbbad8e 100644 --- a/usr/src/uts/common/io/simnet/simnet.c +++ b/usr/src/uts/common/io/simnet/simnet.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -109,10 +109,12 @@ static mblk_t *simnet_m_tx(void *, mblk_t *); static int simnet_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int simnet_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void simnet_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t simnet_m_callbacks = { - (MC_IOCTL | MC_SETPROP | MC_GETPROP), + (MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO), simnet_m_stat, simnet_m_start, simnet_m_stop, @@ -120,12 +122,14 @@ static mac_callbacks_t simnet_m_callbacks = { simnet_m_multicst, simnet_m_unicst, simnet_m_tx, + NULL, simnet_m_ioctl, NULL, NULL, NULL, simnet_m_setprop, - simnet_m_getprop + simnet_m_getprop, + simnet_m_propinfo }; /* @@ -1228,17 +1232,16 @@ simnet_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, } static int -simnet_get_priv_prop(simnet_dev_t *sdev, const char *pr_name, uint_t pr_flags, +simnet_get_priv_prop(simnet_dev_t *sdev, const char *pr_name, uint_t pr_valsize, void *pr_val) { simnet_wifidev_t *wdev = sdev->sd_wifidev; - boolean_t is_default = ((pr_flags & MAC_PROP_DEFAULT) != 0); int err = 0; int value; if (strcmp(pr_name, "_wl_esslist") == 0) { /* Returns num of _wl_ess_conf_t that have been set */ - value = (is_default ? 0:wdev->swd_esslist_num); + value = wdev->swd_esslist_num; } else if (strcmp(pr_name, "_wl_connected") == 0) { value = ((wdev->swd_linkstatus == WL_CONNECTED) ? 1:0); } else { @@ -1252,7 +1255,7 @@ simnet_get_priv_prop(simnet_dev_t *sdev, const char *pr_name, uint_t pr_flags, static int simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { simnet_dev_t *sdev = arg; simnet_wifidev_t *wdev = sdev->sd_wifidev; @@ -1276,9 +1279,6 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, case MAC_PROP_WL_AUTH_MODE: case MAC_PROP_WL_ENCRYPTION: break; - case MAC_PROP_WL_BSSTYPE: - *perm = MAC_PROP_PERM_READ; - break; case MAC_PROP_WL_LINKSTATUS: (void) memcpy(wldp_buf, &wdev->swd_linkstatus, sizeof (wdev->swd_linkstatus)); @@ -1286,7 +1286,6 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, case MAC_PROP_WL_ESS_LIST: { wl_ess_conf_t *w_ess_conf; - *perm = MAC_PROP_PERM_READ; ((wl_ess_list_t *)wldp_buf)->wl_ess_list_num = wdev->swd_esslist_num; /* LINTED E_BAD_PTR_CAST_ALIGN */ @@ -1299,11 +1298,7 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, } break; } - case MAC_PROP_WL_SUPPORTED_RATES: - *perm = MAC_PROP_PERM_READ; - break; case MAC_PROP_WL_RSSI: - *perm = MAC_PROP_PERM_READ; *(wl_rssi_t *)wldp_buf = wdev->swd_rssi; break; case MAC_PROP_WL_RADIO: @@ -1314,8 +1309,8 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, case MAC_PROP_WL_DESIRED_RATES: break; case MAC_PROP_PRIVATE: - err = simnet_get_priv_prop(sdev, pr_name, pr_flags, - wldp_length, wldp_buf); + err = simnet_get_priv_prop(sdev, pr_name, wldp_length, + wldp_buf); break; default: err = ENOTSUP; @@ -1324,3 +1319,40 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } + +static void +simnet_priv_propinfo(const char *pr_name, mac_prop_info_handle_t prh) +{ + char valstr[MAXNAMELEN]; + + bzero(valstr, sizeof (valstr)); + + if (strcmp(pr_name, "_wl_esslist") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", 0); + } + + if (strlen(valstr) > 0) + mac_prop_info_set_default_str(prh, valstr); +} + +static void +simnet_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + simnet_dev_t *sdev = arg; + + if (sdev->sd_type == DL_ETHER) + return; + + switch (wldp_pr_num) { + case MAC_PROP_WL_BSSTYPE: + case MAC_PROP_WL_ESS_LIST: + case MAC_PROP_WL_SUPPORTED_RATES: + case MAC_PROP_WL_RSSI: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + case MAC_PROP_PRIVATE: + simnet_priv_propinfo(pr_name, prh); + break; + } +} diff --git a/usr/src/uts/common/io/softmac/softmac_main.c b/usr/src/uts/common/io/softmac/softmac_main.c index 9e33c31b7b..05f74dd4c1 100644 --- a/usr/src/uts/common/io/softmac/softmac_main.c +++ b/usr/src/uts/common/io/softmac/softmac_main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -84,11 +84,13 @@ static boolean_t softmac_m_getcapab(void *, mac_capab_t, void *); static int softmac_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int softmac_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); - + uint_t, void *); +static void softmac_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); #define SOFTMAC_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_OPEN | MC_CLOSE | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_OPEN | MC_CLOSE | MC_SETPROP | \ + MC_GETPROP | MC_PROPINFO) static mac_callbacks_t softmac_m_callbacks = { SOFTMAC_M_CALLBACK_FLAGS, @@ -99,12 +101,14 @@ static mac_callbacks_t softmac_m_callbacks = { softmac_m_multicst, softmac_m_unicst, softmac_m_tx, + NULL, softmac_m_ioctl, softmac_m_getcapab, softmac_m_open, softmac_m_close, softmac_m_setprop, - softmac_m_getprop + softmac_m_getprop, + softmac_m_propinfo }; /*ARGSUSED*/ @@ -1468,8 +1472,8 @@ softmac_m_setprop(void *arg, const char *name, mac_prop_id_t id, } static int -softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t flags, - uint_t valsize, void *val, uint_t *perm) +softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, + uint_t valsize, void *val) { softmac_t *softmac = arg; char *fpstr; @@ -1478,18 +1482,15 @@ softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t flags, return (ENOTSUP); if (strcmp(name, "_fastpath") == 0) { - if ((flags & MAC_PROP_DEFAULT) != 0) - return (ENOTSUP); - - *perm = MAC_PROP_PERM_READ; mutex_enter(&softmac->smac_fp_mutex); fpstr = (DATAPATH_MODE(softmac) == SOFTMAC_SLOWPATH) ? "disabled" : "enabled"; mutex_exit(&softmac->smac_fp_mutex); } else if (strcmp(name, "_disable_fastpath") == 0) { - *perm = MAC_PROP_PERM_RW; - fpstr = ((flags & MAC_PROP_DEFAULT) != 0) ? "false" : - (softmac->smac_fastpath_admin_disabled ? "true" : "false"); + fpstr = softmac->smac_fastpath_admin_disabled ? + "true" : "false"; + } else if (strcmp(name, "_softmac") == 0) { + fpstr = "true"; } else { return (ENOTSUP); } @@ -1497,6 +1498,23 @@ softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t flags, return (strlcpy(val, fpstr, valsize) >= valsize ? EINVAL : 0); } +static void +softmac_m_propinfo(void *arg, const char *name, mac_prop_id_t id, + mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(arg)); + + if (id != MAC_PROP_PRIVATE) + return; + + if (strcmp(name, "_fastpath") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else if (strcmp(name, "_disable_fastpath") == 0) { + mac_prop_info_set_default_str(prh, "false"); + } + +} + int softmac_hold_device(dev_t dev, dls_dev_handle_t *ddhp) { diff --git a/usr/src/uts/common/io/softmac/softmac_pkt.c b/usr/src/uts/common/io/softmac/softmac_pkt.c index 4641fb3372..6389ec1328 100644 --- a/usr/src/uts/common/io/softmac/softmac_pkt.c +++ b/usr/src/uts/common/io/softmac/softmac_pkt.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -75,10 +75,8 @@ softmac_rput_process_data(softmac_lower_t *slp, mblk_t *mp) "copymsg failed"); goto failed; } - hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, - &value, &flags); - VERIFY(hcksum_assoc(tmp, NULL, NULL, start, stuff, end, - value, flags, KM_NOSLEEP) == 0); + mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags); + mac_hcksum_set(tmp, start, stuff, end, value, flags); freemsg(mp); mp = tmp; } diff --git a/usr/src/uts/common/io/uath/uath.c b/usr/src/uts/common/io/uath/uath.c index 0bc0ba7ea1..3353369389 100644 --- a/usr/src/uts/common/io/uath/uath.c +++ b/usr/src/uts/common/io/uath/uath.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -60,6 +60,7 @@ #include <sys/strsun.h> #include <sys/modctl.h> #include <sys/devops.h> +#include <sys/byteorder.h> #include <sys/mac_provider.h> #include <sys/mac_wifi.h> #include <sys/net80211.h> @@ -193,10 +194,12 @@ static void uath_m_ioctl(void *, queue_t *, mblk_t *); static int uath_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int uath_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void uath_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t uath_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, uath_m_stat, uath_m_start, uath_m_stop, @@ -204,12 +207,14 @@ static mac_callbacks_t uath_m_callbacks = { uath_m_multicst, uath_m_unicst, uath_m_tx, + NULL, uath_m_ioctl, NULL, NULL, NULL, uath_m_setprop, - uath_m_getprop + uath_m_getprop, + uath_m_propinfo }; static usb_alt_if_data_t * @@ -2874,16 +2879,25 @@ uath_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int uath_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct uath_softc *sc = (struct uath_softc *)arg; int err; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +uath_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct uath_softc *sc = (struct uath_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + static int uath_m_stat(void *arg, uint_t stat, uint64_t *val) { diff --git a/usr/src/uts/common/io/ural/ural.c b/usr/src/uts/common/io/ural/ural.c index 82e9d711e4..a55ecd9f61 100644 --- a/usr/src/uts/common/io/ural/ural.c +++ b/usr/src/uts/common/io/ural/ural.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +29,7 @@ #include <sys/strsubr.h> #include <sys/modctl.h> #include <sys/devops.h> +#include <sys/byteorder.h> #include <sys/mac_provider.h> #include <sys/mac_wifi.h> #include <sys/net80211.h> @@ -264,10 +265,12 @@ static void ural_m_ioctl(void *, queue_t *, mblk_t *); static int ural_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int ural_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void ural_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t ural_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, ural_m_stat, ural_m_start, ural_m_stop, @@ -275,12 +278,14 @@ static mac_callbacks_t ural_m_callbacks = { ural_m_multicst, ural_m_unicst, ural_m_tx, + NULL, ural_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, ural_m_setprop, - ural_m_getprop + ural_m_getprop, + ural_m_propinfo }; static void ural_amrr_start(struct ural_softc *, struct ieee80211_node *); @@ -2067,18 +2072,27 @@ ural_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int ural_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct ural_softc *sc = (struct ural_softc *)arg; int err; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } static void +ural_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + struct ural_softc *sc = (struct ural_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + +static void ural_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) { struct ural_softc *sc = (struct ural_softc *)arg; diff --git a/usr/src/uts/common/io/urtw/urtw.c b/usr/src/uts/common/io/urtw/urtw.c index 3060ea80a0..b649ca609d 100644 --- a/usr/src/uts/common/io/urtw/urtw.c +++ b/usr/src/uts/common/io/urtw/urtw.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -83,11 +83,13 @@ static mblk_t *urtw_m_tx(void *, mblk_t *); static void urtw_m_ioctl(void *, queue_t *, mblk_t *); static int urtw_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -static int urtw_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); +static int urtw_m_getprop(void *, const char *, mac_prop_id_t, + uint_t, void *); +static void urtw_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t urtw_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, urtw_m_stat, urtw_m_start, urtw_m_stop, @@ -95,12 +97,14 @@ static mac_callbacks_t urtw_m_callbacks = { urtw_m_multicst, urtw_m_unicst, urtw_m_tx, + NULL, urtw_m_ioctl, NULL, NULL, NULL, urtw_m_setprop, - urtw_m_getprop + urtw_m_getprop, + urtw_m_propinfo }; static int urtw_tx_start(struct urtw_softc *, mblk_t *, int); @@ -4024,16 +4028,25 @@ urtw_m_promisc(void *arg, boolean_t on) static int urtw_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct urtw_softc *sc = (struct urtw_softc *)arg; int err = 0; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +urtw_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + struct urtw_softc *sc = (struct urtw_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + static int urtw_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/vnic/vnic_dev.c b/usr/src/uts/common/io/vnic/vnic_dev.c index b62ed5a16c..2b063cf79c 100644 --- a/usr/src/uts/common/io/vnic/vnic_dev.c +++ b/usr/src/uts/common/io/vnic/vnic_dev.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -82,8 +82,8 @@ static int vnic_m_stat(void *, uint_t, uint64_t *); static void vnic_m_ioctl(void *, queue_t *, mblk_t *); static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -static int vnic_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); +static void vnic_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mblk_t *vnic_m_tx(void *, mblk_t *); static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *); static void vnic_notify_cb(void *, mac_notify_type_t); @@ -101,7 +101,7 @@ static mod_hash_t *vnic_hash; #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id) #define VNIC_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO) static mac_callbacks_t vnic_m_callbacks = { VNIC_M_CALLBACK_FLAGS, @@ -112,12 +112,14 @@ static mac_callbacks_t vnic_m_callbacks = { vnic_m_multicst, vnic_m_unicst, vnic_m_tx, + NULL, vnic_m_ioctl, vnic_m_capab_get, NULL, NULL, vnic_m_setprop, - vnic_m_getprop + NULL, + vnic_m_propinfo }; void @@ -185,7 +187,7 @@ static int vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type, int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg, uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag, - uint16_t vid) + uint16_t vid, boolean_t req_hwgrp_flag) { mac_diag_t mac_diag; uint16_t mac_flags = 0; @@ -290,7 +292,14 @@ vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type, /* * We get the address here since we copy it in the * vnic's vn_addr. + * We can't ask for hardware resources since we + * don't currently support hardware classification + * for these MAC clients. */ + if (req_hwgrp_flag) { + *diag = VNIC_IOC_DIAG_NO_HWRINGS; + return (ENOTSUP); + } mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg); *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh); mac_flags |= MAC_UNICAST_VNIC_PRIMARY; @@ -330,8 +339,7 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0); char vnic_name[MAXNAMELEN]; const mac_info_t *minfop; - uint32_t req_hwgrp_flag = ((flags & VNIC_IOC_CREATE_REQ_HWRINGS) != 0) ? - MAC_OPEN_FLAGS_REQ_HWRINGS : 0; + uint32_t req_hwgrp_flag = B_FALSE; *diag = VNIC_IOC_DIAG_NONE; @@ -394,11 +402,15 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, (void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL, NULL); err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch, - vnic_name, MAC_OPEN_FLAGS_IS_VNIC | req_hwgrp_flag); + vnic_name, MAC_OPEN_FLAGS_IS_VNIC); if (err != 0) goto bail; if (mrp != NULL) { + if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 || + (mrp->mrp_mask & MRP_TX_RINGS) != 0) { + req_hwgrp_flag = B_TRUE; + } err = mac_client_set_resources(vnic->vn_mch, mrp); if (err != 0) goto bail; @@ -406,10 +418,11 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, /* assign a MAC address to the VNIC */ err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot, - mac_prefix_len, mac_len, mac_addr, flags, diag, vid); + mac_prefix_len, mac_len, mac_addr, flags, diag, vid, + req_hwgrp_flag); if (err != 0) { vnic->vn_muh = NULL; - if (diag != NULL && req_hwgrp_flag != 0) + if (diag != NULL && req_hwgrp_flag) *diag = VNIC_IOC_DIAG_NO_HWRINGS; goto bail; } @@ -495,7 +508,7 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, /* Set the VNIC's MAC in the client */ if (!is_anchor) - mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh); + mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp); err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp)); if (err != 0) { @@ -850,38 +863,25 @@ vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, return (err); } -/*ARGSUSED*/ -static int -vnic_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) +/* ARGSUSED */ +static void vnic_m_propinfo(void *m_driver, const char *pr_name, + mac_prop_id_t pr_num, mac_prop_info_handle_t prh) { - mac_propval_range_t range; - vnic_t *vn = m_driver; - int err = ENOTSUP; + vnic_t *vn = m_driver; /* MTU setting allowed only on an etherstub */ if (vn->vn_link_id != DATALINK_INVALID_LINKID) - return (err); + return; switch (pr_num) { case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = ANCHOR_VNIC_MIN_MTU; - range.range_uint32[0].mpur_max = ANCHOR_VNIC_MAX_MTU; - bcopy(&range, pr_val, sizeof (range)); - return (0); - default: + mac_prop_info_set_range_uint32(prh, + ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU); break; } - - return (err); } + int vnic_info(vnic_info_t *info, cred_t *credp) { diff --git a/usr/src/uts/common/io/vr/vr.c b/usr/src/uts/common/io/vr/vr.c index 03fadffe72..4a756212ea 100644 --- a/usr/src/uts/common/io/vr/vr.c +++ b/usr/src/uts/common/io/vr/vr.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -148,7 +148,7 @@ static ddi_dma_attr_t vr_data_dma_attr = { }; static mac_callbacks_t vr_mac_callbacks = { - MC_SETPROP|MC_GETPROP, /* Which callbacks are set */ + MC_SETPROP|MC_GETPROP|MC_PROPINFO, /* Which callbacks are set */ vr_mac_getstat, /* Get the value of a statistic */ vr_mac_start, /* Start the device */ vr_mac_stop, /* Stop the device */ @@ -156,12 +156,14 @@ static mac_callbacks_t vr_mac_callbacks = { vr_mac_set_multicast, /* Enable or disable a multicast addr */ vr_mac_set_ether_addr, /* Set the unicast MAC address */ vr_mac_tx_enqueue_list, /* Transmit a packet */ + NULL, NULL, /* Process an unknown ioctl */ NULL, /* Get capability information */ NULL, /* Open the device */ NULL, /* Close the device */ vr_mac_setprop, /* Set properties of the device */ - vr_mac_getprop /* Get properties of the device */ + vr_mac_getprop, /* Get properties of the device */ + vr_mac_propinfo /* Get properties attributes */ }; /* @@ -3157,7 +3159,7 @@ vr_remove_kstats(vr_t *vrp) */ int vr_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { vr_t *vrp; uint32_t err; @@ -3168,228 +3170,220 @@ vr_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, err = 0; vrp = (vr_t *)arg; - if ((pr_flags & MAC_PROP_DEFAULT) != 0) { - /* - * Defaults depend on the PHY/MAC's capabilities - * All defaults are read/write, otherwise reset-linkprop fails - * with enotsup .... - */ - *perm = MAC_PROP_PERM_RW; - switch (pr_num) { - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_EN_1000FDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - val = 0; - break; + switch (pr_num) { + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + val = 0; + break; - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_EN_100FDX_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_100_BASEX_FD) != 0; - break; + case MAC_PROP_ADV_100FDX_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_100BASE_TX_FD) != 0; + break; - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_100_BASEX) != 0; - break; + case MAC_PROP_ADV_100HDX_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_100BASE_TX) != 0; + break; - case MAC_PROP_ADV_100T4_CAP: - case MAC_PROP_EN_100T4_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_100_BASE_T4) != 0; - break; + case MAC_PROP_ADV_100T4_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_100BASE_T4) != 0; + break; - case MAC_PROP_ADV_10FDX_CAP: - case MAC_PROP_EN_10FDX_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_10_FD) != 0; - break; + case MAC_PROP_ADV_10FDX_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_10BASE_T_FD) != 0; + break; - case MAC_PROP_ADV_10HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_10) != 0; - break; + case MAC_PROP_ADV_10HDX_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_10BASE_T) != 0; + break; - case MAC_PROP_AUTONEG: - case MAC_PROP_EN_AUTONEG: - val = (vrp->chip.mii.status & - MII_STATUS_CANAUTONEG) != 0; - break; + case MAC_PROP_AUTONEG: + val = (vrp->chip.mii.control & + MII_CONTROL_ANE) != 0; + break; - case MAC_PROP_DUPLEX: - val = VR_LINK_DUPLEX_FULL; - break; + case MAC_PROP_DUPLEX: + val = vrp->chip.link.duplex; + break; - case MAC_PROP_FLOWCTRL: - val = VR_PAUSE_BIDIRECTIONAL; - break; + case MAC_PROP_EN_100FDX_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_100BASE_TX_FD) != 0; + break; - case MAC_PROP_MTU: - val = ETHERMTU; - break; + case MAC_PROP_EN_100HDX_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_100BASE_TX) != 0; + break; - case MAC_PROP_SPEED: - val = 100 * 1000 * 1000; - break; + case MAC_PROP_EN_100T4_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_100BASE_T4) != 0; + break; - case MAC_PROP_STATUS: - val = VR_LINK_STATE_UP; - break; + case MAC_PROP_EN_10FDX_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_10BASE_T_FD) != 0; + break; - default: - return (ENOTSUP); - } - } else { - switch (pr_num) { - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - val = 0; - *perm = MAC_PROP_PERM_READ; - break; + case MAC_PROP_EN_10HDX_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_10BASE_T) != 0; + break; + + case MAC_PROP_EN_AUTONEG: + val = vrp->param.an_en == VR_LINK_AUTONEG_ON; + break; + + case MAC_PROP_FLOWCTRL: + val = vrp->chip.link.flowctrl; + break; + + case MAC_PROP_MTU: + val = vrp->param.mtu; + break; - case MAC_PROP_EN_1000FDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; + case MAC_PROP_SPEED: + if (vrp->chip.link.speed == + VR_LINK_SPEED_100MBS) + val = 100 * 1000 * 1000; + else if (vrp->chip.link.speed == + VR_LINK_SPEED_10MBS) + val = 10 * 1000 * 1000; + else val = 0; - break; + break; - case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_100BASE_TX_FD) != 0; - break; + case MAC_PROP_STATUS: + val = vrp->chip.link.state; + break; - case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_100BASE_TX) != 0; - break; + default: + err = ENOTSUP; + break; + } - case MAC_PROP_ADV_100T4_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_100BASE_T4) != 0; - break; + if (err == 0 && pr_num != MAC_PROP_PRIVATE) { + if (pr_valsize == sizeof (uint64_t)) + *(uint64_t *)pr_val = val; + else if (pr_valsize == sizeof (uint32_t)) + *(uint32_t *)pr_val = val; + else if (pr_valsize == sizeof (uint16_t)) + *(uint16_t *)pr_val = val; + else if (pr_valsize == sizeof (uint8_t)) + *(uint8_t *)pr_val = val; + else + err = EINVAL; + } + return (err); +} - case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_10BASE_T_FD) != 0; - break; +void +vr_mac_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + vr_t *vrp = (vr_t *)arg; + uint8_t val, perm; - case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_10BASE_T) != 0; - break; + /* Since we have no private properties */ + _NOTE(ARGUNUSED(pr_name)) - case MAC_PROP_AUTONEG: - *perm = MAC_PROP_PERM_RW; - val = (vrp->chip.mii.control & - MII_CONTROL_ANE) != 0; - break; + switch (pr_num) { + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; - case MAC_PROP_DUPLEX: - /* - * Writability depends on autoneg. - */ - if ((vrp->chip.mii.control & - MII_CONTROL_ANE) == 0) - *perm = MAC_PROP_PERM_RW; - else - *perm = MAC_PROP_PERM_READ; - val = vrp->chip.link.duplex; - break; + case MAC_PROP_EN_100FDX_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_100_BASEX_FD) != 0; + break; - case MAC_PROP_EN_100FDX_CAP: - *perm = MAC_PROP_PERM_RW; - val = (vrp->param.anadv_en & - MII_ABILITY_100BASE_TX_FD) != 0; - break; + case MAC_PROP_EN_100HDX_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_100_BASEX) != 0; + break; - case MAC_PROP_EN_100HDX_CAP: - *perm = MAC_PROP_PERM_RW; - val = (vrp->param.anadv_en & - MII_ABILITY_100BASE_TX) != 0; - break; + case MAC_PROP_EN_100T4_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_100_BASE_T4) != 0; + break; - case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->param.anadv_en & - MII_ABILITY_100BASE_T4) != 0; - break; + case MAC_PROP_EN_10FDX_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_10_FD) != 0; + break; - case MAC_PROP_EN_10FDX_CAP: - *perm = MAC_PROP_PERM_RW; - val = (vrp->param.anadv_en & - MII_ABILITY_10BASE_T_FD) != 0; - break; + case MAC_PROP_EN_10HDX_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_10) != 0; + break; - case MAC_PROP_EN_10HDX_CAP: - *perm = MAC_PROP_PERM_RW; - val = (vrp->param.anadv_en & - MII_ABILITY_10BASE_T) != 0; - break; + case MAC_PROP_AUTONEG: + case MAC_PROP_EN_AUTONEG: + val = (vrp->chip.mii.status & + MII_STATUS_CANAUTONEG) != 0; + break; - case MAC_PROP_EN_AUTONEG: - *perm = MAC_PROP_PERM_RW; - val = vrp->param.an_en == VR_LINK_AUTONEG_ON; - break; + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, + LINK_FLOWCTRL_BI); + return; - case MAC_PROP_FLOWCTRL: - *perm = MAC_PROP_PERM_RW; - val = vrp->chip.link.flowctrl; - break; + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + ETHERMTU, ETHERMTU); + return; - case MAC_PROP_MTU: - *perm = MAC_PROP_PERM_RW; - val = vrp->param.mtu; - break; + case MAC_PROP_DUPLEX: + /* + * Writability depends on autoneg. + */ + perm = ((vrp->chip.mii.control & + MII_CONTROL_ANE) == 0) ? MAC_PROP_PERM_RW : + MAC_PROP_PERM_READ; + mac_prop_info_set_perm(prh, perm); + + if (perm == MAC_PROP_PERM_RW) { + mac_prop_info_set_default_uint8(prh, + VR_LINK_DUPLEX_FULL); + } + return; - case MAC_PROP_SPEED: - /* - * Writability depends on autoneg. - */ - if ((vrp->chip.mii.control & - MII_CONTROL_ANE) == 0) - *perm = MAC_PROP_PERM_RW; - else - *perm = MAC_PROP_PERM_READ; - if (vrp->chip.link.speed == - VR_LINK_SPEED_100MBS) - val = 100 * 1000 * 1000; - else if (vrp->chip.link.speed == - VR_LINK_SPEED_10MBS) - val = 10 * 1000 * 1000; - else - val = 0; - break; + case MAC_PROP_SPEED: + perm = ((vrp->chip.mii.control & + MII_CONTROL_ANE) == 0) ? + MAC_PROP_PERM_RW : MAC_PROP_PERM_READ; + mac_prop_info_set_perm(prh, perm); + + if (perm == MAC_PROP_PERM_RW) { + mac_prop_info_set_default_uint64(prh, + 100 * 1000 * 1000); + } + return; - case MAC_PROP_STATUS: - val = vrp->chip.link.state; - break; + case MAC_PROP_STATUS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; - default: - err = ENOTSUP; - break; + default: + return; } - } - if (err == 0 && pr_num != MAC_PROP_PRIVATE) { - if (pr_valsize == sizeof (uint64_t)) - *(uint64_t *)pr_val = val; - else if (pr_valsize == sizeof (uint32_t)) - *(uint32_t *)pr_val = val; - else if (pr_valsize == sizeof (uint16_t)) - *(uint16_t *)pr_val = val; - else if (pr_valsize == sizeof (uint8_t)) - *(uint8_t *)pr_val = val; - else - err = EINVAL; - } - return (err); + + mac_prop_info_set_default_uint8(prh, val); } /* diff --git a/usr/src/uts/common/io/vr/vr.h b/usr/src/uts/common/io/vr/vr.h index f120895b8f..29b0144272 100644 --- a/usr/src/uts/common/io/vr/vr.h +++ b/usr/src/uts/common/io/vr/vr.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -487,11 +487,13 @@ int vr_mac_set_ether_addr(void *vrp, const uint8_t *macaddr); mblk_t *vr_mac_tx_enqueue_list(void *p, mblk_t *mp); int vr_mac_getprop(void *arg, const char *pr_name, - mac_prop_id_t pr_num, uint_t pr_flags, - uint_t pr_valsize, void *pr_val, uint_t *perm); + mac_prop_id_t pr_num, uint_t pr_valsize, + void *pr_val); int vr_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, const void *pr_val); +void vr_mac_propinfo(void *arg, const char *pr_name, + mac_prop_id_t pr_num, mac_prop_info_handle_t prh); uint_t vr_intr(caddr_t arg1, caddr_t arg2); #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/wpi/wpi.c b/usr/src/uts/common/io/wpi/wpi.c index d25acafb21..1913d7d980 100644 --- a/usr/src/uts/common/io/wpi/wpi.c +++ b/usr/src/uts/common/io/wpi/wpi.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -273,8 +273,9 @@ static void wpi_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); static int wpi_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int wpi_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_lenth, - void *wldp_buf, uint_t *); + mac_prop_id_t wldp_pr_num, uint_t wldp_lenth, void *wldp_buf); +static void wpi_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); static void wpi_destroy_locks(wpi_sc_t *sc); static int wpi_send(ieee80211com_t *ic, mblk_t *mp, uint8_t type); static void wpi_thread(wpi_sc_t *sc); @@ -364,7 +365,7 @@ _info(struct modinfo *mip) * Mac Call Back entries */ mac_callbacks_t wpi_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, wpi_m_stat, wpi_m_start, wpi_m_stop, @@ -372,12 +373,14 @@ mac_callbacks_t wpi_m_callbacks = { wpi_m_multicst, wpi_m_unicst, wpi_m_tx, + NULL, wpi_m_ioctl, NULL, NULL, NULL, wpi_m_setprop, - wpi_m_getprop + wpi_m_getprop, + wpi_m_propinfo }; #ifdef DEBUG @@ -2324,16 +2327,26 @@ wpi_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) /* ARGSUSED */ static int wpi_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_name, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; wpi_sc_t *sc = (wpi_sc_t *)arg; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_name, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } + +static void +wpi_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + wpi_sc_t *sc = (wpi_sc_t *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + static int wpi_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_name, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/xge/drv/xgell.c b/usr/src/uts/common/io/xge/drv/xgell.c index d1b85d74f2..a5d857f05d 100644 --- a/usr/src/uts/common/io/xge/drv/xgell.c +++ b/usr/src/uts/common/io/xge/drv/xgell.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -112,6 +112,7 @@ static mac_callbacks_t xgell_m_callbacks = { xgell_m_multicst, NULL, NULL, + NULL, xgell_m_ioctl, xgell_m_getcapab }; @@ -613,15 +614,13 @@ xgell_rx_hcksum_assoc(mblk_t *mp, char *vaddr, int pkt_length, if (!(ext_info->proto & XGE_HAL_FRAME_PROTO_IP_FRAGMENTED)) { if (ext_info->proto & XGE_HAL_FRAME_PROTO_TCP_OR_UDP) { if (ext_info->l3_cksum == XGE_HAL_L3_CKSUM_OK) { - cksum_flags |= HCK_IPV4_HDRCKSUM; + cksum_flags |= HCK_IPV4_HDRCKSUM_OK; } if (ext_info->l4_cksum == XGE_HAL_L4_CKSUM_OK) { cksum_flags |= HCK_FULLCKSUM_OK; } - if (cksum_flags) { - cksum_flags |= HCK_FULLCKSUM; - (void) hcksum_assoc(mp, NULL, NULL, 0, - 0, 0, 0, cksum_flags, 0); + if (cksum_flags != 0) { + mac_hcksum_set(mp, 0, 0, 0, 0, cksum_flags); } } } else if (ext_info->proto & @@ -640,9 +639,8 @@ xgell_rx_hcksum_assoc(mblk_t *mp, char *vaddr, int pkt_length, start = 40; } cksum_flags |= HCK_PARTIALCKSUM; - (void) hcksum_assoc(mp, NULL, NULL, start, 0, - end, ntohs(ext_info->l4_cksum), cksum_flags, - 0); + mac_hcksum_set(mp, start, 0, end, + ntohs(ext_info->l4_cksum), cksum_flags); } } @@ -795,7 +793,8 @@ xgell_rx_1b_callback(xge_hal_channel_h channelh, xge_hal_dtr_h dtr, u8 t_code, xgell_rx_hcksum_assoc(mp, (char *)rx_buffer->vaddr + HEADROOM, pkt_length, &ext_info); - ring->received_bytes += pkt_length; + ring->rx_pkts++; + ring->rx_bytes += pkt_length; if (mp_head == NULL) { mp_head = mp; @@ -954,9 +953,11 @@ xgell_ring_tx(void *arg, mblk_t *mp) uint32_t mss; int handle_cnt, frag_cnt, ret, i, copied; boolean_t used_copy; + uint64_t sent_bytes; _begin: handle_cnt = frag_cnt = 0; + sent_bytes = 0; if (!lldev->is_initialized || lldev->in_reset) return (mp); @@ -1041,7 +1042,7 @@ _begin: continue; } - ring->sent_bytes += mblen; + sent_bytes += mblen; /* * Check the message length to decide to DMA or bcopy() data @@ -1159,14 +1160,14 @@ _begin: * If LSO is required, just call xge_hal_fifo_dtr_mss_set(dtr, mss) to * do all necessary work. */ - lso_info_get(mp, &mss, &lsoflags); + mac_lso_get(mp, &mss, &lsoflags); if (lsoflags & HW_LSO) { xge_assert((mss != 0) && (mss <= XGE_HAL_DEFAULT_MTU)); xge_hal_fifo_dtr_mss_set(dtr, mss); } - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &hckflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &hckflags); if (hckflags & HCK_IPV4_HDRCKSUM) { xge_hal_fifo_dtr_cksum_set_bits(dtr, XGE_HAL_TXD_TX_CKO_IPV4_EN); @@ -1178,6 +1179,10 @@ _begin: xge_hal_fifo_dtr_post(ring->channelh, dtr); + /* Update per-ring tx statistics */ + atomic_add_64(&ring->tx_pkts, 1); + atomic_add_64(&ring->tx_bytes, sent_bytes); + return (NULL); _exit_cleanup: @@ -1458,6 +1463,7 @@ xgell_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = xgell_rx_ring_start; infop->mri_stop = xgell_rx_ring_stop; infop->mri_poll = xgell_rx_poll; + infop->mri_stat = xgell_rx_ring_stat; mintr = &infop->mri_intr; mintr->mi_handle = (mac_intr_handle_t)rx_ring; @@ -1480,6 +1486,7 @@ xgell_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = xgell_tx_ring_start; infop->mri_stop = xgell_tx_ring_stop; infop->mri_tx = xgell_ring_tx; + infop->mri_stat = xgell_tx_ring_stat; break; } @@ -1618,7 +1625,6 @@ xgell_rx_ring_open(xgell_rx_ring_t *rx_ring) mutex_init(&rx_ring->ring_lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(hldev->irqh)); - rx_ring->received_bytes = 0; rx_ring->poll_bytes = -1; rx_ring->polled_bytes = 0; rx_ring->poll_mp = NULL; @@ -1769,7 +1775,6 @@ xgell_tx_ring_open(xgell_tx_ring_t *tx_ring) return (B_FALSE); } - tx_ring->sent_bytes = 0; tx_ring->live = B_TRUE; return (B_TRUE); @@ -2262,6 +2267,56 @@ xgell_m_stat(void *arg, uint_t stat, uint64_t *val) } /* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +xgell_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + xgell_rx_ring_t *rx_ring = (xgell_rx_ring_t *)rh; + + switch (stat) { + case MAC_STAT_RBYTES: + *val = rx_ring->rx_bytes; + break; + + case MAC_STAT_IPACKETS: + *val = rx_ring->rx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +xgell_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + xgell_tx_ring_t *tx_ring = (xgell_tx_ring_t *)rh; + + switch (stat) { + case MAC_STAT_OBYTES: + *val = tx_ring->tx_bytes; + break; + + case MAC_STAT_OPACKETS: + *val = tx_ring->tx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* * xgell_device_alloc - Allocate new LL device */ int diff --git a/usr/src/uts/common/io/xge/drv/xgell.h b/usr/src/uts/common/io/xge/drv/xgell.h index 93845bb655..39c6447ebf 100644 --- a/usr/src/uts/common/io/xge/drv/xgell.h +++ b/usr/src/uts/common/io/xge/drv/xgell.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -329,8 +329,8 @@ struct xgell_rx_ring { xgell_multi_mac_t mmac; /* per group multiple addrs */ xgell_rx_buffer_pool_t bf_pool; /* per ring buffer pool */ - int received_bytes; /* total received bytes */ - int intr_bytes; /* interrupt received bytes */ + uint64_t rx_pkts; /* total received packets */ + uint64_t rx_bytes; /* total received bytes */ int poll_bytes; /* bytes to be polled up */ int polled_bytes; /* total polled bytes */ mblk_t *poll_mp; /* polled messages */ @@ -344,7 +344,8 @@ struct xgell_tx_ring { xge_hal_channel_h channelh; /* hardware channel */ xgelldev_t *lldev; /* driver device */ mac_ring_handle_t ring_handle; /* call back ring handle */ - int sent_bytes; /* bytes sent though the ring */ + uint64_t tx_pkts; /* packets sent */ + uint64_t tx_bytes; /* bytes sent though the ring */ boolean_t need_resched; }; @@ -418,8 +419,9 @@ void xge_disable_intrs(xgelldev_t *lldev); void xge_rem_intrs(xgelldev_t *lldev); +int xgell_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val); - +int xgell_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val); #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/yge/yge.c b/usr/src/uts/common/io/yge/yge.c index c41dda7b60..33d2fb527a 100644 --- a/usr/src/uts/common/io/yge/yge.c +++ b/usr/src/uts/common/io/yge/yge.c @@ -228,8 +228,9 @@ static mblk_t *yge_m_tx(void *, mblk_t *); static int yge_m_stat(void *, uint_t, uint64_t *); static int yge_m_start(void *); static void yge_m_stop(void *); -static int yge_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); +static int yge_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +static void yge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int yge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static void yge_m_ioctl(void *, queue_t *, mblk_t *); @@ -240,7 +241,7 @@ extern int yge_phys_restart(yge_port_t *, boolean_t); extern int yge_phys_init(yge_port_t *, phy_readreg_t, phy_writereg_t); static mac_callbacks_t yge_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, yge_m_stat, yge_m_start, yge_m_stop, @@ -248,12 +249,14 @@ static mac_callbacks_t yge_m_callbacks = { yge_m_multicst, yge_m_unicst, yge_m_tx, + NULL, yge_m_ioctl, NULL, /* mc_getcapab */ NULL, /* mc_open */ NULL, /* mc_close */ yge_m_setprop, yge_m_getprop, + yge_m_propinfo }; static mii_ops_t yge_mii_ops = { @@ -3348,47 +3351,30 @@ err: int yge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { yge_port_t *port = arg; - mac_propval_range_t range; - int err; - err = mii_m_getprop(port->p_mii, pr_name, pr_num, pr_flags, - pr_valsize, pr_val, perm); - if (err != ENOTSUP) { - return (err); - } - - if (pr_valsize == 0) - return (EINVAL); + return (mii_m_getprop(port->p_mii, pr_name, pr_num, pr_valsize, + pr_val)); +} - bzero(pr_val, pr_valsize); - *perm = MAC_PROP_PERM_RW; +static void +yge_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + yge_port_t *port = arg; switch (pr_num) { case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) { - err = ENOTSUP; - break; - } - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = ETHERMTU; - range.range_uint32[0].mpur_max = + mac_prop_info_set_range_uint32(prh, ETHERMTU, port->p_flags & PORT_FLAG_NOJUMBO ? - ETHERMTU : YGE_JUMBO_MTU; - bcopy(&range, pr_val, sizeof (range)); - err = 0; + ETHERMTU : YGE_JUMBO_MTU); break; - default: - err = ENOTSUP; + mii_m_propinfo(port->p_mii, pr_name, pr_num, prh); break; } - return (err); } void diff --git a/usr/src/uts/common/io/zyd/zyd.c b/usr/src/uts/common/io/zyd/zyd.c index bdd5ef37d1..61c4abf3c6 100644 --- a/usr/src/uts/common/io/zyd/zyd.c +++ b/usr/src/uts/common/io/zyd/zyd.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -54,8 +54,9 @@ static int zyd_m_promisc(void *arg, boolean_t on); static void zyd_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); static mblk_t *zyd_m_tx(void *arg, mblk_t *mp); static int zyd_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void zyd_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); static int zyd_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); @@ -72,7 +73,7 @@ void *zyd_ssp; * Mac Call Back entries */ static mac_callbacks_t zyd_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, zyd_m_stat, /* Get the value of a statistic */ zyd_m_start, /* Start the device */ zyd_m_stop, /* Stop the device */ @@ -80,12 +81,14 @@ static mac_callbacks_t zyd_m_callbacks = { zyd_m_multicst, /* Enable or disable a multicast addr */ zyd_m_unicst, /* Set the unicast MAC address */ zyd_m_tx, /* Transmit a packet */ + NULL, zyd_m_ioctl, /* Process an unknown ioctl */ NULL, /* mc_getcapab */ NULL, NULL, zyd_m_setprop, - zyd_m_getprop + zyd_m_getprop, + zyd_m_propinfo }; /* @@ -893,7 +896,7 @@ zyd_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int zyd_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct zyd_softc *sc = (struct zyd_softc *)arg; int err; @@ -903,11 +906,20 @@ zyd_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, } err = ieee80211_getprop(&sc->ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +zyd_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + struct zyd_softc *sc = (struct zyd_softc *)arg; + + ieee80211_propinfo(&sc->ic, pr_name, wldp_pr_num, mph); +} + /* * Transmit a data frame. */ diff --git a/usr/src/uts/common/os/pool.c b/usr/src/uts/common/os/pool.c index 7c3c70de3c..4b4337b3a2 100644 --- a/usr/src/uts/common/os/pool.c +++ b/usr/src/uts/common/os/pool.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,6 +44,7 @@ #include <sys/zone.h> #include <sys/policy.h> #include <sys/schedctl.h> +#include <sys/taskq.h> /* * RESOURCE POOLS @@ -153,6 +154,12 @@ static kthread_t *pool_busy_thread; /* thread holding "pool_lock" */ static kmutex_t pool_barrier_lock; /* synch. with pool_barrier_* */ static kcondvar_t pool_barrier_cv; /* synch. with pool_barrier_* */ static int pool_barrier_count; /* synch. with pool_barrier_* */ +static list_t pool_event_cb_list; /* pool event callbacks */ +static boolean_t pool_event_cb_init = B_FALSE; +static kmutex_t pool_event_cb_lock; +static taskq_t *pool_event_cb_taskq = NULL; + +void pool_event_dispatch(pool_event_t, poolid_t); /* * Boot-time pool initialization. @@ -373,6 +380,21 @@ pool_lookup_pool_by_id(poolid_t poolid) return (NULL); } +pool_t * +pool_lookup_pool_by_pset(int id) +{ + pool_t *pool = pool_default; + psetid_t psetid = (psetid_t)id; + + ASSERT(pool_lock_held()); + for (pool = list_head(&pool_list); pool != NULL; + pool = list_next(&pool_list, pool)) { + if (pool->pool_pset->pset_id == psetid) + return (pool); + } + return (NULL); +} + /* * Create new pool, associate it with default resource sets, and give * it a temporary name. @@ -545,12 +567,14 @@ pool_status(int status) if (ret != 0) return (ret); pool_state = POOL_ENABLED; + pool_event_dispatch(POOL_E_ENABLE, NULL); break; case POOL_DISABLED: ret = pool_disable(); if (ret != 0) return (ret); pool_state = POOL_DISABLED; + pool_event_dispatch(POOL_E_DISABLE, NULL); break; default: ret = EINVAL; @@ -572,6 +596,8 @@ pool_assoc(poolid_t poolid, int idtype, id_t id) switch (idtype) { case PREC_PSET: ret = pool_pset_assoc(poolid, (psetid_t)id); + if (ret == 0) + pool_event_dispatch(POOL_E_CHANGE, poolid); break; default: ret = EINVAL; @@ -595,6 +621,8 @@ pool_dissoc(poolid_t poolid, int idtype) switch (idtype) { case PREC_PSET: ret = pool_pset_assoc(poolid, PS_NONE); + if (ret == 0) + pool_event_dispatch(POOL_E_CHANGE, poolid); break; default: ret = EINVAL; @@ -612,24 +640,48 @@ int pool_transfer(int type, id_t src, id_t dst, uint64_t qty) { int ret = EINVAL; + return (ret); } +static poolid_t +pool_lookup_id_by_pset(int id) +{ + pool_t *pool = pool_default; + psetid_t psetid = (psetid_t)id; + + ASSERT(pool_lock_held()); + for (pool = list_head(&pool_list); pool != NULL; + pool = list_next(&pool_list, pool)) { + if (pool->pool_pset->pset_id == psetid) + return (pool->pool_id); + } + return (POOL_INVALID); +} + /* * Transfer resources specified by their IDs between resource sets. */ int -pool_xtransfer(int type, id_t src, id_t dst, uint_t size, id_t *ids) +pool_xtransfer(int type, id_t src_pset, id_t dst_pset, uint_t size, id_t *ids) { int ret; + poolid_t src_pool, dst_pool; ASSERT(pool_lock_held()); if (pool_state == POOL_DISABLED) return (ENOTACTIVE); switch (type) { case PREC_PSET: - ret = pool_pset_xtransfer((psetid_t)src, (psetid_t)dst, - size, ids); + ret = pool_pset_xtransfer((psetid_t)src_pset, + (psetid_t)dst_pset, size, ids); + + if ((src_pool = pool_lookup_id_by_pset(src_pset)) == -1) + return (EINVAL); + if ((dst_pool = pool_lookup_id_by_pset(dst_pset)) == -1) + return (EINVAL); + pool_event_dispatch(POOL_E_CHANGE, src_pool); + pool_event_dispatch(POOL_E_CHANGE, dst_pool); break; default: ret = EINVAL; @@ -643,7 +695,7 @@ pool_xtransfer(int type, id_t src, id_t dst, uint_t size, id_t *ids) int pool_bind(poolid_t poolid, idtype_t idtype, id_t id) { - pool_t *pool; + pool_t *pool; ASSERT(pool_lock_held()); @@ -1234,6 +1286,17 @@ pool_change_class(proc_t *p, id_t cid) kmem_free(bufs, nlwp * sizeof (void *)); } +void +pool_get_name(pool_t *pool, char **name) +{ + ASSERT(pool_lock_held()); + + (void) nvlist_lookup_string(pool->pool_props, "pool.name", name); + + ASSERT(strlen(*name) != 0); +} + + /* * The meat of the bind operation. The steps in pool_do_bind are: * @@ -1658,3 +1721,71 @@ out: switch (idtype) { ASSERT(pool_barrier_count == 0); return (rv); } + +void +pool_event_cb_register(pool_event_cb_t *cb) +{ + ASSERT(!pool_lock_held() || panicstr); + ASSERT(cb->pec_func != NULL); + + mutex_enter(&pool_event_cb_lock); + if (!pool_event_cb_init) { + list_create(&pool_event_cb_list, sizeof (pool_event_cb_t), + offsetof(pool_event_cb_t, pec_list)); + pool_event_cb_init = B_TRUE; + } + list_insert_tail(&pool_event_cb_list, cb); + mutex_exit(&pool_event_cb_lock); +} + +void +pool_event_cb_unregister(pool_event_cb_t *cb) +{ + ASSERT(!pool_lock_held() || panicstr); + + mutex_enter(&pool_event_cb_lock); + list_remove(&pool_event_cb_list, cb); + mutex_exit(&pool_event_cb_lock); +} + +typedef struct { + pool_event_t tqd_what; + poolid_t tqd_id; +} pool_tqd_t; + +void +pool_event_notify(void *arg) +{ + pool_tqd_t *tqd = (pool_tqd_t *)arg; + pool_event_cb_t *cb; + + ASSERT(!pool_lock_held() || panicstr); + + mutex_enter(&pool_event_cb_lock); + for (cb = list_head(&pool_event_cb_list); cb != NULL; + cb = list_next(&pool_event_cb_list, cb)) { + cb->pec_func(tqd->tqd_what, tqd->tqd_id, cb->pec_arg); + } + mutex_exit(&pool_event_cb_lock); + kmem_free(tqd, sizeof (*tqd)); +} + +void +pool_event_dispatch(pool_event_t what, poolid_t id) +{ + pool_tqd_t *tqd = NULL; + + ASSERT(pool_lock_held()); + + if (pool_event_cb_taskq == NULL) { + pool_event_cb_taskq = taskq_create("pool_event_cb_taskq", 1, + -1, 1, 1, TASKQ_PREPOPULATE); + } + + tqd = kmem_alloc(sizeof (*tqd), KM_SLEEP); + tqd->tqd_what = what; + tqd->tqd_id = id; + + (void) taskq_dispatch(pool_event_cb_taskq, pool_event_notify, tqd, + KM_SLEEP); +} diff --git a/usr/src/uts/common/os/strsubr.c b/usr/src/uts/common/os/strsubr.c index 75bd481d21..c8068ca965 100644 --- a/usr/src/uts/common/os/strsubr.c +++ b/usr/src/uts/common/os/strsubr.c @@ -8559,18 +8559,6 @@ lso_info_cleanup(mblk_t *mp) DB_LSOMSS(mp) = 0; } -void -lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) -{ - ASSERT(DB_TYPE(mp) == M_DATA); - - if (flags != NULL) { - *flags = DB_CKSUMFLAGS(mp) & HW_LSO_FLAGS; - if ((*flags != 0) && (mss != NULL)) - *mss = (uint32_t)DB_LSOMSS(mp); - } -} - /* * Checksum buffer *bp for len bytes with psum partial checksum, * or 0 if none, and return the 16 bit partial checksum. diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index e251fd3222..834725f8f7 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -356,6 +356,7 @@ CHKHDRS= \ mac_impl.h \ mac_provider.h \ mac_soft_ring.h \ + mac_stat.h \ machelf.h \ map.h \ md4.h \ diff --git a/usr/src/uts/common/sys/aggr_impl.h b/usr/src/uts/common/sys/aggr_impl.h index ee0979b798..8363d231cf 100644 --- a/usr/src/uts/common/sys/aggr_impl.h +++ b/usr/src/uts/common/sys/aggr_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -75,6 +75,19 @@ typedef struct aggr_pseudo_rx_group_s { uint_t arg_ring_cnt; } aggr_pseudo_rx_group_t; +typedef struct aggr_pseudo_tx_ring_s { + mac_ring_handle_t atr_rh; /* filled in by aggr_fill_ring() */ + struct aggr_port_s *atr_port; + mac_ring_handle_t atr_hw_rh; + uint_t atr_flags; +} aggr_pseudo_tx_ring_t; + +typedef struct aggr_pseudo_tx_group_s { + mac_group_handle_t atg_gh; /* filled in by aggr_fill_group() */ + uint_t atg_ring_cnt; + aggr_pseudo_tx_ring_t atg_rings[MAX_RINGS_PER_GROUP]; +} aggr_pseudo_tx_group_t; + /* * A link aggregation MAC port. * Note that lp_next is protected by the lg_lock of the group the @@ -93,9 +106,10 @@ typedef struct aggr_port_s { lp_collector_enabled : 1, lp_promisc_on : 1, lp_no_link_update : 1, - lp_grp_added : 1, + lp_rx_grp_added : 1, + lp_tx_grp_added : 1, lp_closing : 1, - lp_pad_bits : 25; + lp_pad_bits : 24; mac_handle_t lp_mh; mac_client_handle_t lp_mch; const mac_info_t *lp_mip; @@ -116,6 +130,17 @@ typedef struct aggr_port_s { aggr_unicst_addr_t *lp_prom_addr; /* handle of the underlying HW RX group */ mac_group_handle_t lp_hwgh; + int lp_tx_ring_cnt; + /* handles of the underlying HW TX rings */ + mac_ring_handle_t *lp_tx_rings; + /* + * Handles of the pseudo TX rings. Each of them maps to + * corresponding hardware TX ring in lp_tx_rings[]. A + * pseudo TX ring is presented to aggr primary mac + * client even when underlying NIC has no TX ring. + */ + mac_ring_handle_t *lp_pseudo_tx_rings; + void *lp_tx_notify_mh; } aggr_port_t; /* @@ -187,7 +212,16 @@ typedef struct aggr_grp_s { mblk_t *lg_lacp_tail; kthread_t *lg_lacp_rx_thread; boolean_t lg_lacp_done; + aggr_pseudo_rx_group_t lg_rx_group; + aggr_pseudo_tx_group_t lg_tx_group; + + kmutex_t lg_tx_flowctl_lock; + kcondvar_t lg_tx_flowctl_cv; + uint_t lg_tx_blocked_cnt; + mac_ring_handle_t *lg_tx_blocked_rings; + kthread_t *lg_tx_notify_thread; + boolean_t lg_tx_notify_done; /* * The following fields are used by aggr to wait for all the @@ -274,7 +308,8 @@ extern void aggr_port_init_callbacks(aggr_port_t *); extern void aggr_recv_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t); -extern mblk_t *aggr_m_tx(void *, mblk_t *); +extern void aggr_tx_ring_update(void *, uintptr_t); +extern void aggr_tx_notify_thread(void *); extern void aggr_send_port_enable(aggr_port_t *); extern void aggr_send_port_disable(aggr_port_t *); extern void aggr_send_update_policy(aggr_grp_t *, uint32_t); @@ -302,6 +337,10 @@ extern void aggr_grp_port_wait(aggr_grp_t *); extern int aggr_port_addmac(aggr_port_t *, const uint8_t *); extern void aggr_port_remmac(aggr_port_t *, const uint8_t *); +extern mblk_t *aggr_ring_tx(void *, mblk_t *); +extern mblk_t *aggr_find_tx_ring(void *, mblk_t *, + uintptr_t, mac_ring_handle_t *); + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h index ed80269fbc..fb2a0749d3 100644 --- a/usr/src/uts/common/sys/dld.h +++ b/usr/src/uts/common/sys/dld.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -27,7 +27,7 @@ #define _SYS_DLD_H /* - * Data-Link Driver (public header). + * Data-Link Driver ioctl interfaces. * * Note that the datastructures defined here define an ioctl interface * that is shared betwen user and kernel space. The dld driver thus @@ -280,10 +280,12 @@ typedef struct dld_ioc_usagelog { #define DLDIOC_SETMACPROP DLDIOC(0x1b) #define DLDIOC_GETMACPROP DLDIOC(0x1c) -#define MAC_PROP_VERSION 1 + +/* pr_flags can be set to a combination of the following flags */ +#define DLD_PROP_DEFAULT 0x0001 +#define DLD_PROP_POSSIBLE 0x0002 typedef struct dld_ioc_macprop_s { - int pr_version; uint_t pr_flags; datalink_id_t pr_linkid; mac_prop_id_t pr_num; @@ -308,7 +310,7 @@ typedef struct dld_hwgrpinfo { uint_t dhi_grp_type; uint_t dhi_n_rings; uint_t dhi_n_clnts; - /* XXXX later we should use dhi_n_clnts * MAXNAMELEN for dhi_clnts */ + uint_t dhi_rings[MAX_RINGS_PER_GROUP]; char dhi_clnts[MAXCLIENTNAMELEN]; } dld_hwgrpinfo_t; diff --git a/usr/src/uts/common/sys/fibre-channel/fca/oce/oce_impl.h b/usr/src/uts/common/sys/fibre-channel/fca/oce/oce_impl.h index ca255b1643..61e0fb1a44 100644 --- a/usr/src/uts/common/sys/fibre-channel/fca/oce/oce_impl.h +++ b/usr/src/uts/common/sys/fibre-channel/fca/oce/oce_impl.h @@ -286,7 +286,10 @@ void oce_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); int oce_m_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size, const void *val); int oce_m_getprop(void *arg, const char *name, mac_prop_id_t id, - uint_t flags, uint_t size, void *val, uint_t *perm); + uint_t size, void *val); +void oce_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh); + int oce_m_stat(void *arg, uint_t stat, uint64_t *val); /* Hardware start/stop functions */ diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h index 7a8fc3293d..ab04126708 100644 --- a/usr/src/uts/common/sys/mac.h +++ b/usr/src/uts/common/sys/mac.h @@ -92,7 +92,7 @@ typedef enum { } link_tagmode_t; /* - * Defines range of uint32 values + * Defines range of uint32_t values */ typedef struct mac_propval_uint32_range_s { uint32_t mpur_min; @@ -100,10 +100,12 @@ typedef struct mac_propval_uint32_range_s { } mac_propval_uint32_range_t; /* - * Data type of the value + * Data type of property values. */ typedef enum { - MAC_PROPVAL_UINT32 = 0x1 + MAC_PROPVAL_UINT8, + MAC_PROPVAL_UINT32, + MAC_PROPVAL_STR } mac_propval_type_t; /* @@ -111,8 +113,6 @@ typedef enum { * range of values (int32, int64, uint32, uint64, et al) or collection/ * enumeration of values (strings). * Can be used as a value-result parameter. - * - * See PSARC 2009/235 for more information. */ typedef struct mac_propval_range_s { uint_t mpr_count; /* count of ranges */ @@ -122,7 +122,7 @@ typedef struct mac_propval_range_s { } u; } mac_propval_range_t; -#define range_uint32 u.mpr_uint32 +#define mpr_range_uint32 u.mpr_uint32 /* * Maximum MAC address length @@ -134,26 +134,15 @@ typedef enum { MAC_LOGTYPE_FLOW } mac_logtype_t; -/* - * Encodings for public properties. - * A most significant bit value of 1 indicates private property, intended - * to allow private property implementations to use internal encodings - * if desired. - * - * Note that there are 2 sets of parameters: the *_EN_* - * values are those that the Administrator configures for autonegotiation. - * The _ADV_* values are those that are currently exposed over the wire. - */ -#define MAXLINKPROPNAME 256 -#define MAC_PROP_DEFAULT 0x0001 /* default property value */ +#define MAXLINKPROPNAME 256 /* max property name len */ /* - * Indicates the linkprop framework is interested in knowing the list of - * possible property values. When used to obtain possible values for a - * property, one may have to change all the drivers. See PSARC 2009/235. + * Public properties. + * + * Note that there are 2 sets of parameters: the *_EN_* values are + * those that the Administrator configures for autonegotiation. The + * _ADV_* values are those that are currently exposed over the wire. */ -#define MAC_PROP_POSSIBLE 0x0002 /* possible property values */ - typedef enum { MAC_PROP_DUPLEX = 0x00000001, MAC_PROP_SPEED, @@ -202,16 +191,20 @@ typedef enum { MAC_PROP_WL_DELKEY, MAC_PROP_WL_KEY, MAC_PROP_WL_MLME, - MAC_PROP_MAXBW, - MAC_PROP_PRIO, - MAC_PROP_BIND_CPU, MAC_PROP_TAGMODE, MAC_PROP_ADV_10GFDX_CAP, MAC_PROP_EN_10GFDX_CAP, MAC_PROP_PVID, MAC_PROP_LLIMIT, MAC_PROP_LDECAY, - MAC_PROP_PROTECT, + MAC_PROP_RESOURCE, + MAC_PROP_RESOURCE_EFF, + MAC_PROP_RXRINGSRANGE, + MAC_PROP_TXRINGSRANGE, + MAC_PROP_MAX_TX_RINGS_AVAIL, + MAC_PROP_MAX_RX_RINGS_AVAIL, + MAC_PROP_MAX_RXHWCLNT_AVAIL, + MAC_PROP_MAX_TXHWCLNT_AVAIL, MAC_PROP_PRIVATE = -1 } mac_prop_id_t; @@ -248,7 +241,8 @@ enum mac_mod_stat { MAC_STAT_LINK_STATE, MAC_STAT_LINK_UP, MAC_STAT_PROMISC, - MAC_STAT_LOWLINK_STATE + MAC_STAT_LOWLINK_STATE, + MAC_STAT_HDROPS }; /* @@ -328,9 +322,13 @@ typedef struct mac_capab_vnic_s { } mac_capab_vnic_t; typedef void (*mac_rename_fn_t)(const char *, void *); +typedef mblk_t *(*mac_tx_ring_fn_t)(void *, mblk_t *, uintptr_t, + mac_ring_handle_t *); typedef struct mac_capab_aggr_s { mac_rename_fn_t mca_rename_fn; int (*mca_unicst)(void *, const uint8_t *); + mac_tx_ring_fn_t mca_find_tx_ring_fn; + void *mca_arg; } mac_capab_aggr_t; /* Bridge transmit and receive function signatures */ @@ -373,6 +371,8 @@ typedef struct mac_intr_s { mac_intr_handle_t mi_handle; mac_intr_enable_t mi_enable; mac_intr_disable_t mi_disable; + ddi_intr_handle_t mi_ddi_handle; + boolean_t mi_ddi_shared; } mac_intr_t; typedef struct mac_rx_fifo_s { @@ -571,12 +571,6 @@ typedef struct mactype_register_s { size_t mtr_mappingcount; } mactype_register_t; -typedef struct mac_prop_s { - mac_prop_id_t mp_id; - char *mp_name; - uint_t mp_flags; -} mac_prop_t; - /* * Driver interface functions. */ @@ -617,6 +611,7 @@ extern int mac_start_logusage(mac_logtype_t, uint_t); extern void mac_stop_logusage(mac_logtype_t); extern mac_handle_t mac_get_lower_mac_handle(mac_handle_t); +extern boolean_t mac_is_vnic_primary(mac_handle_t); /* * Packet hashing for distribution to multiple ports and rings. diff --git a/usr/src/uts/common/sys/mac_client.h b/usr/src/uts/common/sys/mac_client.h index ad3f30aa63..40cd15a1b8 100644 --- a/usr/src/uts/common/sys/mac_client.h +++ b/usr/src/uts/common/sys/mac_client.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -68,6 +68,18 @@ typedef enum { MAC_DIAG_MACNO_HWRINGS } mac_diag_t; +/* + * These are used when MAC clients what to specify tx and rx rings + * properties. MAC_RXRINGS_NONE/MAC_TXRINGS_NONE mean that we should + * not reserve any rings while MAC_RXRINGS_DONTCARE/MAC_TXRINGS_DONTCARE + * mean that the system can decide if it wants to reserve rings or + * not. + */ +#define MAC_RXRINGS_NONE 0 +#define MAC_TXRINGS_NONE MAC_RXRINGS_NONE +#define MAC_RXRINGS_DONTCARE -1 +#define MAC_TXRINGS_DONTCARE MAC_RXRINGS_DONTCARE + typedef enum { MAC_CLIENT_PROMISC_ALL, MAC_CLIENT_PROMISC_FILTERED, @@ -87,11 +99,10 @@ typedef enum { #define MAC_OPEN_FLAGS_IS_VNIC 0x0001 #define MAC_OPEN_FLAGS_EXCLUSIVE 0x0002 #define MAC_OPEN_FLAGS_IS_AGGR_PORT 0x0004 -#define MAC_OPEN_FLAGS_NO_HWRINGS 0x0008 -#define MAC_OPEN_FLAGS_SHARES_DESIRED 0x0010 -#define MAC_OPEN_FLAGS_USE_DATALINK_NAME 0x0020 -#define MAC_OPEN_FLAGS_REQ_HWRINGS 0x0040 -#define MAC_OPEN_FLAGS_MULTI_PRIMARY 0x0080 +#define MAC_OPEN_FLAGS_SHARES_DESIRED 0x0008 +#define MAC_OPEN_FLAGS_USE_DATALINK_NAME 0x0010 +#define MAC_OPEN_FLAGS_MULTI_PRIMARY 0x0020 +#define MAC_OPEN_FLAGS_NO_UNICAST_ADDR 0x0040 /* flags passed to mac_client_close */ #define MAC_CLOSE_FLAGS_IS_VNIC 0x0001 @@ -161,12 +172,12 @@ extern uint_t mac_addr_factory_num(mac_handle_t); extern mac_tx_notify_handle_t mac_client_tx_notify(mac_client_handle_t, mac_tx_notify_t, void *); -extern int mac_set_resources(mac_handle_t, mac_resource_props_t *); -extern void mac_get_resources(mac_handle_t, mac_resource_props_t *); extern int mac_client_set_resources(mac_client_handle_t, mac_resource_props_t *); extern void mac_client_get_resources(mac_client_handle_t, mac_resource_props_t *); +extern void mac_client_get_eff_resources(mac_client_handle_t, + mac_resource_props_t *); /* bridging-related interfaces */ extern int mac_set_pvid(mac_handle_t, uint16_t); @@ -180,15 +191,7 @@ extern void mac_share_unbind(mac_client_handle_t); extern int mac_set_mtu(mac_handle_t, uint_t, uint_t *); -extern uint_t mac_hwgrp_num(mac_handle_t); -extern void mac_get_hwgrp_info(mac_handle_t, int, uint_t *, uint_t *, - uint_t *, uint_t *, char *); - -extern uint32_t mac_no_notification(mac_handle_t); -extern int mac_set_prop(mac_handle_t, mac_prop_t *, void *, uint_t); -extern int mac_get_prop(mac_handle_t, mac_prop_t *, void *, uint_t, uint_t *); - -extern boolean_t mac_is_vnic(mac_handle_t); +extern void mac_client_set_rings(mac_client_handle_t, int, int); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h index bcdeb1da46..ae25df6a0d 100644 --- a/usr/src/uts/common/sys/mac_client_impl.h +++ b/usr/src/uts/common/sys/mac_client_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,6 +31,7 @@ #include <sys/mac_provider.h> #include <sys/mac.h> #include <sys/mac_impl.h> +#include <sys/mac_stat.h> #include <net/if.h> #include <sys/mac_flow_impl.h> @@ -153,16 +154,7 @@ struct mac_client_impl_s { /* Protected by */ uintptr_t mci_tx_notify_id; /* per MAC client stats */ /* None */ - uint64_t mci_stat_multircv; - uint64_t mci_stat_brdcstrcv; - uint64_t mci_stat_multixmt; - uint64_t mci_stat_brdcstxmt; - uint64_t mci_stat_obytes; - uint64_t mci_stat_opackets; - uint64_t mci_stat_oerrors; - uint64_t mci_stat_ibytes; - uint64_t mci_stat_ipackets; - uint64_t mci_stat_ierrors; + mac_misc_stats_t mci_misc_stat; flow_tab_t *mci_subflow_tab; /* Rx quiescence */ @@ -182,6 +174,20 @@ struct mac_client_impl_s { /* Protected by */ struct mac_mcast_addrs_s *mci_mcast_addrs; /* mi_rw_lock */ /* + * Mac protection related fields + */ + kmutex_t mci_protect_lock; + uint32_t mci_protect_flags; /* SL */ + in6_addr_t mci_v6_local_addr; /* SL */ + avl_tree_t mci_v4_pending_txn; /* mci_protect_lock */ + avl_tree_t mci_v4_completed_txn; /* mci_protect_lock */ + avl_tree_t mci_v4_dyn_ip; /* mci_protect_lock */ + avl_tree_t mci_v6_pending_txn; /* mci_protect_lock */ + avl_tree_t mci_v6_cid; /* mci_protect_lock */ + avl_tree_t mci_v6_dyn_ip; /* mci_protect_lock */ + timeout_id_t mci_txn_cleanup_tid; /* mci_protect_lock */ + + /* * Protected by mci_tx_pcpu[0].pcpu_tx_lock */ uint_t mci_tx_flag; @@ -287,12 +293,15 @@ extern int mac_tx_percpu_cnt; #define MCIS_CLIENT_POLL_CAPABLE 0x0020 #define MCIS_DESC_LOGGED 0x0040 #define MCIS_SHARE_BOUND 0x0080 -#define MCIS_NO_HWRINGS 0x0100 -#define MCIS_DISABLE_TX_VID_CHECK 0x0200 -#define MCIS_USE_DATALINK_NAME 0x0400 -#define MCIS_UNICAST_HW 0x0800 -#define MCIS_REQ_HWRINGS 0x1000 -#define MCIS_RX_BYPASS_DISABLE 0x2000 +#define MCIS_DISABLE_TX_VID_CHECK 0x0100 +#define MCIS_USE_DATALINK_NAME 0x0200 +#define MCIS_UNICAST_HW 0x0400 +#define MCIS_IS_AGGR 0x0800 +#define MCIS_RX_BYPASS_DISABLE 0x1000 +#define MCIS_NO_UNICAST_ADDR 0x2000 + +/* Mac protection flags */ +#define MPT_FLAG_V6_LOCAL_ADDR_SET 0x0001 /* in mac_client.c */ extern void mac_promisc_client_dispatch(mac_client_impl_t *, mblk_t *); @@ -301,7 +310,7 @@ extern void mac_client_fini(void); extern void mac_promisc_dispatch(mac_impl_t *, mblk_t *, mac_client_impl_t *); -extern int mac_validate_props(mac_resource_props_t *); +extern int mac_validate_props(mac_impl_t *, mac_resource_props_t *); extern mac_client_impl_t *mac_vnic_lower(mac_impl_t *); extern mac_client_impl_t *mac_primary_client_handle(mac_impl_t *); @@ -316,6 +325,10 @@ boolean_t mac_client_check_flow_vid(mac_client_impl_t *, uint16_t); extern boolean_t mac_is_primary_client(mac_client_impl_t *); +extern int mac_client_set_rings_prop(mac_client_impl_t *, + mac_resource_props_t *, mac_resource_props_t *); +extern void mac_set_prim_vlan_rings(mac_impl_t *, mac_resource_props_t *); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/mac_client_priv.h b/usr/src/uts/common/sys/mac_client_priv.h index 78421a3b80..0ddc1f074d 100644 --- a/usr/src/uts/common/sys/mac_client_priv.h +++ b/usr/src/uts/common/sys/mac_client_priv.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -117,35 +117,71 @@ extern int mac_link_flow_walk(datalink_id_t, int (*)(mac_flowinfo_t *, void *), void *); extern int mac_link_flow_info(char *, mac_flowinfo_t *); -extern void *mac_tx_hold(mac_client_handle_t); -extern void mac_tx_rele(mac_client_handle_t, void *); -extern void mac_rx_client_quiesce(mac_client_handle_t); -extern void mac_rx_client_restart(mac_client_handle_t); -extern void mac_srs_perm_quiesce(mac_client_handle_t, boolean_t); -extern int mac_hwrings_get(mac_client_handle_t, mac_group_handle_t *, - mac_ring_handle_t *, mac_ring_type_t); -extern void mac_hwring_setup(mac_ring_handle_t, mac_resource_handle_t); -extern void mac_hwring_teardown(mac_ring_handle_t); -extern int mac_hwring_disable_intr(mac_ring_handle_t); -extern int mac_hwring_enable_intr(mac_ring_handle_t); -extern int mac_hwring_start(mac_ring_handle_t); -extern void mac_hwring_stop(mac_ring_handle_t); -extern mblk_t *mac_hwring_poll(mac_ring_handle_t, int); -#define MAC_HWRING_POLL(ring, bytes) \ - (((ring)->mr_info.mri_poll) \ - ((ring)->mr_info.mri_driver, (bytes))) - -extern int mac_hwgroup_addmac(mac_group_handle_t, const uint8_t *); -extern int mac_hwgroup_remmac(mac_group_handle_t, const uint8_t *); - -extern void mac_set_upper_mac(mac_client_handle_t, mac_handle_t); +extern void mac_rx_client_quiesce(mac_client_handle_t); +extern void mac_rx_client_restart(mac_client_handle_t); +extern void mac_tx_client_quiesce(mac_client_handle_t); +extern void mac_tx_client_condemn(mac_client_handle_t); +extern void mac_tx_client_restart(mac_client_handle_t); +extern void mac_srs_perm_quiesce(mac_client_handle_t, boolean_t); +extern int mac_hwrings_get(mac_client_handle_t, mac_group_handle_t *, + mac_ring_handle_t *, mac_ring_type_t); +extern uint_t mac_hwring_getinfo(mac_ring_handle_t); +extern void mac_hwring_setup(mac_ring_handle_t, mac_resource_handle_t, + mac_ring_handle_t); +extern void mac_hwring_teardown(mac_ring_handle_t); +extern int mac_hwring_disable_intr(mac_ring_handle_t); +extern int mac_hwring_enable_intr(mac_ring_handle_t); +extern int mac_hwring_start(mac_ring_handle_t); +extern void mac_hwring_stop(mac_ring_handle_t); +extern mblk_t *mac_hwring_poll(mac_ring_handle_t, int); +extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); +extern int mac_hwring_getstat(mac_ring_handle_t, uint_t, uint64_t *); +extern mblk_t *mac_hwring_send_priv(mac_client_handle_t, + mac_ring_handle_t, mblk_t *); + +#define MAC_HWRING_POLL(ring, bytes) \ + (((ring)->mr_info.mri_poll) \ + ((ring)->mr_info.mri_driver, (bytes))) + +extern int mac_hwgroup_addmac(mac_group_handle_t, const uint8_t *); +extern int mac_hwgroup_remmac(mac_group_handle_t, const uint8_t *); + +extern void mac_set_upper_mac(mac_client_handle_t, mac_handle_t, + mac_resource_props_t *); extern int mac_mark_exclusive(mac_handle_t); extern void mac_unmark_exclusive(mac_handle_t); -extern int32_t mac_client_intr_cpu(mac_client_handle_t); -extern void mac_client_set_intr_cpu(void *, mac_client_handle_t, int32_t); -extern void *mac_get_devinfo(mac_handle_t); +extern uint_t mac_hwgrp_num(mac_handle_t, int); +extern void mac_get_hwrxgrp_info(mac_handle_t, int, uint_t *, uint_t *, + uint_t *, uint_t *, uint_t *, char *); +extern void mac_get_hwtxgrp_info(mac_handle_t, int, uint_t *, uint_t *, + uint_t *, uint_t *, uint_t *, char *); + +extern uint_t mac_txavail_get(mac_handle_t); +extern uint_t mac_rxavail_get(mac_handle_t); +extern uint_t mac_txrsvd_get(mac_handle_t); +extern uint_t mac_rxrsvd_get(mac_handle_t); +extern uint_t mac_rxhwlnksavail_get(mac_handle_t); +extern uint_t mac_rxhwlnksrsvd_get(mac_handle_t); +extern uint_t mac_txhwlnksavail_get(mac_handle_t); +extern uint_t mac_txhwlnksrsvd_get(mac_handle_t); + +extern int32_t mac_client_intr_cpu(mac_client_handle_t); +extern void mac_client_set_intr_cpu(void *, mac_client_handle_t, int32_t); +extern void *mac_get_devinfo(mac_handle_t); + +extern boolean_t mac_is_vnic(mac_handle_t); +extern uint32_t mac_no_notification(mac_handle_t); + +extern int mac_set_prop(mac_handle_t, mac_prop_id_t, char *, void *, uint_t); +extern int mac_get_prop(mac_handle_t, mac_prop_id_t, char *, void *, uint_t); +extern int mac_prop_info(mac_handle_t, mac_prop_id_t, char *, void *, + uint_t, mac_propval_range_t *, uint_t *); +extern boolean_t mac_prop_check_size(mac_prop_id_t, uint_t, boolean_t); + +extern uint64_t mac_pseudo_rx_ring_stat_get(mac_ring_handle_t, uint_t); +extern uint64_t mac_pseudo_tx_ring_stat_get(mac_ring_handle_t, uint_t); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/mac_flow.h b/usr/src/uts/common/sys/mac_flow.h index 08c7a211a3..9f9902fc29 100644 --- a/usr/src/uts/common/sys/mac_flow.h +++ b/usr/src/uts/common/sys/mac_flow.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,6 +39,8 @@ extern "C" { #include <netinet/in.h> /* for IPPROTO_* constants */ #include <sys/ethernet.h> +#define MAX_RINGS_PER_GROUP 128 + /* * MAXFLOWNAMELEN defines the longest possible permitted flow name, * including the terminating NUL. @@ -93,29 +95,45 @@ typedef struct flow_desc_s { /* * In MCM_CPUS mode, cpu bindings is user specified. In MCM_FANOUT mode, * user only specifies a fanout count. - * mc_fanout_cnt gives the number of CPUs used for fanout soft rings. - * mc_fanout_cpus[] array stores the CPUs used for fanout soft rings. + * mc_rx_fanout_cnt gives the number of CPUs used for fanout soft rings. + * mc_rx_fanout_cpus[] array stores the CPUs used for fanout soft rings. */ typedef enum { MCM_FANOUT = 1, MCM_CPUS } mac_cpu_mode_t; +/* + * Structure to store the value of the CPUs to be used to re-target + * Tx interrupt. + */ +typedef struct mac_tx_intr_cpus_s { + /* cpu value to re-target intr to */ + int32_t mtc_intr_cpu[MRP_NCPUS]; + /* re-targeted CPU or -1 if failed */ + int32_t mtc_retargeted_cpu[MRP_NCPUS]; +} mac_tx_intr_cpu_t; + typedef struct mac_cpus_props_s { uint32_t mc_ncpus; /* num of cpus */ uint32_t mc_cpus[MRP_NCPUS]; /* cpu list */ - uint32_t mc_fanout_cnt; /* soft ring cpu cnt */ - uint32_t mc_fanout_cpus[MRP_NCPUS]; /* SR cpu list */ - uint32_t mc_pollid; /* poll thr binding */ - uint32_t mc_workerid; /* worker thr binding */ + uint32_t mc_rx_fanout_cnt; /* soft ring cpu cnt */ + uint32_t mc_rx_fanout_cpus[MRP_NCPUS]; /* SR cpu list */ + uint32_t mc_rx_pollid; /* poll thr binding */ + uint32_t mc_rx_workerid; /* worker thr binding */ /* * interrupt cpu: mrp_intr_cpu less than 0 implies platform limitation * in retargetting the interrupt assignment. */ - int32_t mc_intr_cpu; + int32_t mc_rx_intr_cpu; + int32_t mc_tx_fanout_cpus[MRP_NCPUS]; + mac_tx_intr_cpu_t mc_tx_intr_cpus; mac_cpu_mode_t mc_fanout_mode; /* fanout mode */ } mac_cpus_t; +#define mc_tx_intr_cpu mc_tx_intr_cpus.mtc_intr_cpu +#define mc_tx_retargeted_cpu mc_tx_intr_cpus.mtc_retargeted_cpu + /* Priority values */ typedef enum { MPL_LOW, @@ -126,19 +144,41 @@ typedef enum { /* Protection types */ #define MPT_MACNOSPOOF 0x00000001 -#define MPT_IPNOSPOOF 0x00000002 -#define MPT_RESTRICTED 0x00000004 -#define MPT_ALL (MPT_MACNOSPOOF|MPT_IPNOSPOOF|MPT_RESTRICTED) +#define MPT_RESTRICTED 0x00000002 +#define MPT_IPNOSPOOF 0x00000004 +#define MPT_DHCPNOSPOOF 0x00000008 +#define MPT_ALL 0x0000000f #define MPT_RESET 0xffffffff -#define MPT_MAXIPADDR 32 +#define MPT_MAXCNT 32 +#define MPT_MAXIPADDR MPT_MAXCNT +#define MPT_MAXCID MPT_MAXCNT +#define MPT_MAXCIDLEN 256 + +typedef struct mac_ipaddr_s { + uint32_t ip_version; + in6_addr_t ip_addr; +} mac_ipaddr_t; + +typedef enum { + CIDFORM_TYPED = 1, + CIDFORM_HEX, + CIDFORM_STR +} mac_dhcpcid_form_t; + +typedef struct mac_dhcpcid_s { + uchar_t dc_id[MPT_MAXCIDLEN]; + uint32_t dc_len; + mac_dhcpcid_form_t dc_form; +} mac_dhcpcid_t; typedef struct mac_protect_s { uint32_t mp_types; uint32_t mp_ipaddrcnt; - ipaddr_t mp_ipaddrs[MPT_MAXIPADDR]; + mac_ipaddr_t mp_ipaddrs[MPT_MAXIPADDR]; + uint32_t mp_cidcnt; + mac_dhcpcid_t mp_cids[MPT_MAXCID]; } mac_protect_t; - /* The default priority for links */ #define MPL_LINK_DEFAULT MPL_HIGH @@ -150,6 +190,12 @@ typedef struct mac_protect_s { #define MRP_CPUS_USERSPEC 0x00000004 /* CPU/fanout from user */ #define MRP_PRIORITY 0x00000008 /* Priority set */ #define MRP_PROTECT 0x00000010 /* Protection set */ +#define MRP_RX_RINGS 0x00000020 /* Rx rings */ +#define MRP_TX_RINGS 0x00000040 /* Tx rings */ +#define MRP_RXRINGS_UNSPEC 0x00000080 /* unspecified rings */ +#define MRP_TXRINGS_UNSPEC 0x00000100 /* unspecified rings */ +#define MRP_RINGS_RESET 0x00000200 /* resetting rings */ +#define MRP_POOL 0x00000400 /* CPU pool */ #define MRP_THROTTLE MRP_MAXBW @@ -174,21 +220,24 @@ typedef struct mac_resource_props_s { mac_priority_level_t mrp_priority; /* relative flow priority */ mac_cpus_t mrp_cpus; mac_protect_t mrp_protect; + uint32_t mrp_nrxrings; + uint32_t mrp_ntxrings; + char mrp_pool[MAXPATHLEN]; /* CPU pool */ } mac_resource_props_t; -#define mrp_ncpus mrp_cpus.mc_ncpus -#define mrp_cpu mrp_cpus.mc_cpus -#define mrp_fanout_cnt mrp_cpus.mc_fanout_cnt -#define mrp_fanout_cpu mrp_cpus.mc_fanout_cpus -#define mrp_pollid mrp_cpus.mc_pollid -#define mrp_workerid mrp_cpus.mc_workerid -#define mrp_intr_cpu mrp_cpus.mc_intr_cpu -#define mrp_fanout_mode mrp_cpus.mc_fanout_mode +#define mrp_ncpus mrp_cpus.mc_ncpus +#define mrp_cpu mrp_cpus.mc_cpus +#define mrp_rx_fanout_cnt mrp_cpus.mc_rx_fanout_cnt +#define mrp_rx_pollid mrp_cpus.mc_rx_pollid +#define mrp_rx_workerid mrp_cpus.mc_rx_workerid +#define mrp_rx_intr_cpu mrp_cpus.mc_rx_intr_cpu +#define mrp_fanout_mode mrp_cpus.mc_fanout_mode #define MAC_COPY_CPUS(mrp, fmrp) { \ int ncpus; \ (fmrp)->mrp_ncpus = (mrp)->mrp_ncpus; \ - (fmrp)->mrp_intr_cpu = (mrp)->mrp_intr_cpu; \ + (fmrp)->mrp_rx_fanout_cnt = (mrp)->mrp_rx_fanout_cnt; \ + (fmrp)->mrp_rx_intr_cpu = (mrp)->mrp_rx_intr_cpu; \ (fmrp)->mrp_fanout_mode = (mrp)->mrp_fanout_mode; \ if ((mrp)->mrp_ncpus == 0) { \ (fmrp)->mrp_mask &= ~MRP_CPUS; \ @@ -202,24 +251,6 @@ typedef struct mac_resource_props_s { } \ } -typedef struct flow_stats_s { - uint64_t fs_rbytes; - uint64_t fs_ipackets; - uint64_t fs_ierrors; - uint64_t fs_obytes; - uint64_t fs_opackets; - uint64_t fs_oerrors; -} flow_stats_t; - -typedef enum { - FLOW_STAT_RBYTES, - FLOW_STAT_IPACKETS, - FLOW_STAT_IERRORS, - FLOW_STAT_OBYTES, - FLOW_STAT_OPACKETS, - FLOW_STAT_OERRORS -} flow_stat_t; - #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 #pragma pack() #endif diff --git a/usr/src/uts/common/sys/mac_flow_impl.h b/usr/src/uts/common/sys/mac_flow_impl.h index f01d9d486c..307e06c1bf 100644 --- a/usr/src/uts/common/sys/mac_flow_impl.h +++ b/usr/src/uts/common/sys/mac_flow_impl.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -280,7 +280,9 @@ struct flow_entry_s { /* Protected by */ void *fe_rx_ring_group; /* SL */ void *fe_rx_srs[MAX_RINGS_PER_GROUP]; /* fe_lock */ int fe_rx_srs_cnt; /* fe_lock */ + void *fe_tx_ring_group; void *fe_tx_srs; /* WO */ + int fe_tx_ring_cnt; /* * This is a unicast flow, and is a mac_client_impl_t @@ -317,7 +319,8 @@ struct flow_entry_s { /* Protected by */ flow_tab_t *fe_flow_tab; kstat_t *fe_ksp; - flow_stats_t fe_flowstats; + kstat_t *fe_misc_stat_ksp; + boolean_t fe_desc_logged; uint64_t fe_nic_speed; }; @@ -465,23 +468,36 @@ typedef struct flow_tab_info_s { #define FLOW_TAB_EMPTY(ft) ((ft) == NULL || (ft)->ft_flow_count == 0) -/* - * This is used by mac_tx_send. - */ -typedef struct mac_tx_stats_s { - uint_t ts_opackets; - uint_t ts_obytes; - uint_t ts_oerrors; -} mac_tx_stats_t; - -#define FLOW_STAT_UPDATE(f, s, c) { \ - ((flow_entry_t *)(f))->fe_flowstats.fs_##s += ((uint64_t)(c)); \ + +#define MCIP_STAT_UPDATE(m, s, c) { \ + ((mac_client_impl_t *)(m))->mci_misc_stat.mms_##s \ + += ((uint64_t)(c)); \ +} + +#define SRS_RX_STAT_UPDATE(m, s, c) { \ + ((mac_soft_ring_set_t *)(m))->srs_rx.sr_stat.mrs_##s \ + += ((uint64_t)(c)); \ +} + +#define SRS_TX_STAT_UPDATE(m, s, c) { \ + ((mac_soft_ring_set_t *)(m))->srs_tx.st_stat.mts_##s \ + += ((uint64_t)(c)); \ +} + +#define SRS_TX_STATS_UPDATE(m, s) { \ + SRS_TX_STAT_UPDATE((m), opackets, (s)->mts_opackets); \ + SRS_TX_STAT_UPDATE((m), obytes, (s)->mts_obytes); \ + SRS_TX_STAT_UPDATE((m), oerrors, (s)->mts_oerrors); \ +} + +#define SOFTRING_TX_STAT_UPDATE(m, s, c) { \ + ((mac_soft_ring_t *)(m))->s_st_stat.mts_##s += ((uint64_t)(c)); \ } -#define FLOW_TX_STATS_UPDATE(f, s) { \ - FLOW_STAT_UPDATE((f), opackets, (s)->ts_opackets); \ - FLOW_STAT_UPDATE((f), obytes, (s)->ts_obytes); \ - FLOW_STAT_UPDATE((f), oerrors, (s)->ts_oerrors); \ +#define SOFTRING_TX_STATS_UPDATE(m, s) { \ + SOFTRING_TX_STAT_UPDATE((m), opackets, (s)->mts_opackets); \ + SOFTRING_TX_STAT_UPDATE((m), obytes, (s)->mts_obytes); \ + SOFTRING_TX_STAT_UPDATE((m), oerrors, (s)->mts_oerrors); \ } extern void mac_flow_init(); diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h index 760e2a4a18..ff4eeb1221 100644 --- a/usr/src/uts/common/sys/mac_impl.h +++ b/usr/src/uts/common/sys/mac_impl.h @@ -26,10 +26,12 @@ #ifndef _SYS_MAC_IMPL_H #define _SYS_MAC_IMPL_H +#include <sys/cpupart.h> #include <sys/modhash.h> #include <sys/mac_client.h> #include <sys/mac_provider.h> #include <sys/note.h> +#include <sys/avl.h> #include <net/if.h> #include <sys/mac_flow_impl.h> #include <netinet/ip6.h> @@ -85,6 +87,8 @@ typedef struct mac_chain_s { #define MCB_NOTIFY_CB_T 0x2 #define MCB_TX_NOTIFY_CB_T 0x4 +extern boolean_t mac_tx_serialize; + typedef struct mac_cb_s { struct mac_cb_s *mcb_nextp; /* Linked list of callbacks */ void *mcb_objp; /* Ptr to enclosing object */ @@ -189,6 +193,8 @@ typedef enum { #define MR_CONDEMNED 0x2 #define MR_QUIESCE 0x4 +typedef struct mac_impl_s mac_impl_t; + struct mac_ring_s { int mr_index; /* index in the original list */ mac_ring_type_t mr_type; /* ring type */ @@ -196,11 +202,15 @@ struct mac_ring_s { mac_group_handle_t mr_gh; /* reference to group */ mac_classify_type_t mr_classify_type; /* HW vs SW */ - struct mac_soft_ring_set_s *mr_srs; /* associated SRS */ - uint_t mr_refcnt; /* Ring references */ + struct mac_soft_ring_set_s *mr_srs; /* associated SRS */ + mac_ring_handle_t mr_prh; /* associated pseudo ring hdl */ + uint_t mr_refcnt; /* Ring references */ /* ring generation no. to guard against drivers using stale rings */ uint64_t mr_gen_num; + kstat_t *mr_ksp; /* ring kstats */ + mac_impl_t *mr_mip; /* pointer to primary's mip */ + kmutex_t mr_lock; kcondvar_t mr_cv; /* mr_lock */ mac_ring_state_t mr_state; /* mr_lock */ @@ -211,6 +221,7 @@ struct mac_ring_s { #define mr_driver mr_info.mri_driver #define mr_start mr_info.mri_start #define mr_stop mr_info.mri_stop +#define mr_stat mr_info.mri_stat #define MAC_RING_MARK(mr, flag) \ (mr)->mr_flag |= flag; @@ -245,9 +256,9 @@ typedef struct mac_grp_client { struct mac_client_impl_s *mgc_client; } mac_grp_client_t; -#define MAC_RX_GROUP_NO_CLIENT(g) ((g)->mrg_clients == NULL) +#define MAC_GROUP_NO_CLIENT(g) ((g)->mrg_clients == NULL) -#define MAC_RX_GROUP_ONLY_CLIENT(g) \ +#define MAC_GROUP_ONLY_CLIENT(g) \ ((((g)->mrg_clients != NULL) && \ ((g)->mrg_clients->mgc_next == NULL)) ? \ (g)->mrg_clients->mgc_client : NULL) @@ -267,7 +278,6 @@ struct mac_group_s { mac_grp_client_t *mrg_clients; /* clients list */ - struct mac_client_impl_s *mrg_tx_client; /* TX client pointer */ mac_group_info_t mrg_info; /* driver supplied info */ }; @@ -279,8 +289,6 @@ struct mac_group_s { #define GROUP_INTR_ENABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_enable #define GROUP_INTR_DISABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_disable -#define MAC_DEFAULT_GROUP(mh) (((mac_impl_t *)mh)->mi_rx_groups) - #define MAC_RING_TX(mhp, rh, mp, rest) { \ mac_ring_handle_t mrh = rh; \ mac_impl_t *mimpl = (mac_impl_t *)mhp; \ @@ -304,7 +312,8 @@ struct mac_group_s { * rh nulled out if the bridge chooses to send output on a different * link due to forwarding. */ -#define MAC_TX(mip, rh, mp, share_bound) { \ +#define MAC_TX(mip, rh, mp, src_mcip) { \ + mac_ring_handle_t rhandle = (rh); \ /* \ * If there is a bound Hybrid I/O share, send packets through \ * the default tx ring. (When there's a bound Hybrid I/O share, \ @@ -312,17 +321,19 @@ struct mac_group_s { * and not accessible from here.) \ */ \ _NOTE(CONSTANTCONDITION) \ - if (share_bound) \ - rh = NULL; \ + if ((src_mcip)->mci_state_flags & MCIS_SHARE_BOUND) \ + rhandle = (mip)->mi_default_tx_ring; \ + if (mip->mi_promisc_list != NULL) \ + mac_promisc_dispatch(mip, mp, src_mcip); \ /* \ * Grab the proper transmit pointer and handle. Special \ * optimization: we can test mi_bridge_link itself atomically, \ * and if that indicates no bridge send packets through tx ring.\ */ \ if (mip->mi_bridge_link == NULL) { \ - MAC_RING_TX(mip, rh, mp, mp); \ + MAC_RING_TX(mip, rhandle, mp, mp); \ } else { \ - mp = mac_bridge_tx(mip, rh, mp); \ + mp = mac_bridge_tx(mip, rhandle, mp); \ } \ } @@ -346,8 +357,6 @@ typedef enum { MAC_ADDRESS_TYPE_UNICAST_PROMISC /* promiscuous mode */ } mac_address_type_t; -typedef struct mac_impl_s mac_impl_t; - typedef struct mac_address_s { mac_address_type_t ma_type; /* address type */ int ma_nusers; /* number of users */ @@ -406,7 +415,6 @@ struct mac_impl_s { link_state_t mi_lowlinkstate; /* none */ link_state_t mi_lastlowlinkstate; /* none */ uint_t mi_devpromisc; /* SL */ - kmutex_t mi_lock; uint8_t mi_addr[MAXMACADDRLEN]; /* mi_rw_lock */ uint8_t mi_dstaddr[MAXMACADDRLEN]; /* mi_rw_lock */ boolean_t mi_dstaddr_set; @@ -436,6 +444,11 @@ struct mac_impl_s { mac_group_type_t mi_rx_group_type; /* grouping type */ uint_t mi_rx_group_count; mac_group_t *mi_rx_groups; + mac_group_t *mi_rx_donor_grp; + uint_t mi_rxrings_rsvd; + uint_t mi_rxrings_avail; + uint_t mi_rxhwclnt_avail; + uint_t mi_rxhwclnt_used; mac_capab_rings_t mi_rx_rings_cap; @@ -446,8 +459,11 @@ struct mac_impl_s { uint_t mi_tx_group_count; uint_t mi_tx_group_free; mac_group_t *mi_tx_groups; - mac_capab_rings_t mi_tx_rings_cap; + uint_t mi_txrings_rsvd; + uint_t mi_txrings_avail; + uint_t mi_txhwclnt_avail; + uint_t mi_txhwclnt_used; mac_ring_handle_t mi_default_tx_ring; @@ -516,7 +532,7 @@ struct mac_impl_s { * sorted: the first one has the greatest value. */ mac_margin_req_t *mi_mmrp; - mac_priv_prop_t *mi_priv_prop; + char **mi_priv_prop; uint_t mi_priv_prop_count; /* @@ -541,6 +557,72 @@ struct mac_impl_s { #endif }; +/* + * The default TX group is the last one in the list. + */ +#define MAC_DEFAULT_TX_GROUP(mip) \ + (mip)->mi_tx_groups + (mip)->mi_tx_group_count + +/* + * The default RX group is the first one in the list + */ +#define MAC_DEFAULT_RX_GROUP(mip) (mip)->mi_rx_groups + +/* Reserved RX rings */ +#define MAC_RX_RING_RESERVED(m, cnt) { \ + ASSERT((m)->mi_rxrings_avail >= (cnt)); \ + (m)->mi_rxrings_rsvd += (cnt); \ + (m)->mi_rxrings_avail -= (cnt); \ +} + +/* Released RX rings */ +#define MAC_RX_RING_RELEASED(m, cnt) { \ + ASSERT((m)->mi_rxrings_rsvd >= (cnt)); \ + (m)->mi_rxrings_rsvd -= (cnt); \ + (m)->mi_rxrings_avail += (cnt); \ +} + +/* Reserved a RX group */ +#define MAC_RX_GRP_RESERVED(m) { \ + ASSERT((m)->mi_rxhwclnt_avail > 0); \ + (m)->mi_rxhwclnt_avail--; \ + (m)->mi_rxhwclnt_used++; \ +} + +/* Released a RX group */ +#define MAC_RX_GRP_RELEASED(m) { \ + ASSERT((m)->mi_rxhwclnt_used > 0); \ + (m)->mi_rxhwclnt_avail++; \ + (m)->mi_rxhwclnt_used--; \ +} + +/* Reserved TX rings */ +#define MAC_TX_RING_RESERVED(m, cnt) { \ + ASSERT((m)->mi_txrings_avail >= (cnt)); \ + (m)->mi_txrings_rsvd += (cnt); \ + (m)->mi_txrings_avail -= (cnt); \ +} +/* Released TX rings */ +#define MAC_TX_RING_RELEASED(m, cnt) { \ + ASSERT((m)->mi_txrings_rsvd >= (cnt)); \ + (m)->mi_txrings_rsvd -= (cnt); \ + (m)->mi_txrings_avail += (cnt); \ +} + +/* Reserved a TX group */ +#define MAC_TX_GRP_RESERVED(m) { \ + ASSERT((m)->mi_txhwclnt_avail > 0); \ + (m)->mi_txhwclnt_avail--; \ + (m)->mi_txhwclnt_used++; \ +} + +/* Released a TX group */ +#define MAC_TX_GRP_RELEASED(m) { \ + ASSERT((m)->mi_txhwclnt_used > 0); \ + (m)->mi_txhwclnt_avail++; \ + (m)->mi_txhwclnt_used--; \ +} + /* for mi_state_flags */ #define MIS_DISABLED 0x0001 #define MIS_IS_VNIC 0x0002 @@ -570,12 +652,6 @@ typedef struct mac_notify_task_arg { mac_ring_t *mnt_ring; } mac_notify_task_arg_t; -typedef enum { - MAC_RX_NO_RESERVE, - MAC_RX_RESERVE_DEFAULT, - MAC_RX_RESERVE_NONDEFAULT -} mac_rx_group_reserve_type_t; - /* * XXX All MAC_DBG_PRTs must be replaced with call to dtrace probes. For now * it may be easier to have these printfs for easier debugging @@ -599,18 +675,45 @@ extern int mac_dbg; (need_close) = ((uintptr_t)mph & 0x1); \ } +/* + * Type of property information that can be returned by a driver. + * Valid flags of the pr_flags of the mac_prop_info_t data structure. + */ +#define MAC_PROP_INFO_DEFAULT 0x0001 +#define MAC_PROP_INFO_RANGE 0x0002 +#define MAC_PROP_INFO_PERM 0x0004 + +/* + * Property information. pr_flags is a combination of one of the + * MAC_PROP_INFO_* flags, it is reset by the framework before invoking + * the driver's prefix_propinfo() entry point. + * + * Drivers should use MAC_PROP_INFO_SET_*() macros to provide + * information about a property. + */ +typedef struct mac_prop_info_state_s { + uint8_t pr_flags; + uint8_t pr_perm; + void *pr_default; + size_t pr_default_size; + uint8_t pr_default_status; + mac_propval_range_t *pr_range; +} mac_prop_info_state_t; + +#define MAC_PROTECT_ENABLED(mcip, type) \ + (((mcip)->mci_flent-> \ + fe_resource_props.mrp_mask & MRP_PROTECT) != 0 && \ + ((mcip)->mci_flent-> \ + fe_resource_props.mrp_protect.mp_types & (type)) != 0) + typedef struct mac_client_impl_s mac_client_impl_t; extern void mac_init(void); extern int mac_fini(void); -extern void mac_stat_create(mac_impl_t *); -extern void mac_stat_destroy(mac_impl_t *); -extern uint64_t mac_stat_default(mac_impl_t *, uint_t); extern void mac_ndd_ioctl(mac_impl_t *, queue_t *, mblk_t *); -extern void mac_create_soft_ring_kstats(mac_impl_t *, int32_t); -extern boolean_t mac_ip_hdr_length_v6(mblk_t *, ip6_t *, uint16_t *, - uint8_t *, boolean_t *, uint32_t *); +extern boolean_t mac_ip_hdr_length_v6(ip6_t *, uint8_t *, uint16_t *, + uint8_t *, ip6_frag_t **); extern mblk_t *mac_copymsgchain_cksum(mblk_t *); extern mblk_t *mac_fix_cksum(mblk_t *); @@ -649,10 +752,17 @@ extern int mac_rx_group_add_flow(mac_client_impl_t *, flow_entry_t *, mac_group_t *); extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); extern mblk_t *mac_bridge_tx(mac_impl_t *, mac_ring_handle_t, mblk_t *); +extern mac_group_t *mac_reserve_rx_group(mac_client_impl_t *, uint8_t *, + boolean_t); +extern void mac_release_rx_group(mac_client_impl_t *, mac_group_t *); +extern int mac_rx_switch_group(mac_client_impl_t *, mac_group_t *, + mac_group_t *); extern mac_ring_t *mac_reserve_tx_ring(mac_impl_t *, mac_ring_t *); -extern void mac_release_tx_ring(mac_ring_handle_t); -extern mac_group_t *mac_reserve_tx_group(mac_impl_t *, mac_share_handle_t); -extern void mac_release_tx_group(mac_impl_t *, mac_group_t *); +extern mac_group_t *mac_reserve_tx_group(mac_client_impl_t *, boolean_t); +extern void mac_release_tx_group(mac_client_impl_t *, mac_group_t *); +extern void mac_tx_switch_group(mac_client_impl_t *, mac_group_t *, + mac_group_t *); +extern void mac_rx_switch_grp_to_sw(mac_group_t *); /* * MAC address functions are used internally by MAC layer. @@ -676,7 +786,7 @@ extern void mac_link_flow_clean(mac_client_handle_t, flow_entry_t *); * Fanout update routines called when the link speed of the NIC changes * or when a MAC client's share is unbound. */ -extern void mac_fanout_recompute_client(mac_client_impl_t *); +extern void mac_fanout_recompute_client(mac_client_impl_t *, cpupart_t *); extern void mac_fanout_recompute(mac_impl_t *); /* @@ -687,14 +797,15 @@ extern void mac_fanout_recompute(mac_impl_t *); extern int mac_datapath_setup(mac_client_impl_t *, flow_entry_t *, uint32_t); extern void mac_datapath_teardown(mac_client_impl_t *, flow_entry_t *, uint32_t); -extern void mac_srs_group_setup(mac_client_impl_t *, flow_entry_t *, - mac_group_t *, uint32_t); -extern void mac_srs_group_teardown(mac_client_impl_t *, flow_entry_t *, +extern void mac_rx_srs_group_setup(mac_client_impl_t *, flow_entry_t *, + uint32_t); +extern void mac_tx_srs_group_setup(mac_client_impl_t *, flow_entry_t *, + uint32_t); +extern void mac_rx_srs_group_teardown(flow_entry_t *, boolean_t); +extern void mac_tx_srs_group_teardown(mac_client_impl_t *, flow_entry_t *, uint32_t); extern int mac_rx_classify_flow_quiesce(flow_entry_t *, void *); extern int mac_rx_classify_flow_restart(flow_entry_t *, void *); -extern void mac_tx_client_quiesce(mac_client_impl_t *, uint_t); -extern void mac_tx_client_restart(mac_client_impl_t *); extern void mac_client_quiesce(mac_client_impl_t *); extern void mac_client_restart(mac_client_impl_t *); @@ -725,15 +836,17 @@ extern void mac_rx_group_unmark(mac_group_t *, uint_t); extern void mac_tx_client_flush(mac_client_impl_t *); extern void mac_tx_client_block(mac_client_impl_t *); extern void mac_tx_client_unblock(mac_client_impl_t *); +extern void mac_tx_invoke_callbacks(mac_client_impl_t *, mac_tx_cookie_t); extern int i_mac_promisc_set(mac_impl_t *, boolean_t); extern void i_mac_promisc_walker_cleanup(mac_impl_t *); extern mactype_t *mactype_getplugin(const char *); extern void mac_addr_factory_init(mac_impl_t *); extern void mac_addr_factory_fini(mac_impl_t *); -extern void mac_register_priv_prop(mac_impl_t *, mac_priv_prop_t *, uint_t); +extern void mac_register_priv_prop(mac_impl_t *, char **); extern void mac_unregister_priv_prop(mac_impl_t *); extern int mac_init_rings(mac_impl_t *, mac_ring_type_t); extern void mac_free_rings(mac_impl_t *, mac_ring_type_t); +extern void mac_compare_ddi_handle(mac_group_t *, uint_t, mac_ring_t *); extern int mac_start_group(mac_group_t *); extern void mac_stop_group(mac_group_t *); @@ -742,20 +855,41 @@ extern void mac_stop_ring(mac_ring_t *); extern int mac_add_macaddr(mac_impl_t *, mac_group_t *, uint8_t *, boolean_t); extern int mac_remove_macaddr(mac_address_t *); -extern void mac_set_rx_group_state(mac_group_t *, mac_group_state_t); -extern void mac_rx_group_add_client(mac_group_t *, mac_client_impl_t *); -extern void mac_rx_group_remove_client(mac_group_t *, mac_client_impl_t *) -; +extern void mac_set_group_state(mac_group_t *, mac_group_state_t); +extern void mac_group_add_client(mac_group_t *, mac_client_impl_t *); +extern void mac_group_remove_client(mac_group_t *, mac_client_impl_t *); + extern int i_mac_group_add_ring(mac_group_t *, mac_ring_t *, int); extern void i_mac_group_rem_ring(mac_group_t *, mac_ring_t *, boolean_t); - +extern int mac_group_ring_modify(mac_client_impl_t *, mac_group_t *, + mac_group_t *); extern void mac_poll_state_change(mac_handle_t, boolean_t); +extern mac_group_state_t mac_group_next_state(mac_group_t *, + mac_client_impl_t **, mac_group_t *, boolean_t); + extern mblk_t *mac_protect_check(mac_client_handle_t, mblk_t *); extern int mac_protect_set(mac_client_handle_t, mac_resource_props_t *); extern boolean_t mac_protect_enabled(mac_client_handle_t, uint32_t); extern int mac_protect_validate(mac_resource_props_t *); extern void mac_protect_update(mac_resource_props_t *, mac_resource_props_t *); +extern void mac_protect_update_v6_local_addr(mac_client_impl_t *); +extern void mac_protect_intercept_dhcp(mac_client_impl_t *, mblk_t *); +extern void mac_protect_flush_dhcp(mac_client_impl_t *); +extern void mac_protect_cancel_timer(mac_client_impl_t *); +extern void mac_protect_init(mac_client_impl_t *); +extern void mac_protect_fini(mac_client_impl_t *); + +extern int mac_set_resources(mac_handle_t, mac_resource_props_t *); +extern void mac_get_resources(mac_handle_t, mac_resource_props_t *); +extern void mac_get_effective_resources(mac_handle_t, mac_resource_props_t *); + +extern cpupart_t *mac_pset_find(mac_resource_props_t *, boolean_t *); +extern void mac_set_pool_effective(boolean_t, cpupart_t *, + mac_resource_props_t *, mac_resource_props_t *); +extern void mac_set_rings_effective(mac_client_impl_t *); +extern mac_client_impl_t *mac_check_primary_relocation(mac_client_impl_t *, + boolean_t); /* Global callbacks into the bridging module (when loaded) */ extern mac_bridge_tx_t mac_bridge_tx_cb; @@ -763,6 +897,7 @@ extern mac_bridge_rx_t mac_bridge_rx_cb; extern mac_bridge_ref_t mac_bridge_ref_cb; extern mac_bridge_ls_t mac_bridge_ls_cb; + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h index 988f723f67..c96d07b594 100644 --- a/usr/src/uts/common/sys/mac_provider.h +++ b/usr/src/uts/common/sys/mac_provider.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,7 +32,6 @@ #include <sys/sunddi.h> #include <sys/stream.h> #include <sys/mkdev.h> -#include <sys/mac_flow.h> #include <sys/mac.h> /* @@ -44,18 +43,16 @@ extern "C" { #endif /* - * MAC version identifier. This is used by mac_alloc() mac_register() to + * MAC version identifiers. Drivers compiled against the stable V1 version + * of the API should register with MAC_VERSION_V1. ON drivers should use + * MAC_VERSION. This is used by mac_alloc() mac_register() to * verify that incompatible drivers don't register. */ -#define MAC_VERSION 0x2 +#define MAC_VERSION_V1 0x1 +#define MAC_VERSION MAC_VERSION_V1 /* - * Opaque handle types - */ -typedef struct __mac_rule_handle *mac_rule_handle_t; - -/* - * Statistics + * Possible values for ETHER_STAT_XCVR_INUSE statistic. */ #define XCVR_UNDEFINED 0 @@ -82,28 +79,35 @@ typedef struct __mac_rule_handle *mac_rule_handle_t; */ typedef enum { /* - * Capabilities reserved for internal use only + * Public Capabilities (MAC_VERSION_V1) */ - MAC_CAPAB_VNIC = 0x0001, /* data is mac_capab_vnic_t */ - MAC_CAPAB_ANCHOR_VNIC = 0x0002, /* boolean only, no data */ - MAC_CAPAB_AGGR = 0x0004, /* data is mac_capab_aggr_t */ - MAC_CAPAB_NO_NATIVEVLAN = 0x0008, /* boolean only, no data */ - MAC_CAPAB_NO_ZCOPY = 0x0010, /* boolean only, no data */ - MAC_CAPAB_LEGACY = 0x0020, /* data is mac_capab_legacy_t */ - MAC_CAPAB_VRRP = 0x0040, /* data is mac_capab_vrrp_t */ + MAC_CAPAB_HCKSUM = 0x00000001, /* data is a uint32_t */ + MAC_CAPAB_LSO = 0x00000008, /* data is mac_capab_lso_t */ /* - * Public Capabilities + * Reserved capabilities, do not use */ - MAC_CAPAB_HCKSUM = 0x0100, /* data is a uint32_t */ - MAC_CAPAB_LSO = 0x0200, /* data is mac_capab_lso_t */ - MAC_CAPAB_RINGS = 0x0400, /* data is mac_capab_rings_t */ - MAC_CAPAB_MULTIFACTADDR = 0x0800, /* mac_data_multifactaddr_t */ - MAC_CAPAB_SHARES = 0x1000 /* data is mac_capab_share_t */ + MAC_CAPAB_RESERVED1 = 0x00000002, + MAC_CAPAB_RESERVED2 = 0x00000004, - /* add new capabilities here */ -} mac_capab_t; + /* + * Private driver capabilities + */ + MAC_CAPAB_RINGS = 0x00000010, /* data is mac_capab_rings_t */ + MAC_CAPAB_SHARES = 0x00000020, /* data is mac_capab_share_t */ + MAC_CAPAB_MULTIFACTADDR = 0x00000040, /* mac_data_multifactaddr_t */ + /* + * Private driver capabilities for use by the GLDv3 framework only + */ + MAC_CAPAB_VNIC = 0x00010000, /* data is mac_capab_vnic_t */ + MAC_CAPAB_ANCHOR_VNIC = 0x00020000, /* boolean only, no data */ + MAC_CAPAB_AGGR = 0x00040000, /* data is mac_capab_aggr_t */ + MAC_CAPAB_NO_NATIVEVLAN = 0x00080000, /* boolean only, no data */ + MAC_CAPAB_NO_ZCOPY = 0x00100000, /* boolean only, no data */ + MAC_CAPAB_LEGACY = 0x00200000, /* data is mac_capab_legacy_t */ + MAC_CAPAB_VRRP = 0x00400000 /* data is mac_capab_vrrp_t */ +} mac_capab_t; /* * LSO capability @@ -164,6 +168,8 @@ typedef struct mac_capab_legacy_s { void (*ml_fastpath_enable)(void *); } mac_capab_legacy_t; +typedef struct __mac_prop_info_handle *mac_prop_info_handle_t; + /* * MAC driver entry point types. */ @@ -182,12 +188,15 @@ typedef void (*mac_close_t)(void *); typedef int (*mac_set_prop_t)(void *, const char *, mac_prop_id_t, uint_t, const void *); typedef int (*mac_get_prop_t)(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +typedef void (*mac_prop_info_t)(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); /* - * Drivers must set all of these callbacks except for mc_resources, - * mc_ioctl, and mc_getcapab, which are optional. If any of these optional - * callbacks are set, their appropriate flags must be set in mc_callbacks. + * Driver callbacks. The following capabilities are optional, and if + * implemented by the driver, must have a corresponding MC_ flag set + * in the mc_callbacks field. + * * Any future additions to this list must also be accompanied by an * associated mc_callbacks flag so that the framework can grow without * affecting the binary compatibility of the interface. @@ -201,18 +210,31 @@ typedef struct mac_callbacks_s { mac_multicst_t mc_multicst; /* Enable or disable a multicast addr */ mac_unicst_t mc_unicst; /* Set the unicast MAC address */ mac_tx_t mc_tx; /* Transmit a packet */ + void *mc_reserved; /* Reserved, do not use */ mac_ioctl_t mc_ioctl; /* Process an unknown ioctl */ mac_getcapab_t mc_getcapab; /* Get capability information */ mac_open_t mc_open; /* Open the device */ mac_close_t mc_close; /* Close the device */ mac_set_prop_t mc_setprop; mac_get_prop_t mc_getprop; + mac_prop_info_t mc_propinfo; } mac_callbacks_t; -typedef struct mac_priv_prop_s { - char mpp_name[MAXLINKPROPNAME]; - uint_t mpp_flags; -} mac_priv_prop_t; +/* + * Flags for mc_callbacks. Requiring drivers to set the flags associated + * with optional callbacks initialized in the structure allows the mac + * module to add optional callbacks in the future without requiring drivers + * to recompile. + */ +#define MC_RESERVED 0x0001 +#define MC_IOCTL 0x0002 +#define MC_GETCAPAB 0x0004 +#define MC_OPEN 0x0008 +#define MC_CLOSE 0x0010 +#define MC_SETPROP 0x0020 +#define MC_GETPROP 0x0040 +#define MC_PROPINFO 0x0080 +#define MC_PROPERTIES (MC_SETPROP | MC_GETPROP | MC_PROPINFO) /* * Virtualization Capabilities @@ -245,24 +267,16 @@ typedef void (*mac_rx_func_t)(void *, mac_resource_handle_t, mblk_t *, * * MAC_VIRT_HIO: Hybrid I/O capable MAC. Require the support * of the MAC_CAPAB_SHARES capability. - * - * MAC_VIRT_SERIALIZE: Temporary flag *ONLY* for nxge. Mac layer - * uses this to enable mac Tx serializer on - * outbound traffic and to always enqueue - * incoming traffic on Rx soft rings in mac. */ #define MAC_VIRT_NONE 0x0 #define MAC_VIRT_LEVEL1 0x1 #define MAC_VIRT_HIO 0x2 -#define MAC_VIRT_SERIALIZE 0x4 typedef enum { MAC_RING_TYPE_RX = 1, /* Receive ring */ MAC_RING_TYPE_TX /* Transmit ring */ } mac_ring_type_t; -#define MAX_RINGS_PER_GROUP 128 - /* * Grouping type of a ring group * @@ -313,6 +327,8 @@ typedef void (*mac_ring_stop_t)(mac_ring_driver_t); typedef mblk_t *(*mac_ring_send_t)(void *, mblk_t *); typedef mblk_t *(*mac_ring_poll_t)(void *, int); +typedef int (*mac_ring_stat_t)(mac_ring_driver_t, uint_t, uint64_t *); + typedef struct mac_ring_info_s { mac_ring_driver_t mri_driver; mac_ring_start_t mri_start; @@ -322,11 +338,27 @@ typedef struct mac_ring_info_s { mac_ring_send_t send; mac_ring_poll_t poll; } mrfunion; + mac_ring_stat_t mri_stat; + /* + * mri_flags will have some bits set to indicate some special + * property/feature of a ring like serialization needed for a + * Tx ring or packets should always need enqueuing on Rx side, + * etc. + */ + uint_t mri_flags; } mac_ring_info_s; #define mri_tx mrfunion.send #define mri_poll mrfunion.poll +/* + * #defines for mri_flags. The flags are temporary flags that are provided + * only to workaround issues in specific drivers, and they will be + * removed in the future. + */ +#define MAC_RING_TX_SERIALIZE 0x1 +#define MAC_RING_RX_ENQUEUE 0x2 + typedef int (*mac_group_start_t)(mac_group_driver_t); typedef void (*mac_group_stop_t)(mac_group_driver_t); typedef int (*mac_add_mac_addr_t)(void *, const uint8_t *); @@ -415,26 +447,12 @@ typedef struct mac_register_s { uint_t m_max_sdu; void *m_pdata; size_t m_pdata_size; + char **m_priv_props; uint32_t m_margin; - mac_priv_prop_t *m_priv_props; - size_t m_priv_prop_count; uint32_t m_v12n; /* Virtualization level */ } mac_register_t; /* - * Flags for mc_callbacks. Requiring drivers to set the flags associated - * with optional callbacks initialized in the structure allows the mac - * module to add optional callbacks in the future without requiring drivers - * to recompile. - */ -#define MC_IOCTL 0x001 -#define MC_GETCAPAB 0x002 -#define MC_OPEN 0x004 -#define MC_CLOSE 0x008 -#define MC_SETPROP 0x010 -#define MC_GETPROP 0x020 - -/* * Driver interface functions. */ extern void mac_sdu_get(mac_handle_t, uint_t *, uint_t *); @@ -476,6 +494,9 @@ extern void mac_init_ops(struct dev_ops *, const char *); extern void mac_fini_ops(struct dev_ops *); extern int mac_devt_to_instance(dev_t); extern minor_t mac_private_minor(void); +extern void mac_ring_intr_set(mac_ring_handle_t, + ddi_intr_handle_t); + extern mactype_register_t *mactype_alloc(uint_t); extern void mactype_free(mactype_register_t *); @@ -488,6 +509,31 @@ extern boolean_t mac_unicst_verify(mac_handle_t, extern int mac_group_add_ring(mac_group_handle_t, int); extern void mac_group_rem_ring(mac_group_handle_t, mac_ring_handle_t); +extern mac_ring_handle_t mac_find_ring(mac_group_handle_t, int); + +extern void mac_prop_info_set_default_uint8( + mac_prop_info_handle_t, uint8_t); +extern void mac_prop_info_set_default_str( + mac_prop_info_handle_t, const char *); +extern void mac_prop_info_set_default_uint64( + mac_prop_info_handle_t, uint64_t); +extern void mac_prop_info_set_default_uint32( + mac_prop_info_handle_t, uint32_t); +extern void mac_prop_info_set_default_link_flowctrl( + mac_prop_info_handle_t, link_flowctrl_t); +extern void mac_prop_info_set_range_uint32( + mac_prop_info_handle_t, + uint32_t, uint32_t); +extern void mac_prop_info_set_perm(mac_prop_info_handle_t, + uint8_t); + +extern void mac_hcksum_get(mblk_t *, uint32_t *, + uint32_t *, uint32_t *, uint32_t *, + uint32_t *); +extern void mac_hcksum_set(mblk_t *, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t); + +extern void mac_lso_get(mblk_t *, uint32_t *, uint32_t *); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/mac_soft_ring.h b/usr/src/uts/common/sys/mac_soft_ring.h index ed4c47954d..88f1aa7249 100644 --- a/usr/src/uts/common/sys/mac_soft_ring.h +++ b/usr/src/uts/common/sys/mac_soft_ring.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,11 +33,13 @@ extern "C" { #include <sys/types.h> #include <sys/cpuvar.h> +#include <sys/cpupart.h> #include <sys/processor.h> #include <sys/stream.h> #include <sys/squeue.h> #include <sys/dlpi.h> #include <sys/mac_impl.h> +#include <sys/mac_stat.h> #define S_RING_NAMELEN 64 @@ -85,8 +87,6 @@ struct mac_soft_ring_s { /* # of mblocks after which to relieve flow control */ int s_ring_tx_lowat; boolean_t s_ring_tx_woken_up; - uint32_t s_ring_blocked_cnt; /* times blocked for Tx descs */ - uint32_t s_ring_unblocked_cnt; /* unblock calls from driver */ uint32_t s_ring_hiwat_cnt; /* times blocked for Tx descs */ void *s_ring_tx_arg1; @@ -107,9 +107,9 @@ struct mac_soft_ring_s { kthread_t *s_ring_worker; /* kernel thread id */ char s_ring_name[S_RING_NAMELEN + 1]; uint32_t s_ring_total_inpkt; + uint32_t s_ring_total_rbytes; uint32_t s_ring_drops; struct mac_client_impl_s *s_ring_mcip; - void *s_ring_flent; kstat_t *s_ring_ksp; /* Teardown, poll disable control ops */ @@ -119,6 +119,8 @@ struct mac_soft_ring_s { mac_soft_ring_t *s_ring_next; mac_soft_ring_t *s_ring_prev; mac_soft_ring_drain_func_t s_ring_drain_func; + + mac_tx_stats_t s_st_stat; }; typedef void (*mac_srs_drain_proc_t)(mac_soft_ring_set_t *, uint_t); @@ -131,9 +133,6 @@ typedef struct mac_srs_tx_s { void *st_arg1; void *st_arg2; mac_group_t *st_group; /* TX group for share */ - uint32_t st_ring_count; /* no. of tx rings */ - mac_ring_handle_t *st_rings; - boolean_t st_woken_up; /* @@ -156,18 +155,19 @@ typedef struct mac_srs_tx_s { */ uint32_t st_hiwat; /* mblk cnt to apply flow control */ uint32_t st_lowat; /* mblk cnt to relieve flow control */ - uint32_t st_drop_count; + uint32_t st_hiwat_cnt; /* times blocked for Tx descs */ + mac_tx_stats_t st_stat; + mac_capab_aggr_t st_capab_aggr; /* - * Number of times the srs gets blocked due to lack of Tx - * desc is noted down. Corresponding wakeup from driver - * to unblock is also noted down. They should match in a - * correctly working setup. If there is less unblocks - * than blocks, then Tx side waits forever for a wakeup - * from below. The following protected by srs_lock. + * st_soft_rings is used as an array to store aggr Tx soft + * rings. When aggr_find_tx_ring() returns a pseudo ring, + * the associated soft ring has to be found. st_soft_rings + * array stores the soft ring associated with a pseudo Tx + * ring and it can be accessed using the pseudo ring + * index (mr_index). Note that the ring index is unique + * for each ring in a group. */ - uint32_t st_blocked_cnt; /* times blocked for Tx descs */ - uint32_t st_unblocked_cnt; /* unblock calls from driver */ - uint32_t st_hiwat_cnt; /* times blocked for Tx descs */ + mac_soft_ring_t **st_soft_rings; } mac_srs_tx_t; /* Receive side Soft Ring Set */ @@ -191,9 +191,7 @@ typedef struct mac_srs_rx_s { uint32_t sr_hiwat; /* mblk cnt to relieve flow control */ uint32_t sr_lowat; - uint32_t sr_poll_count; - uint32_t sr_intr_count; - uint32_t sr_drop_count; + mac_rx_stats_t sr_stat; /* Times polling was enabled */ uint32_t sr_poll_on; @@ -246,13 +244,6 @@ typedef struct mac_srs_rx_s { uint32_t sr_drain_finish_intr; /* Polling thread needs to schedule worker wakeup */ uint32_t sr_poll_worker_wakeup; - - /* Chains less than 10 pkts */ - uint32_t sr_chain_cnt_undr10; - /* Chains between 10 & 50 pkts */ - uint32_t sr_chain_cnt_10to50; - /* Chains over 50 pkts */ - uint32_t sr_chain_cnt_over50; } mac_srs_rx_t; /* @@ -334,12 +325,14 @@ struct mac_soft_ring_set_s { int srs_tcp_ring_count; mac_soft_ring_t **srs_udp_soft_rings; int srs_udp_ring_count; + mac_soft_ring_t **srs_oth_soft_rings; + int srs_oth_ring_count; /* - * srs_oth_soft_rings is also used by tx_srs in + * srs_tx_soft_rings is used by tx_srs in * when operating in multi tx ring mode. */ - mac_soft_ring_t **srs_oth_soft_rings; - int srs_oth_ring_count; + mac_soft_ring_t **srs_tx_soft_rings; + int srs_tx_ring_count; /* * Bandwidth control related members. @@ -386,6 +379,7 @@ struct mac_soft_ring_set_s { mac_srs_rx_t srs_rx; mac_srs_tx_t srs_tx; + kstat_t *srs_ksp; }; /* @@ -507,7 +501,9 @@ typedef enum { SRS_TX_SERIALIZE, SRS_TX_FANOUT, SRS_TX_BW, - SRS_TX_BW_FANOUT + SRS_TX_BW_FANOUT, + SRS_TX_AGGR, + SRS_TX_BW_AGGR } mac_tx_srs_mode_t; /* @@ -626,9 +622,7 @@ extern struct dls_kstats dls_kstat; (srs)->srs_bw->mac_bw_used += (sz); \ } -#define TX_MULTI_RING_MODE(mac_srs) \ - ((mac_srs)->srs_tx.st_mode == SRS_TX_FANOUT || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_BW_FANOUT) +#define MAC_TX_SOFT_RINGS(mac_srs) ((mac_srs)->srs_tx_ring_count >= 1) /* Soft ring flags for teardown */ #define SRS_POLL_THR_OWNER (SRS_PROC | SRS_POLLING | SRS_GET_PKTS) @@ -639,7 +633,8 @@ extern struct dls_kstats dls_kstat; extern void mac_soft_ring_init(void); extern void mac_soft_ring_finish(void); extern void mac_fanout_setup(mac_client_impl_t *, flow_entry_t *, - mac_resource_props_t *, mac_direct_rx_t, void *, mac_resource_handle_t); + mac_resource_props_t *, mac_direct_rx_t, void *, mac_resource_handle_t, + cpupart_t *); extern void mac_soft_ring_worker_wakeup(mac_soft_ring_t *); extern void mac_soft_ring_blank(void *, time_t, uint_t, int); @@ -654,6 +649,8 @@ extern mac_soft_ring_set_t *mac_srs_create(struct mac_client_impl_s *, extern void mac_srs_free(mac_soft_ring_set_t *); extern void mac_srs_signal(mac_soft_ring_set_t *, uint_t); extern cpu_t *mac_srs_bind(mac_soft_ring_set_t *, processorid_t); +extern void mac_rx_srs_retarget_intr(mac_soft_ring_set_t *, processorid_t); +extern void mac_tx_srs_retarget_intr(mac_soft_ring_set_t *); extern void mac_srs_change_upcall(void *, mac_direct_rx_t, void *); extern void mac_srs_quiesce_initiate(mac_soft_ring_set_t *); @@ -673,12 +670,13 @@ extern void mac_tx_srs_quiesce(mac_soft_ring_set_t *, uint_t); /* Tx SRS, Tx softring */ extern void mac_tx_srs_wakeup(mac_soft_ring_set_t *, mac_ring_handle_t); -extern void mac_tx_srs_setup(struct mac_client_impl_s *, - flow_entry_t *, uint32_t); +extern void mac_tx_srs_setup(struct mac_client_impl_s *, flow_entry_t *); extern mac_tx_func_t mac_tx_get_func(uint32_t); extern mblk_t *mac_tx_send(mac_client_handle_t, mac_ring_handle_t, mblk_t *, mac_tx_stats_t *); extern boolean_t mac_tx_srs_ring_present(mac_soft_ring_set_t *, mac_ring_t *); +extern mac_soft_ring_t *mac_tx_srs_get_soft_ring(mac_soft_ring_set_t *, + mac_ring_t *); extern void mac_tx_srs_add_ring(mac_soft_ring_set_t *, mac_ring_t *); extern void mac_tx_srs_del_ring(mac_soft_ring_set_t *, mac_ring_t *); extern mac_tx_cookie_t mac_tx_srs_no_desc(mac_soft_ring_set_t *, mblk_t *, @@ -695,12 +693,12 @@ extern void mac_client_update_classifier(mac_client_impl_t *, boolean_t); extern void mac_soft_ring_intr_enable(void *); extern boolean_t mac_soft_ring_intr_disable(void *); -extern mac_soft_ring_t *mac_soft_ring_create(int, clock_t, void *, uint16_t, +extern mac_soft_ring_t *mac_soft_ring_create(int, clock_t, uint16_t, pri_t, mac_client_impl_t *, mac_soft_ring_set_t *, processorid_t, mac_direct_rx_t, void *, mac_resource_handle_t); extern cpu_t *mac_soft_ring_bind(mac_soft_ring_t *, processorid_t); extern void mac_soft_ring_unbind(mac_soft_ring_t *); -extern void mac_soft_ring_free(mac_soft_ring_t *, boolean_t); +extern void mac_soft_ring_free(mac_soft_ring_t *); extern void mac_soft_ring_signal(mac_soft_ring_t *, uint_t); extern void mac_rx_soft_ring_process(mac_client_impl_t *, mac_soft_ring_t *, mblk_t *, mblk_t *, int, size_t); diff --git a/usr/src/uts/common/sys/mac_stat.h b/usr/src/uts/common/sys/mac_stat.h new file mode 100644 index 0000000000..2d2989cd76 --- /dev/null +++ b/usr/src/uts/common/sys/mac_stat.h @@ -0,0 +1,124 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _MAC_STAT_H +#define _MAC_STAT_H + +#include <sys/mac_flow_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif + +struct mac_soft_ring_set_s; +struct mac_soft_ring_s; + +typedef struct mac_rx_stats_s { + uint64_t mrs_lclbytes; + uint64_t mrs_lclcnt; + uint64_t mrs_pollcnt; + uint64_t mrs_pollbytes; + uint64_t mrs_intrcnt; + uint64_t mrs_intrbytes; + uint64_t mrs_sdrops; + uint64_t mrs_chaincntundr10; + uint64_t mrs_chaincnt10to50; + uint64_t mrs_chaincntover50; + uint64_t mrs_ierrors; +} mac_rx_stats_t; + +typedef struct mac_tx_stats_s { + uint64_t mts_obytes; + uint64_t mts_opackets; + uint64_t mts_oerrors; + /* + * Number of times the srs gets blocked due to lack of Tx + * desc is noted down. Corresponding wakeup from driver + * to unblock is also noted down. They should match in a + * correctly working setup. If there is less unblocks + * than blocks, then Tx side waits forever for a wakeup + * from below. The following protected by srs_lock. + */ + uint64_t mts_blockcnt; /* times blocked for Tx descs */ + uint64_t mts_unblockcnt; /* unblock calls from driver */ + uint64_t mts_sdrops; +} mac_tx_stats_t; + +typedef struct mac_misc_stats_s { + uint64_t mms_multircv; + uint64_t mms_brdcstrcv; + uint64_t mms_multixmt; + uint64_t mms_brdcstxmt; + uint64_t mms_multircvbytes; + uint64_t mms_brdcstrcvbytes; + uint64_t mms_multixmtbytes; + uint64_t mms_brdcstxmtbytes; + uint64_t mms_txerrors; /* vid_check, tag needed errors */ + + /* + * When a ring is taken away from a mac client, before destroying + * corresponding SRS (for rx ring) or soft ring (for tx ring), add stats + * recorded by that SRS or soft ring to defunct lane stats. + */ + mac_rx_stats_t mms_defunctrxlanestats; + mac_tx_stats_t mms_defuncttxlanestats; + + /* link protection stats */ + uint64_t mms_macspoofed; + uint64_t mms_ipspoofed; + uint64_t mms_dhcpspoofed; + uint64_t mms_restricted; + uint64_t mms_dhcpdropped; +} mac_misc_stats_t; + +extern void mac_misc_stat_create(flow_entry_t *); +extern void mac_misc_stat_delete(flow_entry_t *); + +extern void mac_ring_stat_create(mac_ring_t *); +extern void mac_ring_stat_delete(mac_ring_t *); + +extern void mac_srs_stat_create(struct mac_soft_ring_set_s *); +extern void mac_srs_stat_delete(struct mac_soft_ring_set_s *); +extern void mac_tx_srs_stat_recreate(struct mac_soft_ring_set_s *, + boolean_t); + +extern void mac_soft_ring_stat_create(struct mac_soft_ring_s *); +extern void mac_soft_ring_stat_delete(struct mac_soft_ring_s *); + +extern void mac_stat_rename(mac_client_impl_t *); +extern void mac_pseudo_ring_stat_rename(mac_impl_t *); + +extern void mac_driver_stat_create(mac_impl_t *); +extern void mac_driver_stat_delete(mac_impl_t *); +extern uint64_t mac_driver_stat_default(mac_impl_t *, uint_t); + +extern uint64_t mac_rx_ring_stat_get(void *, uint_t); +extern uint64_t mac_tx_ring_stat_get(void *, uint_t); + +#endif /* _MAC_STAT_H */ diff --git a/usr/src/uts/common/sys/mii.h b/usr/src/uts/common/sys/mii.h index 6a060c8100..c3941affce 100644 --- a/usr/src/uts/common/sys/mii.h +++ b/usr/src/uts/common/sys/mii.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -428,11 +428,8 @@ boolean_t mii_m_loop_ioctl(mii_handle_t mii, queue_t *wq, mblk_t *msg); * mii MII handle. * name Property name. * id Property ID. - * flags Property flags (MAC_PROP_DEFAULT). * sz Size of property in bytes. * val Location to receive property value. - * perm Location to receive property permissions (either - * MAC_PROP_PERM_READ or MAC_PROP_PERM_RW). * * Returns * @@ -441,7 +438,7 @@ boolean_t mii_m_loop_ioctl(mii_handle_t mii, queue_t *wq, mblk_t *msg); * ENOTSUP if the prooperty is not supported by MII or the PHY. */ int mii_m_getprop(mii_handle_t mii, const char *name, mac_prop_id_t id, - uint_t flags, uint_t sz, void *val, uint_t *perm); + uint_t sz, void *val); /* * mii_m_setprop @@ -471,6 +468,25 @@ int mii_m_setprop(mii_handle_t mii, const char *name, mac_prop_id_t id, uint_t sz, const void *val); /* + * mii_m_propinfo + * + * Used to support the driver's mc_setprop() mac callback, + * and only to be called from that function (and without any + * locks held). + * + * Arguments + * + * mii MII handle. + * name Property name. + * id Property ID. + * prh Property info handle. + * + */ +void mii_m_propinfo(mii_handle_t mii, const char *name, mac_prop_id_t id, + mac_prop_info_handle_t prh); + + +/* * mii_m_getstat * * Used to support the driver's mc_getstat() mac callback for diff --git a/usr/src/uts/common/sys/net80211.h b/usr/src/uts/common/sys/net80211.h index a8d45c6174..313b335afa 100644 --- a/usr/src/uts/common/sys/net80211.h +++ b/usr/src/uts/common/sys/net80211.h @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,6 +39,7 @@ #define _SYS_NET80211_H #include <sys/mac.h> +#include <sys/mac_provider.h> #include <sys/ethernet.h> #include <sys/net80211_proto.h> #include <sys/net80211_crypto.h> @@ -731,8 +732,10 @@ void *ieee80211_malloc(size_t); void ieee80211_free(void *); int ieee80211_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -int ieee80211_getprop(void *, const char *, mac_prop_id_t, uint_t, uint_t, - void *, uint_t *); +int ieee80211_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +void ieee80211_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); + struct ieee80211_channel *ieee80211_find_channel(ieee80211com_t *, int, int); const struct ieee80211_rateset *ieee80211_get_suprates(ieee80211com_t *, diff --git a/usr/src/uts/common/sys/nxge/nxge.h b/usr/src/uts/common/sys/nxge/nxge.h index 14801131ce..042590e3e0 100644 --- a/usr/src/uts/common/sys/nxge/nxge.h +++ b/usr/src/uts/common/sys/nxge/nxge.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -601,6 +601,8 @@ typedef struct _nxge_ring_handle_t { p_nxge_t nxgep; int index; /* port-wise */ mac_ring_handle_t ring_handle; + uint64_t ring_gen_num; /* For RX Ring Start */ + uint32_t channel; } nxge_ring_handle_t, *p_nxge_ring_handle_t; /* @@ -791,8 +793,6 @@ struct _nxge_t { nxge_grp_set_t tx_set; boolean_t tdc_is_shared[NXGE_MAX_TDCS]; - boolean_t rx_channel_started[NXGE_MAX_RDCS]; - /* Ring Handles */ nxge_ring_handle_t tx_ring_handles[NXGE_MAX_TDCS]; nxge_ring_handle_t rx_ring_handles[NXGE_MAX_RDCS]; diff --git a/usr/src/uts/common/sys/nxge/nxge_hio.h b/usr/src/uts/common/sys/nxge/nxge_hio.h index 2f809e557f..492da24d55 100644 --- a/usr/src/uts/common/sys/nxge/nxge_hio.h +++ b/usr/src/uts/common/sys/nxge/nxge_hio.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -367,6 +367,8 @@ extern int nxge_hio_rxdma_bind_intr(nxge_t *, rx_rcr_ring_t *, int); /* nxge_hio_guest.c */ extern void nxge_hio_unregister(nxge_t *); +extern int nxge_hio_get_dc_htable_idx(nxge_t *nxge, vpc_type_t type, + uint32_t channel); extern int nxge_guest_regs_map(nxge_t *); extern void nxge_guest_regs_map_free(nxge_t *); @@ -392,7 +394,7 @@ extern nxge_status_t nxge_hio_intr_remove(nxge_t *, vpc_type_t, int); extern nxge_status_t nxge_hio_intr_add(nxge_t *, vpc_type_t, int); extern nxge_status_t nxge_hio_intr_rem(nxge_t *, int); -extern hv_rv_t nxge_hio_ldsv_add(nxge_t *, nxge_hio_dc_t *); +extern int nxge_hio_ldsv_add(nxge_t *, nxge_hio_dc_t *); extern void nxge_hio_ldsv_im(nxge_t *, nxge_ldg_t *, pio_ld_op_t, uint64_t *); extern void nxge_hio_ldgimgn(nxge_t *, nxge_ldg_t *); diff --git a/usr/src/uts/common/sys/nxge/nxge_impl.h b/usr/src/uts/common/sys/nxge/nxge_impl.h index dc6b29be68..1221e542dc 100644 --- a/usr/src/uts/common/sys/nxge/nxge_impl.h +++ b/usr/src/uts/common/sys/nxge/nxge_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -641,6 +641,7 @@ struct _nxge_ldg_t { p_nxge_ldv_t ldvp; nxge_sys_intr_t sys_intr_handler; p_nxge_t nxgep; + uint32_t htable_idx; }; struct _nxge_ldv_t { @@ -885,6 +886,8 @@ int nxge_port_kstat_update(kstat_t *, int); void nxge_save_cntrs(p_nxge_t); int nxge_m_stat(void *arg, uint_t, uint64_t *); +int nxge_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); +int nxge_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); /* nxge_hw.c */ void diff --git a/usr/src/uts/common/sys/nxge/nxge_rxdma.h b/usr/src/uts/common/sys/nxge/nxge_rxdma.h index ab0d0cde60..885f051cef 100644 --- a/usr/src/uts/common/sys/nxge/nxge_rxdma.h +++ b/usr/src/uts/common/sys/nxge/nxge_rxdma.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -209,7 +209,7 @@ typedef struct _rx_rcr_ring_t { p_nxge_rx_ring_stats_t rdc_stats; - int poll_flag; /* 1 if polling mode */ + boolean_t poll_flag; /* B_TRUE, if polling mode */ rcrcfig_a_t rcr_cfga; rcrcfig_b_t rcr_cfgb; @@ -244,6 +244,7 @@ typedef struct _rx_rcr_ring_t { uint32_t rcvd_pkt_bytes; /* Received bytes of a packet */ p_nxge_ldv_t ldvp; p_nxge_ldg_t ldgp; + boolean_t started; } rx_rcr_ring_t, *p_rx_rcr_ring_t; diff --git a/usr/src/uts/common/sys/pattr.h b/usr/src/uts/common/sys/pattr.h index 4d3dc29753..1269aeca10 100644 --- a/usr/src/uts/common/sys/pattr.h +++ b/usr/src/uts/common/sys/pattr.h @@ -67,20 +67,24 @@ typedef struct pattr_hcksum_s { */ #define HCK_IPV4_HDRCKSUM 0x01 /* On Transmit: Compute IP header */ /* checksum in hardware. */ - /* On Receive: IP header checksum */ + +#define HCK_IPV4_HDRCKSUM_OK 0x01 /* On Receive: IP header checksum */ /* was verified by h/w and is */ /* correct. */ + #define HCK_PARTIALCKSUM 0x02 /* On Transmit: Compute partial 1's */ /* complement checksum based on */ /* start, stuff and end offsets. */ /* On Receive : Partial checksum */ /* computed and attached. */ + #define HCK_FULLCKSUM 0x04 /* On Transmit: Compute full(in case */ /* of TCP/UDP, full is pseudo-header */ /* + header + payload) checksum for */ /* this packet. */ /* On Receive : Full checksum */ /* computed in h/w and is attached */ + #define HCK_FULLCKSUM_OK 0x08 /* On Transmit: N/A */ /* On Receive: Full checksum status */ /* If set, implies full checksum */ diff --git a/usr/src/uts/common/sys/pool.h b/usr/src/uts/common/sys/pool.h index 679ca05a86..c4cc9fc22a 100644 --- a/usr/src/uts/common/sys/pool.h +++ b/usr/src/uts/common/sys/pool.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_POOL_H #define _SYS_POOL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/time.h> #include <sys/nvpair.h> @@ -41,6 +38,7 @@ extern "C" { #define POOL_DEFAULT 0 /* default pool's ID */ #define POOL_MAXID 999999 /* maximum possible pool ID */ +#define POOL_INVALID -1 /* pools states */ #define POOL_DISABLED 0 /* pools enabled */ @@ -81,6 +79,7 @@ extern size_t pool_bufsz; /* size of pool_buf */ */ extern pool_t *pool_lookup_pool_by_id(poolid_t); extern pool_t *pool_lookup_pool_by_name(char *); +extern pool_t *pool_lookup_pool_by_pset(int); /* * Configuration routines @@ -102,6 +101,7 @@ extern int pool_propput(int, int, id_t, nvpair_t *); extern int pool_proprm(int, int, id_t, char *); extern int pool_propget(char *, int, int, id_t, nvlist_t **); extern int pool_commit(int); +extern void pool_get_name(pool_t *, char **); /* * Synchronization routines @@ -113,6 +113,25 @@ extern void pool_unlock(void); extern void pool_barrier_enter(void); extern void pool_barrier_exit(void); +typedef enum { + POOL_E_ENABLE, + POOL_E_DISABLE, + POOL_E_CHANGE, +} pool_event_t; + +typedef void pool_event_cb_func_t(pool_event_t, poolid_t, void *); + +typedef struct pool_event_cb { + pool_event_cb_func_t *pec_func; + void *pec_arg; + list_node_t pec_list; +} pool_event_cb_t; + +/* + * Routines used to register interest in changes in cpu pools. + */ +extern void pool_event_cb_register(pool_event_cb_t *); +extern void pool_event_cb_unregister(pool_event_cb_t *); #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/strsubr.h b/usr/src/uts/common/sys/strsubr.h index fd5db10058..7168f50362 100644 --- a/usr/src/uts/common/sys/strsubr.h +++ b/usr/src/uts/common/sys/strsubr.h @@ -1243,7 +1243,6 @@ extern int hcksum_assoc(mblk_t *, struct multidata_s *, struct pdesc_s *, extern void hcksum_retrieve(mblk_t *, struct multidata_s *, struct pdesc_s *, uint32_t *, uint32_t *, uint32_t *, uint32_t *, uint32_t *); extern void lso_info_set(mblk_t *, uint32_t, uint32_t); -extern void lso_info_get(mblk_t *, uint32_t *, uint32_t *); extern void lso_info_cleanup(mblk_t *); extern unsigned int bcksum(uchar_t *, int, unsigned int); extern boolean_t is_vmloaned_mblk(mblk_t *, struct multidata_s *, diff --git a/usr/src/uts/common/sys/vnic.h b/usr/src/uts/common/sys/vnic.h index 7e2aeac90a..3a6f5279ee 100644 --- a/usr/src/uts/common/sys/vnic.h +++ b/usr/src/uts/common/sys/vnic.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -111,9 +111,6 @@ typedef enum { */ #define VNIC_IOC_CREATE_FORCE 0x00000004 -/* Allocate a hardware ring to the vnic */ -#define VNIC_IOC_CREATE_REQ_HWRINGS 0x00000008 - typedef struct vnic_ioc_create { datalink_id_t vc_vnic_id; datalink_id_t vc_link_id; diff --git a/usr/src/uts/common/sys/vnic_impl.h b/usr/src/uts/common/sys/vnic_impl.h index b91f128fef..2bb48a60c6 100644 --- a/usr/src/uts/common/sys/vnic_impl.h +++ b/usr/src/uts/common/sys/vnic_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -63,16 +63,6 @@ typedef struct vnic_s { uint32_t vn_hcksum_txflags; } vnic_t; -#define vn_madd_naddr vn_mma_capab.maddr_naddr -#define vn_maddr_naddrfree vn_mma_capab.maddr_naddrfree -#define vn_maddr_flag vn_mma_capab.maddr_flag -#define vn_maddr_handle vn_mma_capab.maddr_handle -#define vn_maddr_reserve vn_mma_capab.maddr_reserve -#define vn_maddr_add vn_mma_capab.maddr_add -#define vn_maddr_remove vn_mma_capab.maddr_remove -#define vn_maddr_modify vn_mma_capab.maddr_modify -#define vn_maddr_get vn_mma_capab.maddr_get - extern int vnic_dev_create(datalink_id_t, datalink_id_t, vnic_mac_addr_type_t *, int *, uchar_t *, int *, uint_t, uint16_t, vrid_t, int, mac_resource_props_t *, uint32_t, vnic_ioc_diag_t *, cred_t *); diff --git a/usr/src/uts/common/xen/io/xnb.c b/usr/src/uts/common/xen/io/xnb.c index 9ddae7aa30..7c22ff8e52 100644 --- a/usr/src/uts/common/xen/io/xnb.c +++ b/usr/src/uts/common/xen/io/xnb.c @@ -250,8 +250,7 @@ xnb_software_csum(xnb_t *xnbp, mblk_t *mp) * XXPV dme: shouldn't rely on mac_fix_cksum(), not least * because it doesn't cover all of the interesting cases :-( */ - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM, KM_NOSLEEP); + mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM); return (mac_fix_cksum(mp)); } @@ -342,9 +341,7 @@ xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) */ *stuffp = 0; - (void) hcksum_assoc(mp, NULL, NULL, - 0, 0, 0, 0, - HCK_FULLCKSUM, KM_NOSLEEP); + mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM); xnbp->xnb_stat_csum_hardware++; @@ -375,9 +372,8 @@ xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) *stuffp = (uint16_t)(cksum ? cksum : ~cksum); } - (void) hcksum_assoc(mp, NULL, NULL, - start, stuff, length, 0, - HCK_PARTIALCKSUM, KM_NOSLEEP); + mac_hcksum_set(mp, start, stuff, length, 0, + HCK_PARTIALCKSUM); xnbp->xnb_stat_csum_hardware++; @@ -911,13 +907,13 @@ replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) mblk_t *new_mp; new_mp = copyb(mp); - if (new_mp == NULL) + if (new_mp == NULL) { cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" "for %p, len %lu", (void *) mp, len); + } - hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); - (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value, - flags, KM_NOSLEEP); + mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags); + mac_hcksum_set(new_mp, start, stuff, end, value, flags); new_mp->b_next = mp->b_next; new_mp->b_prev = mp->b_prev; diff --git a/usr/src/uts/common/xen/io/xnbo.c b/usr/src/uts/common/xen/io/xnbo.c index 78c6ba02e7..9a51328896 100644 --- a/usr/src/uts/common/xen/io/xnbo.c +++ b/usr/src/uts/common/xen/io/xnbo.c @@ -173,8 +173,7 @@ xnbo_cksum_to_peer(xnb_t *xnbp, mblk_t *mp) * gain some advantage. */ - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, - NULL, &csum, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, &csum, &pflags); /* * If the MAC driver has asserted that the checksum is diff --git a/usr/src/uts/common/xen/io/xnbu.c b/usr/src/uts/common/xen/io/xnbu.c index cf192365cf..c41a584e8b 100644 --- a/usr/src/uts/common/xen/io/xnbu.c +++ b/usr/src/uts/common/xen/io/xnbu.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -70,6 +70,7 @@ static mac_callbacks_t xnbu_callbacks = { xnbu_m_set_mac_addr, xnbu_m_send, NULL, + NULL, xnbu_m_getcapab }; @@ -130,16 +131,8 @@ xnbu_cksum_from_peer(xnb_t *xnbp, mblk_t *mp, uint16_t flags) if ((flags & NETTXF_data_validated) != 0) { /* * The checksum is asserted valid. - * - * The hardware checksum offload specification says - * that we must provide the actual checksum as well as - * an assertion that it is valid, but the protocol - * stack doesn't actually use it so we don't bother. - * If it was necessary we could grovel in the packet - * to find it. */ - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM | HCK_FULLCKSUM_OK, KM_NOSLEEP); + mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM_OK); } return (mp); @@ -152,8 +145,7 @@ xnbu_cksum_to_peer(xnb_t *xnbp, mblk_t *mp) uint16_t r = 0; uint32_t pflags; - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, - NULL, NULL, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); /* * If the protocol stack has requested checksum diff --git a/usr/src/uts/common/xen/io/xnf.c b/usr/src/uts/common/xen/io/xnf.c index b6d4cad439..534b3f0904 100644 --- a/usr/src/uts/common/xen/io/xnf.c +++ b/usr/src/uts/common/xen/io/xnf.c @@ -257,6 +257,7 @@ static mac_callbacks_t xnf_callbacks = { xnf_set_mac_addr, xnf_send, NULL, + NULL, xnf_getcapab }; @@ -1619,8 +1620,7 @@ xnf_send(void *arg, mblk_t *mp) txp->tx_txreq.size = length; txp->tx_txreq.offset = (uintptr_t)txp->tx_bufp & PAGEOFFSET; txp->tx_txreq.flags = 0; - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, - &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); if (pflags != 0) { /* * If the local protocol stack requests checksum @@ -2104,21 +2104,9 @@ xnf_rx_collect(xnf_t *xnfp) * blank" flag, and hence could have a * packet here that we are asserting * is good with a blank checksum. - * - * The hardware checksum offload - * specification says that we must - * provide the actual checksum as well - * as an assertion that it is valid, - * but the protocol stack doesn't - * actually use it and some other - * drivers don't bother, so we don't. - * If it was necessary we could grovel - * in the packet to find it. */ - (void) hcksum_assoc(mp, NULL, - NULL, 0, 0, 0, 0, - HCK_FULLCKSUM | - HCK_FULLCKSUM_OK, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, + HCK_FULLCKSUM_OK); xnfp->xnf_stat_rx_cksum_no_need++; } if (head == NULL) { diff --git a/usr/src/uts/intel/hxge/Makefile b/usr/src/uts/intel/hxge/Makefile index 40f6b64bcb..836f7c0924 100644 --- a/usr/src/uts/intel/hxge/Makefile +++ b/usr/src/uts/intel/hxge/Makefile @@ -20,12 +20,9 @@ # # uts/intel/hxge/Makefile # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# -# ident "%Z%%M% %I% %E% SMI" -# # This makefile drives the production of the Sun # 10G hxge Ethernet leaf driver kernel module. # @@ -71,7 +68,6 @@ CFLAGS += -dalign # # Include hxge specific header files # -INC_PATH += -I$(UTSBASE)/common INC_PATH += -I$(UTSBASE)/common/io/hxge # # diff --git a/usr/src/uts/intel/io/amd8111s/amd8111s_main.c b/usr/src/uts/intel/io/amd8111s/amd8111s_main.c index 1664ee7543..317e55b22a 100644 --- a/usr/src/uts/intel/io/amd8111s/amd8111s_main.c +++ b/usr/src/uts/intel/io/amd8111s/amd8111s_main.c @@ -1,5 +1,5 @@ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -195,6 +195,7 @@ static mac_callbacks_t amd8111s_m_callbacks = { amd8111s_m_multicst, amd8111s_m_unicst, amd8111s_m_tx, + NULL, amd8111s_m_ioctl }; diff --git a/usr/src/uts/intel/io/dnet/dnet.c b/usr/src/uts/intel/io/dnet/dnet.c index 2c045e893c..a6badb9b4b 100644 --- a/usr/src/uts/intel/io/dnet/dnet.c +++ b/usr/src/uts/intel/io/dnet/dnet.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -314,6 +314,7 @@ static mac_callbacks_t dnet_m_callbacks = { dnet_m_multicst, /* mc_multicst */ dnet_m_unicst, /* mc_unicst */ dnet_m_tx, /* mc_tx */ + NULL, NULL, /* mc_ioctl */ NULL, /* mc_getcapab */ NULL, /* mc_open */ diff --git a/usr/src/uts/intel/mii/Makefile b/usr/src/uts/intel/mii/Makefile index 8f3b7b6772..28ac502177 100644 --- a/usr/src/uts/intel/mii/Makefile +++ b/usr/src/uts/intel/mii/Makefile @@ -21,7 +21,7 @@ # # uts/intel/mii/Makefile # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the mii support module. @@ -55,7 +55,7 @@ ALL_TARGET = $(BINARY) LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) -LDFLAGS += -dy +LDFLAGS += -dy -N misc/mac # # Default build targets. diff --git a/usr/src/uts/intel/qlge/Makefile b/usr/src/uts/intel/qlge/Makefile index 52f1c143a2..82f64ac215 100644 --- a/usr/src/uts/intel/qlge/Makefile +++ b/usr/src/uts/intel/qlge/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the qlge driver kernel module. @@ -57,8 +57,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # MODSTUBS_DIR = $(OBJS_DIR) -INC_PATH += -I$(ROOT)/usr/include -INC_PATH += -I$(UTSBASE)/common/sys INC_PATH += -I$(UTSBASE)/common/sys/fibre-channel/fca/qlge LDFLAGS += -dy -Nmisc/mac -Ndrv/ip diff --git a/usr/src/uts/sparc/hxge/Makefile b/usr/src/uts/sparc/hxge/Makefile index 79b504d443..a04957a00f 100644 --- a/usr/src/uts/sparc/hxge/Makefile +++ b/usr/src/uts/sparc/hxge/Makefile @@ -20,7 +20,7 @@ # # uts/sparc/hxge/Makefile # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the Sun @@ -68,7 +68,6 @@ CFLAGS += -dalign # # Include hxge specific header files # -INC_PATH += -I$(UTSBASE)/common INC_PATH += -I$(UTSBASE)/common/io/hxge # # diff --git a/usr/src/uts/sparc/mii/Makefile b/usr/src/uts/sparc/mii/Makefile index 52726241b9..b1e80d5bd6 100644 --- a/usr/src/uts/sparc/mii/Makefile +++ b/usr/src/uts/sparc/mii/Makefile @@ -21,7 +21,7 @@ # # uts/sparc/mii/Makefile # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the mii support module. @@ -55,7 +55,7 @@ ALL_TARGET = $(BINARY) LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) -LDFLAGS += -dy +LDFLAGS += -dy -N misc/mac # # Default build targets. diff --git a/usr/src/uts/sparc/qlge/Makefile b/usr/src/uts/sparc/qlge/Makefile index 52f1c143a2..82f64ac215 100644 --- a/usr/src/uts/sparc/qlge/Makefile +++ b/usr/src/uts/sparc/qlge/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the qlge driver kernel module. @@ -57,8 +57,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # MODSTUBS_DIR = $(OBJS_DIR) -INC_PATH += -I$(ROOT)/usr/include -INC_PATH += -I$(UTSBASE)/common/sys INC_PATH += -I$(UTSBASE)/common/sys/fibre-channel/fca/qlge LDFLAGS += -dy -Nmisc/mac -Ndrv/ip diff --git a/usr/src/uts/sun/io/eri/eri.c b/usr/src/uts/sun/io/eri/eri.c index ab08fafc39..a4ac10cdd3 100644 --- a/usr/src/uts/sun/io/eri/eri.c +++ b/usr/src/uts/sun/io/eri/eri.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -200,6 +200,7 @@ static mac_callbacks_t eri_m_callbacks = { eri_m_multicst, eri_m_unicst, eri_m_tx, + NULL, eri_m_ioctl, eri_m_getcapab }; @@ -264,9 +265,9 @@ static mac_callbacks_t eri_m_callbacks = { if (type == ETHERTYPE_IP || type == ETHERTYPE_IPV6) { \ start_offset = 0; \ end_offset = MBLKL(bp) - ETHERHEADER_SIZE; \ - (void) hcksum_assoc(bp, NULL, NULL, \ + mac_hcksum_set(bp, \ start_offset, 0, end_offset, sum, \ - HCK_PARTIALCKSUM, 0); \ + HCK_PARTIALCKSUM); \ } else { \ /* \ * Strip the PADS for 802.3 \ @@ -3469,8 +3470,7 @@ eri_send_msg(struct eri *erip, mblk_t *mp) } #ifdef ERI_HWCSUM - hcksum_retrieve(mp, NULL, NULL, &start_offset, &stuff_offset, - NULL, NULL, &flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, NULL, NULL, &flags); if (flags & HCK_PARTIALCKSUM) { if (get_ether_type(mp->b_rptr) == ETHERTYPE_VLAN) { diff --git a/usr/src/uts/sun4v/io/vnet.c b/usr/src/uts/sun4v/io/vnet.c index f25860b251..1f857dbe13 100644 --- a/usr/src/uts/sun4v/io/vnet.c +++ b/usr/src/uts/sun4v/io/vnet.c @@ -27,6 +27,7 @@ #include <sys/types.h> #include <sys/errno.h> #include <sys/param.h> +#include <sys/callb.h> #include <sys/stream.h> #include <sys/kmem.h> #include <sys/conf.h> @@ -84,8 +85,12 @@ static void vnet_get_group(void *arg, mac_ring_type_t type, const int index, mac_group_info_t *infop, mac_group_handle_t handle); static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); static void vnet_rx_ring_stop(mac_ring_driver_t rdriver); +static int vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, + uint64_t *val); static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); static void vnet_tx_ring_stop(mac_ring_driver_t rdriver); +static int vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, + uint64_t *val); static int vnet_ring_enable_intr(void *arg); static int vnet_ring_disable_intr(void *arg); static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup); @@ -107,7 +112,6 @@ static void vnet_unbind_rings(vnet_res_t *vresp); static int vnet_hio_stat(void *, uint_t, uint64_t *); static int vnet_hio_start(void *); static void vnet_hio_stop(void *); -static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type); mblk_t *vnet_hio_tx(void *, mblk_t *); /* Forwarding database (FDB) routines */ @@ -129,6 +133,7 @@ static void vnet_res_start_task(void *arg); static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err); static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp); static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp); +static void vnet_tx_notify_thread(void *); /* Exported to vnet_gen */ int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); @@ -168,8 +173,7 @@ extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg); extern void vdds_cleanup_hybrid_res(void *arg); extern void vdds_cleanup_hio(vnet_t *vnetp); -/* Externs imported from mac_impl */ -extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); +extern pri_t minclsyspri; #define DRV_NAME "vnet" #define VNET_FDBE_REFHOLD(p) \ @@ -199,6 +203,7 @@ static mac_callbacks_t vnet_m_callbacks = { vnet_m_multicst, NULL, /* m_unicst entry must be NULL while rx rings are exposed */ NULL, /* m_tx entry must be NULL while tx rings are exposed */ + NULL, vnet_m_ioctl, vnet_m_capab, NULL @@ -232,6 +237,8 @@ uint32_t vnet_ldc_mtu = VNET_LDC_MTU; /* ldc mtu */ /* Configure tx serialization in mac layer for the vnet device */ boolean_t vnet_mac_tx_serialize = B_TRUE; +/* Configure enqueing at Rx soft rings in mac layer for the vnet device */ +boolean_t vnet_mac_rx_queuing = B_TRUE; /* * Set this to non-zero to enable additional internal receive buffer pools @@ -785,6 +792,7 @@ mblk_t * vnet_tx_ring_send(void *arg, mblk_t *mp) { vnet_pseudo_tx_ring_t *tx_ringp; + vnet_tx_ring_stats_t *statsp; vnet_t *vnetp; vnet_res_t *vresp; mblk_t *next; @@ -795,8 +803,10 @@ vnet_tx_ring_send(void *arg, mblk_t *mp) boolean_t is_pvid; /* non-default pvid ? */ boolean_t hres; /* Hybrid resource ? */ void *tx_arg; + size_t size; tx_ringp = (vnet_pseudo_tx_ring_t *)arg; + statsp = &tx_ringp->tx_ring_stats; vnetp = (vnet_t *)tx_ringp->vnetp; DBG1(vnetp, "enter\n"); ASSERT(mp != NULL); @@ -808,6 +818,9 @@ vnet_tx_ring_send(void *arg, mblk_t *mp) next = mp->b_next; mp->b_next = NULL; + /* update stats */ + size = msgsize(mp); + /* * Find fdb entry for the destination * and hold a reference to it. @@ -911,6 +924,8 @@ vnet_tx_ring_send(void *arg, mblk_t *mp) } } + statsp->obytes += size; + statsp->opackets++; mp = next; } @@ -971,6 +986,10 @@ vnet_ring_grp_init(vnet_t *vnetp) } tx_grp->rings = tx_ringp; tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS; + mutex_init(&tx_grp->flowctl_lock, NULL, MUTEX_DRIVER, NULL); + cv_init(&tx_grp->flowctl_cv, NULL, CV_DRIVER, NULL); + tx_grp->flowctl_thread = thread_create(NULL, 0, + vnet_tx_notify_thread, tx_grp, 0, &p0, TS_RUN, minclsyspri); rx_grp = &vnetp->rx_grp[0]; rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP; @@ -1005,8 +1024,21 @@ vnet_ring_grp_uninit(vnet_t *vnetp) { vnet_pseudo_rx_group_t *rx_grp; vnet_pseudo_tx_group_t *tx_grp; + kt_did_t tid = 0; tx_grp = &vnetp->tx_grp[0]; + + /* Inform tx_notify_thread to exit */ + mutex_enter(&tx_grp->flowctl_lock); + if (tx_grp->flowctl_thread != NULL) { + tid = tx_grp->flowctl_thread->t_did; + tx_grp->flowctl_done = B_TRUE; + cv_signal(&tx_grp->flowctl_cv); + } + mutex_exit(&tx_grp->flowctl_lock); + if (tid != 0) + thread_join(tid); + if (tx_grp->rings != NULL) { ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS); kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) * @@ -1090,14 +1122,7 @@ vnet_mac_register(vnet_t *vnetp) macp->m_max_sdu = vnetp->mtu; macp->m_margin = VLAN_TAGSZ; - /* - * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to - * workaround tx lock contention issues in nxge. - */ macp->m_v12n = MAC_VIRT_LEVEL1; - if (vnet_mac_tx_serialize == B_TRUE) { - macp->m_v12n |= MAC_VIRT_SERIALIZE; - } /* * Finally, we're ready to register ourselves with the MAC layer @@ -1404,6 +1429,73 @@ vnet_tx_update(vio_net_handle_t vrh) } /* + * vnet_tx_notify_thread: + * + * vnet_tx_ring_update() callback function wakes up this thread when + * it gets called. This thread will call mac_tx_ring_update() to + * notify upper mac of flow control getting relieved. Note that + * vnet_tx_ring_update() cannot call mac_tx_ring_update() directly + * because vnet_tx_ring_update() is called from lower mac with + * mi_rw_lock held and mac_tx_ring_update() would also try to grab + * the same lock. + */ +static void +vnet_tx_notify_thread(void *arg) +{ + callb_cpr_t cprinfo; + vnet_pseudo_tx_group_t *tx_grp = (vnet_pseudo_tx_group_t *)arg; + vnet_pseudo_tx_ring_t *tx_ringp; + vnet_t *vnetp; + int i; + + CALLB_CPR_INIT(&cprinfo, &tx_grp->flowctl_lock, callb_generic_cpr, + "vnet_tx_notify_thread"); + + mutex_enter(&tx_grp->flowctl_lock); + while (!tx_grp->flowctl_done) { + CALLB_CPR_SAFE_BEGIN(&cprinfo); + cv_wait(&tx_grp->flowctl_cv, &tx_grp->flowctl_lock); + CALLB_CPR_SAFE_END(&cprinfo, &tx_grp->flowctl_lock); + + for (i = 0; i < tx_grp->ring_cnt; i++) { + tx_ringp = &tx_grp->rings[i]; + if (tx_ringp->woken_up) { + tx_ringp->woken_up = B_FALSE; + vnetp = tx_ringp->vnetp; + mac_tx_ring_update(vnetp->mh, tx_ringp->handle); + } + } + } + /* + * The tx_grp is being destroyed, exit the thread. + */ + tx_grp->flowctl_thread = NULL; + CALLB_CPR_EXIT(&cprinfo); + thread_exit(); +} + +void +vnet_tx_ring_update(void *arg1, uintptr_t arg2) +{ + vnet_t *vnetp = (vnet_t *)arg1; + vnet_pseudo_tx_group_t *tx_grp; + vnet_pseudo_tx_ring_t *tx_ringp; + int i; + + tx_grp = &vnetp->tx_grp[0]; + for (i = 0; i < tx_grp->ring_cnt; i++) { + tx_ringp = &tx_grp->rings[i]; + if (tx_ringp->hw_rh == (mac_ring_handle_t)arg2) { + mutex_enter(&tx_grp->flowctl_lock); + tx_ringp->woken_up = B_TRUE; + cv_signal(&tx_grp->flowctl_cv); + mutex_exit(&tx_grp->flowctl_lock); + break; + } + } +} + +/* * Update the new mtu of vnet into the mac layer. First check if the device has * been plumbed and if so fail the mtu update. Returns 0 on success. */ @@ -2053,6 +2145,22 @@ vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data) * we unmap ring->hw_rh. For rings mapped to LDC resources, we * stop the rx callbacks (in vgen) before we remove ring->hw_rh * (vio_net_resource_unreg()). + * Also, we access ring->hw_rh in vnet_rx_ring_stat(). + * Note that for rings mapped to Hybrid resource, though the + * rings are statically registered with the mac layer, its + * hardware ring mapping (ringp->hw_rh) can be torn down in + * vnet_unbind_hwrings() while the kstat operation is in + * progress. To protect against this, we hold a reference to + * the resource in FDB; this ensures that the thread in + * vio_net_resource_unreg() waits for the reference to be + * dropped before unbinding the ring. + * + * We don't need to do this for rings mapped to LDC resources. + * These rings are registered/unregistered dynamically with + * the mac layer and so any attempt to unregister the ring + * while kstat operation is in progress will block in + * mac_group_rem_ring(). Thus implicitly protects the + * resource (ringp->hw_rh) from disappearing. */ if (cap_rings->mr_type == MAC_RING_TYPE_RX) { @@ -2148,10 +2256,22 @@ vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, infop->mri_driver = (mac_ring_driver_t)rx_ringp; infop->mri_start = vnet_rx_ring_start; infop->mri_stop = vnet_rx_ring_stop; + infop->mri_stat = vnet_rx_ring_stat; /* Set the poll function, as this is an rx ring */ infop->mri_poll = vnet_rx_poll; - + /* + * MAC_RING_RX_ENQUEUE bit needed to be set for nxge + * which was not sending packet chains in interrupt + * context. For such drivers, packets are queued in + * Rx soft rings so that we get a chance to switch + * into a polling mode under backlog. This bug (not + * sending packet chains) has now been fixed. Once + * the performance impact is measured, this change + * will be removed. + */ + infop->mri_flags = (vnet_mac_rx_queuing ? + MAC_RING_RX_ENQUEUE : 0); break; } @@ -2178,10 +2298,17 @@ vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, infop->mri_driver = (mac_ring_driver_t)tx_ringp; infop->mri_start = vnet_tx_ring_start; infop->mri_stop = vnet_tx_ring_stop; + infop->mri_stat = vnet_tx_ring_stat; /* Set the transmit function, as this is a tx ring */ infop->mri_tx = vnet_tx_ring_send; - + /* + * MAC_RING_TX_SERIALIZE bit needs to be set while + * hybridIO is enabled to workaround tx lock + * contention issues in nxge. + */ + infop->mri_flags = (vnet_mac_tx_serialize ? + MAC_RING_TX_SERIALIZE : 0); break; } @@ -2325,6 +2452,44 @@ vnet_rx_ring_stop(mac_ring_driver_t arg) rx_ringp->state &= ~VNET_RXRING_STARTED; } +static int +vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)rdriver; + vnet_t *vnetp = (vnet_t *)rx_ringp->vnetp; + vnet_res_t *vresp; + mac_register_t *macp; + mac_callbacks_t *cbp; + + /* + * Refer to vnet_m_capab() function for detailed comments on ring + * synchronization. + */ + if ((rx_ringp->state & VNET_RXRING_HYBRID) != 0) { + READ_ENTER(&vnetp->vsw_fp_rw); + if (vnetp->hio_fp == NULL) { + RW_EXIT(&vnetp->vsw_fp_rw); + return (0); + } + + VNET_FDBE_REFHOLD(vnetp->hio_fp); + RW_EXIT(&vnetp->vsw_fp_rw); + mac_hwring_getstat(rx_ringp->hw_rh, stat, val); + VNET_FDBE_REFRELE(vnetp->hio_fp); + return (0); + } + + ASSERT((rx_ringp->state & + (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0); + vresp = (vnet_res_t *)rx_ringp->hw_rh; + macp = &vresp->macreg; + cbp = macp->m_callbacks; + + cbp->mc_getstat(macp->m_driver, stat, val); + + return (0); +} + /* ARGSUSED */ static int vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) @@ -2343,6 +2508,31 @@ vnet_tx_ring_stop(mac_ring_driver_t arg) tx_ringp->state &= ~VNET_TXRING_STARTED; } +static int +vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)rdriver; + vnet_tx_ring_stats_t *statsp; + + statsp = &tx_ringp->tx_ring_stats; + + switch (stat) { + case MAC_STAT_OPACKETS: + *val = statsp->opackets; + break; + + case MAC_STAT_OBYTES: + *val = statsp->obytes; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + /* * Disable polling for a ring and enable its interrupt. */ @@ -2569,10 +2759,6 @@ vnet_hio_mac_init(vnet_t *vnetp, char *ifname) /* add the recv callback */ mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp); - /* add the notify callback - only tx updates for now */ - vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb, - vnetp); - return (0); fail: @@ -2584,11 +2770,6 @@ fail: void vnet_hio_mac_cleanup(vnet_t *vnetp) { - if (vnetp->hio_mnh != NULL) { - (void) mac_notify_remove(vnetp->hio_mnh, B_TRUE); - vnetp->hio_mnh = NULL; - } - if (vnetp->hio_vhp != NULL) { vio_net_resource_unreg(vnetp->hio_vhp); vnetp->hio_vhp = NULL; @@ -2666,7 +2847,7 @@ vnet_bind_hwrings(vnet_t *vnetp) /* Bind the pseudo ring to the underlying hwring */ mac_hwring_setup(rx_ringp->hw_rh, - (mac_resource_handle_t)rx_ringp); + (mac_resource_handle_t)rx_ringp, NULL); /* Start the hwring if needed */ if (rx_ringp->state & VNET_RXRING_STARTED) { @@ -2703,6 +2884,8 @@ vnet_bind_hwrings(vnet_t *vnetp) tx_ringp->hw_rh = hw_rh[i]; tx_ringp->state |= VNET_TXRING_HYBRID; } + tx_grp->tx_notify_handle = + mac_client_tx_notify(vnetp->hio_mch, vnet_tx_ring_update, vnetp); mac_perim_exit(mph1); return (0); @@ -2734,6 +2917,8 @@ vnet_unbind_hwrings(vnet_t *vnetp) tx_ringp->hw_rh = NULL; } } + (void) mac_client_tx_notify(vnetp->hio_mch, NULL, + tx_grp->tx_notify_handle); rx_grp = &vnetp->rx_grp[0]; for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { @@ -2980,24 +3165,6 @@ vnet_hio_tx(void *arg, mblk_t *mp) return (mp); } -static void -vnet_hio_notify_cb(void *arg, mac_notify_type_t type) -{ - vnet_t *vnetp = (vnet_t *)arg; - mac_perim_handle_t mph; - - mac_perim_enter_by_mh(vnetp->hio_mh, &mph); - switch (type) { - case MAC_NOTE_TX: - vnet_tx_update(vnetp->hio_vhp); - break; - - default: - break; - } - mac_perim_exit(mph); -} - #ifdef VNET_IOC_DEBUG /* diff --git a/usr/src/uts/sun4v/io/vnet_gen.c b/usr/src/uts/sun4v/io/vnet_gen.c index 6bf674fd85..875c8dd93f 100644 --- a/usr/src/uts/sun4v/io/vnet_gen.c +++ b/usr/src/uts/sun4v/io/vnet_gen.c @@ -461,6 +461,7 @@ static mac_callbacks_t vgen_m_callbacks = { vgen_multicst, vgen_unicst, vgen_tx, + NULL, vgen_ioctl, NULL, NULL diff --git a/usr/src/uts/sun4v/io/vsw.c b/usr/src/uts/sun4v/io/vsw.c index f53adb5af5..a061321e86 100644 --- a/usr/src/uts/sun4v/io/vsw.c +++ b/usr/src/uts/sun4v/io/vsw.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -295,10 +295,7 @@ static mac_callbacks_t vsw_m_callbacks = { vsw_m_promisc, vsw_m_multicst, vsw_m_unicst, - vsw_m_tx, - NULL, - NULL, - NULL + vsw_m_tx }; static struct cb_ops vsw_cb_ops = { diff --git a/usr/src/uts/sun4v/io/vsw_phys.c b/usr/src/uts/sun4v/io/vsw_phys.c index f5fc90b929..c725e8bb5f 100644 --- a/usr/src/uts/sun4v/io/vsw_phys.c +++ b/usr/src/uts/sun4v/io/vsw_phys.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -497,17 +497,12 @@ vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type) * Open a MAC client for a port or an interface. * The flags and their purpose as below: * - * MAC_OPEN_FLAGS_NO_HWRINGS -- This flag is used by default - * for all ports/interface so that they are associated with - * default group & resources. It will not be used for the - * ports that have HybridIO is enabled so that the h/w resources - * assigned to it. - * * MAC_OPEN_FLAGS_SHARES_DESIRED -- This flag is used to indicate * that a port desires a Share. This will be the case with the * the ports that have hybrid mode enabled. This will only cause * MAC layer to allocate a share and corresponding resources - * ahead of time. + * ahead of time. Ports that are not HybridIO enabled are + * associated with default group & resources. * * MAC_UNICAST_TAG_DISABLE -- This flag is used for VLAN * support. It will cause MAC to not add any tags, but expect @@ -525,7 +520,7 @@ vsw_maccl_open(vsw_t *vswp, vsw_port_t *port, int type) char mac_cl_name[MAXNAMELEN]; const char *dev_name; mac_client_handle_t *mchp; - uint64_t flags = MAC_OPEN_FLAGS_NO_HWRINGS; + uint64_t flags = 0; ASSERT(MUTEX_HELD(&vswp->mac_lock)); if (vswp->mh == NULL) { @@ -545,10 +540,8 @@ vsw_maccl_open(vsw_t *vswp, vsw_port_t *port, int type) dev_name = ddi_driver_name(vswp->dip); instance = ddi_get_instance(vswp->dip); if (type == VSW_VNETPORT) { - if (port->p_hio_enabled == B_TRUE) { - flags &= ~MAC_OPEN_FLAGS_NO_HWRINGS; + if (port->p_hio_enabled) flags |= MAC_OPEN_FLAGS_SHARES_DESIRED; - } (void) snprintf(mac_cl_name, MAXNAMELEN, "%s%d%s%d", dev_name, instance, "_port", port->p_instance); } else { @@ -561,6 +554,10 @@ vsw_maccl_open(vsw_t *vswp, vsw_port_t *port, int type) cmn_err(CE_NOTE, "!vsw%d:%s mac_client_open() failed\n", vswp->instance, mac_cl_name); } + + if (type != VSW_VNETPORT || !port->p_hio_enabled) + mac_client_set_rings(*mchp, MAC_RXRINGS_NONE, MAC_TXRINGS_NONE); + return (rv); } @@ -1389,7 +1386,7 @@ vsw_maccl_set_bandwidth(vsw_t *vswp, vsw_port_t *port, int type, uint64_t maxbw) { int rv = 0; uint64_t *bw; - mac_resource_props_t mrp; + mac_resource_props_t *mrp; mac_client_handle_t mch; ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); @@ -1409,15 +1406,15 @@ vsw_maccl_set_bandwidth(vsw_t *vswp, vsw_port_t *port, int type, uint64_t maxbw) } if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) { - bzero(&mrp, sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); if (maxbw == 0) { - mrp.mrp_maxbw = MRP_MAXBW_RESETVAL; + mrp->mrp_maxbw = MRP_MAXBW_RESETVAL; } else { - mrp.mrp_maxbw = maxbw; + mrp->mrp_maxbw = maxbw; } - mrp.mrp_mask |= MRP_MAXBW; + mrp->mrp_mask |= MRP_MAXBW; - rv = mac_client_set_resources(mch, &mrp); + rv = mac_client_set_resources(mch, mrp); if (rv != 0) { if (type == VSW_VNETPORT) { cmn_err(CE_NOTE, "!port%d: cannot set " @@ -1434,5 +1431,6 @@ vsw_maccl_set_bandwidth(vsw_t *vswp, vsw_port_t *port, int type, uint64_t maxbw) */ *bw = maxbw; } + kmem_free(mrp, sizeof (*mrp)); } } diff --git a/usr/src/uts/sun4v/sys/vnet.h b/usr/src/uts/sun4v/sys/vnet.h index 21fb92852b..e80324110e 100644 --- a/usr/src/uts/sun4v/sys/vnet.h +++ b/usr/src/uts/sun4v/sys/vnet.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -106,6 +106,11 @@ typedef struct vnet_hio_kstats { kstat_named_t noxmtbuf; /* MIB - ifOutDiscards */ } vnet_hio_kstats_t; +typedef struct vnet_tx_ring_stats { + uint64_t opackets; /* # tx packets */ + uint64_t obytes; /* # bytes transmitted */ +} vnet_tx_ring_stats_t; + /* * A vnet resource structure. */ @@ -214,6 +219,8 @@ typedef struct vnet_pseudo_tx_ring { /* ring handle. Hybrid res: ring hdl */ /* of hardware rx ring; LDC res: hdl */ /* to the res itself (vnet_res_t) */ + boolean_t woken_up; + vnet_tx_ring_stats_t tx_ring_stats; /* ring statistics */ } vnet_pseudo_tx_ring_t; /* @@ -241,6 +248,11 @@ typedef struct vnet_pseudo_tx_group { mac_group_handle_t handle; /* grp handle in mac layer */ uint_t ring_cnt; /* total # of rings in grp */ vnet_pseudo_tx_ring_t *rings; /* array of rings */ + kmutex_t flowctl_lock; /* flow control lock */ + kcondvar_t flowctl_cv; + kthread_t *flowctl_thread; + boolean_t flowctl_done; + void *tx_notify_handle; /* Tx ring notification */ } vnet_pseudo_tx_group_t; /* @@ -298,7 +310,6 @@ typedef struct vnet { mac_handle_t hio_mh; /* HIO mac hdl */ mac_client_handle_t hio_mch; /* HIO mac client hdl */ mac_unicast_handle_t hio_muh; /* HIO mac unicst hdl */ - mac_notify_handle_t hio_mnh; /* HIO notify cb hdl */ mac_group_handle_t rx_hwgh; /* HIO rx ring-group hdl */ mac_group_handle_t tx_hwgh; /* HIO tx ring-group hdl */ } vnet_t; |