diff options
214 files changed, 24250 insertions, 7110 deletions
diff --git a/exception_lists/packaging b/exception_lists/packaging index 7f3da27d0e..500926f56f 100644 --- a/exception_lists/packaging +++ b/exception_lists/packaging @@ -90,8 +90,8 @@ usr/include/sys/mac_client_impl.h usr/include/sys/mac_client.h usr/include/sys/mac_flow_impl.h usr/include/sys/mac_impl.h -usr/include/sys/mac_provider.h usr/include/sys/mac_soft_ring.h +usr/include/sys/mac_stat.h # # Private GLDv3 userland libraries and headers # diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile index 36b830bfbf..2d0ce5e6d5 100644 --- a/usr/src/cmd/Makefile +++ b/usr/src/cmd/Makefile @@ -126,6 +126,7 @@ COMMON_SUBDIRS= \ diskmgtd \ dispadmin \ dladm \ + dlstat \ dmesg \ dodatadm \ dtrace \ @@ -157,6 +158,7 @@ COMMON_SUBDIRS= \ filebench \ find \ flowadm \ + flowstat \ fm \ fmli \ fmt \ @@ -563,6 +565,7 @@ MSGSUBDIRS= \ diff \ diffmk \ dladm \ + dlstat \ du \ dumpcs \ ed \ @@ -577,6 +580,7 @@ MSGSUBDIRS= \ filesync \ find \ flowadm \ + flowstat \ fm \ fold \ fs.d \ diff --git a/usr/src/cmd/acctadm/main.c b/usr/src/cmd/acctadm/main.c index 2c610bdc10..8176214b9e 100644 --- a/usr/src/cmd/acctadm/main.c +++ b/usr/src/cmd/acctadm/main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -375,30 +375,7 @@ main(int argc, char *argv[]) } } str2buf(buf, disabled, AC_OFF, type); - } - if (enabled) { - /* - * Lets us get network logging started. - */ - if (type & AC_NET) { - /* - * Default logging interval for AC_NET is - * ACCTADM_NET_LOG_INTERVAL. - */ - (void) priv_set(PRIV_ON, PRIV_EFFECTIVE, - PRIV_SYS_DL_CONFIG, NULL); - err = dladm_start_usagelog(dld_handle, - strcmp(enabled, "basic") == 0 ? - DLADM_LOGTYPE_LINK : DLADM_LOGTYPE_FLOW, - ACCTADM_NET_LOG_INTERVAL); - (void) priv_set(PRIV_OFF, PRIV_EFFECTIVE, - PRIV_SYS_DL_CONFIG, NULL); - if (err != DLADM_STATUS_OK) { - die(gettext("failed to start logging " - "network information, error %d\n"), - errno); - } - } + } else if (enabled) { str2buf(buf, enabled, AC_ON, type); } (void) priv_set(PRIV_ON, PRIV_EFFECTIVE, PRIV_SYS_ACCT, NULL); @@ -408,7 +385,6 @@ main(int argc, char *argv[]) "resources\n"), ac_type_name(type)); } (void) priv_set(PRIV_OFF, PRIV_EFFECTIVE, PRIV_SYS_ACCT, NULL); - tracked = buf2str(buf, AC_BUFSIZE, AC_ON, type); untracked = buf2str(buf, AC_BUFSIZE, AC_OFF, type); if (aconf_set_string(AC_PROP_TRACKED, tracked) == -1) @@ -448,6 +424,31 @@ main(int argc, char *argv[]) modified++; } + /* + * Let's get network logging started. We do this after turning on + * accounting and opening the file so that we can start writing + * immediately. + */ + if (enabled && (type & AC_NET)) { + /* + * Default logging interval for AC_NET is + * ACCTADM_NET_LOG_INTERVAL. + */ + (void) priv_set(PRIV_ON, PRIV_EFFECTIVE, + PRIV_SYS_DL_CONFIG, NULL); + err = dladm_start_usagelog(dld_handle, + strcmp(enabled, "basic") == 0 ? + DLADM_LOGTYPE_LINK : DLADM_LOGTYPE_FLOW, + ACCTADM_NET_LOG_INTERVAL); + (void) priv_set(PRIV_OFF, PRIV_EFFECTIVE, + PRIV_SYS_DL_CONFIG, NULL); + if (err != DLADM_STATUS_OK) { + die(gettext("failed to start logging " + "network information, error %d\n"), + errno); + } + } + if (Dflg) { /* * Disable accounting diff --git a/usr/src/cmd/dladm/dladm.c b/usr/src/cmd/dladm/dladm.c index a55fa79735..713920767c 100644 --- a/usr/src/cmd/dladm/dladm.c +++ b/usr/src/cmd/dladm/dladm.c @@ -320,7 +320,7 @@ static cmd_t cmds[] = { " create-vnic [-t] -l <link> [-m <value> | auto |\n" "\t\t {factory [-n <slot-id>]} | {random [-r <prefix>]} |\n" "\t\t {vrrp -V <vrid> -A {inet | inet6}} [-v <vid> [-f]]\n" - "\t\t [-H] [-p <prop>=<value>[,...]] <vnic-link>" }, + "\t\t [-p <prop>=<value>[,...]] <vnic-link>" }, { "delete-vnic", do_delete_vnic, " delete-vnic [-t] <vnic-link>" }, { "show-vnic", do_show_vnic, @@ -810,18 +810,18 @@ static const ofmt_field_t phys_m_fields[] = { typedef enum { PHYS_H_LINK, - PHYS_H_GROUP, - PHYS_H_GRPTYPE, + PHYS_H_RINGTYPE, PHYS_H_RINGS, PHYS_H_CLIENTS } phys_h_field_index_t; +#define RINGSTRLEN 21 + static const ofmt_field_t phys_h_fields[] = { { "LINK", 13, PHYS_H_LINK, print_phys_one_hwgrp_cb}, -{ "GROUP", 9, PHYS_H_GROUP, print_phys_one_hwgrp_cb}, -{ "GROUPTYPE", 7, PHYS_H_GRPTYPE, print_phys_one_hwgrp_cb}, -{ "RINGS", 17, PHYS_H_RINGS, print_phys_one_hwgrp_cb}, -{ "CLIENTS", 21, PHYS_H_CLIENTS, print_phys_one_hwgrp_cb}, +{ "RINGTYPE", 9, PHYS_H_RINGTYPE, print_phys_one_hwgrp_cb}, +{ "RINGS", RINGSTRLEN, PHYS_H_RINGS, print_phys_one_hwgrp_cb}, +{ "CLIENTS", 24, PHYS_H_CLIENTS, print_phys_one_hwgrp_cb}, { NULL, 0, 0, NULL}} ; @@ -3694,6 +3694,13 @@ typedef struct { static boolean_t print_phys_one_hwgrp_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) { + int i; + boolean_t first = B_TRUE; + int start = -1; + int end = -1; + char ringstr[RINGSTRLEN]; + char ringsubstr[RINGSTRLEN]; + print_phys_hwgrp_state_t *hg_state = ofarg->ofmt_cbarg; dladm_hwgrp_attr_t *attr = hg_state->hs_grp_attr; @@ -3701,15 +3708,78 @@ print_phys_one_hwgrp_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) case PHYS_H_LINK: (void) snprintf(buf, bufsize, "%s", attr->hg_link_name); break; - case PHYS_H_GROUP: - (void) snprintf(buf, bufsize, "%d", attr->hg_grp_num); - break; - case PHYS_H_GRPTYPE: + case PHYS_H_RINGTYPE: (void) snprintf(buf, bufsize, "%s", attr->hg_grp_type == DLADM_HWGRP_TYPE_RX ? "RX" : "TX"); break; case PHYS_H_RINGS: - (void) snprintf(buf, bufsize, "%d", attr->hg_n_rings); + ringstr[0] = '\0'; + for (i = 0; i < attr->hg_n_rings; i++) { + uint_t index = attr->hg_rings[i]; + + if (start == -1) { + start = index; + end = index; + } else if (index == end + 1) { + end = index; + } else { + if (start == end) { + if (first) { + (void) snprintf( + ringsubstr, + RINGSTRLEN, "%d", + start); + first = B_FALSE; + } else { + (void) snprintf( + ringsubstr, + RINGSTRLEN, ",%d", + start); + } + } else { + if (first) { + (void) snprintf( + ringsubstr, + RINGSTRLEN, + "%d-%d", + start, end); + first = B_FALSE; + } else { + (void) snprintf( + ringsubstr, + RINGSTRLEN, + ",%d-%d", + start, end); + } + } + (void) strlcat(ringstr, ringsubstr, + RINGSTRLEN); + start = index; + end = index; + } + } + /* The last one */ + if (start != -1) { + if (first) { + if (start == end) { + (void) snprintf(buf, bufsize, "%d", + start); + } else { + (void) snprintf(buf, bufsize, "%d-%d", + start, end); + } + } else { + if (start == end) { + (void) snprintf(ringsubstr, RINGSTRLEN, + ",%d", start); + } else { + (void) snprintf(ringsubstr, RINGSTRLEN, + ",%d-%d", start, end); + } + (void) strlcat(ringstr, ringsubstr, RINGSTRLEN); + (void) snprintf(buf, bufsize, "%s", ringstr); + } + } break; case PHYS_H_CLIENTS: if (attr->hg_client_names[0] == '\0') { @@ -4232,8 +4302,7 @@ do_show_phys(int argc, char *argv[], const char *use) "link,media,state,speed,duplex,device"; char *all_inactive_fields = "link,device,media,flags"; char *all_mac_fields = "link,slot,address,inuse,client"; - char *all_hwgrp_fields = - "link,group,grouptype,rings,clients"; + char *all_hwgrp_fields = "link,ringtype,rings,clients"; const ofmt_field_t *pf; ofmt_handle_t ofmt; ofmt_status_t oferr; @@ -4534,9 +4603,6 @@ do_create_vnic(int argc, char *argv[], const char *use) case 'f': flags |= DLADM_OPT_FORCE; break; - case 'H': - flags |= DLADM_OPT_HWRINGS; - break; default: die_opterr(optopt, option, use); } @@ -8722,7 +8788,7 @@ warn(const char *format, ...) (void) vfprintf(stderr, format, alist); va_end(alist); - (void) putchar('\n'); + (void) putc('\n', stderr); } /* PRINTFLIKE2 */ @@ -8779,7 +8845,7 @@ die(const char *format, ...) (void) vfprintf(stderr, format, alist); va_end(alist); - (void) putchar('\n'); + (void) putc('\n', stderr); /* close dladm handle if it was opened */ if (handle != NULL) diff --git a/usr/src/cmd/dlstat/Makefile b/usr/src/cmd/dlstat/Makefile new file mode 100644 index 0000000000..8885dbc5a0 --- /dev/null +++ b/usr/src/cmd/dlstat/Makefile @@ -0,0 +1,49 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# + +PROG= dlstat + +ROOTFS_PROG= $(PROG) + +include ../Makefile.cmd + +XGETFLAGS += -a -x $(PROG).xcl +LDLIBS += -L$(ROOT)/lib +LDLIBS += -ldladm -linetutil + +.KEEP_STATE: + +all: $(ROOTFS_PROG) + +install: all $(ROOTSBINPROG) + $(RM) $(ROOTUSRSBINPROG) + -$(SYMLINK) ../../sbin/$(PROG) $(ROOTUSRSBINPROG) + +clean: + +lint: lint_PROG + +include ../Makefile.targ diff --git a/usr/src/cmd/dlstat/dlstat.c b/usr/src/cmd/dlstat/dlstat.c new file mode 100644 index 0000000000..a931ba82ff --- /dev/null +++ b/usr/src/cmd/dlstat/dlstat.c @@ -0,0 +1,2457 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdio.h> +#include <ctype.h> +#include <locale.h> +#include <signal.h> +#include <stdarg.h> +#include <stdlib.h> +#include <fcntl.h> +#include <string.h> +#include <stropts.h> +#include <sys/stat.h> +#include <errno.h> +#include <strings.h> +#include <getopt.h> +#include <unistd.h> +#include <priv.h> +#include <termios.h> +#include <pwd.h> +#include <auth_attr.h> +#include <auth_list.h> +#include <libintl.h> +#include <libdevinfo.h> +#include <libdlpi.h> +#include <libdladm.h> +#include <libdllink.h> +#include <libdlstat.h> +#include <libdlaggr.h> +#include <libinetutil.h> +#include <bsm/adt.h> +#include <bsm/adt_event.h> +#include <stddef.h> +#include <ofmt.h> + +typedef struct link_chain_s { + datalink_id_t lc_linkid; + boolean_t lc_visited; + dladm_stat_chain_t *lc_statchain[DLADM_STAT_NUM_STATS]; + struct link_chain_s *lc_next; +} link_chain_t; + +typedef void * (*stats2str_t)(const char *, void *, + char, boolean_t); + +typedef struct show_state { + link_chain_t *ls_linkchain; + boolean_t ls_stattype[DLADM_STAT_NUM_STATS]; + stats2str_t ls_stats2str[DLADM_STAT_NUM_STATS]; + ofmt_handle_t ls_ofmt; + char ls_unit; + boolean_t ls_parsable; +} show_state_t; + +typedef struct show_history_state_s { + boolean_t hs_plot; + boolean_t hs_parsable; + boolean_t hs_printheader; + boolean_t hs_first; + boolean_t hs_showall; + ofmt_handle_t hs_ofmt; +} show_history_state_t; + +/* + * callback functions for printing output and error diagnostics. + */ +static ofmt_cb_t print_default_cb; + +static void dlstat_ofmt_check(ofmt_status_t, boolean_t, ofmt_handle_t); + +typedef void cmdfunc_t(int, char **, const char *); + +static cmdfunc_t do_show, do_show_history, do_show_phys, do_show_link; +static cmdfunc_t do_show_aggr; + +static void die(const char *, ...); +static void die_optdup(int); +static void die_opterr(int, int, const char *); +static void die_dlerr(dladm_status_t, const char *, ...); +static void warn(const char *, ...); + +typedef struct cmd { + char *c_name; + cmdfunc_t *c_fn; + const char *c_usage; +} cmd_t; + +static cmd_t cmds[] = { + { "", do_show, + "dlstat [-r | -t] [-i <interval>] [link]\n" + " dlstat [-a | -A] [-i <interval>] [-p] [ -o field[,...]]\n" + " [-u R|K|M|G|T|P] [link]"}, + { "show-phys", do_show_phys, + "dlstat show-phys [-r | -t] [-i interval] [-a]\n" + " [-p] [ -o field[,...]] [-u R|K|M|G|T|P] " + "[link]"}, + { "show-link", do_show_link, + "dlstat show-link [-r [-F] | -t] [-i interval] [-a]\n" + " [-p] [ -o field[,...]] [-u R|K|M|G|T|P] " + "[link]\n" + " dlstat show-link -h [-a] [-d] [-F <format>]\n" + " [-s <DD/MM/YYYY,HH:MM:SS>] " + "[-e <DD/MM/YYYY,HH:MM:SS>]\n" + " -f <logfile> [<link>]" }, + { "show-aggr", do_show_aggr, + "dlstat show-aggr [-r | -t] [-i interval] [-p]\n" + " [ -o field[,...]] [-u R|K|M|G|T|P] " + " [link]" } +}; + +#define MAXSTATLEN 15 + +/* + * dlstat : total stat fields + */ +typedef struct total_fields_buf_s { + char t_linkname[MAXLINKNAMELEN]; + char t_ipackets[MAXSTATLEN]; + char t_rbytes[MAXSTATLEN]; + char t_opackets[MAXSTATLEN]; + char t_obytes[MAXSTATLEN]; +} total_fields_buf_t; + +static ofmt_field_t total_s_fields[] = { +{ "LINK", 15, + offsetof(total_fields_buf_t, t_linkname), print_default_cb}, +{ "IPKTS", 8, + offsetof(total_fields_buf_t, t_ipackets), print_default_cb}, +{ "RBYTES", 8, + offsetof(total_fields_buf_t, t_rbytes), print_default_cb}, +{ "OPKTS", 8, + offsetof(total_fields_buf_t, t_opackets), print_default_cb}, +{ "OBYTES", 8, + offsetof(total_fields_buf_t, t_obytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-phys: both Rx and Tx stat fields + */ +typedef struct ring_fields_buf_s { + char r_linkname[MAXLINKNAMELEN]; + char r_type[MAXSTATLEN]; + char r_id[MAXSTATLEN]; + char r_index[MAXSTATLEN]; + char r_packets[MAXSTATLEN]; + char r_bytes[MAXSTATLEN]; +} ring_fields_buf_t; + +static ofmt_field_t ring_s_fields[] = { +{ "LINK", 15, + offsetof(ring_fields_buf_t, r_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(ring_fields_buf_t, r_type), print_default_cb}, +{ "ID", 7, + offsetof(ring_fields_buf_t, r_id), print_default_cb}, +{ "INDEX", 6, + offsetof(ring_fields_buf_t, r_index), print_default_cb}, +{ "PKTS", 8, + offsetof(ring_fields_buf_t, r_packets), print_default_cb}, +{ "BYTES", 8, + offsetof(ring_fields_buf_t, r_bytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-phys -r: Rx Ring stat fields + */ +typedef struct rx_ring_fields_buf_s { + char rr_linkname[MAXLINKNAMELEN]; + char rr_type[MAXSTATLEN]; + char rr_id[MAXSTATLEN]; + char rr_index[MAXSTATLEN]; + char rr_ipackets[MAXSTATLEN]; + char rr_rbytes[MAXSTATLEN]; +} rx_ring_fields_buf_t; + +static ofmt_field_t rx_ring_s_fields[] = { +{ "LINK", 15, + offsetof(rx_ring_fields_buf_t, rr_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(rx_ring_fields_buf_t, rr_type), print_default_cb}, +{ "ID", 7, + offsetof(rx_ring_fields_buf_t, rr_id), print_default_cb}, +{ "INDEX", 6, + offsetof(rx_ring_fields_buf_t, rr_index), print_default_cb}, +{ "IPKTS", 8, + offsetof(rx_ring_fields_buf_t, rr_ipackets), print_default_cb}, +{ "RBYTES", 8, + offsetof(rx_ring_fields_buf_t, rr_rbytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-phys -t: Tx Ring stat fields + */ +typedef struct tx_ring_fields_buf_s { + char tr_linkname[MAXLINKNAMELEN]; + char tr_type[MAXSTATLEN]; + char tr_id[MAXSTATLEN]; + char tr_index[MAXSTATLEN]; + char tr_opackets[MAXSTATLEN]; + char tr_obytes[MAXSTATLEN]; +} tx_ring_fields_buf_t; + +static ofmt_field_t tx_ring_s_fields[] = { +{ "LINK", 15, + offsetof(tx_ring_fields_buf_t, tr_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(tx_ring_fields_buf_t, tr_type), print_default_cb}, +{ "ID", 7, + offsetof(tx_ring_fields_buf_t, tr_id), print_default_cb}, +{ "INDEX", 6, + offsetof(tx_ring_fields_buf_t, tr_index), print_default_cb}, +{ "OPKTS", 8, + offsetof(tx_ring_fields_buf_t, tr_opackets), print_default_cb}, +{ "OBYTES", 8, + offsetof(tx_ring_fields_buf_t, tr_obytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-link: both Rx and Tx lane fields + */ +typedef struct lane_fields_buf_s { + char l_linkname[MAXLINKNAMELEN]; + char l_type[MAXSTATLEN]; + char l_id[MAXSTATLEN]; + char l_index[MAXSTATLEN]; + char l_packets[MAXSTATLEN]; + char l_bytes[MAXSTATLEN]; +} lane_fields_buf_t; + +static ofmt_field_t lane_s_fields[] = { +{ "LINK", 15, + offsetof(lane_fields_buf_t, l_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(lane_fields_buf_t, l_type), print_default_cb}, +{ "ID", 7, + offsetof(lane_fields_buf_t, l_id), print_default_cb}, +{ "INDEX", 6, + offsetof(lane_fields_buf_t, l_index), print_default_cb}, +{ "PKTS", 8, + offsetof(lane_fields_buf_t, l_packets), print_default_cb}, +{ "BYTES", 8, + offsetof(lane_fields_buf_t, l_bytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-link -r, dlstat -r: Rx Lane stat fields + */ +typedef struct rx_lane_fields_buf_s { + char rl_linkname[MAXLINKNAMELEN]; + char rl_type[MAXSTATLEN]; + char rl_id[MAXSTATLEN]; + char rl_index[MAXSTATLEN]; + char rl_ipackets[MAXSTATLEN]; + char rl_rbytes[MAXSTATLEN]; + char rl_intrs[MAXSTATLEN]; + char rl_polls[MAXSTATLEN]; + char rl_sdrops[MAXSTATLEN]; + char rl_chl10[MAXSTATLEN]; + char rl_ch10_50[MAXSTATLEN]; + char rl_chg50[MAXSTATLEN]; +} rx_lane_fields_buf_t; + +static ofmt_field_t rx_lane_s_fields[] = { +{ "LINK", 10, + offsetof(rx_lane_fields_buf_t, rl_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(rx_lane_fields_buf_t, rl_type), print_default_cb}, +{ "ID", 7, + offsetof(rx_lane_fields_buf_t, rl_id), print_default_cb}, +{ "INDEX", 6, + offsetof(rx_lane_fields_buf_t, rl_index), print_default_cb}, +{ "IPKTS", 8, + offsetof(rx_lane_fields_buf_t, rl_ipackets), print_default_cb}, +{ "RBYTES", 8, + offsetof(rx_lane_fields_buf_t, rl_rbytes), print_default_cb}, +{ "INTRS", 8, + offsetof(rx_lane_fields_buf_t, rl_intrs), print_default_cb}, +{ "POLLS", 8, + offsetof(rx_lane_fields_buf_t, rl_polls), print_default_cb}, +{ "SDROPS", 8, + offsetof(rx_lane_fields_buf_t, rl_sdrops), print_default_cb}, +{ "CH<10", 8, + offsetof(rx_lane_fields_buf_t, rl_chl10), print_default_cb}, +{ "CH10-50", 8, + offsetof(rx_lane_fields_buf_t, rl_ch10_50), print_default_cb}, +{ "CH>50", 8, + offsetof(rx_lane_fields_buf_t, rl_chg50), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-link -r -F: Rx fanout stat fields + */ +typedef struct rx_fanout_lane_fields_buf_s { + char rfl_linkname[MAXLINKNAMELEN]; + char rfl_type[MAXSTATLEN]; + char rfl_id[MAXSTATLEN]; + char rfl_index[MAXSTATLEN]; + char rfl_fout[MAXSTATLEN]; + char rfl_ipackets[MAXSTATLEN]; + char rfl_rbytes[MAXSTATLEN]; +} rx_fanout_lane_fields_buf_t; + +static ofmt_field_t rx_fanout_lane_s_fields[] = { +{ "LINK", 15, + offsetof(rx_fanout_lane_fields_buf_t, rfl_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(rx_fanout_lane_fields_buf_t, rfl_type), print_default_cb}, +{ "ID", 7, + offsetof(rx_fanout_lane_fields_buf_t, rfl_id), print_default_cb}, +{ "INDEX", 6, + offsetof(rx_fanout_lane_fields_buf_t, rfl_index), print_default_cb}, +{ "FOUT", 6, + offsetof(rx_fanout_lane_fields_buf_t, rfl_fout), print_default_cb}, +{ "IPKTS", 8, + offsetof(rx_fanout_lane_fields_buf_t, rfl_ipackets), print_default_cb}, +{ "RBYTES", 8, + offsetof(rx_fanout_lane_fields_buf_t, rfl_rbytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-link -t: Tx Lane stat fields + */ +typedef struct tx_lane_fields_buf_s { + char tl_linkname[MAXLINKNAMELEN]; + char tl_index[MAXSTATLEN]; + char tl_type[MAXSTATLEN]; + char tl_id[MAXSTATLEN]; + char tl_opackets[MAXSTATLEN]; + char tl_obytes[MAXSTATLEN]; + char tl_blockcnt[MAXSTATLEN]; + char tl_unblockcnt[MAXSTATLEN]; + char tl_sdrops[MAXSTATLEN]; +} tx_lane_fields_buf_t; + +static ofmt_field_t tx_lane_s_fields[] = { +{ "LINK", 15, + offsetof(tx_lane_fields_buf_t, tl_linkname), print_default_cb}, +{ "TYPE", 5, + offsetof(tx_lane_fields_buf_t, tl_type), print_default_cb}, +{ "ID", 7, + offsetof(tx_lane_fields_buf_t, tl_id), print_default_cb}, +{ "INDEX", 6, + offsetof(tx_lane_fields_buf_t, tl_index), print_default_cb}, +{ "OPKTS", 8, + offsetof(tx_lane_fields_buf_t, tl_opackets), print_default_cb}, +{ "OBYTES", 8, + offsetof(tx_lane_fields_buf_t, tl_obytes), print_default_cb}, +{ "BLKCNT", 8, + offsetof(tx_lane_fields_buf_t, tl_blockcnt), print_default_cb}, +{ "UBLKCNT", 8, + offsetof(tx_lane_fields_buf_t, tl_unblockcnt), print_default_cb}, +{ "SDROPS", 8, + offsetof(tx_lane_fields_buf_t, tl_sdrops), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * dlstat show-aggr: aggr port stat fields + */ +typedef struct aggr_port_fields_buf_s { + char ap_linkname[MAXLINKNAMELEN]; + char ap_portname[MAXLINKNAMELEN]; + char ap_ipackets[MAXSTATLEN]; + char ap_rbytes[MAXSTATLEN]; + char ap_opackets[MAXSTATLEN]; + char ap_obytes[MAXSTATLEN]; +} aggr_port_fields_buf_t; + +static ofmt_field_t aggr_port_s_fields[] = { +{ "LINK", 15, + offsetof(aggr_port_fields_buf_t, ap_linkname), print_default_cb}, +{ "PORT", 15, + offsetof(aggr_port_fields_buf_t, ap_portname), print_default_cb}, +{ "IPKTS", 8, + offsetof(aggr_port_fields_buf_t, ap_ipackets), print_default_cb}, +{ "RBYTES", 8, + offsetof(aggr_port_fields_buf_t, ap_rbytes), print_default_cb}, +{ "OPKTS", 8, + offsetof(aggr_port_fields_buf_t, ap_opackets), print_default_cb}, +{ "OBYTES", 8, + offsetof(aggr_port_fields_buf_t, ap_obytes), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * structures for 'dlstat show-link -h' + */ +typedef struct history_fields_buf_s { + char h_link[12]; + char h_duration[10]; + char h_ipackets[9]; + char h_rbytes[10]; + char h_opackets[9]; + char h_obytes[10]; + char h_bandwidth[14]; +} history_fields_buf_t; + +static ofmt_field_t history_fields[] = { +{ "LINK", 13, + offsetof(history_fields_buf_t, h_link), print_default_cb}, +{ "DURATION", 11, + offsetof(history_fields_buf_t, h_duration), print_default_cb}, +{ "IPKTS", 10, + offsetof(history_fields_buf_t, h_ipackets), print_default_cb}, +{ "RBYTES", 11, + offsetof(history_fields_buf_t, h_rbytes), print_default_cb}, +{ "OPKTS", 10, + offsetof(history_fields_buf_t, h_opackets), print_default_cb}, +{ "OBYTES", 11, + offsetof(history_fields_buf_t, h_obytes), print_default_cb}, +{ "BANDWIDTH", 15, + offsetof(history_fields_buf_t, h_bandwidth), print_default_cb}, +{ NULL, 0, 0, NULL}}; + +/* + * structures for 'dlstat show-link -h link' + */ +typedef struct history_l_fields_buf_s { + char hl_link[12]; + char hl_stime[13]; + char hl_etime[13]; + char hl_rbytes[8]; + char hl_obytes[8]; + char hl_bandwidth[14]; +} history_l_fields_buf_t; + +static ofmt_field_t history_l_fields[] = { +/* name, field width, offset */ +{ "LINK", 13, + offsetof(history_l_fields_buf_t, hl_link), print_default_cb}, +{ "START", 14, + offsetof(history_l_fields_buf_t, hl_stime), print_default_cb}, +{ "END", 14, + offsetof(history_l_fields_buf_t, hl_etime), print_default_cb}, +{ "RBYTES", 9, + offsetof(history_l_fields_buf_t, hl_rbytes), print_default_cb}, +{ "OBYTES", 9, + offsetof(history_l_fields_buf_t, hl_obytes), print_default_cb}, +{ "BANDWIDTH", 15, + offsetof(history_l_fields_buf_t, hl_bandwidth), print_default_cb}, +{ NULL, 0, 0, NULL}} +; + +static char *progname; + +/* + * Handle to libdladm. Opened in main() before the sub-command + * specific function is called. + */ +static dladm_handle_t handle = NULL; + +static void +usage(void) +{ + int i; + cmd_t *cmdp; + + (void) fprintf(stderr, gettext("usage: ")); + for (i = 0; i < sizeof (cmds) / sizeof (cmds[0]); i++) { + cmdp = &cmds[i]; + if (cmdp->c_usage != NULL) + (void) fprintf(stderr, "%s\n", gettext(cmdp->c_usage)); + } + + /* close dladm handle if it was opened */ + if (handle != NULL) + dladm_close(handle); + + exit(1); +} + +int +main(int argc, char *argv[]) +{ + int i; + cmd_t *cmdp; + dladm_status_t status; + + (void) setlocale(LC_ALL, ""); +#if !defined(TEXT_DOMAIN) +#define TEXT_DOMAIN "SYS_TEST" +#endif + (void) textdomain(TEXT_DOMAIN); + + progname = argv[0]; + + /* Open the libdladm handle */ + if ((status = dladm_open(&handle)) != DLADM_STATUS_OK) + die_dlerr(status, "could not open /dev/dld"); + + if (argc == 1) { + do_show(argc - 1, NULL, cmds[0].c_usage); + goto done; + } + + for (i = 0; i < sizeof (cmds) / sizeof (cmds[0]); i++) { + cmdp = &cmds[i]; + if (strcmp(argv[1], cmdp->c_name) == 0) { + cmdp->c_fn(argc - 1, &argv[1], cmdp->c_usage); + goto done; + } + } + + do_show(argc, &argv[0], cmds[0].c_usage); + +done: + dladm_close(handle); + return (0); +} + +/*ARGSUSED*/ +static int +show_history_date(dladm_usage_t *history, void *arg) +{ + show_history_state_t *state = arg; + time_t stime; + char timebuf[20]; + dladm_status_t status; + uint32_t flags; + + /* + * Only show history information for existing links unless '-a' + * is specified. + */ + if (!state->hs_showall) { + if ((status = dladm_name2info(handle, history->du_name, + NULL, &flags, NULL, NULL)) != DLADM_STATUS_OK) { + return (status); + } + if ((flags & DLADM_OPT_ACTIVE) == 0) + return (DLADM_STATUS_LINKINVAL); + } + + stime = history->du_stime; + (void) strftime(timebuf, sizeof (timebuf), "%m/%d/%Y", + localtime(&stime)); + (void) printf("%s\n", timebuf); + + return (DLADM_STATUS_OK); +} + +static int +show_history_time(dladm_usage_t *history, void *arg) +{ + show_history_state_t *state = arg; + char buf[DLADM_STRSIZE]; + history_l_fields_buf_t ubuf; + time_t time; + double bw; + dladm_status_t status; + uint32_t flags; + + /* + * Only show history information for existing links unless '-a' + * is specified. + */ + if (!state->hs_showall) { + if ((status = dladm_name2info(handle, history->du_name, + NULL, &flags, NULL, NULL)) != DLADM_STATUS_OK) { + return (status); + } + if ((flags & DLADM_OPT_ACTIVE) == 0) + return (DLADM_STATUS_LINKINVAL); + } + + if (state->hs_plot) { + if (!state->hs_printheader) { + if (state->hs_first) { + (void) printf("# Time"); + state->hs_first = B_FALSE; + } + (void) printf(" %s", history->du_name); + if (history->du_last) { + (void) printf("\n"); + state->hs_first = B_TRUE; + state->hs_printheader = B_TRUE; + } + } else { + if (state->hs_first) { + time = history->du_etime; + (void) strftime(buf, sizeof (buf), "%T", + localtime(&time)); + state->hs_first = B_FALSE; + (void) printf("%s", buf); + } + bw = (double)history->du_bandwidth/1000; + (void) printf(" %.2f", bw); + if (history->du_last) { + (void) printf("\n"); + state->hs_first = B_TRUE; + } + } + return (DLADM_STATUS_OK); + } + + bzero(&ubuf, sizeof (ubuf)); + + (void) snprintf(ubuf.hl_link, sizeof (ubuf.hl_link), "%s", + history->du_name); + time = history->du_stime; + (void) strftime(buf, sizeof (buf), "%T", localtime(&time)); + (void) snprintf(ubuf.hl_stime, sizeof (ubuf.hl_stime), "%s", + buf); + time = history->du_etime; + (void) strftime(buf, sizeof (buf), "%T", localtime(&time)); + (void) snprintf(ubuf.hl_etime, sizeof (ubuf.hl_etime), "%s", + buf); + (void) snprintf(ubuf.hl_rbytes, sizeof (ubuf.hl_rbytes), + "%llu", history->du_rbytes); + (void) snprintf(ubuf.hl_obytes, sizeof (ubuf.hl_obytes), + "%llu", history->du_obytes); + (void) snprintf(ubuf.hl_bandwidth, sizeof (ubuf.hl_bandwidth), + "%s Mbps", dladm_bw2str(history->du_bandwidth, buf)); + + ofmt_print(state->hs_ofmt, &ubuf); + return (DLADM_STATUS_OK); +} + +static int +show_history_res(dladm_usage_t *history, void *arg) +{ + show_history_state_t *state = arg; + char buf[DLADM_STRSIZE]; + history_fields_buf_t ubuf; + dladm_status_t status; + uint32_t flags; + + /* + * Only show history information for existing links unless '-a' + * is specified. + */ + if (!state->hs_showall) { + if ((status = dladm_name2info(handle, history->du_name, + NULL, &flags, NULL, NULL)) != DLADM_STATUS_OK) { + return (status); + } + if ((flags & DLADM_OPT_ACTIVE) == 0) + return (DLADM_STATUS_LINKINVAL); + } + + bzero(&ubuf, sizeof (ubuf)); + + (void) snprintf(ubuf.h_link, sizeof (ubuf.h_link), "%s", + history->du_name); + (void) snprintf(ubuf.h_duration, sizeof (ubuf.h_duration), + "%llu", history->du_duration); + (void) snprintf(ubuf.h_ipackets, sizeof (ubuf.h_ipackets), + "%llu", history->du_ipackets); + (void) snprintf(ubuf.h_rbytes, sizeof (ubuf.h_rbytes), + "%llu", history->du_rbytes); + (void) snprintf(ubuf.h_opackets, sizeof (ubuf.h_opackets), + "%llu", history->du_opackets); + (void) snprintf(ubuf.h_obytes, sizeof (ubuf.h_obytes), + "%llu", history->du_obytes); + (void) snprintf(ubuf.h_bandwidth, sizeof (ubuf.h_bandwidth), + "%s Mbps", dladm_bw2str(history->du_bandwidth, buf)); + + ofmt_print(state->hs_ofmt, &ubuf); + + return (DLADM_STATUS_OK); +} + +static boolean_t +valid_formatspec(char *formatspec_str) +{ + return (strcmp(formatspec_str, "gnuplot") == 0); +} + +/*ARGSUSED*/ +static void +do_show_history(int argc, char *argv[], const char *use) +{ + char *file = NULL; + int opt; + dladm_status_t status; + boolean_t d_arg = B_FALSE; + char *stime = NULL; + char *etime = NULL; + char *resource = NULL; + show_history_state_t state; + boolean_t o_arg = B_FALSE; + boolean_t F_arg = B_FALSE; + char *fields_str = NULL; + char *formatspec_str = NULL; + char *all_l_fields = + "link,start,end,rbytes,obytes,bandwidth"; + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = 0; + + bzero(&state, sizeof (show_history_state_t)); + state.hs_parsable = B_FALSE; + state.hs_printheader = B_FALSE; + state.hs_plot = B_FALSE; + state.hs_first = B_TRUE; + + while ((opt = getopt(argc, argv, "das:e:o:f:F:")) != -1) { + switch (opt) { + case 'd': + d_arg = B_TRUE; + break; + case 'a': + state.hs_showall = B_TRUE; + break; + case 'f': + file = optarg; + break; + case 's': + stime = optarg; + break; + case 'e': + etime = optarg; + break; + case 'o': + o_arg = B_TRUE; + fields_str = optarg; + break; + case 'F': + state.hs_plot = F_arg = B_TRUE; + formatspec_str = optarg; + break; + default: + die_opterr(optopt, opt, use); + break; + } + } + + if (file == NULL) + die("show-link -h requires a file"); + + if (optind == (argc-1)) { + uint32_t flags; + + resource = argv[optind]; + if (!state.hs_showall && + (((status = dladm_name2info(handle, resource, NULL, &flags, + NULL, NULL)) != DLADM_STATUS_OK) || + ((flags & DLADM_OPT_ACTIVE) == 0))) { + die("invalid link: '%s'", resource); + } + } + + if (F_arg && d_arg) + die("incompatible -d and -F options"); + + if (F_arg && !valid_formatspec(formatspec_str)) + die("Format specifier %s not supported", formatspec_str); + + if (state.hs_parsable) + ofmtflags |= OFMT_PARSABLE; + + if (resource == NULL && stime == NULL && etime == NULL) { + oferr = ofmt_open(fields_str, history_fields, ofmtflags, 0, + &ofmt); + } else { + if (!o_arg || (o_arg && strcasecmp(fields_str, "all") == 0)) + fields_str = all_l_fields; + oferr = ofmt_open(fields_str, history_l_fields, ofmtflags, 0, + &ofmt); + + } + dlstat_ofmt_check(oferr, state.hs_parsable, ofmt); + state.hs_ofmt = ofmt; + + if (d_arg) { + /* Print log dates */ + status = dladm_usage_dates(show_history_date, + DLADM_LOGTYPE_LINK, file, resource, &state); + } else if (resource == NULL && stime == NULL && etime == NULL && + !F_arg) { + /* Print summary */ + status = dladm_usage_summary(show_history_res, + DLADM_LOGTYPE_LINK, file, &state); + } else if (resource != NULL) { + /* Print log entries for named resource */ + status = dladm_walk_usage_res(show_history_time, + DLADM_LOGTYPE_LINK, file, resource, stime, etime, &state); + } else { + /* Print time and information for each link */ + status = dladm_walk_usage_time(show_history_time, + DLADM_LOGTYPE_LINK, file, stime, etime, &state); + } + + if (status != DLADM_STATUS_OK) + die_dlerr(status, "show-link -h"); + ofmt_close(ofmt); +} + +boolean_t +dlstat_unit(char *oarg, char *unit) +{ + if ((strcmp(oarg, "R") == 0) || (strcmp(oarg, "K") == 0) || + (strcmp(oarg, "M") == 0) || (strcmp(oarg, "G") == 0) || + (strcmp(oarg, "T") == 0) || (strcmp(oarg, "P") == 0)) { + *unit = oarg[0]; + return (B_TRUE); + } + + return (B_FALSE); +} + +void +map_to_units(char *buf, uint_t bufsize, double num, char unit, + boolean_t parsable) +{ + if (parsable) { + (void) snprintf(buf, bufsize, "%.0lf", num); + return; + } + + if (unit == '\0') { + int index; + + for (index = 0; (int)(num/1000) != 0; index++, num /= 1000) + ; + + switch (index) { + case 0: + unit = '\0'; + break; + case 1: + unit = 'K'; + break; + case 2: + unit = 'M'; + break; + case 3: + unit = 'G'; + break; + case 4: + unit = 'T'; + break; + case 5: + /* Largest unit supported */ + default: + unit = 'P'; + break; + } + } else { + switch (unit) { + case 'R': + /* Already raw numbers */ + unit = '\0'; + break; + case 'K': + num /= 1000; + break; + case 'M': + num /= (1000*1000); + break; + case 'G': + num /= (1000*1000*1000); + break; + case 'T': + num /= (1000.0*1000.0*1000.0*1000.0); + break; + case 'P': + /* Largest unit supported */ + default: + num /= (1000.0*1000.0*1000.0*1000.0*1000.0); + break; + } + } + + if (unit == '\0') + (void) snprintf(buf, bufsize, " %7.0lf%c", num, unit); + else + (void) snprintf(buf, bufsize, " %6.2lf%c", num, unit); +} + +link_chain_t * +get_link_prev_stat(datalink_id_t linkid, void *arg) +{ + show_state_t *state = (show_state_t *)arg; + link_chain_t *link_curr = NULL; + + /* Scan prev linkid list and look for entry matching this entry */ + for (link_curr = state->ls_linkchain; link_curr; + link_curr = link_curr->lc_next) { + if (link_curr->lc_linkid == linkid) + break; + } + /* New link, add it */ + if (link_curr == NULL) { + link_curr = (link_chain_t *)malloc(sizeof (link_chain_t)); + if (link_curr == NULL) + goto done; + link_curr->lc_linkid = linkid; + bzero(&link_curr->lc_statchain, + sizeof (link_curr->lc_statchain)); + link_curr->lc_next = state->ls_linkchain; + state->ls_linkchain = link_curr; + } +done: + return (link_curr); +} + +/* + * Number of links may change while dlstat with -i is executing. + * Free memory allocated for links that are no longer there. + * Prepare for next iteration by marking visited = false for existing stat + * entries. + */ +static void +cleanup_removed_links(show_state_t *state) +{ + link_chain_t *lcurr; + link_chain_t *lprev; + link_chain_t *tofree; + int i; + + /* Delete all nodes from the list that have lc_visited marked false */ + lcurr = state->ls_linkchain; + while (lcurr != NULL) { + if (lcurr->lc_visited) { + lcurr->lc_visited = B_FALSE; + lprev = lcurr; + lcurr = lcurr->lc_next; + continue; + } + /* Is it head of the list? */ + if (lcurr == state->ls_linkchain) + state->ls_linkchain = lcurr->lc_next; + else + lprev->lc_next = lcurr->lc_next; + /* lprev remains the same */ + tofree = lcurr; + lcurr = lcurr->lc_next; + + /* Free stats memory for the removed link */ + for (i = 0; i < DLADM_STAT_NUM_STATS; i++) { + if (state->ls_stattype[i]) + dladm_link_stat_free(tofree->lc_statchain[i]); + } + free(tofree); + } +} + +void * +print_total_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + total_stat_entry_t *sentry = statentry; + total_stat_t *link_stats = &sentry->tse_stats; + total_fields_buf_t *buf; + + buf = malloc(sizeof (total_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->t_linkname, sizeof (buf->t_linkname), "%s", + linkname); + + map_to_units(buf->t_ipackets, sizeof (buf->t_ipackets), + link_stats->ts_ipackets, unit, parsable); + + map_to_units(buf->t_rbytes, sizeof (buf->t_rbytes), + link_stats->ts_rbytes, unit, parsable); + + map_to_units(buf->t_opackets, sizeof (buf->t_opackets), + link_stats->ts_opackets, unit, parsable); + + map_to_units(buf->t_obytes, sizeof (buf->t_obytes), + link_stats->ts_obytes, unit, parsable); + +done: + return (buf); +} + +void * +print_rx_generic_ring_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + ring_stat_entry_t *sentry = statentry; + ring_stat_t *link_stats = &sentry->re_stats; + ring_fields_buf_t *buf; + + buf = malloc(sizeof (ring_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->r_linkname, sizeof (buf->r_linkname), "%s", + linkname); + + (void) snprintf(buf->r_type, sizeof (buf->r_type), "rx"); + + if (sentry->re_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->r_index, sizeof (buf->r_index), "--"); + } else { + (void) snprintf(buf->r_index, sizeof (buf->r_index), + "%llu", sentry->re_index); + } + + map_to_units(buf->r_packets, sizeof (buf->r_packets), + link_stats->r_packets, unit, parsable); + + map_to_units(buf->r_bytes, sizeof (buf->r_bytes), + link_stats->r_bytes, unit, parsable); + +done: + return (buf); +} + +void * +print_tx_generic_ring_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + ring_stat_entry_t *sentry = statentry; + ring_stat_t *link_stats = &sentry->re_stats; + ring_fields_buf_t *buf; + + buf = malloc(sizeof (ring_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->r_linkname, sizeof (buf->r_linkname), "%s", + linkname); + + (void) snprintf(buf->r_type, sizeof (buf->r_type), "tx"); + + if (sentry->re_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->r_index, sizeof (buf->r_index), "--"); + } else { + (void) snprintf(buf->r_index, sizeof (buf->r_index), + "%llu", sentry->re_index); + } + + map_to_units(buf->r_packets, sizeof (buf->r_packets), + link_stats->r_packets, unit, parsable); + + map_to_units(buf->r_bytes, sizeof (buf->r_bytes), + link_stats->r_bytes, unit, parsable); + +done: + return (buf); +} + +void * +print_rx_ring_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + ring_stat_entry_t *sentry = statentry; + ring_stat_t *link_stats = &sentry->re_stats; + rx_ring_fields_buf_t *buf; + + buf = malloc(sizeof (rx_ring_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->rr_linkname, sizeof (buf->rr_linkname), "%s", + linkname); + + (void) snprintf(buf->rr_type, sizeof (buf->rr_type), "rx"); + + if (sentry->re_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->rr_index, sizeof (buf->rr_index), "--"); + } else { + (void) snprintf(buf->rr_index, sizeof (buf->rr_index), + "%llu", sentry->re_index); + } + + map_to_units(buf->rr_ipackets, sizeof (buf->rr_ipackets), + link_stats->r_packets, unit, parsable); + + map_to_units(buf->rr_rbytes, sizeof (buf->rr_rbytes), + link_stats->r_bytes, unit, parsable); + +done: + return (buf); +} + +void * +print_tx_ring_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + ring_stat_entry_t *sentry = statentry; + ring_stat_t *link_stats = &sentry->re_stats; + tx_ring_fields_buf_t *buf; + + buf = malloc(sizeof (tx_ring_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->tr_linkname, sizeof (buf->tr_linkname), "%s", + linkname); + + (void) snprintf(buf->tr_type, sizeof (buf->tr_type), "tx"); + + if (sentry->re_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->tr_index, sizeof (buf->tr_index), "--"); + } else { + (void) snprintf(buf->tr_index, sizeof (buf->tr_index), + "%llu", sentry->re_index); + } + + map_to_units(buf->tr_opackets, sizeof (buf->tr_opackets), + link_stats->r_packets, unit, parsable); + + map_to_units(buf->tr_obytes, sizeof (buf->tr_obytes), + link_stats->r_bytes, unit, parsable); + +done: + return (buf); +} + +void * +print_rx_generic_lane_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + rx_lane_stat_entry_t *sentry = statentry; + rx_lane_stat_t *link_stats = &sentry->rle_stats; + lane_fields_buf_t *buf; + + if (sentry->rle_id == L_DFNCT) + return (NULL); + + buf = malloc(sizeof (lane_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->l_linkname, sizeof (buf->l_linkname), "%s", + linkname); + + (void) snprintf(buf->l_type, sizeof (buf->l_type), "rx"); + + if (sentry->rle_id == L_HWLANE) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "hw"); + else if (sentry->rle_id == L_SWLANE) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "sw"); + else if (sentry->rle_id == L_LOCAL) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "local"); + else if (sentry->rle_id == L_BCAST) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "bcast"); + else + (void) snprintf(buf->l_id, sizeof (buf->l_id), "--"); + + if (sentry->rle_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->l_index, sizeof (buf->l_index), "--"); + } else { + (void) snprintf(buf->l_index, sizeof (buf->l_index), + "%llu", sentry->rle_index); + } + + map_to_units(buf->l_packets, sizeof (buf->l_packets), + link_stats->rl_ipackets, unit, parsable); + + map_to_units(buf->l_bytes, sizeof (buf->l_bytes), + link_stats->rl_rbytes, unit, parsable); + +done: + return (buf); +} + +void * +print_tx_generic_lane_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + tx_lane_stat_entry_t *sentry = statentry; + tx_lane_stat_t *link_stats = &sentry->tle_stats; + lane_fields_buf_t *buf; + + if (sentry->tle_id == L_DFNCT) + return (NULL); + + buf = malloc(sizeof (lane_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->l_linkname, sizeof (buf->l_linkname), "%s", + linkname); + + (void) snprintf(buf->l_type, sizeof (buf->l_type), "tx"); + + if (sentry->tle_id == L_HWLANE) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "hw"); + else if (sentry->tle_id == L_SWLANE) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "sw"); + else if (sentry->tle_id == L_BCAST) + (void) snprintf(buf->l_id, sizeof (buf->l_id), "bcast"); + else + (void) snprintf(buf->l_id, sizeof (buf->l_id), "--"); + + if (sentry->tle_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->l_index, sizeof (buf->l_index), "--"); + } else { + (void) snprintf(buf->l_index, sizeof (buf->l_index), + "%llu", sentry->tle_index); + } + map_to_units(buf->l_packets, sizeof (buf->l_packets), + link_stats->tl_opackets, unit, parsable); + + map_to_units(buf->l_bytes, sizeof (buf->l_bytes), + link_stats->tl_obytes, unit, parsable); + +done: + return (buf); +} + +void * +print_rx_lane_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + rx_lane_stat_entry_t *sentry = statentry; + rx_lane_stat_t *link_stats = &sentry->rle_stats; + rx_lane_fields_buf_t *buf; + + if (sentry->rle_id == L_DFNCT) + return (NULL); + + buf = malloc(sizeof (rx_lane_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->rl_linkname, sizeof (buf->rl_linkname), "%s", + linkname); + + (void) snprintf(buf->rl_type, sizeof (buf->rl_type), "rx"); + + if (sentry->rle_id == L_HWLANE) + (void) snprintf(buf->rl_id, sizeof (buf->rl_id), "hw"); + else if (sentry->rle_id == L_SWLANE) + (void) snprintf(buf->rl_id, sizeof (buf->rl_id), "sw"); + else if (sentry->rle_id == L_LOCAL) + (void) snprintf(buf->rl_id, sizeof (buf->rl_id), "local"); + else if (sentry->rle_id == L_BCAST) + (void) snprintf(buf->rl_id, sizeof (buf->rl_id), "bcast"); + else + (void) snprintf(buf->rl_id, sizeof (buf->rl_id), "--"); + + if (sentry->rle_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->rl_index, sizeof (buf->rl_index), "--"); + } else { + (void) snprintf(buf->rl_index, sizeof (buf->rl_index), + "%llu", sentry->rle_index); + } + + map_to_units(buf->rl_ipackets, sizeof (buf->rl_ipackets), + link_stats->rl_ipackets, unit, parsable); + + map_to_units(buf->rl_rbytes, sizeof (buf->rl_rbytes), + link_stats->rl_rbytes, unit, parsable); + + map_to_units(buf->rl_intrs, sizeof (buf->rl_intrs), + link_stats->rl_intrs, unit, parsable); + + map_to_units(buf->rl_polls, sizeof (buf->rl_polls), + link_stats->rl_polls, unit, parsable); + + map_to_units(buf->rl_sdrops, sizeof (buf->rl_sdrops), + link_stats->rl_sdrops, unit, parsable); + + map_to_units(buf->rl_chl10, sizeof (buf->rl_chl10), + link_stats->rl_chl10, unit, parsable); + + map_to_units(buf->rl_ch10_50, sizeof (buf->rl_ch10_50), + link_stats->rl_ch10_50, unit, parsable); + + map_to_units(buf->rl_chg50, sizeof (buf->rl_chg50), + link_stats->rl_chg50, unit, parsable); + +done: + return (buf); +} + +void * +print_tx_lane_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + tx_lane_stat_entry_t *sentry = statentry; + tx_lane_stat_t *link_stats = &sentry->tle_stats; + tx_lane_fields_buf_t *buf = NULL; + + if (sentry->tle_id == L_DFNCT) + return (NULL); + + buf = malloc(sizeof (tx_lane_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->tl_linkname, sizeof (buf->tl_linkname), "%s", + linkname); + + (void) snprintf(buf->tl_type, sizeof (buf->tl_type), "tx"); + + if (sentry->tle_id == L_HWLANE) + (void) snprintf(buf->tl_id, sizeof (buf->tl_id), "hw"); + else if (sentry->tle_id == L_SWLANE) + (void) snprintf(buf->tl_id, sizeof (buf->tl_id), "sw"); + else if (sentry->tle_id == L_BCAST) + (void) snprintf(buf->tl_id, sizeof (buf->tl_id), "bcast"); + else + (void) snprintf(buf->tl_id, sizeof (buf->tl_id), "--"); + + if (sentry->tle_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->tl_index, sizeof (buf->tl_index), "--"); + } else { + (void) snprintf(buf->tl_index, sizeof (buf->tl_index), + "%llu", sentry->tle_index); + } + + map_to_units(buf->tl_opackets, sizeof (buf->tl_opackets), + link_stats->tl_opackets, unit, parsable); + + map_to_units(buf->tl_obytes, sizeof (buf->tl_obytes), + link_stats->tl_obytes, unit, parsable); + + map_to_units(buf->tl_blockcnt, sizeof (buf->tl_blockcnt), + link_stats->tl_blockcnt, unit, parsable); + + map_to_units(buf->tl_unblockcnt, sizeof (buf->tl_unblockcnt), + link_stats->tl_unblockcnt, unit, parsable); + + map_to_units(buf->tl_sdrops, sizeof (buf->tl_sdrops), + link_stats->tl_sdrops, unit, parsable); + +done: + return (buf); +} + +void * +print_fanout_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + fanout_stat_entry_t *sentry = statentry; + fanout_stat_t *link_stats = &sentry->fe_stats; + rx_fanout_lane_fields_buf_t *buf; + + buf = malloc(sizeof (rx_fanout_lane_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->rfl_linkname, sizeof (buf->rfl_linkname), "%s", + linkname); + + (void) snprintf(buf->rfl_type, sizeof (buf->rfl_type), "rx"); + + if (sentry->fe_id == L_HWLANE) + (void) snprintf(buf->rfl_id, sizeof (buf->rfl_id), "hw"); + else if (sentry->fe_id == L_SWLANE) + (void) snprintf(buf->rfl_id, sizeof (buf->rfl_id), "sw"); + else if (sentry->fe_id == L_LCLSWLANE) + (void) snprintf(buf->rfl_id, sizeof (buf->rfl_id), "lcl/sw"); + else if (sentry->fe_id == L_LOCAL) + (void) snprintf(buf->rfl_id, sizeof (buf->rfl_id), "local"); + else if (sentry->fe_id == L_BCAST) + (void) snprintf(buf->rfl_id, sizeof (buf->rfl_id), "bcast"); + else + (void) snprintf(buf->rfl_id, sizeof (buf->rfl_id), "--"); + + if (sentry->fe_index == DLSTAT_INVALID_ENTRY) { + (void) snprintf(buf->rfl_index, sizeof (buf->rfl_index), "--"); + } else { + (void) snprintf(buf->rfl_index, sizeof (buf->rfl_index), + "%llu", sentry->fe_index); + } + + if (sentry->fe_foutindex == DLSTAT_INVALID_ENTRY) + (void) snprintf(buf->rfl_fout, sizeof (buf->rfl_fout), "--"); + else { + (void) snprintf(buf->rfl_fout, sizeof (buf->rfl_fout), "%llu", + sentry->fe_foutindex); + } + + map_to_units(buf->rfl_ipackets, sizeof (buf->rfl_ipackets), + link_stats->f_ipackets, unit, parsable); + + map_to_units(buf->rfl_rbytes, sizeof (buf->rfl_rbytes), + link_stats->f_rbytes, unit, parsable); + +done: + return (buf); +} + +void * +print_aggr_port_stats(const char *linkname, void *statentry, char unit, + boolean_t parsable) +{ + aggr_port_stat_entry_t *sentry = statentry; + aggr_port_stat_t *link_stats = &sentry->ape_stats; + aggr_port_fields_buf_t *buf; + char portname[MAXLINKNAMELEN]; + + buf = malloc(sizeof (aggr_port_fields_buf_t)); + if (buf == NULL) + goto done; + + (void) snprintf(buf->ap_linkname, sizeof (buf->ap_linkname), "%s", + linkname); + + if (dladm_datalink_id2info(handle, sentry->ape_portlinkid, NULL, + NULL, NULL, portname, DLPI_LINKNAME_MAX) + != DLADM_STATUS_OK) { + (void) snprintf(buf->ap_portname, + sizeof (buf->ap_portname), "--"); + } else { + (void) snprintf(buf->ap_portname, + sizeof (buf->ap_portname), "%s", portname); + } + + map_to_units(buf->ap_ipackets, sizeof (buf->ap_ipackets), + link_stats->ap_ipackets, unit, parsable); + + map_to_units(buf->ap_rbytes, sizeof (buf->ap_rbytes), + link_stats->ap_rbytes, unit, parsable); + + map_to_units(buf->ap_opackets, sizeof (buf->ap_opackets), + link_stats->ap_opackets, unit, parsable); + + map_to_units(buf->ap_obytes, sizeof (buf->ap_obytes), + link_stats->ap_obytes, unit, parsable); + +done: + return (buf); +} + +dladm_stat_chain_t * +query_link_stats(dladm_handle_t dh, datalink_id_t linkid, void *arg, + dladm_stat_type_t stattype) +{ + link_chain_t *link_node; + dladm_stat_chain_t *curr_stat; + dladm_stat_chain_t *prev_stat = NULL; + dladm_stat_chain_t *diff_stat = NULL; + + /* Get prev iteration stat for this link */ + link_node = get_link_prev_stat(linkid, arg); + if (link_node == NULL) + goto done; + + link_node->lc_visited = B_TRUE; + prev_stat = link_node->lc_statchain[stattype]; + + /* Query library for current stats */ + curr_stat = dladm_link_stat_query(dh, linkid, stattype); + if (curr_stat == NULL) + goto done; + + /* current stats - prev iteration stats */ + diff_stat = dladm_link_stat_diffchain(curr_stat, prev_stat, stattype); + + /* Free prev stats */ + dladm_link_stat_free(prev_stat); + + /* Prev <- curr stats */ + link_node->lc_statchain[stattype] = curr_stat; + +done: + return (diff_stat); +} + +void +walk_dlstat_stats(show_state_t *state, const char *linkname, + dladm_stat_type_t stattype, dladm_stat_chain_t *diff_stat) +{ + dladm_stat_chain_t *curr; + + /* Unpack invidual stat entry and call library consumer's callback */ + for (curr = diff_stat; curr != NULL; curr = curr->dc_next) { + void *fields_buf; + + /* Format the raw numbers for printing */ + fields_buf = state->ls_stats2str[stattype](linkname, + curr->dc_statentry, state->ls_unit, state->ls_parsable); + /* Print the stats */ + if (fields_buf != NULL) + ofmt_print(state->ls_ofmt, fields_buf); + free(fields_buf); + } +} + +static int +show_queried_stats(dladm_handle_t dh, datalink_id_t linkid, void *arg) +{ + show_state_t *state = arg; + int i; + dladm_stat_chain_t *diff_stat; + char linkname[DLPI_LINKNAME_MAX]; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + goto done; + } + + for (i = 0; i < DLADM_STAT_NUM_STATS; i++) { + if (state->ls_stattype[i]) { + /* + * Query library for stats + * Stats are returned as chain of raw numbers + */ + diff_stat = query_link_stats(handle, linkid, arg, i); + walk_dlstat_stats(state, linkname, i, diff_stat); + dladm_link_stat_free(diff_stat); + } + } +done: + return (DLADM_WALK_CONTINUE); +} + +void +show_link_stats(datalink_id_t linkid, show_state_t state, uint32_t interval) +{ + for (;;) { + if (linkid == DATALINK_ALL_LINKID) { + (void) dladm_walk_datalink_id(show_queried_stats, + handle, &state, DATALINK_CLASS_ALL, + DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE); + } else { + (void) show_queried_stats(handle, linkid, &state); + } + + if (interval == 0) + break; + + cleanup_removed_links(&state); + (void) sleep(interval); + } +} + +void +print_all_stats(dladm_handle_t dh, datalink_id_t linkid, + dladm_stat_chain_t *stat_chain) +{ + dladm_stat_chain_t *curr; + name_value_stat_entry_t *stat_entry; + name_value_stat_t *curr_stat; + boolean_t stat_printed = B_FALSE; + char linkname[MAXLINKNAMELEN]; + char prev_linkname[MAXLINKNAMELEN]; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) + return; + + for (curr = stat_chain; curr != NULL; curr = curr->dc_next) { + stat_entry = curr->dc_statentry; + /* + * Print header + * If link name is already printed in previous iteration, + * don't print again + */ + if (strcmp(prev_linkname, linkname) != 0) + printf("%s \n", linkname); + printf(" %s \n", stat_entry->nve_header); + + /* Print stat fields */ + for (curr_stat = stat_entry->nve_stats; curr_stat != NULL; + curr_stat = curr_stat->nv_nextstat) { + printf("\t%15s", curr_stat->nv_statname); + printf("\t\t%15llu\n", curr_stat->nv_statval); + } + + strncpy(prev_linkname, linkname, MAXLINKNAMELEN); + stat_printed = B_TRUE; + } + if (stat_printed) + printf("---------------------------------------------------\n"); +} + +static int +dump_queried_stats(dladm_handle_t dh, datalink_id_t linkid, void *arg) +{ + boolean_t *stattype = arg; + int i; + dladm_stat_chain_t *stat_chain; + + for (i = 0; i < DLADM_STAT_NUM_STATS; i++) { + if (stattype[i]) { + stat_chain = dladm_link_stat_query_all(dh, linkid, i); + print_all_stats(dh, linkid, stat_chain); + dladm_link_stat_query_all_free(stat_chain); + } + } +done: + return (DLADM_WALK_CONTINUE); +} + +void +dump_all_link_stats(datalink_id_t linkid, boolean_t *stattype) +{ + if (linkid == DATALINK_ALL_LINKID) { + (void) dladm_walk_datalink_id(dump_queried_stats, + handle, stattype, DATALINK_CLASS_ALL, + DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE); + } else { + (void) dump_queried_stats(handle, linkid, stattype); + } +} + +static void +do_show(int argc, char *argv[], const char *use) +{ + int option; + boolean_t r_arg = B_FALSE; + boolean_t t_arg = B_FALSE; + boolean_t i_arg = B_FALSE; + boolean_t p_arg = B_FALSE; + boolean_t o_arg = B_FALSE; + boolean_t u_arg = B_FALSE; + boolean_t a_arg = B_FALSE; + boolean_t A_arg = B_FALSE; + uint32_t flags = DLADM_OPT_ACTIVE; + datalink_id_t linkid = DATALINK_ALL_LINKID; + uint32_t interval = 0; + char unit = '\0'; + show_state_t state; + dladm_status_t status; + char *fields_str = NULL; + char *o_fields_str = NULL; + + char *total_stat_fields = + "link,ipkts,rbytes,opkts,obytes"; + char *rx_total_stat_fields = + "link,ipkts,rbytes,intrs,polls,ch<10,ch10-50,ch>50"; + char *tx_total_stat_fields = + "link,opkts,obytes,blkcnt,ublkcnt"; + + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = OFMT_RIGHTJUST; + ofmt_field_t *oftemplate; + + bzero(&state, sizeof (state)); + opterr = 0; + while ((option = getopt_long(argc, argv, ":rtaApi:o:u:", + NULL, NULL)) != -1) { + switch (option) { + case 'r': + if (r_arg) + die_optdup(option); + + r_arg = B_TRUE; + break; + case 't': + if (t_arg) + die_optdup(option); + + t_arg = B_TRUE; + break; + case 'a': + if (a_arg) + die_optdup(option); + + a_arg = B_TRUE; + break; + case 'A': + if (A_arg) + die_optdup(option); + + A_arg = B_TRUE; + break; + case 'i': + if (i_arg) + die_optdup(option); + + i_arg = B_TRUE; + if (!dladm_str2interval(optarg, &interval)) + die("invalid interval value '%s'", optarg); + break; + case 'p': + if (p_arg) + die_optdup(option); + + p_arg = B_TRUE; + break; + case 'o': + o_arg = B_TRUE; + o_fields_str = optarg; + break; + case 'u': + if (u_arg) + die_optdup(option); + + u_arg = B_TRUE; + if (!dlstat_unit(optarg, &unit)) + die("invalid unit value '%s'," + "unit must be R|K|M|G|T|P", optarg); + break; + default: + die_opterr(optopt, option, use); + break; + } + } + + if (r_arg && t_arg) + die("the options -t and -r are not compatible"); + + if (u_arg && p_arg) + die("the options -u and -p are not compatible"); + + if (p_arg && !o_arg) + die("-p requires -o"); + + if (p_arg && strcasecmp(o_fields_str, "all") == 0) + die("\"-o all\" is invalid with -p"); + + if (a_arg && A_arg) + die("the options -a and -A are not compatible"); + + if (a_arg && + (p_arg || o_arg || u_arg || i_arg)) { + die("the option -a is not compatible with " + "-p, -o, -u, -i"); + } + + if (A_arg && + (r_arg || t_arg || p_arg || o_arg || u_arg || i_arg)) { + die("the option -A is not compatible with " + "-r, -t, -p, -o, -u, -i"); + } + + /* get link name (optional last argument) */ + if (optind == (argc-1)) { + if (strlen(argv[optind]) >= MAXLINKNAMELEN) + die("link name too long"); + + if ((status = dladm_name2info(handle, argv[optind], &linkid, + NULL, NULL, NULL)) != DLADM_STATUS_OK) { + die_dlerr(status, "link %s is not valid", argv[optind]); + } + } else if (optind != argc) { + if (argc != 0) + usage(); + } + + if (a_arg) { + boolean_t stattype[DLADM_STAT_NUM_STATS]; + + bzero(&stattype, sizeof (stattype)); + if (r_arg) { + stattype[DLADM_STAT_RX_LANE_TOTAL] = B_TRUE; + } else if (t_arg) { + stattype[DLADM_STAT_TX_LANE_TOTAL] = B_TRUE; + } else { /* Display both Rx and Tx lanes */ + stattype[DLADM_STAT_TOTAL] = B_TRUE; + } + + dump_all_link_stats(linkid, stattype); + return; + } + + if (A_arg) { + boolean_t stattype[DLADM_STAT_NUM_STATS]; + int i; + + for (i = 0; i < DLADM_STAT_NUM_STATS; i++) + stattype[i] = B_TRUE; + + dump_all_link_stats(linkid, stattype); + return; + } + + state.ls_unit = unit; + state.ls_parsable = p_arg; + + if (state.ls_parsable) + ofmtflags |= OFMT_PARSABLE; + + if (r_arg) { + fields_str = rx_total_stat_fields; + oftemplate = rx_lane_s_fields; + state.ls_stattype[DLADM_STAT_RX_LANE_TOTAL] = B_TRUE; + state.ls_stats2str[DLADM_STAT_RX_LANE_TOTAL] = + print_rx_lane_stats; + } else if (t_arg) { + fields_str = tx_total_stat_fields; + oftemplate = tx_lane_s_fields; + state.ls_stattype[DLADM_STAT_TX_LANE_TOTAL] = B_TRUE; + state.ls_stats2str[DLADM_STAT_TX_LANE_TOTAL] = + print_tx_lane_stats; + } else { /* Display both Rx and Tx lanes total */ + fields_str = total_stat_fields; + oftemplate = total_s_fields; + state.ls_stattype[DLADM_STAT_TOTAL] = B_TRUE; + state.ls_stats2str[DLADM_STAT_TOTAL] = print_total_stats; + } + + if (o_arg) { + fields_str = (strcasecmp(o_fields_str, "all") == 0) ? + fields_str : o_fields_str; + } + + oferr = ofmt_open(fields_str, oftemplate, ofmtflags, 0, &ofmt); + dlstat_ofmt_check(oferr, state.ls_parsable, ofmt); + state.ls_ofmt = ofmt; + + show_link_stats(linkid, state, interval); + + ofmt_close(ofmt); +} + +static void +do_show_phys(int argc, char *argv[], const char *use) +{ + int option; + boolean_t r_arg = B_FALSE; + boolean_t t_arg = B_FALSE; + boolean_t i_arg = B_FALSE; + boolean_t p_arg = B_FALSE; + boolean_t o_arg = B_FALSE; + boolean_t u_arg = B_FALSE; + boolean_t a_arg = B_FALSE; + uint32_t flags = DLADM_OPT_ACTIVE; + datalink_id_t linkid = DATALINK_ALL_LINKID; + char linkname[MAXLINKNAMELEN]; + uint32_t interval = 0; + char unit = '\0'; + show_state_t state; + dladm_status_t status; + char *fields_str = NULL; + char *o_fields_str = NULL; + char *ring_stat_fields = + "link,type,index,pkts,bytes"; + char *rx_ring_stat_fields = + "link,type,index,ipkts,rbytes"; + char *tx_ring_stat_fields = + "link,type,index,opkts,obytes"; + + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = OFMT_RIGHTJUST; + ofmt_field_t *oftemplate; + + bzero(&state, sizeof (state)); + opterr = 0; + while ((option = getopt_long(argc, argv, ":rtapi:o:u:", + NULL, NULL)) != -1) { + switch (option) { + case 'r': + if (r_arg) + die_optdup(option); + + r_arg = B_TRUE; + break; + case 't': + if (t_arg) + die_optdup(option); + + t_arg = B_TRUE; + break; + case 'a': + if (a_arg) + die_optdup(option); + + a_arg = B_TRUE; + break; + case 'i': + if (i_arg) + die_optdup(option); + + i_arg = B_TRUE; + if (!dladm_str2interval(optarg, &interval)) + die("invalid interval value '%s'", optarg); + break; + case 'p': + if (p_arg) + die_optdup(option); + + p_arg = B_TRUE; + break; + case 'o': + o_arg = B_TRUE; + o_fields_str = optarg; + break; + case 'u': + if (u_arg) + die_optdup(option); + + u_arg = B_TRUE; + if (!dlstat_unit(optarg, &unit)) + die("invalid unit value '%s'," + "unit must be R|K|M|G|T|P", optarg); + break; + default: + die_opterr(optopt, option, use); + break; + } + } + + if (r_arg && t_arg) + die("the options -t and -r are not compatible"); + + if (u_arg && p_arg) + die("the options -u and -p are not compatible"); + + if (p_arg && !o_arg) + die("-p requires -o"); + + if (p_arg && strcasecmp(o_fields_str, "all") == 0) + die("\"-o all\" is invalid with -p"); + + if (a_arg && + (p_arg || o_arg || u_arg || i_arg)) { + die("the option -a is not compatible with " + "-p, -o, -u, -i"); + } + + + /* get link name (optional last argument) */ + if (optind == (argc-1)) { + if (strlen(argv[optind]) >= MAXLINKNAMELEN) + die("link name too long"); + + if ((status = dladm_name2info(handle, argv[optind], &linkid, + NULL, NULL, NULL)) != DLADM_STATUS_OK) { + die_dlerr(status, "link %s is not valid", argv[optind]); + } + } else if (optind != argc) { + usage(); + } + + if (a_arg) { + boolean_t stattype[DLADM_STAT_NUM_STATS]; + + bzero(&stattype, sizeof (stattype)); + + if (r_arg) { + stattype[DLADM_STAT_RX_RING] = B_TRUE; + } else if (t_arg) { + stattype[DLADM_STAT_TX_RING] = B_TRUE; + } else { /* Display both Rx and Tx lanes */ + stattype[DLADM_STAT_RX_RING] = B_TRUE; + stattype[DLADM_STAT_TX_RING] = B_TRUE; + } + + dump_all_link_stats(linkid, stattype); + return; + } + + state.ls_unit = unit; + state.ls_parsable = p_arg; + + if (state.ls_parsable) + ofmtflags |= OFMT_PARSABLE; + + if (r_arg) { + fields_str = rx_ring_stat_fields; + oftemplate = rx_ring_s_fields; + state.ls_stattype[DLADM_STAT_RX_RING] = B_TRUE; + state.ls_stats2str[DLADM_STAT_RX_RING] = print_rx_ring_stats; + } else if (t_arg) { + fields_str = tx_ring_stat_fields; + oftemplate = tx_ring_s_fields; + state.ls_stattype[DLADM_STAT_TX_RING] = B_TRUE; + state.ls_stats2str[DLADM_STAT_TX_RING] = print_tx_ring_stats; + } else { /* Display both Rx and Tx lanes */ + fields_str = ring_stat_fields; + oftemplate = ring_s_fields; + state.ls_stattype[DLADM_STAT_RX_RING] = B_TRUE; + state.ls_stattype[DLADM_STAT_TX_RING] = B_TRUE; + state.ls_stats2str[DLADM_STAT_RX_RING] = + print_rx_generic_ring_stats; + state.ls_stats2str[DLADM_STAT_TX_RING] = + print_tx_generic_ring_stats; + } + + if (o_arg) { + fields_str = (strcasecmp(o_fields_str, "all") == 0) ? + fields_str : o_fields_str; + } + + oferr = ofmt_open(fields_str, oftemplate, ofmtflags, 0, &ofmt); + dlstat_ofmt_check(oferr, state.ls_parsable, ofmt); + state.ls_ofmt = ofmt; + + show_link_stats(linkid, state, interval); + + ofmt_close(ofmt); +} + +static void +do_show_link(int argc, char *argv[], const char *use) +{ + int option; + boolean_t r_arg = B_FALSE; + boolean_t F_arg = B_FALSE; + boolean_t t_arg = B_FALSE; + boolean_t i_arg = B_FALSE; + boolean_t p_arg = B_FALSE; + boolean_t o_arg = B_FALSE; + boolean_t u_arg = B_FALSE; + boolean_t a_arg = B_FALSE; + uint32_t flags = DLADM_OPT_ACTIVE; + datalink_id_t linkid = DATALINK_ALL_LINKID; + uint32_t interval = 0; + char unit = '\0'; + show_state_t state; + dladm_status_t status; + char *fields_str = NULL; + char *o_fields_str = NULL; + + char *lane_stat_fields = + "link,type,id,index,pkts,bytes"; + char *rx_lane_stat_fields = + "link,type,id,index,ipkts,rbytes,intrs,polls,ch<10,ch10-50,ch>50"; + char *tx_lane_stat_fields = + "link,type,id,index,opkts,obytes,blkcnt,ublkcnt"; + char *rx_fanout_stat_fields = + "link,id,index,fout,ipkts,rbytes"; + + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = OFMT_RIGHTJUST; + ofmt_field_t *oftemplate; + + bzero(&state, sizeof (state)); + opterr = 0; + while ((option = getopt_long(argc, argv, ":hrtFapi:o:u:", + NULL, NULL)) != -1) { + switch (option) { + case 'h': + if (r_arg || F_arg || t_arg || i_arg || p_arg || + o_arg || u_arg || a_arg) { + die("the option -h is not compatible with " + "-r, -F, -t, -i, -p, -o, -u, -a"); + } + do_show_history(argc, &argv[0], use); + return; + case 'r': + if (r_arg) + die_optdup(option); + + r_arg = B_TRUE; + break; + case 'F': + if (F_arg) + die_optdup(option); + + F_arg = B_TRUE; + break; + case 't': + if (t_arg) + die_optdup(option); + + t_arg = B_TRUE; + break; + case 'a': + if (a_arg) + die_optdup(option); + + a_arg = B_TRUE; + break; + case 'i': + if (i_arg) + die_optdup(option); + + i_arg = B_TRUE; + if (!dladm_str2interval(optarg, &interval)) + die("invalid interval value '%s'", optarg); + break; + case 'p': + if (p_arg) + die_optdup(option); + + p_arg = B_TRUE; + break; + case 'o': + o_arg = B_TRUE; + o_fields_str = optarg; + break; + case 'u': + if (u_arg) + die_optdup(option); + + u_arg = B_TRUE; + if (!dlstat_unit(optarg, &unit)) + die("invalid unit value '%s'," + "unit must be R|K|M|G|T|P", optarg); + break; + default: + die_opterr(optopt, option, use); + break; + } + } + + if (r_arg && t_arg) + die("the options -t and -r are not compatible"); + + if (u_arg && p_arg) + die("the options -u and -p are not compatible"); + + if (F_arg && !r_arg) + die("-F must be used with -r"); + + if (p_arg && !o_arg) + die("-p requires -o"); + + if (p_arg && strcasecmp(o_fields_str, "all") == 0) + die("\"-o all\" is invalid with -p"); + + if (a_arg && + (p_arg || o_arg || u_arg || i_arg)) { + die("the option -a is not compatible with " + "-p, -o, -u, -i"); + } + + /* get link name (optional last argument) */ + if (optind == (argc-1)) { + if (strlen(argv[optind]) >= MAXLINKNAMELEN) + die("link name too long"); + + if ((status = dladm_name2info(handle, argv[optind], &linkid, + NULL, NULL, NULL)) != DLADM_STATUS_OK) { + die_dlerr(status, "link %s is not valid", argv[optind]); + } + } else if (optind != argc) { + usage(); + } + + if (a_arg) { + boolean_t stattype[DLADM_STAT_NUM_STATS]; + + bzero(&stattype, sizeof (stattype)); + + if (r_arg) { + if (F_arg) { + stattype[DLADM_STAT_RX_LANE_FOUT] = B_TRUE; + } else { + stattype[DLADM_STAT_RX_LANE] = B_TRUE; + } + } else if (t_arg) { + stattype[DLADM_STAT_TX_LANE] = B_TRUE; + } else { /* Display both Rx and Tx lanes */ + stattype[DLADM_STAT_RX_LANE] = B_TRUE; + stattype[DLADM_STAT_TX_LANE] = B_TRUE; + } + + dump_all_link_stats(linkid, stattype); + return; + } + + state.ls_unit = unit; + state.ls_parsable = p_arg; + + if (state.ls_parsable) + ofmtflags |= OFMT_PARSABLE; + + if (r_arg) { + if (F_arg) { + fields_str = rx_fanout_stat_fields; + oftemplate = rx_fanout_lane_s_fields; + state.ls_stattype[DLADM_STAT_RX_LANE_FOUT] = B_TRUE; + state.ls_stats2str[DLADM_STAT_RX_LANE_FOUT] = + print_fanout_stats; + } else { + fields_str = rx_lane_stat_fields; + oftemplate = rx_lane_s_fields; + state.ls_stattype[DLADM_STAT_RX_LANE] = B_TRUE; + state.ls_stats2str[DLADM_STAT_RX_LANE] = + print_rx_lane_stats; + } + } else if (t_arg) { + fields_str = tx_lane_stat_fields; + oftemplate = tx_lane_s_fields; + state.ls_stattype[DLADM_STAT_TX_LANE] = B_TRUE; + state.ls_stats2str[DLADM_STAT_TX_LANE] = print_tx_lane_stats; + } else { /* Display both Rx and Tx lanes */ + fields_str = lane_stat_fields; + oftemplate = lane_s_fields; + state.ls_stattype[DLADM_STAT_RX_LANE] = B_TRUE; + state.ls_stattype[DLADM_STAT_TX_LANE] = B_TRUE; + state.ls_stats2str[DLADM_STAT_RX_LANE] = + print_rx_generic_lane_stats; + state.ls_stats2str[DLADM_STAT_TX_LANE] = + print_tx_generic_lane_stats; + } + if (o_arg) { + fields_str = (strcasecmp(o_fields_str, "all") == 0) ? + fields_str : o_fields_str; + } + + oferr = ofmt_open(fields_str, oftemplate, ofmtflags, 0, &ofmt); + dlstat_ofmt_check(oferr, state.ls_parsable, ofmt); + + state.ls_ofmt = ofmt; + + show_link_stats(linkid, state, interval); + + ofmt_close(ofmt); +} + +static void +do_show_aggr(int argc, char *argv[], const char *use) +{ + int option; + boolean_t r_arg = B_FALSE; + boolean_t t_arg = B_FALSE; + boolean_t i_arg = B_FALSE; + boolean_t p_arg = B_FALSE; + boolean_t o_arg = B_FALSE; + boolean_t u_arg = B_FALSE; + uint32_t flags = DLADM_OPT_ACTIVE; + datalink_id_t linkid = DATALINK_ALL_LINKID; + uint32_t interval = 0; + char unit = '\0'; + show_state_t state; + dladm_status_t status; + char *fields_str = NULL; + char *o_fields_str = NULL; + + char *aggr_stat_fields = + "link,port,ipkts,rbytes,opkts,obytes"; + char *rx_aggr_stat_fields = "link,port,ipkts,rbytes"; + char *tx_aggr_stat_fields = "link,port,opkts,obytes"; + + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = OFMT_RIGHTJUST; + ofmt_field_t *oftemplate; + + bzero(&state, sizeof (state)); + opterr = 0; + while ((option = getopt_long(argc, argv, ":rtpi:o:u:", + NULL, NULL)) != -1) { + switch (option) { + case 'r': + if (r_arg) + die_optdup(option); + + r_arg = B_TRUE; + break; + case 't': + if (t_arg) + die_optdup(option); + + t_arg = B_TRUE; + break; + case 'i': + if (i_arg) + die_optdup(option); + + i_arg = B_TRUE; + if (!dladm_str2interval(optarg, &interval)) + die("invalid interval value '%s'", optarg); + break; + case 'p': + if (p_arg) + die_optdup(option); + + p_arg = B_TRUE; + break; + case 'o': + o_arg = B_TRUE; + o_fields_str = optarg; + break; + case 'u': + if (u_arg) + die_optdup(option); + + u_arg = B_TRUE; + if (!dlstat_unit(optarg, &unit)) + die("invalid unit value '%s'," + "unit must be R|K|M|G|T|P", optarg); + break; + default: + die_opterr(optopt, option, use); + break; + } + } + + if (r_arg && t_arg) + die("the options -t and -r are not compatible"); + + if (u_arg && p_arg) + die("the options -u and -p are not compatible"); + + if (p_arg && !o_arg) + die("-p requires -o"); + + if (p_arg && strcasecmp(o_fields_str, "all") == 0) + die("\"-o all\" is invalid with -p"); + + + /* get link name (optional last argument) */ + if (optind == (argc-1)) { + if (strlen(argv[optind]) >= MAXLINKNAMELEN) + die("link name too long"); + + if ((status = dladm_name2info(handle, argv[optind], &linkid, + NULL, NULL, NULL)) != DLADM_STATUS_OK) { + die_dlerr(status, "link %s is not valid", argv[optind]); + } + } else if (optind != argc) { + usage(); + } + + state.ls_unit = unit; + state.ls_parsable = p_arg; + + if (state.ls_parsable) + ofmtflags |= OFMT_PARSABLE; + + oftemplate = aggr_port_s_fields; + state.ls_stattype[DLADM_STAT_AGGR_PORT] = B_TRUE; + state.ls_stats2str[DLADM_STAT_AGGR_PORT] = print_aggr_port_stats; + + if (r_arg) + fields_str = rx_aggr_stat_fields; + else if (t_arg) + fields_str = tx_aggr_stat_fields; + else + fields_str = aggr_stat_fields; + + if (o_arg) { + fields_str = (strcasecmp(o_fields_str, "all") == 0) ? + fields_str : o_fields_str; + } + + oferr = ofmt_open(fields_str, oftemplate, ofmtflags, 0, &ofmt); + dlstat_ofmt_check(oferr, state.ls_parsable, ofmt); + state.ls_ofmt = ofmt; + + show_link_stats(linkid, state, interval); + + ofmt_close(ofmt); +} + +/* PRINTFLIKE1 */ +static void +warn(const char *format, ...) +{ + va_list alist; + + format = gettext(format); + (void) fprintf(stderr, "%s: warning: ", progname); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); + + (void) putc('\n', stderr); +} + +/* + * Also closes the dladm handle if it is not NULL. + */ +/* PRINTFLIKE2 */ +static void +die_dlerr(dladm_status_t err, const char *format, ...) +{ + va_list alist; + char errmsg[DLADM_STRSIZE]; + + format = gettext(format); + (void) fprintf(stderr, "%s: ", progname); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); + (void) fprintf(stderr, ": %s\n", dladm_status2str(err, errmsg)); + + /* close dladm handle if it was opened */ + if (handle != NULL) + dladm_close(handle); + + exit(EXIT_FAILURE); +} + +/* PRINTFLIKE1 */ +static void +die(const char *format, ...) +{ + va_list alist; + + format = gettext(format); + (void) fprintf(stderr, "%s: ", progname); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); + + (void) putc('\n', stderr); + + /* close dladm handle if it was opened */ + if (handle != NULL) + dladm_close(handle); + + exit(EXIT_FAILURE); +} + +static void +die_optdup(int opt) +{ + die("the option -%c cannot be specified more than once", opt); +} + +static void +die_opterr(int opt, int opterr, const char *usage) +{ + switch (opterr) { + case ':': + die("option '-%c' requires a value\nusage: %s", opt, + gettext(usage)); + break; + case '?': + default: + die("unrecognized option '-%c'\nusage: %s", opt, + gettext(usage)); + break; + } +} + +/* + * default output callback function that, when invoked, + * prints string which is offset by ofmt_arg->ofmt_id within buf. + */ +static boolean_t +print_default_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) +{ + char *value; + + value = (char *)ofarg->ofmt_cbarg + ofarg->ofmt_id; + (void) strlcpy(buf, value, bufsize); + return (B_TRUE); +} + +static void +dlstat_ofmt_check(ofmt_status_t oferr, boolean_t parsable, + ofmt_handle_t ofmt) +{ + char buf[OFMT_BUFSIZE]; + + if (oferr == OFMT_SUCCESS) + return; + (void) ofmt_strerror(ofmt, oferr, buf, sizeof (buf)); + /* + * All errors are considered fatal in parsable mode. + * NOMEM errors are always fatal, regardless of mode. + * For other errors, we print diagnostics in human-readable + * mode and processs what we can. + */ + if (parsable || oferr == OFMT_ENOFIELDS) { + ofmt_close(ofmt); + die(buf); + } else { + warn(buf); + } +} diff --git a/usr/src/cmd/dlstat/dlstat.xcl b/usr/src/cmd/dlstat/dlstat.xcl new file mode 100644 index 0000000000..bc201f606f --- /dev/null +++ b/usr/src/cmd/dlstat/dlstat.xcl @@ -0,0 +1,110 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# + +msgid " %s \n" +msgid " %.2f" +msgid " %6.2lf%c" +msgid " %7.0lf%c" +msgid " %s" +msgid "---------------------------------------------------\n" +msgid "--" +msgid ": %s\n" +msgid ":hrtFapi:o:u:" +msgid ":rtaApi:o:u:" +msgid ":rtapi:o:u:" +msgid ":rtpi:o:u:" +msgid "" +msgid "\n" +msgid "\t\t%15llu\n" +msgid "\t%15s" +msgid "# Time" +msgid "%.0lf" +msgid "%llu" +msgid "%m/%d/%Y" +msgid "%s \n" +msgid "%s Mbps" +msgid "%s: " +msgid "%s: warning: " +msgid "%s" +msgid "%s\n" +msgid "%T" +msgid "all" +msgid "B" +msgid "BANDWIDTH" +msgid "bcast" +msgid "BLKCNT" +msgid "BYTES" +msgid "CH<10" +msgid "CH>50" +msgid "CH10-50" +msgid "das:e:o:f:F:" +msgid "DURATION" +msgid "END" +msgid "FOUT" +msgid "G" +msgid "gnuplot" +msgid "hw" +msgid "ID" +msgid "INDEX" +msgid "INTRS" +msgid "IPKTS" +msgid "K" +msgid "lcl/sw" +msgid "link,id,index,fout,ipkts,rbytes" +msgid "link,ipkts,rbytes,intrs,polls,ch<10,ch10-50,ch>50" +msgid "link,ipkts,rbytes,opkts,obytes" +msgid "link,opkts,obytes,blkcnt,ublkcnt" +msgid "link,port,ipkts,rbytes,opkts,obytes" +msgid "link,port,ipkts,rbytes" +msgid "link,port,opkts,obytes" +msgid "link,start,end,rbytes,obytes,bandwidth" +msgid "link,type,id,index,ipkts,rbytes,intrs,polls,ch<10,ch10-50,ch>50" +msgid "link,type,id,index,opkts,obytes,blkcnt,ublkcnt" +msgid "link,type,id,index,pkts,bytes" +msgid "link,type,index,ipkts,rbytes" +msgid "link,type,index,opkts,obytes" +msgid "link,type,index,pkts,bytes" +msgid "LINK" +msgid "local" +msgid "M" +msgid "OBYTES" +msgid "OPKTS" +msgid "P" +msgid "PKTS" +msgid "POLLS" +msgid "PORT" +msgid "RBYTES" +msgid "rx" +msgid "SDROPS" +msgid "show-aggr" +msgid "show-link -h" +msgid "show-link" +msgid "show-phys" +msgid "START" +msgid "sw" +msgid "T" +msgid "tx" +msgid "TYPE" +msgid "UBLKCNT" diff --git a/usr/src/cmd/flowadm/Makefile b/usr/src/cmd/flowadm/Makefile index aa057c1f2b..19a15a1b47 100644 --- a/usr/src/cmd/flowadm/Makefile +++ b/usr/src/cmd/flowadm/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -34,7 +34,7 @@ include ../Makefile.cmd XGETFLAGS += -a -x $(PROG).xcl LDLIBS += -L$(ROOT)/lib -LDLIBS += -ldladm -lkstat -linetutil +LDLIBS += -ldladm -linetutil ROOTCFGDIR= $(ROOTETC)/dladm ROOTCFGFILES= $(CONFIGFILES:%=$(ROOTCFGDIR)/%) diff --git a/usr/src/cmd/flowadm/flowadm.c b/usr/src/cmd/flowadm/flowadm.c index 2950adcf48..374fa1675c 100644 --- a/usr/src/cmd/flowadm/flowadm.c +++ b/usr/src/cmd/flowadm/flowadm.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,7 +31,6 @@ #include <string.h> #include <stropts.h> #include <errno.h> -#include <kstat.h> #include <strings.h> #include <getopt.h> #include <unistd.h> @@ -51,45 +50,22 @@ #include <stddef.h> #include <ofmt.h> -typedef struct show_usage_state_s { - boolean_t us_plot; - boolean_t us_parsable; - boolean_t us_printheader; - boolean_t us_first; - boolean_t us_showall; - ofmt_handle_t us_ofmt; -} show_usage_state_t; - typedef struct show_flow_state { - boolean_t fs_firstonly; - boolean_t fs_donefirst; - pktsum_t fs_prevstats; - uint32_t fs_flags; dladm_status_t fs_status; ofmt_handle_t fs_ofmt; const char *fs_flow; - const char *fs_link; boolean_t fs_parsable; boolean_t fs_persist; - boolean_t fs_stats; - uint64_t fs_mask; } show_flow_state_t; typedef void cmdfunc_t(int, char **); static cmdfunc_t do_add_flow, do_remove_flow, do_init_flow, do_show_flow; static cmdfunc_t do_show_flowprop, do_set_flowprop, do_reset_flowprop; -static cmdfunc_t do_show_usage; static int show_flow(dladm_handle_t, dladm_flow_attr_t *, void *); static int show_flows_onelink(dladm_handle_t, datalink_id_t, void *); -static void flow_stats(const char *, datalink_id_t, uint_t, char *, - show_flow_state_t *); -static void get_flow_stats(const char *, pktsum_t *); -static int show_flow_stats(dladm_handle_t, dladm_flow_attr_t *, void *); -static int show_link_flow_stats(dladm_handle_t, datalink_id_t, void *); - static int remove_flow(dladm_handle_t, dladm_flow_attr_t *, void *); static int show_flowprop(dladm_handle_t, dladm_flow_attr_t *, void *); @@ -104,7 +80,7 @@ static void warn(const char *, ...); static void warn_dlerr(dladm_status_t, const char *, ...); /* callback functions for printing output */ -static ofmt_cb_t print_flowprop_cb, print_default_cb, print_flow_stats_cb; +static ofmt_cb_t print_flowprop_cb, print_default_cb; static void flowadm_ofmt_check(ofmt_status_t, boolean_t, ofmt_handle_t); typedef struct cmd { @@ -120,15 +96,12 @@ static cmd_t cmds[] = { { "reset-flowprop", do_reset_flowprop }, { "show-flow", do_show_flow }, { "init-flow", do_init_flow }, - { "show-usage", do_show_usage } }; static const struct option longopts[] = { {"link", required_argument, 0, 'l'}, {"parsable", no_argument, 0, 'p'}, {"parseable", no_argument, 0, 'p'}, - {"statistics", no_argument, 0, 's'}, - {"interval", required_argument, 0, 'i'}, {"temporary", no_argument, 0, 't'}, {"root-dir", required_argument, 0, 'R'}, { 0, 0, 0, 0 } @@ -236,104 +209,6 @@ typedef struct flowprop_args_s { char *fs_propname; char *fs_flowname; } flowprop_args_t; -/* - * structures for 'flowadm show-flow -s' (print statistics) - */ -typedef enum { - FLOW_S_FLOW, - FLOW_S_IPKTS, - FLOW_S_RBYTES, - FLOW_S_IERRORS, - FLOW_S_OPKTS, - FLOW_S_OBYTES, - FLOW_S_OERRORS -} flow_s_field_index_t; - -static ofmt_field_t flow_s_fields[] = { -/* name, field width, index, callback */ -{ "FLOW", 15, FLOW_S_FLOW, print_flow_stats_cb}, -{ "IPACKETS", 10, FLOW_S_IPKTS, print_flow_stats_cb}, -{ "RBYTES", 8, FLOW_S_RBYTES, print_flow_stats_cb}, -{ "IERRORS", 10, FLOW_S_IERRORS, print_flow_stats_cb}, -{ "OPACKETS", 12, FLOW_S_OPKTS, print_flow_stats_cb}, -{ "OBYTES", 12, FLOW_S_OBYTES, print_flow_stats_cb}, -{ "OERRORS", 8, FLOW_S_OERRORS, print_flow_stats_cb}, -NULL_OFMT} -; - -typedef struct flow_args_s { - char *flow_s_flow; - pktsum_t *flow_s_psum; -} flow_args_t; - -/* - * structures for 'flowadm show-usage' - */ -typedef struct usage_fields_buf_s { - char usage_flow[12]; - char usage_duration[10]; - char usage_ipackets[9]; - char usage_rbytes[10]; - char usage_opackets[9]; - char usage_obytes[10]; - char usage_bandwidth[14]; -} usage_fields_buf_t; - -static ofmt_field_t usage_fields[] = { -/* name, field width, offset */ -{ "FLOW", 13, - offsetof(usage_fields_buf_t, usage_flow), print_default_cb}, -{ "DURATION", 11, - offsetof(usage_fields_buf_t, usage_duration), print_default_cb}, -{ "IPACKETS", 10, - offsetof(usage_fields_buf_t, usage_ipackets), print_default_cb}, -{ "RBYTES", 11, - offsetof(usage_fields_buf_t, usage_rbytes), print_default_cb}, -{ "OPACKETS", 10, - offsetof(usage_fields_buf_t, usage_opackets), print_default_cb}, -{ "OBYTES", 11, - offsetof(usage_fields_buf_t, usage_obytes), print_default_cb}, -{ "BANDWIDTH", 15, - offsetof(usage_fields_buf_t, usage_bandwidth), print_default_cb}, -NULL_OFMT} -; - -/* - * structures for 'dladm show-usage link' - */ - -typedef struct usage_l_fields_buf_s { - char usage_l_flow[12]; - char usage_l_stime[13]; - char usage_l_etime[13]; - char usage_l_rbytes[8]; - char usage_l_obytes[8]; - char usage_l_bandwidth[14]; -} usage_l_fields_buf_t; - -static ofmt_field_t usage_l_fields[] = { -/* name, field width, offset */ -{ "FLOW", 13, - offsetof(usage_l_fields_buf_t, usage_l_flow), print_default_cb}, -{ "START", 14, - offsetof(usage_l_fields_buf_t, usage_l_stime), print_default_cb}, -{ "END", 14, - offsetof(usage_l_fields_buf_t, usage_l_etime), print_default_cb}, -{ "RBYTES", 9, - offsetof(usage_l_fields_buf_t, usage_l_rbytes), print_default_cb}, -{ "OBYTES", 9, - offsetof(usage_l_fields_buf_t, usage_l_obytes), print_default_cb}, -{ "BANDWIDTH", 15, - offsetof(usage_l_fields_buf_t, usage_l_bandwidth), print_default_cb}, -NULL_OFMT} -; - -#define PRI_HI 100 -#define PRI_LO 10 -#define PRI_NORM 50 - -#define FLOWADM_CONF "/etc/dladm/flowadm.conf" -#define BLANK_LINE(s) ((s[0] == '\0') || (s[0] == '#') || (s[0] == '\n')) static char *progname; @@ -360,15 +235,12 @@ usage(void) " add-flow [-t] -l <link> -a <attr>=<value>[,...]\n" "\t\t [-p <prop>=<value>,...] <flow>\n" " remove-flow [-t] {-l <link> | <flow>}\n" - " show-flow [-p] [-s [-i <interval>]] [-l <link>] " + " show-flow [-p] [-l <link>] " "[<flow>]\n\n" " set-flowprop [-t] -p <prop>=<value>[,...] <flow>\n" " reset-flowprop [-t] [-p <prop>,...] <flow>\n" " show-flowprop [-cP] [-l <link>] [-p <prop>,...] " - "[<flow>]\n\n" - " show-usage [-a] [-d | -F <format>] " - "[-s <DD/MM/YYYY,HH:MM:SS>]\n" - "\t\t [-e <DD/MM/YYYY,HH:MM:SS>] -f <logfile> [<flow>]\n")); + "[<flow>]\n")); /* close dladm handle if it was opened */ if (handle != NULL) @@ -446,275 +318,6 @@ do_init_flow(int argc, char *argv[]) die_dlerr(status, "flows initialization failed"); } -/* ARGSUSED */ -static int -show_usage_date(dladm_usage_t *usage, void *arg) -{ - show_usage_state_t *state = (show_usage_state_t *)arg; - time_t stime; - char timebuf[20]; - dladm_flow_attr_t attr; - dladm_status_t status; - - /* - * Only show usage information for existing flows unless '-a' - * is specified. - */ - if (!state->us_showall && ((status = dladm_flow_info(handle, - usage->du_name, &attr)) != DLADM_STATUS_OK)) { - return (status); - } - - stime = usage->du_stime; - (void) strftime(timebuf, sizeof (timebuf), "%m/%d/%Y", - localtime(&stime)); - (void) printf("%s\n", timebuf); - - return (DLADM_STATUS_OK); -} - -static int -show_usage_time(dladm_usage_t *usage, void *arg) -{ - show_usage_state_t *state = (show_usage_state_t *)arg; - char buf[DLADM_STRSIZE]; - usage_l_fields_buf_t ubuf; - time_t time; - double bw; - dladm_flow_attr_t attr; - dladm_status_t status; - - /* - * Only show usage information for existing flows unless '-a' - * is specified. - */ - if (!state->us_showall && ((status = dladm_flow_info(handle, - usage->du_name, &attr)) != DLADM_STATUS_OK)) { - return (status); - } - - if (state->us_plot) { - if (!state->us_printheader) { - if (state->us_first) { - (void) printf("# Time"); - state->us_first = B_FALSE; - } - (void) printf(" %s", usage->du_name); - if (usage->du_last) { - (void) printf("\n"); - state->us_first = B_TRUE; - state->us_printheader = B_TRUE; - } - } else { - if (state->us_first) { - time = usage->du_etime; - (void) strftime(buf, sizeof (buf), "%T", - localtime(&time)); - state->us_first = B_FALSE; - (void) printf("%s", buf); - } - bw = (double)usage->du_bandwidth/1000; - (void) printf(" %.2f", bw); - if (usage->du_last) { - (void) printf("\n"); - state->us_first = B_TRUE; - } - } - return (DLADM_STATUS_OK); - } - - bzero(&ubuf, sizeof (ubuf)); - - (void) snprintf(ubuf.usage_l_flow, sizeof (ubuf.usage_l_flow), "%s", - usage->du_name); - time = usage->du_stime; - (void) strftime(buf, sizeof (buf), "%T", localtime(&time)); - (void) snprintf(ubuf.usage_l_stime, sizeof (ubuf.usage_l_stime), "%s", - buf); - time = usage->du_etime; - (void) strftime(buf, sizeof (buf), "%T", localtime(&time)); - (void) snprintf(ubuf.usage_l_etime, sizeof (ubuf.usage_l_etime), "%s", - buf); - (void) snprintf(ubuf.usage_l_rbytes, sizeof (ubuf.usage_l_rbytes), - "%llu", usage->du_rbytes); - (void) snprintf(ubuf.usage_l_obytes, sizeof (ubuf.usage_l_obytes), - "%llu", usage->du_obytes); - (void) snprintf(ubuf.usage_l_bandwidth, sizeof (ubuf.usage_l_bandwidth), - "%s Mbps", dladm_bw2str(usage->du_bandwidth, buf)); - - ofmt_print(state->us_ofmt, (void *)&ubuf); - return (DLADM_STATUS_OK); -} - -static int -show_usage_res(dladm_usage_t *usage, void *arg) -{ - show_usage_state_t *state = (show_usage_state_t *)arg; - char buf[DLADM_STRSIZE]; - usage_fields_buf_t ubuf; - dladm_flow_attr_t attr; - dladm_status_t status; - - /* - * Only show usage information for existing flows unless '-a' - * is specified. - */ - if (!state->us_showall && ((status = dladm_flow_info(handle, - usage->du_name, &attr)) != DLADM_STATUS_OK)) { - return (status); - } - - bzero(&ubuf, sizeof (ubuf)); - - (void) snprintf(ubuf.usage_flow, sizeof (ubuf.usage_flow), "%s", - usage->du_name); - (void) snprintf(ubuf.usage_duration, sizeof (ubuf.usage_duration), - "%llu", usage->du_duration); - (void) snprintf(ubuf.usage_ipackets, sizeof (ubuf.usage_ipackets), - "%llu", usage->du_ipackets); - (void) snprintf(ubuf.usage_rbytes, sizeof (ubuf.usage_rbytes), - "%llu", usage->du_rbytes); - (void) snprintf(ubuf.usage_opackets, sizeof (ubuf.usage_opackets), - "%llu", usage->du_opackets); - (void) snprintf(ubuf.usage_obytes, sizeof (ubuf.usage_obytes), - "%llu", usage->du_obytes); - (void) snprintf(ubuf.usage_bandwidth, sizeof (ubuf.usage_bandwidth), - "%s Mbps", dladm_bw2str(usage->du_bandwidth, buf)); - - ofmt_print(state->us_ofmt, (void *)&ubuf); - - return (DLADM_STATUS_OK); -} - -static boolean_t -valid_formatspec(char *formatspec_str) -{ - if (strcmp(formatspec_str, "gnuplot") == 0) - return (B_TRUE); - return (B_FALSE); -} - -/* ARGSUSED */ -static void -do_show_usage(int argc, char *argv[]) -{ - char *file = NULL; - int opt; - dladm_status_t status; - boolean_t d_arg = B_FALSE; - char *stime = NULL; - char *etime = NULL; - char *resource = NULL; - show_usage_state_t state; - boolean_t o_arg = B_FALSE; - boolean_t F_arg = B_FALSE; - char *fields_str = NULL; - char *formatspec_str = NULL; - char *all_fields = - "flow,duration,ipackets,rbytes,opackets,obytes,bandwidth"; - char *all_l_fields = - "flow,start,end,rbytes,obytes,bandwidth"; - ofmt_handle_t ofmt; - ofmt_status_t oferr; - uint_t ofmtflags = 0; - - bzero(&state, sizeof (show_usage_state_t)); - state.us_parsable = B_FALSE; - state.us_printheader = B_FALSE; - state.us_plot = B_FALSE; - state.us_first = B_TRUE; - - while ((opt = getopt(argc, argv, "das:e:o:f:F:")) != -1) { - switch (opt) { - case 'd': - d_arg = B_TRUE; - break; - case 'a': - state.us_showall = B_TRUE; - break; - case 'f': - file = optarg; - break; - case 's': - stime = optarg; - break; - case 'e': - etime = optarg; - break; - case 'o': - o_arg = B_TRUE; - fields_str = optarg; - break; - case 'F': - state.us_plot = F_arg = B_TRUE; - formatspec_str = optarg; - break; - default: - die_opterr(optopt, opt); - } - } - - if (file == NULL) - die("show-usage requires a file"); - - if (optind == (argc-1)) { - dladm_flow_attr_t attr; - - if (!state.us_showall && - dladm_flow_info(handle, resource, &attr) != - DLADM_STATUS_OK) { - die("invalid flow: '%s'", resource); - } - resource = argv[optind]; - } - - if (state.us_parsable) - ofmtflags |= OFMT_PARSABLE; - if (resource == NULL && stime == NULL && etime == NULL) { - if (!o_arg || (o_arg && strcasecmp(fields_str, "all") == 0)) - fields_str = all_fields; - oferr = ofmt_open(fields_str, usage_fields, ofmtflags, - 0, &ofmt); - } else { - if (!o_arg || (o_arg && strcasecmp(fields_str, "all") == 0)) - fields_str = all_l_fields; - oferr = ofmt_open(fields_str, usage_l_fields, ofmtflags, - 0, &ofmt); - } - - flowadm_ofmt_check(oferr, state.us_parsable, ofmt); - state.us_ofmt = ofmt; - - if (F_arg && d_arg) - die("incompatible -d and -F options"); - - if (F_arg && valid_formatspec(formatspec_str) == B_FALSE) - die("Format specifier %s not supported", formatspec_str); - - if (d_arg) { - /* Print log dates */ - status = dladm_usage_dates(show_usage_date, - DLADM_LOGTYPE_FLOW, file, resource, &state); - } else if (resource == NULL && stime == NULL && etime == NULL && - !F_arg) { - /* Print summary */ - status = dladm_usage_summary(show_usage_res, - DLADM_LOGTYPE_FLOW, file, &state); - } else if (resource != NULL) { - /* Print log entries for named resource */ - status = dladm_walk_usage_res(show_usage_time, - DLADM_LOGTYPE_FLOW, file, resource, stime, etime, &state); - } else { - /* Print time and information for each link */ - status = dladm_walk_usage_time(show_usage_time, - DLADM_LOGTYPE_FLOW, file, stime, etime, &state); - } - - ofmt_close(ofmt); - if (status != DLADM_STATUS_OK) - die_dlerr(status, "show-usage"); -} - static void do_add_flow(int argc, char *argv[]) { @@ -981,176 +584,14 @@ show_flows_onelink(dladm_handle_t dh, datalink_id_t linkid, void *arg) } static void -get_flow_stats(const char *flowname, pktsum_t *stats) -{ - kstat_ctl_t *kcp; - kstat_t *ksp; - - bzero(stats, sizeof (*stats)); - - if ((kcp = kstat_open()) == NULL) { - warn("kstat open operation failed"); - return; - } - - ksp = dladm_kstat_lookup(kcp, NULL, -1, flowname, "flow"); - - if (ksp != NULL) - dladm_get_stats(kcp, ksp, stats); - - (void) kstat_close(kcp); -} - -static boolean_t -print_flow_stats_cb(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) -{ - flow_args_t *fargs = of_arg->ofmt_cbarg; - pktsum_t *diff_stats = fargs->flow_s_psum; - - switch (of_arg->ofmt_id) { - case FLOW_S_FLOW: - (void) snprintf(buf, bufsize, "%s", fargs->flow_s_flow); - break; - case FLOW_S_IPKTS: - (void) snprintf(buf, bufsize, "%llu", - diff_stats->ipackets); - break; - case FLOW_S_RBYTES: - (void) snprintf(buf, bufsize, "%llu", - diff_stats->rbytes); - break; - case FLOW_S_IERRORS: - (void) snprintf(buf, bufsize, "%u", - diff_stats->ierrors); - break; - case FLOW_S_OPKTS: - (void) snprintf(buf, bufsize, "%llu", - diff_stats->opackets); - break; - case FLOW_S_OBYTES: - (void) snprintf(buf, bufsize, "%llu", - diff_stats->obytes); - break; - case FLOW_S_OERRORS: - (void) snprintf(buf, bufsize, "%u", - diff_stats->oerrors); - break; - default: - die("invalid input"); - break; - } - return (B_TRUE); -} - -/* ARGSUSED */ -static int -show_flow_stats(dladm_handle_t handle, dladm_flow_attr_t *attr, void *arg) -{ - show_flow_state_t *state = (show_flow_state_t *)arg; - char *name = attr->fa_flowname; - pktsum_t stats, diff_stats; - flow_args_t fargs; - - if (state->fs_firstonly) { - if (state->fs_donefirst) - return (DLADM_WALK_TERMINATE); - state->fs_donefirst = B_TRUE; - } else { - bzero(&state->fs_prevstats, sizeof (state->fs_prevstats)); - } - - get_flow_stats(name, &stats); - dladm_stats_diff(&diff_stats, &stats, &state->fs_prevstats); - - fargs.flow_s_flow = name; - fargs.flow_s_psum = &diff_stats; - ofmt_print(state->fs_ofmt, (void *)&fargs); - state->fs_prevstats = stats; - - return (DLADM_WALK_CONTINUE); -} - -/* - * Wrapper of dladm_walk_flow(show_flow,...) to make it usable for - * dladm_walk_datalink_id(). Used for showing flow stats for - * all flows on all links. - */ -static int -show_link_flow_stats(dladm_handle_t dh, datalink_id_t linkid, void * arg) -{ - if (dladm_walk_flow(show_flow_stats, dh, linkid, arg, B_FALSE) - == DLADM_STATUS_OK) - return (DLADM_WALK_CONTINUE); - else - return (DLADM_WALK_TERMINATE); -} - -/* ARGSUSED */ -static void -flow_stats(const char *flow, datalink_id_t linkid, uint_t interval, - char *fields_str, show_flow_state_t *state) -{ - dladm_flow_attr_t attr; - ofmt_handle_t ofmt; - ofmt_status_t oferr; - uint_t ofmtflags = 0; - - oferr = ofmt_open(fields_str, flow_s_fields, ofmtflags, 0, &ofmt); - flowadm_ofmt_check(oferr, state->fs_parsable, ofmt); - state->fs_ofmt = ofmt; - - if (flow != NULL && - dladm_flow_info(handle, flow, &attr) != DLADM_STATUS_OK) - die("invalid flow %s", flow); - - /* - * If an interval is specified, continuously show the stats - * for only the first flow. - */ - state->fs_firstonly = (interval != 0); - - for (;;) { - state->fs_donefirst = B_FALSE; - - /* Show stats for named flow */ - if (flow != NULL) { - state->fs_flow = flow; - (void) show_flow_stats(handle, &attr, state); - - /* Show all stats on a link */ - } else if (linkid != DATALINK_INVALID_LINKID) { - (void) dladm_walk_flow(show_flow_stats, handle, linkid, - state, B_FALSE); - - /* Show all stats by datalink */ - } else { - (void) dladm_walk_datalink_id(show_link_flow_stats, - handle, state, DATALINK_CLASS_ALL, - DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE); - } - - if (interval == 0) - break; - - (void) fflush(stdout); - (void) sleep(interval); - } - ofmt_close(ofmt); -} - -static void do_show_flow(int argc, char *argv[]) { char flowname[MAXFLOWNAMELEN]; char linkname[MAXLINKNAMELEN]; datalink_id_t linkid = DATALINK_ALL_LINKID; int option; - boolean_t s_arg = B_FALSE; - boolean_t S_arg = B_FALSE; - boolean_t i_arg = B_FALSE; boolean_t l_arg = B_FALSE; boolean_t o_arg = B_FALSE; - uint32_t interval = 0; show_flow_state_t state; char *fields_str = NULL; ofmt_handle_t ofmt; @@ -1160,7 +601,7 @@ do_show_flow(int argc, char *argv[]) bzero(&state, sizeof (state)); opterr = 0; - while ((option = getopt_long(argc, argv, ":pPsSi:l:o:", + while ((option = getopt_long(argc, argv, ":pPl:o:", longopts, NULL)) != -1) { switch (option) { case 'p': @@ -1170,18 +611,6 @@ do_show_flow(int argc, char *argv[]) case 'P': state.fs_persist = B_TRUE; break; - case 's': - if (s_arg) - die_optdup(option); - - s_arg = B_TRUE; - break; - case 'S': - if (S_arg) - die_optdup(option); - - S_arg = B_TRUE; - break; case 'o': if (o_arg) die_optdup(option); @@ -1189,15 +618,6 @@ do_show_flow(int argc, char *argv[]) o_arg = B_TRUE; fields_str = optarg; break; - case 'i': - if (i_arg) - die_optdup(option); - - i_arg = B_TRUE; - - if (!dladm_str2interval(optarg, &interval)) - die("invalid interval value '%s'", optarg); - break; case 'l': if (strlcpy(linkname, optarg, MAXLINKNAMELEN) >= MAXLINKNAMELEN) @@ -1212,11 +632,6 @@ do_show_flow(int argc, char *argv[]) break; } } - if (i_arg && !(s_arg || S_arg)) - die("the -i option can be used only with -s or -S"); - - if (s_arg && S_arg) - die("the -s option cannot be used with -S"); /* get flow name (optional last argument */ if (optind == (argc-1)) { @@ -1226,17 +641,6 @@ do_show_flow(int argc, char *argv[]) state.fs_flow = flowname; } - if (S_arg) { - dladm_continuous(handle, linkid, state.fs_flow, interval, - FLOW_REPORT); - return; - } - - if (s_arg) { - flow_stats(state.fs_flow, linkid, interval, fields_str, &state); - return; - } - oferr = ofmt_open(fields_str, flow_fields, ofmtflags, 0, &ofmt); flowadm_ofmt_check(oferr, state.fs_parsable, ofmt); state.fs_ofmt = ofmt; @@ -1471,7 +875,7 @@ warn(const char *format, ...) (void) vfprintf(stderr, format, alist); va_end(alist); - (void) putchar('\n'); + (void) putc('\n', stderr); } /* PRINTFLIKE2 */ @@ -1503,7 +907,7 @@ die(const char *format, ...) (void) vfprintf(stderr, format, alist); va_end(alist); - (void) putchar('\n'); + (void) putc('\n', stderr); /* close dladm handle if it was opened */ if (handle != NULL) diff --git a/usr/src/cmd/flowstat/Makefile b/usr/src/cmd/flowstat/Makefile new file mode 100644 index 0000000000..5cb3eb4025 --- /dev/null +++ b/usr/src/cmd/flowstat/Makefile @@ -0,0 +1,70 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +PROG=flowstat + +ROOTFS_PROG= $(PROG) + +POFILE= $(PROG).po + +include ../Makefile.cmd + +XGETFLAGS += -a -x $(PROG).xcl +LDLIBS += -L$(ROOT)/lib +LDLIBS += -ldladm -linetutil + +ROOTCFGDIR= $(ROOTETC)/dladm + +.KEEP_STATE: + +all: $(ROOTFS_PROG) + +# +# Message catalog +# +_msg: $(POFILE) + +$(POFILE): $(PROG).c + $(RM) $@ + $(COMPILE.cpp) $(PROG).c > $(POFILE).i + $(XGETTEXT) $(XGETFLAGS) $(POFILE).i + sed "/^domain/d" messages.po > $@ + $(RM) messages.po $(POFILE).i + +install: all $(ROOTSBINPROG) $(ROOTCFGDIR) + $(RM) $(ROOTUSRSBINPROG) + -$(SYMLINK) ../../sbin/$(PROG) $(ROOTUSRSBINPROG) + +clean: + +lint: lint_PROG + +$(ROOTCFGDIR): + $(INS.dir) + +$(ROOTCFGDIR)/%: $(ROOTCFGDIR) % + $(INS.file) + +include ../Makefile.targ diff --git a/usr/src/cmd/flowstat/flowstat.c b/usr/src/cmd/flowstat/flowstat.c new file mode 100644 index 0000000000..3ddff9e34f --- /dev/null +++ b/usr/src/cmd/flowstat/flowstat.c @@ -0,0 +1,1149 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <stdio.h> +#include <locale.h> +#include <stdarg.h> +#include <stdlib.h> +#include <fcntl.h> +#include <string.h> +#include <stropts.h> +#include <errno.h> +#include <strings.h> +#include <getopt.h> +#include <unistd.h> +#include <priv.h> +#include <netdb.h> +#include <libintl.h> +#include <libdlflow.h> +#include <libdllink.h> +#include <libdlstat.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <sys/ethernet.h> +#include <inet/ip.h> +#include <inet/ip6.h> +#include <stddef.h> +#include <ofmt.h> + +typedef struct flow_chain_s { + char fc_flowname[MAXFLOWNAMELEN]; + boolean_t fc_visited; + flow_stat_t *fc_stat; + struct flow_chain_s *fc_next; +} flow_chain_t; + +typedef struct show_flow_state { + flow_chain_t *fs_flowchain; + ofmt_handle_t fs_ofmt; + char fs_unit; + boolean_t fs_parsable; +} show_flow_state_t; + +typedef struct show_history_state_s { + boolean_t us_plot; + boolean_t us_parsable; + boolean_t us_printheader; + boolean_t us_first; + boolean_t us_showall; + ofmt_handle_t us_ofmt; +} show_history_state_t; + +static void do_show_history(int, char **); + +static int query_flow_stats(dladm_handle_t, dladm_flow_attr_t *, void *); +static int query_link_flow_stats(dladm_handle_t, datalink_id_t, void *); + +static void die(const char *, ...); +static void die_optdup(int); +static void die_opterr(int, int, const char *); +static void die_dlerr(dladm_status_t, const char *, ...); +static void warn(const char *, ...); + +/* callback functions for printing output */ +static ofmt_cb_t print_default_cb, print_flow_stats_cb; +static void flowstat_ofmt_check(ofmt_status_t, boolean_t, ofmt_handle_t); + +#define NULL_OFMT {NULL, 0, 0, NULL} + +/* + * structures for flowstat (printing live statistics) + */ +typedef enum { + FLOW_S_FLOW, + FLOW_S_IPKTS, + FLOW_S_RBYTES, + FLOW_S_IERRORS, + FLOW_S_OPKTS, + FLOW_S_OBYTES, + FLOW_S_OERRORS +} flow_s_field_index_t; + +static ofmt_field_t flow_s_fields[] = { +/* name, field width, index, callback */ +{ "FLOW", 15, FLOW_S_FLOW, print_flow_stats_cb}, +{ "IPKTS", 8, FLOW_S_IPKTS, print_flow_stats_cb}, +{ "RBYTES", 8, FLOW_S_RBYTES, print_flow_stats_cb}, +{ "IERRS", 8, FLOW_S_IERRORS, print_flow_stats_cb}, +{ "OPKTS", 8, FLOW_S_OPKTS, print_flow_stats_cb}, +{ "OBYTES", 8, FLOW_S_OBYTES, print_flow_stats_cb}, +{ "OERRS", 8, FLOW_S_OERRORS, print_flow_stats_cb}, +NULL_OFMT} +; + +typedef struct flow_args_s { + char *flow_s_flow; + flow_stat_t *flow_s_stat; + char flow_s_unit; + boolean_t flow_s_parsable; +} flow_args_t; + +/* + * structures for 'flowstat -h' + */ +typedef struct history_fields_buf_s { + char history_flow[12]; + char history_duration[10]; + char history_ipackets[9]; + char history_rbytes[10]; + char history_opackets[9]; + char history_obytes[10]; + char history_bandwidth[14]; +} history_fields_buf_t; + +static ofmt_field_t history_fields[] = { +/* name, field width, offset */ +{ "FLOW", 13, + offsetof(history_fields_buf_t, history_flow), print_default_cb}, +{ "DURATION", 11, + offsetof(history_fields_buf_t, history_duration), print_default_cb}, +{ "IPACKETS", 10, + offsetof(history_fields_buf_t, history_ipackets), print_default_cb}, +{ "RBYTES", 11, + offsetof(history_fields_buf_t, history_rbytes), print_default_cb}, +{ "OPACKETS", 10, + offsetof(history_fields_buf_t, history_opackets), print_default_cb}, +{ "OBYTES", 11, + offsetof(history_fields_buf_t, history_obytes), print_default_cb}, +{ "BANDWIDTH", 15, + offsetof(history_fields_buf_t, history_bandwidth), print_default_cb}, +NULL_OFMT} +; + +typedef struct history_l_fields_buf_s { + char history_l_flow[12]; + char history_l_stime[13]; + char history_l_etime[13]; + char history_l_rbytes[8]; + char history_l_obytes[8]; + char history_l_bandwidth[14]; +} history_l_fields_buf_t; + +static ofmt_field_t history_l_fields[] = { +/* name, field width, offset */ +{ "FLOW", 13, + offsetof(history_l_fields_buf_t, history_l_flow), print_default_cb}, +{ "START", 14, + offsetof(history_l_fields_buf_t, history_l_stime), print_default_cb}, +{ "END", 14, + offsetof(history_l_fields_buf_t, history_l_etime), print_default_cb}, +{ "RBYTES", 9, + offsetof(history_l_fields_buf_t, history_l_rbytes), print_default_cb}, +{ "OBYTES", 9, + offsetof(history_l_fields_buf_t, history_l_obytes), print_default_cb}, +{ "BANDWIDTH", 15, + offsetof(history_l_fields_buf_t, history_l_bandwidth), + print_default_cb}, +NULL_OFMT} +; + +static char *progname; + +/* + * Handle to libdladm. Opened in main() before the sub-command + * specific function is called. + */ +static dladm_handle_t handle = NULL; + +const char *usage_ermsg = "flowstat [-r | -t] [-i interval] " + "[-l link] [flow]\n" + " flowstat [-S] [-A] [-i interval] [-p] [ -o field[,...]]\n" + " [-u R|K|M|G|T|P] [-l link] [flow]\n" + " flowstat -h [-a] [-d] [-F format]" + " [-s <DD/MM/YYYY,HH:MM:SS>]\n" + " [-e <DD/MM/YYYY,HH:MM:SS>] -f <logfile> " + "[<flow>]"; + +static void +usage(void) +{ + (void) fprintf(stderr, "%s\n", gettext(usage_ermsg)); + + /* close dladm handle if it was opened */ + if (handle != NULL) + dladm_close(handle); + + exit(1); +} + +boolean_t +flowstat_unit(char *oarg, char *unit) +{ + if ((strcmp(oarg, "R") == 0) || (strcmp(oarg, "K") == 0) || + (strcmp(oarg, "M") == 0) || (strcmp(oarg, "G") == 0) || + (strcmp(oarg, "T") == 0) || (strcmp(oarg, "P") == 0)) { + *unit = oarg[0]; + return (B_TRUE); + } + + return (B_FALSE); +} + +void +map_to_units(char *buf, uint_t bufsize, double num, char unit, + boolean_t parsable) +{ + if (parsable) { + (void) snprintf(buf, bufsize, "%.0lf", num); + return; + } + + if (unit == '\0') { + int index; + + for (index = 0; (int)(num/1000) != 0; index++, num /= 1000) + ; + + switch (index) { + case 0: + unit = '\0'; + break; + case 1: + unit = 'K'; + break; + case 2: + unit = 'M'; + break; + case 3: + unit = 'G'; + break; + case 4: + unit = 'T'; + break; + case 5: + /* Largest unit supported */ + default: + unit = 'P'; + break; + } + } else { + switch (unit) { + case 'R': + /* Already raw numbers */ + unit = '\0'; + break; + case 'K': + num /= 1000; + break; + case 'M': + num /= (1000*1000); + break; + case 'G': + num /= (1000*1000*1000); + break; + case 'T': + num /= (1000.0*1000.0*1000.0*1000.0); + break; + case 'P': + /* Largest unit supported */ + default: + num /= (1000.0*1000.0*1000.0*1000.0*1000.0); + break; + } + } + + if (unit == '\0') + (void) snprintf(buf, bufsize, " %7.0lf%c", num, unit); + else + (void) snprintf(buf, bufsize, " %6.2lf%c", num, unit); +} + +flow_chain_t * +get_flow_prev_stat(const char *flowname, void *arg) +{ + show_flow_state_t *state = arg; + flow_chain_t *flow_curr = NULL; + + /* Scan prev flowname list and look for entry matching this entry */ + for (flow_curr = state->fs_flowchain; flow_curr; + flow_curr = flow_curr->fc_next) { + if (strcmp(flow_curr->fc_flowname, flowname) == 0) + break; + } + + /* New flow, add it */ + if (flow_curr == NULL) { + flow_curr = (flow_chain_t *)malloc(sizeof (flow_chain_t)); + if (flow_curr == NULL) + goto done; + (void) strncpy(flow_curr->fc_flowname, flowname, + MAXFLOWNAMELEN); + flow_curr->fc_stat = NULL; + flow_curr->fc_next = state->fs_flowchain; + state->fs_flowchain = flow_curr; + } +done: + return (flow_curr); +} + +/* + * Number of flows may change while flowstat -i is executing. + * Free memory allocated for flows that are no longer there. + * Prepare for next iteration by marking visited = false for + * existing stat entries. + */ +static void +cleanup_removed_flows(show_flow_state_t *state) +{ + flow_chain_t *fcurr; + flow_chain_t *fprev; + flow_chain_t *tofree; + + /* Delete all nodes from the list that have fc_visited marked false */ + fcurr = state->fs_flowchain; + while (fcurr != NULL) { + if (fcurr->fc_visited) { + fcurr->fc_visited = B_FALSE; + fprev = fcurr; + fcurr = fcurr->fc_next; + continue; + } + + /* Is it head of the list? */ + if (fcurr == state->fs_flowchain) + state->fs_flowchain = fcurr->fc_next; + else + fprev->fc_next = fcurr->fc_next; + + /* fprev remains the same */ + tofree = fcurr; + fcurr = fcurr->fc_next; + + /* Free stats memory for the removed flow */ + dladm_flow_stat_free(tofree->fc_stat); + free(tofree); + } +} + +static boolean_t +print_flow_stats_cb(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + flow_args_t *fargs = of_arg->ofmt_cbarg; + flow_stat_t *diff_stats = fargs->flow_s_stat; + char unit = fargs->flow_s_unit; + boolean_t parsable = fargs->flow_s_parsable; + + switch (of_arg->ofmt_id) { + case FLOW_S_FLOW: + (void) snprintf(buf, bufsize, "%s", fargs->flow_s_flow); + break; + case FLOW_S_IPKTS: + map_to_units(buf, bufsize, diff_stats->fl_ipackets, unit, + parsable); + break; + case FLOW_S_RBYTES: + map_to_units(buf, bufsize, diff_stats->fl_rbytes, unit, + parsable); + break; + case FLOW_S_IERRORS: + map_to_units(buf, bufsize, diff_stats->fl_ierrors, unit, + parsable); + break; + case FLOW_S_OPKTS: + map_to_units(buf, bufsize, diff_stats->fl_opackets, unit, + parsable); + break; + case FLOW_S_OBYTES: + map_to_units(buf, bufsize, diff_stats->fl_obytes, unit, + parsable); + break; + case FLOW_S_OERRORS: + map_to_units(buf, bufsize, diff_stats->fl_oerrors, unit, + parsable); + break; + default: + die("invalid input"); + break; + } + return (B_TRUE); +} + +/* ARGSUSED */ +static int +query_flow_stats(dladm_handle_t handle, dladm_flow_attr_t *attr, void *arg) +{ + show_flow_state_t *state = arg; + flow_chain_t *flow_node; + flow_stat_t *curr_stat; + flow_stat_t *prev_stat; + flow_stat_t *diff_stat; + char *flowname = attr->fa_flowname; + flow_args_t fargs; + + /* Get previous stats for the flow */ + flow_node = get_flow_prev_stat(flowname, arg); + if (flow_node == NULL) + goto done; + + flow_node->fc_visited = B_TRUE; + prev_stat = flow_node->fc_stat; + + /* Query library for current stats */ + curr_stat = dladm_flow_stat_query(flowname); + if (curr_stat == NULL) + goto done; + + /* current stats - prev iteration stats */ + diff_stat = dladm_flow_stat_diff(curr_stat, prev_stat); + + /* Free prev stats */ + dladm_flow_stat_free(prev_stat); + + /* Prev <- curr stats */ + flow_node->fc_stat = curr_stat; + + if (diff_stat == NULL) + goto done; + + /* Print stats */ + fargs.flow_s_flow = flowname; + fargs.flow_s_stat = diff_stat; + fargs.flow_s_unit = state->fs_unit; + fargs.flow_s_parsable = state->fs_parsable; + ofmt_print(state->fs_ofmt, &fargs); + + /* Free diff stats */ + dladm_flow_stat_free(diff_stat); +done: + return (DLADM_WALK_CONTINUE); +} + +/* + * Wrapper of dladm_walk_flow(query_flow_stats,...) to make it usable for + * dladm_walk_datalink_id(). Used for showing flow stats for + * all flows on all links. + */ +static int +query_link_flow_stats(dladm_handle_t dh, datalink_id_t linkid, void * arg) +{ + if (dladm_walk_flow(query_flow_stats, dh, linkid, arg, B_FALSE) + == DLADM_STATUS_OK) + return (DLADM_WALK_CONTINUE); + else + return (DLADM_WALK_TERMINATE); +} + +void +print_all_stats(name_value_stat_entry_t *stat_entry) +{ + name_value_stat_t *curr_stat; + + printf("%s\n", stat_entry->nve_header); + + for (curr_stat = stat_entry->nve_stats; curr_stat != NULL; + curr_stat = curr_stat->nv_nextstat) { + printf("\t%15s", curr_stat->nv_statname); + printf("\t%15llu\n", curr_stat->nv_statval); + } +} + +/* ARGSUSED */ +static int +dump_one_flow_stats(dladm_handle_t handle, dladm_flow_attr_t *attr, void *arg) +{ + char *flowname = attr->fa_flowname; + void *stat; + + stat = dladm_flow_stat_query_all(flowname); + if (stat == NULL) + goto done; + print_all_stats(stat); + dladm_flow_stat_query_all_free(stat); + +done: + return (DLADM_WALK_CONTINUE); +} + +/* + * Wrapper of dladm_walk_flow(query_flow_stats,...) to make it usable for + * dladm_walk_datalink_id(). Used for showing flow stats for + * all flows on all links. + */ +static int +dump_link_flow_stats(dladm_handle_t dh, datalink_id_t linkid, void * arg) +{ + if (dladm_walk_flow(dump_one_flow_stats, dh, linkid, arg, B_FALSE) + == DLADM_STATUS_OK) + return (DLADM_WALK_CONTINUE); + else + return (DLADM_WALK_TERMINATE); +} + +static void +dump_all_flow_stats(dladm_flow_attr_t *attrp, void *arg, datalink_id_t linkid, + boolean_t flow_arg) +{ + /* Show stats for named flow */ + if (flow_arg) { + (void) dump_one_flow_stats(handle, attrp, arg); + + /* Show stats for flows on one link */ + } else if (linkid != DATALINK_INVALID_LINKID) { + (void) dladm_walk_flow(dump_one_flow_stats, handle, linkid, + arg, B_FALSE); + + /* Show stats for all flows on all links */ + } else { + (void) dladm_walk_datalink_id(dump_link_flow_stats, + handle, arg, DATALINK_CLASS_ALL, + DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE); + } +} + +int +main(int argc, char *argv[]) +{ + dladm_status_t status; + int option; + boolean_t r_arg = B_FALSE; + boolean_t t_arg = B_FALSE; + boolean_t p_arg = B_FALSE; + boolean_t i_arg = B_FALSE; + boolean_t o_arg = B_FALSE; + boolean_t u_arg = B_FALSE; + boolean_t A_arg = B_FALSE; + boolean_t S_arg = B_FALSE; + boolean_t flow_arg = B_FALSE; + datalink_id_t linkid = DATALINK_ALL_LINKID; + char linkname[MAXLINKNAMELEN]; + char flowname[MAXFLOWNAMELEN]; + uint32_t interval = 0; + char unit = '\0'; + show_flow_state_t state; + char *fields_str = NULL; + char *o_fields_str = NULL; + + char *total_stat_fields = + "flow,ipkts,rbytes,ierrs,opkts,obytes,oerrs"; + char *rx_stat_fields = + "flow,ipkts,rbytes,ierrs"; + char *tx_stat_fields = + "flow,opkts,obytes,oerrs"; + + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = OFMT_RIGHTJUST; + + dladm_flow_attr_t attr; + + (void) setlocale(LC_ALL, ""); +#if !defined(TEXT_DOMAIN) +#define TEXT_DOMAIN "SYS_TEST" +#endif + (void) textdomain(TEXT_DOMAIN); + + progname = argv[0]; + + /* Open the libdladm handle */ + if ((status = dladm_open(&handle)) != DLADM_STATUS_OK) + die_dlerr(status, "could not open /dev/dld"); + + bzero(&state, sizeof (state)); + + opterr = 0; + while ((option = getopt_long(argc, argv, ":rtApSi:o:u:l:h", + NULL, NULL)) != -1) { + switch (option) { + case 'r': + if (r_arg) + die_optdup(option); + + r_arg = B_TRUE; + break; + case 't': + if (t_arg) + die_optdup(option); + + t_arg = B_TRUE; + break; + case 'A': + if (A_arg) + die_optdup(option); + + A_arg = B_TRUE; + break; + case 'p': + if (p_arg) + die_optdup(option); + + p_arg = B_TRUE; + break; + case 'S': + if (S_arg) + die_optdup(option); + S_arg = B_TRUE; + break; + case 'i': + if (i_arg) + die_optdup(option); + + i_arg = B_TRUE; + if (!dladm_str2interval(optarg, &interval)) + die("invalid interval value '%s'", optarg); + break; + case 'o': + o_arg = B_TRUE; + o_fields_str = optarg; + break; + case 'u': + if (u_arg) + die_optdup(option); + + u_arg = B_TRUE; + if (!flowstat_unit(optarg, &unit)) + die("invalid unit value '%s'," + "unit must be R|K|M|G|T|P", optarg); + break; + case 'l': + if (strlcpy(linkname, optarg, MAXLINKNAMELEN) + >= MAXLINKNAMELEN) + die("link name too long\n"); + if (dladm_name2info(handle, linkname, &linkid, NULL, + NULL, NULL) != DLADM_STATUS_OK) + die("invalid link '%s'", linkname); + break; + case 'h': + if (r_arg || t_arg || p_arg || o_arg || u_arg || + i_arg || S_arg || A_arg) { + die("the option -h is not compatible with " + "-r, -t, -p, -o, -u, -i, -S, -A"); + } + do_show_history(argc, argv); + return (0); + break; + default: + die_opterr(optopt, option, usage_ermsg); + break; + } + } + + if (r_arg && t_arg) + die("the option -t and -r are not compatible"); + + if (u_arg && p_arg) + die("the option -u and -p are not compatible"); + + if (p_arg && !o_arg) + die("-p requires -o"); + + if (p_arg && strcasecmp(o_fields_str, "all") == 0) + die("\"-o all\" is invalid with -p"); + + if (S_arg && + (r_arg || t_arg || p_arg || o_arg || u_arg)) + die("the option -S is not compatible with " + "-r, -t, -p, -o, -u"); + + if (A_arg && + (r_arg || t_arg || p_arg || o_arg || u_arg || i_arg)) + die("the option -A is not compatible with " + "-r, -t, -p, -o, -u, -i"); + + /* get flow name (optional last argument) */ + if (optind == (argc-1)) { + if (strlcpy(flowname, argv[optind], MAXFLOWNAMELEN) + >= MAXFLOWNAMELEN) + die("flow name too long"); + flow_arg = B_TRUE; + } else if (optind != argc) { + usage(); + } + + if (S_arg) { + dladm_continuous(handle, linkid, (flow_arg ? flowname : NULL), + interval, FLOW_REPORT); + return (0); + } + + if (flow_arg && + dladm_flow_info(handle, flowname, &attr) != DLADM_STATUS_OK) + die("invalid flow %s", flowname); + + if (A_arg) { + dump_all_flow_stats(&attr, &state, linkid, flow_arg); + return (0); + } + + state.fs_unit = unit; + state.fs_parsable = p_arg; + + if (state.fs_parsable) + ofmtflags |= OFMT_PARSABLE; + + if (r_arg) + fields_str = rx_stat_fields; + else if (t_arg) + fields_str = tx_stat_fields; + else + fields_str = total_stat_fields; + + if (o_arg) { + fields_str = (strcasecmp(o_fields_str, "all") == 0) ? + fields_str : o_fields_str; + } + + oferr = ofmt_open(fields_str, flow_s_fields, ofmtflags, 0, &ofmt); + flowstat_ofmt_check(oferr, state.fs_parsable, ofmt); + state.fs_ofmt = ofmt; + + for (;;) { + /* Show stats for named flow */ + if (flow_arg) { + (void) query_flow_stats(handle, &attr, &state); + + /* Show stats for flows on one link */ + } else if (linkid != DATALINK_INVALID_LINKID) { + (void) dladm_walk_flow(query_flow_stats, handle, linkid, + &state, B_FALSE); + + /* Show stats for all flows on all links */ + } else { + (void) dladm_walk_datalink_id(query_link_flow_stats, + handle, &state, DATALINK_CLASS_ALL, + DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE); + } + + if (interval == 0) + break; + + (void) fflush(stdout); + cleanup_removed_flows(&state); + (void) sleep(interval); + } + ofmt_close(ofmt); + + dladm_close(handle); + return (0); +} + +/* ARGSUSED */ +static int +show_history_date(dladm_usage_t *history, void *arg) +{ + show_history_state_t *state = (show_history_state_t *)arg; + time_t stime; + char timebuf[20]; + dladm_flow_attr_t attr; + dladm_status_t status; + + /* + * Only show historical information for existing flows unless '-a' + * is specified. + */ + if (!state->us_showall && ((status = dladm_flow_info(handle, + history->du_name, &attr)) != DLADM_STATUS_OK)) { + return (status); + } + + stime = history->du_stime; + (void) strftime(timebuf, sizeof (timebuf), "%m/%d/%Y", + localtime(&stime)); + (void) printf("%s\n", timebuf); + + return (DLADM_STATUS_OK); +} + +static int +show_history_time(dladm_usage_t *history, void *arg) +{ + show_history_state_t *state = (show_history_state_t *)arg; + char buf[DLADM_STRSIZE]; + history_l_fields_buf_t ubuf; + time_t time; + double bw; + dladm_flow_attr_t attr; + dladm_status_t status; + + /* + * Only show historical information for existing flows unless '-a' + * is specified. + */ + if (!state->us_showall && ((status = dladm_flow_info(handle, + history->du_name, &attr)) != DLADM_STATUS_OK)) { + return (status); + } + + if (state->us_plot) { + if (!state->us_printheader) { + if (state->us_first) { + (void) printf("# Time"); + state->us_first = B_FALSE; + } + (void) printf(" %s", history->du_name); + if (history->du_last) { + (void) printf("\n"); + state->us_first = B_TRUE; + state->us_printheader = B_TRUE; + } + } else { + if (state->us_first) { + time = history->du_etime; + (void) strftime(buf, sizeof (buf), "%T", + localtime(&time)); + state->us_first = B_FALSE; + (void) printf("%s", buf); + } + bw = (double)history->du_bandwidth/1000; + (void) printf(" %.2f", bw); + if (history->du_last) { + (void) printf("\n"); + state->us_first = B_TRUE; + } + } + return (DLADM_STATUS_OK); + } + + bzero(&ubuf, sizeof (ubuf)); + + (void) snprintf(ubuf.history_l_flow, sizeof (ubuf.history_l_flow), "%s", + history->du_name); + time = history->du_stime; + (void) strftime(buf, sizeof (buf), "%T", localtime(&time)); + (void) snprintf(ubuf.history_l_stime, sizeof (ubuf.history_l_stime), + "%s", buf); + time = history->du_etime; + (void) strftime(buf, sizeof (buf), "%T", localtime(&time)); + (void) snprintf(ubuf.history_l_etime, sizeof (ubuf.history_l_etime), + "%s", buf); + (void) snprintf(ubuf.history_l_rbytes, sizeof (ubuf.history_l_rbytes), + "%llu", history->du_rbytes); + (void) snprintf(ubuf.history_l_obytes, sizeof (ubuf.history_l_obytes), + "%llu", history->du_obytes); + (void) snprintf(ubuf.history_l_bandwidth, + sizeof (ubuf.history_l_bandwidth), "%s Mbps", + dladm_bw2str(history->du_bandwidth, buf)); + + ofmt_print(state->us_ofmt, (void *)&ubuf); + return (DLADM_STATUS_OK); +} + +static int +show_history_res(dladm_usage_t *history, void *arg) +{ + show_history_state_t *state = (show_history_state_t *)arg; + char buf[DLADM_STRSIZE]; + history_fields_buf_t ubuf; + dladm_flow_attr_t attr; + dladm_status_t status; + + /* + * Only show historical information for existing flows unless '-a' + * is specified. + */ + if (!state->us_showall && ((status = dladm_flow_info(handle, + history->du_name, &attr)) != DLADM_STATUS_OK)) { + return (status); + } + + bzero(&ubuf, sizeof (ubuf)); + + (void) snprintf(ubuf.history_flow, sizeof (ubuf.history_flow), "%s", + history->du_name); + (void) snprintf(ubuf.history_duration, sizeof (ubuf.history_duration), + "%llu", history->du_duration); + (void) snprintf(ubuf.history_ipackets, sizeof (ubuf.history_ipackets), + "%llu", history->du_ipackets); + (void) snprintf(ubuf.history_rbytes, sizeof (ubuf.history_rbytes), + "%llu", history->du_rbytes); + (void) snprintf(ubuf.history_opackets, sizeof (ubuf.history_opackets), + "%llu", history->du_opackets); + (void) snprintf(ubuf.history_obytes, sizeof (ubuf.history_obytes), + "%llu", history->du_obytes); + (void) snprintf(ubuf.history_bandwidth, sizeof (ubuf.history_bandwidth), + "%s Mbps", dladm_bw2str(history->du_bandwidth, buf)); + + ofmt_print(state->us_ofmt, (void *)&ubuf); + + return (DLADM_STATUS_OK); +} + +static boolean_t +valid_formatspec(char *formatspec_str) +{ + return (strcmp(formatspec_str, "gnuplot") == 0); +} + +/* ARGSUSED */ +static void +do_show_history(int argc, char *argv[]) +{ + char *file = NULL; + int opt; + dladm_status_t status; + boolean_t d_arg = B_FALSE; + char *stime = NULL; + char *etime = NULL; + char *resource = NULL; + show_history_state_t state; + boolean_t o_arg = B_FALSE; + boolean_t F_arg = B_FALSE; + char *fields_str = NULL; + char *formatspec_str = NULL; + char *all_fields = + "flow,duration,ipackets,rbytes,opackets,obytes,bandwidth"; + char *all_l_fields = + "flow,start,end,rbytes,obytes,bandwidth"; + ofmt_handle_t ofmt; + ofmt_status_t oferr; + uint_t ofmtflags = 0; + + bzero(&state, sizeof (show_history_state_t)); + state.us_parsable = B_FALSE; + state.us_printheader = B_FALSE; + state.us_plot = B_FALSE; + state.us_first = B_TRUE; + + while ((opt = getopt(argc, argv, "das:e:o:f:F:")) != -1) { + switch (opt) { + case 'd': + d_arg = B_TRUE; + break; + case 'a': + state.us_showall = B_TRUE; + break; + case 'f': + file = optarg; + break; + case 's': + stime = optarg; + break; + case 'e': + etime = optarg; + break; + case 'o': + o_arg = B_TRUE; + fields_str = optarg; + break; + case 'F': + state.us_plot = F_arg = B_TRUE; + formatspec_str = optarg; + break; + default: + die_opterr(optopt, opt, usage_ermsg); + } + } + + if (file == NULL) + die("-h requires a file"); + + if (optind == (argc-1)) { + dladm_flow_attr_t attr; + + resource = argv[optind]; + if (!state.us_showall && + dladm_flow_info(handle, resource, &attr) != + DLADM_STATUS_OK) { + die("invalid flow: '%s'", resource); + } + } + + if (state.us_parsable) + ofmtflags |= OFMT_PARSABLE; + if (resource == NULL && stime == NULL && etime == NULL) { + if (!o_arg || (o_arg && strcasecmp(fields_str, "all") == 0)) + fields_str = all_fields; + oferr = ofmt_open(fields_str, history_fields, ofmtflags, + 0, &ofmt); + } else { + if (!o_arg || (o_arg && strcasecmp(fields_str, "all") == 0)) + fields_str = all_l_fields; + oferr = ofmt_open(fields_str, history_l_fields, ofmtflags, + 0, &ofmt); + } + + flowstat_ofmt_check(oferr, state.us_parsable, ofmt); + state.us_ofmt = ofmt; + + if (F_arg && d_arg) + die("incompatible -d and -F options"); + + if (F_arg && !valid_formatspec(formatspec_str)) + die("Format specifier %s not supported", formatspec_str); + + if (d_arg) { + /* Print log dates */ + status = dladm_usage_dates(show_history_date, + DLADM_LOGTYPE_FLOW, file, resource, &state); + } else if (resource == NULL && stime == NULL && etime == NULL && + !F_arg) { + /* Print summary */ + status = dladm_usage_summary(show_history_res, + DLADM_LOGTYPE_FLOW, file, &state); + } else if (resource != NULL) { + /* Print log entries for named resource */ + status = dladm_walk_usage_res(show_history_time, + DLADM_LOGTYPE_FLOW, file, resource, stime, etime, &state); + } else { + /* Print time and information for each flow */ + status = dladm_walk_usage_time(show_history_time, + DLADM_LOGTYPE_FLOW, file, stime, etime, &state); + } + + ofmt_close(ofmt); + if (status != DLADM_STATUS_OK) + die_dlerr(status, "-h"); + dladm_close(handle); +} + +static void +warn(const char *format, ...) +{ + va_list alist; + + format = gettext(format); + (void) fprintf(stderr, "%s: warning: ", progname); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); + + (void) putc('\n', stderr); +} + +/* PRINTFLIKE1 */ +static void +die(const char *format, ...) +{ + va_list alist; + + format = gettext(format); + (void) fprintf(stderr, "%s: ", progname); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); + + (void) putc('\n', stderr); + + /* close dladm handle if it was opened */ + if (handle != NULL) + dladm_close(handle); + + exit(EXIT_FAILURE); +} + +static void +die_optdup(int opt) +{ + die("the option -%c cannot be specified more than once", opt); +} + +static void +die_opterr(int opt, int opterr, const char *usage) +{ + switch (opterr) { + case ':': + die("option '-%c' requires a value\nusage: %s", opt, + gettext(usage)); + break; + case '?': + default: + die("unrecognized option '-%c'\nusage: %s", opt, + gettext(usage)); + break; + } +} + +/* PRINTFLIKE2 */ +static void +die_dlerr(dladm_status_t err, const char *format, ...) +{ + va_list alist; + char errmsg[DLADM_STRSIZE]; + + format = gettext(format); + (void) fprintf(stderr, "%s: ", progname); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); + (void) fprintf(stderr, ": %s\n", dladm_status2str(err, errmsg)); + + /* close dladm handle if it was opened */ + if (handle != NULL) + dladm_close(handle); + + exit(EXIT_FAILURE); +} + + +/* + * default output callback function that, when invoked from dladm_print_output, + * prints string which is offset by of_arg->ofmt_id within buf. + */ +static boolean_t +print_default_cb(ofmt_arg_t *of_arg, char *buf, uint_t bufsize) +{ + char *value; + + value = (char *)of_arg->ofmt_cbarg + of_arg->ofmt_id; + (void) strlcpy(buf, value, bufsize); + return (B_TRUE); +} + +static void +flowstat_ofmt_check(ofmt_status_t oferr, boolean_t parsable, + ofmt_handle_t ofmt) +{ + char buf[OFMT_BUFSIZE]; + + if (oferr == OFMT_SUCCESS) + return; + (void) ofmt_strerror(ofmt, oferr, buf, sizeof (buf)); + /* + * All errors are considered fatal in parsable mode. + * NOMEM errors are always fatal, regardless of mode. + * For other errors, we print diagnostics in human-readable + * mode and processs what we can. + */ + if (parsable || oferr == OFMT_ENOFIELDS) { + ofmt_close(ofmt); + die(buf); + } else { + warn(buf); + } +} diff --git a/usr/src/cmd/flowstat/flowstat.xcl b/usr/src/cmd/flowstat/flowstat.xcl new file mode 100644 index 0000000000..369608c062 --- /dev/null +++ b/usr/src/cmd/flowstat/flowstat.xcl @@ -0,0 +1,73 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# + +msgid " %.2f" +msgid " %6.2lf%c" +msgid " %7.0lf%c" +msgid " %s" +msgid "-h" +msgid ": %s\n" +msgid ":rtApSi:o:u:l:h" +msgid "" +msgid "\n" +msgid "\t%15llu\n" +msgid "\t%15s" +msgid "# Time" +msgid "%.0lf" +msgid "%llu" +msgid "%m/%d/%Y" +msgid "%s Mbps" +msgid "%s: " +msgid "%s: warning: " +msgid "%s" +msgid "%s\n" +msgid "%T" +msgid "all" +msgid "B" +msgid "BANDWIDTH" +msgid "das:e:o:f:F:" +msgid "DURATION" +msgid "END" +msgid "flow,duration,ipackets,rbytes,opackets,obytes,bandwidth" +msgid "flow,ipkts,rbytes,ierrs,opkts,obytes,oerrs" +msgid "flow,ipkts,rbytes,ierrs" +msgid "flow,opkts,obytes,oerrs" +msgid "flow,start,end,rbytes,obytes,bandwidth" +msgid "FLOW" +msgid "G" +msgid "gnuplot" +msgid "IERRS" +msgid "IPACKETS" +msgid "IPKTS" +msgid "K" +msgid "M" +msgid "OBYTES" +msgid "OERRS" +msgid "OPACKETS" +msgid "OPKTS" +msgid "P" +msgid "RBYTES" +msgid "START" +msgid "T" diff --git a/usr/src/cmd/mdb/common/modules/mac/mac.c b/usr/src/cmd/mdb/common/modules/mac/mac.c index 4a56960ca7..268d92ac2d 100644 --- a/usr/src/cmd/mdb/common/modules/mac/mac.c +++ b/usr/src/cmd/mdb/common/modules/mac/mac.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -34,6 +34,7 @@ #include <sys/mac_client_impl.h> #include <sys/mac_flow_impl.h> #include <sys/mac_soft_ring.h> +#include <sys/mac_stat.h> #define STRSIZE 64 #define MAC_RX_SRS_SIZE (MAX_RINGS_PER_GROUP * sizeof (uintptr_t)) @@ -59,12 +60,15 @@ #define MAC_SRS_STAT 0x04 #define MAC_SRS_CPU 0x08 #define MAC_SRS_VERBOSE 0x10 +#define MAC_SRS_INTR 0x20 #define MAC_SRS_RXSTAT (MAC_SRS_RX|MAC_SRS_STAT) #define MAC_SRS_TXSTAT (MAC_SRS_TX|MAC_SRS_STAT) #define MAC_SRS_RXCPU (MAC_SRS_RX|MAC_SRS_CPU) #define MAC_SRS_TXCPU (MAC_SRS_TX|MAC_SRS_CPU) #define MAC_SRS_RXCPUVERBOSE (MAC_SRS_RXCPU|MAC_SRS_VERBOSE) #define MAC_SRS_TXCPUVERBOSE (MAC_SRS_TXCPU|MAC_SRS_VERBOSE) +#define MAC_SRS_RXINTR (MAC_SRS_RX|MAC_SRS_INTR) +#define MAC_SRS_TXINTR (MAC_SRS_TX|MAC_SRS_INTR) static char * mac_flow_proto2str(uint8_t protocol) @@ -314,9 +318,28 @@ mac_flow_dcmd_output(uintptr_t addr, uint_t flags, uint_t args) break; } case MAC_FLOW_STATS: { + uint64_t totibytes = 0; + uint64_t totobytes = 0; + mac_soft_ring_set_t *mac_srs; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + int i; + + for (i = 0; i < fe.fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)(fe.fe_rx_srs[i]); + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + totibytes += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + + mac_rx_stat->mrs_lclbytes; + } + mac_srs = (mac_soft_ring_set_t *)(fe.fe_tx_srs); + if (mac_srs != NULL) { + mac_tx_stat = &mac_srs->srs_tx.st_stat; + totobytes = mac_tx_stat->mts_obytes; + } mdb_printf("%?p %-32s %16llu %16llu\n", - addr, fe.fe_flow_name, fe.fe_flowstats.fs_rbytes, - fe.fe_flowstats.fs_obytes); + addr, fe.fe_flow_name, totibytes, totobytes); + break; } } @@ -444,6 +467,10 @@ mac_srs_txmode2str(mac_tx_srs_mode_t mode) return ("BW"); case SRS_TX_BW_FANOUT: return ("BWFO"); + case SRS_TX_AGGR: + return ("AG"); + case SRS_TX_BW_AGGR: + return ("BWAG"); } return ("--"); } @@ -460,6 +487,7 @@ mac_srs_help(void) "\t-s\tdisplay statistics for RX or TX side\n" "\t-c\tdisplay CPU binding for RX or TX side\n" "\t-v\tverbose flag for CPU binding to list cpus\n" + "\t-i\tdisplay mac_ring_t and interrupt information\n" "Note: use -r or -t (to specify RX or TX side respectively) along " "with -c or -s\n"); mdb_printf("\n%<u>Interpreting TX Modes%</u>\n"); @@ -468,6 +496,8 @@ mac_srs_help(void) mdb_printf("\t FO --> Fanout\n"); mdb_printf("\t BW --> Bandwidth\n"); mdb_printf("\tBWFO --> Bandwidth Fanout\n"); + mdb_printf("\t AG --> Aggr\n"); + mdb_printf("\tBWAG --> Bandwidth Aggr\n"); } /* @@ -520,6 +550,7 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 't', MDB_OPT_SETBITS, MAC_SRS_TX, &args, 'c', MDB_OPT_SETBITS, MAC_SRS_CPU, &args, 'v', MDB_OPT_SETBITS, MAC_SRS_VERBOSE, &args, + 'i', MDB_OPT_SETBITS, MAC_SRS_INTR, &args, 's', MDB_OPT_SETBITS, MAC_SRS_STAT, &args) != argc) { return (DCMD_USAGE); } @@ -576,7 +607,7 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) "%08x %08x %8d %8d %3d\n", addr, mci.mci_name, mac_srs_txmode2str(srs.srs_tx.st_mode), srs.srs_state, srs.srs_type, srs.srs_count, srs.srs_size, - srs.srs_oth_ring_count); + srs.srs_tx_ring_count); break; } case MAC_SRS_RXCPU: { @@ -596,30 +627,148 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) return (DCMD_OK); mdb_printf("%?p %-20s %-4d %-4d " "%-6d %-4d %-7d\n", - addr, mci.mci_name, mc.mc_ncpus, mc.mc_pollid, - mc.mc_workerid, mc.mc_intr_cpu, mc.mc_fanout_cnt); + addr, mci.mci_name, mc.mc_ncpus, mc.mc_rx_pollid, + mc.mc_rx_workerid, mc.mc_rx_intr_cpu, mc.mc_rx_fanout_cnt); break; } case MAC_SRS_TXCPU: { mac_cpus_t mc = srs.srs_cpu; + mac_soft_ring_t *s_ringp, s_ring; + boolean_t first = B_TRUE; + int i; if (DCMD_HDRSPEC(flags)) { - mdb_printf("%?s %-20s %-4s %-6s " - "%-4s %-7s\n", - "", "", "NUM", "WORKER", - "INTR", "FANOUT"); - mdb_printf("%<u>%?s %-20s %-4s %-6s " - "%-4s %-7s%</u>\n", - "ADDR", "LINK_NAME", "CPUS", "CPU", - "CPU", "CPU_CNT"); + mdb_printf("%?s %-12s %?s %8s %8s %8s\n", + "", "", "SOFT", "WORKER", "INTR", "RETARGETED"); + mdb_printf("%<u>%?s %-12s %?s %8s %8s %8s%</u>\n", + "ADDR", "LINK_NAME", "RING", "CPU", "CPU", "CPU"); } - if ((args & MAC_SRS_TX) && !(srs.srs_type & SRST_TX)) + if (!(srs.srs_type & SRST_TX)) return (DCMD_OK); - mdb_printf("%?p %-20s %-4d " - "%-6d %-4d %-7d\n", - addr, mci.mci_name, mc.mc_ncpus, - mc.mc_workerid, mc.mc_intr_cpu, mc.mc_fanout_cnt); + + mdb_printf("%?p %-12s ", addr, mci.mci_name); + + /* + * Case of no soft rings, print the info from + * mac_srs_tx_t. + */ + if (srs.srs_tx_ring_count == 0) { + mdb_printf("%?p %8d %8d %8d\n", + 0, mc.mc_tx_fanout_cpus[0], + mc.mc_tx_intr_cpu[0], + mc.mc_tx_retargeted_cpu[0]); + break; + } + + for (s_ringp = srs.srs_soft_ring_head, i = 0; s_ringp != NULL; + s_ringp = s_ring.s_ring_next, i++) { + (void) mdb_vread(&s_ring, sizeof (s_ring), + (uintptr_t)s_ringp); + if (first) { + mdb_printf("%?p %8d %8d %8d\n", + s_ringp, mc.mc_tx_fanout_cpus[i], + mc.mc_tx_intr_cpu[i], + mc.mc_tx_retargeted_cpu[i]); + first = B_FALSE; + continue; + } + mdb_printf("%?s %-12s %?p %8d %8d %8d\n", + "", "", s_ringp, mc.mc_tx_fanout_cpus[i], + mc.mc_tx_intr_cpu[i], mc.mc_tx_retargeted_cpu[i]); + } + break; + } + case MAC_SRS_TXINTR: { + mac_cpus_t mc = srs.srs_cpu; + mac_soft_ring_t *s_ringp, s_ring; + mac_ring_t *m_ringp, m_ring; + boolean_t first = B_TRUE; + int i; + + if (DCMD_HDRSPEC(flags)) { + mdb_printf("%?s %-12s %?s %8s %?s %6s %6s\n", + "", "", "SOFT", "WORKER", "MAC", "", "INTR"); + mdb_printf("%<u>%?s %-12s %?s %8s %?s %6s %6s%</u>\n", + "ADDR", "LINK_NAME", "RING", "CPU", "RING", + "SHARED", "CPU"); + } + if (!(srs.srs_type & SRST_TX)) + return (DCMD_OK); + + mdb_printf("%?p %-12s ", addr, mci.mci_name); + + /* + * Case of no soft rings, print the info from + * mac_srs_tx_t. + */ + if (srs.srs_tx_ring_count == 0) { + m_ringp = srs.srs_tx.st_arg2; + if (m_ringp != NULL) { + (void) mdb_vread(&m_ring, sizeof (m_ring), + (uintptr_t)m_ringp); + mdb_printf("%?p %8d %?p %6d %6d\n", + 0, mc.mc_tx_fanout_cpus[0], m_ringp, + m_ring.mr_info.mri_intr.mi_ddi_shared, + mc.mc_tx_retargeted_cpu[0]); + } else { + mdb_printf("%?p %8d %?p %6d %6d\n", + 0, mc.mc_tx_fanout_cpus[0], 0, + 0, mc.mc_tx_retargeted_cpu[0]); + } + break; + } + + for (s_ringp = srs.srs_soft_ring_head, i = 0; s_ringp != NULL; + s_ringp = s_ring.s_ring_next, i++) { + (void) mdb_vread(&s_ring, sizeof (s_ring), + (uintptr_t)s_ringp); + m_ringp = s_ring.s_ring_tx_arg2; + (void) mdb_vread(&m_ring, sizeof (m_ring), + (uintptr_t)m_ringp); + if (first) { + mdb_printf("%?p %8d %?p %6d %6d\n", + s_ringp, mc.mc_tx_fanout_cpus[i], + m_ringp, + m_ring.mr_info.mri_intr.mi_ddi_shared, + mc.mc_tx_retargeted_cpu[i]); + first = B_FALSE; + continue; + } + mdb_printf("%?s %-12s %?p %8d %?p %6d %6d\n", + "", "", s_ringp, mc.mc_tx_fanout_cpus[i], + m_ringp, m_ring.mr_info.mri_intr.mi_ddi_shared, + mc.mc_tx_retargeted_cpu[i]); + } + break; + } + case MAC_SRS_RXINTR: { + mac_cpus_t mc = srs.srs_cpu; + mac_ring_t *m_ringp, m_ring; + + if (DCMD_HDRSPEC(flags)) { + mdb_printf("%?s %-12s %?s %8s %6s %6s\n", + "", "", "MAC", "", "POLL", "INTR"); + mdb_printf("%<u>%?s %-12s %?s %8s %6s %6s%</u>\n", + "ADDR", "LINK_NAME", "RING", "SHARED", "CPU", + "CPU"); + } + if ((args & MAC_SRS_RX) && (srs.srs_type & SRST_TX)) + return (DCMD_OK); + + mdb_printf("%?p %-12s ", addr, mci.mci_name); + + m_ringp = srs.srs_ring; + if (m_ringp != NULL) { + (void) mdb_vread(&m_ring, sizeof (m_ring), + (uintptr_t)m_ringp); + mdb_printf("%?p %8d %6d %6d\n", + m_ringp, m_ring.mr_info.mri_intr.mi_ddi_shared, + mc.mc_rx_pollid, mc.mc_rx_intr_cpu); + } else { + mdb_printf("%?p %8d %6d %6d\n", + 0, 0, mc.mc_rx_pollid, mc.mc_rx_intr_cpu); + } break; } case MAC_SRS_RXCPUVERBOSE: @@ -640,8 +789,8 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) ((args & MAC_SRS_RX) && (srs.srs_type & SRST_TX))) return (DCMD_OK); mdb_printf("%?p %-20s %-20d %-20d\n", addr, mci.mci_name, - mc.mc_ncpus, mc.mc_fanout_cnt); - if (mc.mc_ncpus == 0 && mc.mc_fanout_cnt == 0) + mc.mc_ncpus, mc.mc_rx_fanout_cnt); + if (mc.mc_ncpus == 0 && mc.mc_rx_fanout_cnt == 0) break; /* print all cpus and cpus for soft rings */ while (!cpu_done || !fanout_done) { @@ -658,14 +807,15 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) else mdb_printf("%*s", len, ""); fanout_done = mac_srs_print_cpu(&fanout_index, - mc.mc_fanout_cnt, mc.mc_fanout_cpus, NULL); + mc.mc_rx_fanout_cnt, + mc.mc_rx_fanout_cpus, NULL); } mdb_printf("\n"); } break; } case MAC_SRS_RXSTAT: { - mac_srs_rx_t srs_rx = srs.srs_rx; + mac_rx_stats_t *mac_rx_stat = &srs.srs_rx.sr_stat; if (DCMD_HDRSPEC(flags)) { mdb_printf("%?s %-16s %8s %8s " @@ -682,13 +832,14 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) mdb_printf("%?p %-16s %8d " "%8d %8d " "%8d %8d\n", - addr, mci.mci_name, srs_rx.sr_intr_count, - srs_rx.sr_poll_count, srs_rx.sr_chain_cnt_undr10, - srs_rx.sr_chain_cnt_10to50, srs_rx.sr_chain_cnt_over50); + addr, mci.mci_name, mac_rx_stat->mrs_intrcnt, + mac_rx_stat->mrs_pollcnt, mac_rx_stat->mrs_chaincntundr10, + mac_rx_stat->mrs_chaincnt10to50, + mac_rx_stat->mrs_chaincntover50); break; } case MAC_SRS_TXSTAT: { - mac_srs_tx_t srs_tx = srs.srs_tx; + mac_tx_stats_t *mac_tx_stat = &srs.srs_tx.st_stat; mac_soft_ring_t *s_ringp, s_ring; boolean_t first = B_TRUE; @@ -708,10 +859,11 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) * Case of no soft rings, print the info from * mac_srs_tx_t. */ - if (srs.srs_oth_ring_count == 0) { + if (srs.srs_tx_ring_count == 0) { mdb_printf("%?p %8d %8d %8d\n", - 0, srs_tx.st_drop_count, srs_tx.st_blocked_cnt, - srs_tx.st_unblocked_cnt); + 0, mac_tx_stat->mts_sdrops, + mac_tx_stat->mts_blockcnt, + mac_tx_stat->mts_unblockcnt); break; } @@ -719,18 +871,19 @@ mac_srs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) s_ringp = s_ring.s_ring_next) { (void) mdb_vread(&s_ring, sizeof (s_ring), (uintptr_t)s_ringp); + mac_tx_stat = &s_ring.s_st_stat; if (first) { mdb_printf("%?p %8d %8d %8d\n", - s_ringp, s_ring.s_ring_drops, - s_ring.s_ring_blocked_cnt, - s_ring.s_ring_unblocked_cnt); + s_ringp, mac_tx_stat->mts_sdrops, + mac_tx_stat->mts_blockcnt, + mac_tx_stat->mts_unblockcnt); first = B_FALSE; continue; } mdb_printf("%?s %-20s %?p %8d %8d %8d\n", - "", "", s_ringp, s_ring.s_ring_drops, - s_ring.s_ring_blocked_cnt, - s_ring.s_ring_unblocked_cnt); + "", "", s_ringp, mac_tx_stat->mts_sdrops, + mac_tx_stat->mts_blockcnt, + mac_tx_stat->mts_unblockcnt); } break; } @@ -853,8 +1006,9 @@ mac_ring_help(void) static const mdb_dcmd_t dcmds[] = { {"mac_flow", "?[-u] [-aprtsm]", "display Flow Entry structures", mac_flow_dcmd, mac_flow_help}, - {"mac_srs", "?[ -r[s|c[v]] | -t[s|c[v]] ]", "display MAC Soft Ring Set" - " structures", mac_srs_dcmd, mac_srs_help}, + {"mac_srs", "?[ -r[i|s|c[v]] | -t[i|s|c[v]] ]", + "display MAC Soft Ring Set" " structures", mac_srs_dcmd, + mac_srs_help}, {"mac_ring", "?", "display MAC ring (hardware) structures", mac_ring_dcmd, mac_ring_help}, { NULL } diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c index cb2eff9995..8dce6b20aa 100644 --- a/usr/src/cmd/zoneadmd/vplat.c +++ b/usr/src/cmd/zoneadmd/vplat.c @@ -2530,6 +2530,7 @@ static int add_datalink(zlog_t *zlogp, char *zone_name, datalink_id_t linkid, char *dlname) { dladm_status_t err; + boolean_t cpuset, poolset; /* First check if it's in use by global zone. */ if (zonecfg_ifname_exists(AF_INET, dlname) || @@ -2547,6 +2548,36 @@ add_datalink(zlog_t *zlogp, char *zone_name, datalink_id_t linkid, char *dlname) "WARNING: unable to add network interface"); return (-1); } + + /* + * Set the pool of this link if the zone has a pool and + * neither the cpus nor the pool datalink property is + * already set. + */ + err = dladm_linkprop_is_set(dld_handle, linkid, DLADM_PROP_VAL_CURRENT, + "cpus", &cpuset); + if (err != DLADM_STATUS_OK) { + zdlerror(zlogp, err, dlname, + "WARNING: unable to check if cpus link property is set"); + } + err = dladm_linkprop_is_set(dld_handle, linkid, DLADM_PROP_VAL_CURRENT, + "pool", &poolset); + if (err != DLADM_STATUS_OK) { + zdlerror(zlogp, err, dlname, + "WARNING: unable to check if pool link property is set"); + } + + if ((strlen(pool_name) != 0) && !cpuset && !poolset) { + err = dladm_set_linkprop(dld_handle, linkid, "pool", + &pool_name, 1, DLADM_OPT_ACTIVE); + if (err != DLADM_STATUS_OK) { + zerror(zlogp, B_FALSE, "WARNING: unable to set " + "pool %s to datalink %s", pool_name, dlname); + bzero(pool_name, MAXPATHLEN); + } + } else { + bzero(pool_name, MAXPATHLEN); + } return (0); } @@ -2644,6 +2675,72 @@ configure_exclusive_network_interfaces(zlog_t *zlogp) } static int +remove_datalink_pool(zlog_t *zlogp, zoneid_t zoneid) +{ + ushort_t flags; + zone_iptype_t iptype; + int i, dlnum = 0; + datalink_id_t *dllink, *dllinks = NULL; + dladm_status_t err; + + if (strlen(pool_name) == 0) + return (0); + + if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags, + sizeof (flags)) < 0) { + if (vplat_get_iptype(zlogp, &iptype) < 0) { + zerror(zlogp, B_TRUE, "unable to determine " + "ip-type"); + return (-1); + } + } else { + if (flags & ZF_NET_EXCL) + iptype = ZS_EXCLUSIVE; + else + iptype = ZS_SHARED; + } + + if (iptype == ZS_EXCLUSIVE) { + /* + * Get the datalink count and for each datalink, + * attempt to clear the pool property and clear + * the pool_name. + */ + if (zone_list_datalink(zoneid, &dlnum, NULL) != 0) { + zerror(zlogp, B_TRUE, "unable to count network " + "interfaces"); + return (-1); + } + + if (dlnum == 0) + return (0); + + if ((dllinks = malloc(dlnum * sizeof (datalink_id_t))) + == NULL) { + zerror(zlogp, B_TRUE, "memory allocation failed"); + return (-1); + } + if (zone_list_datalink(zoneid, &dlnum, dllinks) != 0) { + zerror(zlogp, B_TRUE, "unable to list network " + "interfaces"); + return (-1); + } + + bzero(pool_name, MAXPATHLEN); + for (i = 0, dllink = dllinks; i < dlnum; i++, dllink++) { + err = dladm_set_linkprop(dld_handle, *dllink, "pool", + NULL, 0, DLADM_OPT_ACTIVE); + if (err != DLADM_STATUS_OK) { + zerror(zlogp, B_TRUE, + "WARNING: unable to clear pool"); + } + } + free(dllinks); + } + return (0); +} + +static int unconfigure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid) { int dlnum = 0; @@ -4006,6 +4103,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid) zerror(zlogp, B_FALSE, "WARNING: %s", zonecfg_strerror(res)); } + (void) zonecfg_get_poolname(handle, zone_name, pool_name, MAXPATHLEN); zonecfg_fini_handle(handle); return (Z_OK); @@ -4253,6 +4351,12 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd) goto error; } + if ((pool_name = malloc(MAXPATHLEN)) == NULL) { + zerror(zlogp, B_TRUE, "memory allocation failed"); + return (Z_NOMEM); + } + bzero(pool_name, MAXPATHLEN); + /* * The following actions are not performed when merely mounting a zone * for administrative use. @@ -4575,6 +4679,11 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) goto error; } + if (remove_datalink_pool(zlogp, zoneid) != 0) { + zerror(zlogp, B_FALSE, "unable clear datalink pool property"); + goto error; + } + if (zone_shutdown(zoneid) != 0) { zerror(zlogp, B_TRUE, "unable to shutdown zone"); goto error; @@ -4699,6 +4808,8 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) } } + free(pool_name); + remove_mlps(zlogp, zoneid); if (zone_destroy(zoneid) != 0) { diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c index 90803d8770..743370c1ad 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.c +++ b/usr/src/cmd/zoneadmd/zoneadmd.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -106,6 +106,7 @@ static char *progname; char *zone_name; /* zone which we are managing */ +char *pool_name; char default_brand[MAXNAMELEN]; char brand_name[MAXNAMELEN]; boolean_t zone_isnative; diff --git a/usr/src/cmd/zoneadmd/zoneadmd.h b/usr/src/cmd/zoneadmd/zoneadmd.h index 25ac7bf801..da6aa369ed 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.h +++ b/usr/src/cmd/zoneadmd/zoneadmd.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -86,6 +86,7 @@ extern mutex_t msglock; extern boolean_t in_death_throes; extern boolean_t bringup_failure_recovery; extern char *zone_name; +extern char *pool_name; extern char brand_name[MAXNAMELEN]; extern char default_brand[MAXNAMELEN]; extern char boot_args[BOOTARGS_MAX]; diff --git a/usr/src/head/libzonecfg.h b/usr/src/head/libzonecfg.h index 870a8350a4..f6c30e028d 100644 --- a/usr/src/head/libzonecfg.h +++ b/usr/src/head/libzonecfg.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -422,6 +422,7 @@ extern int zonecfg_destroy_tmp_pool(char *, char *, int); extern int zonecfg_bind_tmp_pool(zone_dochandle_t, zoneid_t, char *, int); extern int zonecfg_bind_pool(zone_dochandle_t, zoneid_t, char *, int); extern boolean_t zonecfg_warn_poold(zone_dochandle_t); +extern int zonecfg_get_poolname(zone_dochandle_t, char *, char *, size_t); /* * Miscellaneous utility functions. diff --git a/usr/src/lib/libdladm/Makefile.com b/usr/src/lib/libdladm/Makefile.com index 84d4f28fd4..791a1e65dc 100644 --- a/usr/src/lib/libdladm/Makefile.com +++ b/usr/src/lib/libdladm/Makefile.com @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -37,7 +37,7 @@ include ../../Makefile.rootfs LIBS = $(DYNLIB) $(LINTLIB) LDLIBS += -ldevinfo -lc -linetutil -lsocket -lscf -lrcm -lnvpair \ - -lexacct -lnsl -lkstat -lcurses + -lexacct -lnsl -lkstat -lcurses -lpool SRCDIR = ../common $(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC) diff --git a/usr/src/lib/libdladm/common/flowattr.c b/usr/src/lib/libdladm/common/flowattr.c index fd44c8bed9..33fd13de47 100644 --- a/usr/src/lib/libdladm/common/flowattr.c +++ b/usr/src/lib/libdladm/common/flowattr.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,8 +41,6 @@ #include <libdlflow.h> #include <libdlflow_impl.h> -#define V4_PART_OF_V6(v6) ((v6)._S6_un._S6_u32[3]) - /* max port number for UDP, TCP & SCTP */ #define MAX_PORT 65535 diff --git a/usr/src/lib/libdladm/common/flowprop.c b/usr/src/lib/libdladm/common/flowprop.c index 25cb714176..a0531c8439 100644 --- a/usr/src/lib/libdladm/common/flowprop.c +++ b/usr/src/lib/libdladm/common/flowprop.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -64,7 +64,7 @@ static fprop_desc_t prop_table[] = { { "maxbw", { "", NULL }, NULL, 0, B_FALSE, do_set_maxbw, NULL, do_get_maxbw, do_check_maxbw}, - { "priority", { "", NULL }, NULL, 0, B_FALSE, + { "priority", { "", MPL_RESET }, NULL, 0, B_FALSE, do_set_priority, NULL, do_get_priority, do_check_priority} }; @@ -77,8 +77,8 @@ static prop_table_t prop_tbl = { }; static resource_prop_t rsrc_prop_table[] = { - {"maxbw", do_extract_maxbw}, - {"priority", do_extract_priority} + {"maxbw", extract_maxbw}, + {"priority", extract_priority} }; #define DLADM_MAX_RSRC_PROP (sizeof (rsrc_prop_table) / \ sizeof (resource_prop_t)) @@ -387,15 +387,14 @@ do_set_priority(dladm_handle_t handle, const char *flow, val_desc_t *vdp, { dld_ioc_modifyflow_t attr; mac_resource_props_t mrp; - void *val; if (val_cnt != 1) return (DLADM_STATUS_BADVALCNT); bzero(&mrp, sizeof (mrp)); - if (vdp != NULL && (val = (void *)vdp->vd_val) != NULL) { - bcopy(val, &mrp.mrp_priority, sizeof (mac_priority_level_t)); - free(val); + if (vdp != NULL) { + bcopy(&vdp->vd_val, &mrp.mrp_priority, + sizeof (mac_priority_level_t)); } else { mrp.mrp_priority = MPL_RESET; } @@ -416,35 +415,25 @@ static dladm_status_t do_check_priority(fprop_desc_t *pdp, char **prop_val, uint_t val_cnt, val_desc_t **vdpp) { - mac_priority_level_t *pri; + mac_priority_level_t pri; val_desc_t *vdp = NULL; dladm_status_t status = DLADM_STATUS_OK; if (val_cnt != 1) return (DLADM_STATUS_BADVALCNT); - pri = malloc(sizeof (mac_priority_level_t)); - if (pri == NULL) - return (DLADM_STATUS_NOMEM); - - status = dladm_str2pri(*prop_val, pri); - if (status != DLADM_STATUS_OK) { - free(pri); + status = dladm_str2pri(*prop_val, &pri); + if (status != DLADM_STATUS_OK) return (status); - } - if (*pri == -1) { - free(pri); + if (pri == -1) return (DLADM_STATUS_BADVAL); - } vdp = malloc(sizeof (val_desc_t)); - if (vdp == NULL) { - free(pri); + if (vdp == NULL) return (DLADM_STATUS_NOMEM); - } - vdp->vd_val = (uintptr_t)pri; + vdp->vd_val = (uint_t)pri; *vdpp = vdp; return (DLADM_STATUS_OK); } diff --git a/usr/src/lib/libdladm/common/libdladm.c b/usr/src/lib/libdladm/common/libdladm.c index ad23a1a25b..7531a5c368 100644 --- a/usr/src/lib/libdladm/common/libdladm.c +++ b/usr/src/lib/libdladm/common/libdladm.c @@ -89,8 +89,9 @@ typedef struct { static link_protect_t link_protect_types[] = { { MPT_MACNOSPOOF, "mac-nospoof" }, + { MPT_RESTRICTED, "restricted" }, { MPT_IPNOSPOOF, "ip-nospoof" }, - { MPT_RESTRICTED, "restricted" } + { MPT_DHCPNOSPOOF, "dhcp-nospoof" } }; #define LPTYPES (sizeof (link_protect_types) / sizeof (link_protect_t)) @@ -381,6 +382,9 @@ dladm_status2str(dladm_status_t status, char *buf) case DLADM_STATUS_ADDRINUSE: s = "address already in use"; break; + case DLADM_STATUS_POOLCPU: + s = "pool and cpus property are mutually exclusive"; + break; default: s = "<unknown error>"; break; @@ -901,7 +905,7 @@ const char * dladm_ipv4addr2str(void *addr, char *buf) { if (inet_ntop(AF_INET, addr, buf, INET_ADDRSTRLEN) == NULL) - buf[0] = 0; + buf[0] = '\0'; return (buf); } @@ -913,6 +917,22 @@ dladm_str2ipv4addr(char *token, void *addr) DLADM_STATUS_OK : DLADM_STATUS_INVALID_IP); } +const char * +dladm_ipv6addr2str(void *addr, char *buf) +{ + if (inet_ntop(AF_INET6, addr, buf, INET6_ADDRSTRLEN) == NULL) + buf[0] = '\0'; + + return (buf); +} + +dladm_status_t +dladm_str2ipv6addr(char *token, void *addr) +{ + return (inet_pton(AF_INET6, token, addr) == 1 ? + DLADM_STATUS_OK : DLADM_STATUS_INVALID_IP); +} + /* * Find the set bits in a mask. * This is used for expanding a bitmask into individual sub-masks diff --git a/usr/src/lib/libdladm/common/libdladm.h b/usr/src/lib/libdladm/common/libdladm.h index 1cddd9c66e..4e7cb32ba5 100644 --- a/usr/src/lib/libdladm/common/libdladm.h +++ b/usr/src/lib/libdladm/common/libdladm.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -65,11 +65,12 @@ extern "C" { * - DLADM_OPT_VLAN: * Signifies VLAN creation code path * - * - DLADM_OPT_HWRINGS: - * Requires a hardware group of rings when creating a vnic. - * * - DLADM_OPT_NOREFRESH: * Do not refresh the daemon after setting parameter (used by STP mcheck). + * + * - DLADM_OPT_BOOT: + * Bypass check functions during boot (used by pool property since pools + * can come up after link properties are set) */ #define DLADM_OPT_ACTIVE 0x00000001 #define DLADM_OPT_PERSIST 0x00000002 @@ -78,8 +79,8 @@ extern "C" { #define DLADM_OPT_PREFIX 0x00000010 #define DLADM_OPT_ANCHOR 0x00000020 #define DLADM_OPT_VLAN 0x00000040 -#define DLADM_OPT_HWRINGS 0x00000080 -#define DLADM_OPT_NOREFRESH 0x00000100 +#define DLADM_OPT_NOREFRESH 0x00000080 +#define DLADM_OPT_BOOT 0x00000100 #define DLADM_WALK_TERMINATE 0 #define DLADM_WALK_CONTINUE -1 @@ -160,7 +161,8 @@ typedef enum { DLADM_STATUS_MINMAXBW, DLADM_STATUS_NO_HWRINGS, DLADM_STATUS_PERMONLY, - DLADM_STATUS_OPTMISSING + DLADM_STATUS_OPTMISSING, + DLADM_STATUS_POOLCPU } dladm_status_t; typedef enum { @@ -231,6 +233,8 @@ extern dladm_status_t dladm_str2protect(char *, uint32_t *); extern const char *dladm_protect2str(uint32_t, char *); extern dladm_status_t dladm_str2ipv4addr(char *, void *); extern const char *dladm_ipv4addr2str(void *, char *); +extern dladm_status_t dladm_str2ipv6addr(char *, void *); +extern const char *dladm_ipv6addr2str(void *, char *); extern dladm_status_t dladm_parse_flow_props(char *, dladm_arg_list_t **, boolean_t); diff --git a/usr/src/lib/libdladm/common/libdladm_impl.h b/usr/src/lib/libdladm/common/libdladm_impl.h index 88d695a3c7..47bb94b5f7 100644 --- a/usr/src/lib/libdladm/common/libdladm_impl.h +++ b/usr/src/lib/libdladm/common/libdladm_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,6 +38,7 @@ extern "C" { #define MAXLINELEN 1024 #define BUFLEN(lim, ptr) (((lim) > (ptr)) ? ((lim) - (ptr)) : 0) +#define V4_PART_OF_V6(v6) ((v6)._S6_un._S6_u32[3]) /* * The handle contains file descriptors to DLD_CONTROL_DEV and @@ -57,7 +58,7 @@ extern dladm_status_t dladm_errno2status(int); extern dladm_status_t i_dladm_rw_db(dladm_handle_t, const char *, mode_t, dladm_status_t (*)(dladm_handle_t, void *, FILE *, FILE *), void *, boolean_t); -extern dladm_status_t i_dladm_get_state(dladm_handle_t, datalink_id_t, +extern dladm_status_t dladm_get_state(dladm_handle_t, datalink_id_t, link_state_t *); extern void dladm_find_setbits32(uint32_t, uint32_t *, uint32_t *); extern dladm_status_t dladm_parse_args(char *, dladm_arg_list_t **, @@ -92,7 +93,6 @@ extern void dladm_free_args(dladm_arg_list_t *); #define FMADDRLEN "maddrlen" /* uint64_t */ #define FMADDRSLOT "maddrslot" /* uint64_t */ #define FMADDRPREFIXLEN "maddrpreflen" /* uint64_t */ -#define FHWRINGS "hwrings" /* boolean_t */ #define FVRID "vrid" /* uint64_t */ #define FVRAF "vraf" /* uint64_t */ @@ -114,7 +114,7 @@ extern void dladm_free_args(dladm_arg_list_t *); FKEY, FNPORTS, FPORTS, FPOLICY, \ FFIXMACADDR, FFORCE, FLACPMODE, FLACPTIMER, \ FMADDRTYPE, FMADDRLEN, FMADDRSLOT, \ - FMADDRPREFIXLEN, FHWRINGS, \ + FMADDRPREFIXLEN, \ FMACADDR, FSIMNETTYPE, FSIMNETPEER /* @@ -129,7 +129,8 @@ typedef struct val_desc { #define VALCNT(vals) (sizeof ((vals)) / sizeof (val_desc_t)) extern dladm_status_t dladm_link_proplist_extract(dladm_handle_t, - dladm_arg_list_t *, mac_resource_props_t *); + dladm_arg_list_t *, mac_resource_props_t *, + uint_t); extern dladm_status_t dladm_flow_proplist_extract(dladm_arg_list_t *, mac_resource_props_t *); @@ -141,9 +142,10 @@ extern dladm_status_t dladm_flow_proplist_extract(dladm_arg_list_t *, * by the pd_check function. */ typedef dladm_status_t rp_extractf_t(val_desc_t *, uint_t, void *); -extern rp_extractf_t do_extract_maxbw, do_extract_priority, - do_extract_cpus, do_extract_protection, - do_extract_allowedips; +extern rp_extractf_t extract_maxbw, extract_priority, + extract_cpus, extract_protection, + extract_allowedips, extract_allowedcids, + extract_rxrings, extract_txrings, extract_pool; typedef struct resource_prop_s { /* diff --git a/usr/src/lib/libdladm/common/libdlaggr.c b/usr/src/lib/libdladm/common/libdlaggr.c index d715ff013c..8c91c41ce1 100644 --- a/usr/src/lib/libdladm/common/libdlaggr.c +++ b/usr/src/lib/libdladm/common/libdlaggr.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -57,7 +57,7 @@ static uchar_t zero_mac[] = {0, 0, 0, 0, 0, 0}; #define VALID_PORT_MAC(mac) \ (((mac) != NULL) && (bcmp(zero_mac, (mac), ETHERADDRL) != 0) && \ - (!(mac)[0] & 0x01)) + (!((mac)[0] & 0x01))) #define PORT_DELIMITER ":" diff --git a/usr/src/lib/libdladm/common/libdlether.c b/usr/src/lib/libdladm/common/libdlether.c index ef89439ae1..4855dc234d 100644 --- a/usr/src/lib/libdladm/common/libdlether.c +++ b/usr/src/lib/libdladm/common/libdlether.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -299,7 +299,7 @@ dladm_ether_info(dladm_handle_t handle, datalink_id_t linkid, eattr->lei_attr[CURRENT].le_spdx->lesd_speed = (int)(sp64/1000000ull); eattr->lei_attr[CURRENT].le_spdx->lesd_duplex = link_duplex; - status = i_dladm_get_state(handle, linkid, &eattr->lei_state); + status = dladm_get_state(handle, linkid, &eattr->lei_state); if (status != DLADM_STATUS_OK) goto bail; diff --git a/usr/src/lib/libdladm/common/libdlflow.c b/usr/src/lib/libdladm/common/libdlflow.c index 235b948504..9e2131ac0c 100644 --- a/usr/src/lib/libdladm/common/libdlflow.c +++ b/usr/src/lib/libdladm/common/libdlflow.c @@ -63,8 +63,6 @@ #define MAXLINELEN 1024 #define MAXPATHLEN 1024 -#define V4_PART_OF_V6(v6) ((v6)._S6_un._S6_u32[3]) - /* database file parameters */ static const char *BW_LIMIT = "bw_limit"; static const char *PRIORITY = "priority"; diff --git a/usr/src/lib/libdladm/common/libdllink.c b/usr/src/lib/libdladm/common/libdllink.c index 4c2ca93c8c..7bf18ceed5 100644 --- a/usr/src/lib/libdladm/common/libdllink.c +++ b/usr/src/lib/libdladm/common/libdllink.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,6 +39,7 @@ #include <libdevinfo.h> #include <libdlaggr.h> #include <libdlvlan.h> +#include <libdlvnic.h> #include <libdllink.h> #include <libdlmgmt.h> #include <libdladm_impl.h> @@ -156,9 +157,10 @@ dladm_walk_hwgrp(dladm_handle_t handle, datalink_id_t linkid, void *arg, ret = ioctl(dladm_dld_fd(handle), DLDIOC_GETHWGRP, iomp); if (ret == 0) { - int i; - dld_hwgrpinfo_t *dhip; - dladm_hwgrp_attr_t attr; + int i; + int j; + dld_hwgrpinfo_t *dhip; + dladm_hwgrp_attr_t attr; dhip = (dld_hwgrpinfo_t *)(iomp + 1); for (i = 0; i < iomp->dih_n_groups; i++) { @@ -169,6 +171,9 @@ dladm_walk_hwgrp(dladm_handle_t handle, datalink_id_t linkid, void *arg, attr.hg_grp_num = dhip->dhi_grp_num; attr.hg_grp_type = dhip->dhi_grp_type; attr.hg_n_rings = dhip->dhi_n_rings; + for (j = 0; j < dhip->dhi_n_rings; j++) + attr.hg_rings[j] = dhip->dhi_rings[j]; + dladm_sort_index_list(attr.hg_rings, attr.hg_n_rings); attr.hg_n_clnts = dhip->dhi_n_clnts; (void) strlcpy(attr.hg_client_names, dhip->dhi_clnts, sizeof (attr.hg_client_names)); diff --git a/usr/src/lib/libdladm/common/libdllink.h b/usr/src/lib/libdladm/common/libdllink.h index d47059e1d1..9d5d19a898 100644 --- a/usr/src/lib/libdladm/common/libdllink.h +++ b/usr/src/lib/libdladm/common/libdllink.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -101,6 +101,7 @@ typedef struct dladm_hwgrp_attr { uint_t hg_grp_num; dladm_hwgrp_type_t hg_grp_type; uint_t hg_n_rings; + uint_t hg_rings[MAX_RINGS_PER_GROUP]; uint_t hg_n_clnts; char hg_client_names[MAXCLIENTNAMELEN]; } dladm_hwgrp_attr_t; @@ -134,6 +135,8 @@ extern dladm_status_t dladm_walk_linkprop(dladm_handle_t, datalink_id_t, void *, int (*)(dladm_handle_t, datalink_id_t, const char *, void *)); extern boolean_t dladm_attr_is_linkprop(const char *name); +extern dladm_status_t dladm_linkprop_is_set(dladm_handle_t, datalink_id_t, + dladm_prop_type_t, const char *, boolean_t *); extern dladm_status_t dladm_set_secobj(dladm_handle_t, const char *, dladm_secobj_class_t, uint8_t *, uint_t, uint_t); @@ -208,6 +211,8 @@ extern int dladm_walk_macaddr(dladm_handle_t, datalink_id_t, extern int dladm_walk_hwgrp(dladm_handle_t, datalink_id_t, void *, boolean_t (*)(void *, dladm_hwgrp_attr_t *)); +extern void dladm_sort_index_list(uint_t [], uint_t); + extern dladm_status_t dladm_link_get_proplist(dladm_handle_t, datalink_id_t, dladm_arg_list_t **); diff --git a/usr/src/lib/libdladm/common/libdlstat.c b/usr/src/lib/libdladm/common/libdlstat.c index e69c9d8934..264c5f179f 100644 --- a/usr/src/lib/libdladm/common/libdlstat.c +++ b/usr/src/lib/libdladm/common/libdlstat.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,6 +39,7 @@ #include <libdllink.h> #include <libdlflow.h> #include <libdlstat.h> +#include <libdlaggr.h> /* * x86 <sys/regs> ERR conflicts with <curses.h> ERR. @@ -72,7 +73,7 @@ static int statentry = -1, maxstatentries = 0; /* * Search for flowlist entry in stattable which matches - * the flowname and linkide. If no match is found, use + * the flowname and linkid. If no match is found, use * next available slot. If no slots are available, * reallocate table with more slots. * @@ -347,7 +348,8 @@ closedevnet() struct flowlist *flist; /* Close all open /dev/net/ files */ - for (flist = stattable; index <= maxstatentries; index++, flist++) { + + for (flist = stattable; index < maxstatentries; index++, flist++) { if (flist->linkid == DATALINK_INVALID_LINKID) break; if (flist->fd != -1 && flist->fd != INT32_MAX) @@ -711,7 +713,7 @@ dladm_stats_total(pktsum_t *s1, pktsum_t *s2, pktsum_t *s3) s1->snaptime = s2->snaptime; } -#define DIFF_STAT(s2, s3) ((s2) > (s3) ? (s2 - s3) : 0) +#define DIFF_STAT(s2, s3) ((s2) > (s3) ? ((s2) - (s3)) : 0) /* Compute differences between 2 pktsums (s1 = s2 - s3) */ @@ -726,3 +728,2361 @@ dladm_stats_diff(pktsum_t *s1, pktsum_t *s2, pktsum_t *s3) s1->oerrors = DIFF_STAT(s2->oerrors, s3->oerrors); s1->snaptime = DIFF_STAT(s2->snaptime, s3->snaptime); } + +#define DLSTAT_MAC_RX_SWLANE "mac_rx_swlane" +#define DLSTAT_MAC_RX_HWLANE "mac_rx_hwlane" +#define DLSTAT_MAC_TX_SWLANE "mac_tx_swlane" +#define DLSTAT_MAC_TX_HWLANE "mac_tx_hwlane" +#define DLSTAT_MAC_MISC_STAT "mac_misc_stat" +#define DLSTAT_MAC_RX_RING "mac_rx_ring" +#define DLSTAT_MAC_TX_RING "mac_tx_ring" +#define DLSTAT_MAC_FANOUT "mac_rx_swlane0_fanout" + +typedef struct { + const char *si_name; + uint_t si_offset; +} stat_info_t; + +#define A_CNT(arr) (sizeof (arr) / sizeof (arr[0])) + +/* Definitions for rx lane stats */ +#define RL_OFF(f) (offsetof(rx_lane_stat_t, f)) + +static stat_info_t rx_hwlane_stats_list[] = { + {"ipackets", RL_OFF(rl_ipackets)}, + {"rbytes", RL_OFF(rl_rbytes)}, + {"intrs", RL_OFF(rl_intrs)}, + {"intrbytes", RL_OFF(rl_intrbytes)}, + {"polls", RL_OFF(rl_polls)}, + {"pollbytes", RL_OFF(rl_pollbytes)}, + {"rxsdrops", RL_OFF(rl_sdrops)}, + {"chainunder10", RL_OFF(rl_chl10)}, + {"chain10to50", RL_OFF(rl_ch10_50)}, + {"chainover50", RL_OFF(rl_chg50)} +}; +#define RX_HWLANE_STAT_SIZE A_CNT(rx_hwlane_stats_list) + +static stat_info_t rx_swlane_stats_list[] = { + {"ipackets", RL_OFF(rl_ipackets)}, + {"rbytes", RL_OFF(rl_rbytes)}, + {"local", RL_OFF(rl_lclpackets)}, + {"localbytes", RL_OFF(rl_lclbytes)}, + {"intrs", RL_OFF(rl_intrs)}, + {"intrbytes", RL_OFF(rl_intrbytes)}, + {"rxsdrops", RL_OFF(rl_sdrops)} +}; +#define RX_SWLANE_STAT_SIZE A_CNT(rx_swlane_stats_list) + +static stat_info_t rx_lane_stats_list[] = { + {"ipackets", RL_OFF(rl_ipackets)}, + {"rbytes", RL_OFF(rl_rbytes)}, + {"local", RL_OFF(rl_lclpackets)}, + {"localbytes", RL_OFF(rl_lclbytes)}, + {"intrs", RL_OFF(rl_intrs)}, + {"intrbytes", RL_OFF(rl_intrbytes)}, + {"polls", RL_OFF(rl_polls)}, + {"rxsdrops", RL_OFF(rl_sdrops)}, + {"pollbytes", RL_OFF(rl_pollbytes)}, + {"chainunder10", RL_OFF(rl_chl10)}, + {"chain10to50", RL_OFF(rl_ch10_50)}, + {"chainover50", RL_OFF(rl_chg50)} +}; +#define RX_LANE_STAT_SIZE A_CNT(rx_lane_stats_list) + +/* Definitions for tx lane stats */ +#define TL_OFF(f) (offsetof(tx_lane_stat_t, f)) + +static stat_info_t tx_lane_stats_list[] = { + {"opackets", TL_OFF(tl_opackets)}, + {"obytes", TL_OFF(tl_obytes)}, + {"blockcnt", TL_OFF(tl_blockcnt)}, + {"unblockcnt", TL_OFF(tl_unblockcnt)}, + {"txsdrops", TL_OFF(tl_sdrops)} +}; +#define TX_LANE_STAT_SIZE A_CNT(tx_lane_stats_list) + +/* Definitions for tx/rx misc stats */ +#define M_OFF(f) (offsetof(misc_stat_t, f)) + +static stat_info_t misc_stats_list[] = { + {"multircv", M_OFF(ms_multircv)}, + {"brdcstrcv", M_OFF(ms_brdcstrcv)}, + {"multixmt", M_OFF(ms_multixmt)}, + {"brdcstxmt", M_OFF(ms_brdcstxmt)}, + {"multircvbytes", M_OFF(ms_multircvbytes)}, + {"brdcstrcvbytes", M_OFF(ms_brdcstrcvbytes)}, + {"multixmtbytes", M_OFF(ms_multixmtbytes)}, + {"brdcstxmtbytes", M_OFF(ms_brdcstxmtbytes)}, + {"txerrors", M_OFF(ms_txerrors)}, + {"macspoofed", M_OFF(ms_macspoofed)}, + {"ipspoofed", M_OFF(ms_ipspoofed)}, + {"dhcpspoofed", M_OFF(ms_dhcpspoofed)}, + {"restricted", M_OFF(ms_restricted)}, + {"ipackets", M_OFF(ms_ipackets)}, + {"rbytes", M_OFF(ms_rbytes)}, + {"local", M_OFF(ms_local)}, + {"localbytes", M_OFF(ms_localbytes)}, + {"intrs", M_OFF(ms_intrs)}, + {"intrbytes", M_OFF(ms_intrbytes)}, + {"polls", M_OFF(ms_polls)}, + {"pollbytes", M_OFF(ms_pollbytes)}, + {"rxsdrops", M_OFF(ms_rxsdrops)}, + {"chainunder10", M_OFF(ms_chainunder10)}, + {"chain10to50", M_OFF(ms_chain10to50)}, + {"chainover50", M_OFF(ms_chainover50)}, + {"obytes", M_OFF(ms_obytes)}, + {"opackets", M_OFF(ms_opackets)}, + {"blockcnt", M_OFF(ms_blockcnt)}, + {"unblockcnt", M_OFF(ms_unblockcnt)}, + {"txsdrops", M_OFF(ms_txsdrops)} +}; +#define MISC_STAT_SIZE A_CNT(misc_stats_list) + +/* Definitions for rx ring stats */ +#define R_OFF(f) (offsetof(ring_stat_t, f)) + +static stat_info_t rx_ring_stats_list[] = { + {"ipackets", R_OFF(r_packets)}, + {"rbytes", R_OFF(r_bytes)} +}; +#define RX_RING_STAT_SIZE A_CNT(rx_ring_stats_list) + +/* Definitions for tx ring stats */ +static stat_info_t tx_ring_stats_list[] = { + {"opackets", R_OFF(r_packets)}, + {"obytes", R_OFF(r_bytes)} +}; +#define TX_RING_STAT_SIZE A_CNT(tx_ring_stats_list) + +/* Definitions for fanout stats */ +#define F_OFF(f) (offsetof(fanout_stat_t, f)) + +static stat_info_t fanout_stats_list[] = { + {"ipackets", F_OFF(f_ipackets)}, + {"rbytes", F_OFF(f_rbytes)}, +}; +#define FANOUT_STAT_SIZE A_CNT(fanout_stats_list) + +/* Definitions for total stats */ +#define T_OFF(f) (offsetof(total_stat_t, f)) + +static stat_info_t total_stats_list[] = { + {"ipackets", T_OFF(ts_ipackets)}, + {"rbytes", T_OFF(ts_rbytes)}, + {"opackets", T_OFF(ts_opackets)}, + {"obytes", T_OFF(ts_obytes)} +}; +#define TOTAL_STAT_SIZE A_CNT(total_stats_list) + +/* Definitions for aggr stats */ +#define AP_OFF(f) (offsetof(aggr_port_stat_t, f)) + +static stat_info_t aggr_port_stats_list[] = { + {"ipackets64", AP_OFF(ap_ipackets)}, + {"rbytes64", AP_OFF(ap_rbytes)}, + {"opackets64", AP_OFF(ap_opackets)}, + {"obytes64", AP_OFF(ap_obytes)} +}; +#define AGGR_PORT_STAT_SIZE A_CNT(aggr_port_stats_list) + +/* Definitions for flow stats */ +#define FL_OFF(f) (offsetof(flow_stat_t, f)) + +static stat_info_t flow_stats_list[] = { + {"ipackets", FL_OFF(fl_ipackets)}, + {"rbytes", FL_OFF(fl_rbytes)}, + {"opackets", FL_OFF(fl_opackets)}, + {"obytes", FL_OFF(fl_obytes)} +}; +#define FLOW_STAT_SIZE A_CNT(flow_stats_list) + +/* Rx lane specific functions */ +void * dlstat_rx_lane_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_rx_lane_match(void *, void *); +static void * i_dlstat_rx_lane_stat_entry_diff(void *, void *); + +/* Tx lane specific functions */ +void * dlstat_tx_lane_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_tx_lane_match(void *, void *); +static void * i_dlstat_tx_lane_stat_entry_diff(void *, void *); + +/* Rx lane total specific functions */ +void * dlstat_rx_lane_total_stats(dladm_handle_t, + datalink_id_t); + +/* Tx lane total specific functions */ +void * dlstat_tx_lane_total_stats(dladm_handle_t, + datalink_id_t); + +/* Fanout specific functions */ +void * dlstat_fanout_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_fanout_match(void *, void *); +static void * i_dlstat_fanout_stat_entry_diff(void *, void *); + +/* Rx ring specific functions */ +void * dlstat_rx_ring_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_rx_ring_match(void *, void *); +static void * i_dlstat_rx_ring_stat_entry_diff(void *, void *); + +/* Tx ring specific functions */ +void * dlstat_tx_ring_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_tx_ring_match(void *, void *); +static void * i_dlstat_tx_ring_stat_entry_diff(void *, void *); + +/* Rx ring total specific functions */ +void * dlstat_rx_ring_total_stats(dladm_handle_t, + datalink_id_t); + +/* Tx ring total specific functions */ +void * dlstat_tx_ring_total_stats(dladm_handle_t, + datalink_id_t); + +/* Summary specific functions */ +void * dlstat_total_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_total_match(void *, void *); +static void * i_dlstat_total_stat_entry_diff(void *, void *); + +/* Aggr port specific functions */ +void * dlstat_aggr_port_stats(dladm_handle_t, datalink_id_t); +static boolean_t i_dlstat_aggr_port_match(void *, void *); +static void * i_dlstat_aggr_port_stat_entry_diff(void *, void *); + +/* Misc stat specific functions */ +void * dlstat_misc_stats(dladm_handle_t, datalink_id_t); + +typedef void * dladm_stat_query_t(dladm_handle_t, datalink_id_t); +typedef boolean_t dladm_stat_match_t(void *, void *); +typedef void * dladm_stat_diff_t(void *, void *); + +typedef struct dladm_stat_desc_s { + dladm_stat_type_t ds_stattype; + dladm_stat_query_t *ds_querystat; + dladm_stat_match_t *ds_matchstat; + dladm_stat_diff_t *ds_diffstat; + uint_t ds_offset; + stat_info_t *ds_statlist; + uint_t ds_statsize; +} dladm_stat_desc_t; + +/* + * dladm_stat_table has one entry for each supported stat. ds_querystat returns + * a chain of 'stat entries' for the queried stat. + * Each stat entry has set of identifiers (ids) and an object containing actual + * stat values. These stat entry objects are chained together in a linked list + * of datatype dladm_stat_chain_t. Head of this list is returned to the caller + * of dladm_link_stat_query. + * + * One node in the chain is shown below: + * + * ------------------------- + * | dc_statentry | + * | -------------- | + * | | ids | | + * | -------------- | + * | | stat fields | | + * | -------------- | + * ------------------------- + * | dc_next ---------|------> to next stat entry + * ------------------------- + * + * In particular, for query DLADM_STAT_RX_LANE, dc_statentry carries pointer to + * object of type rx_lane_stat_entry_t. + * + * dladm_link_stat_query_all returns similar chain. However, instead of storing + * stat fields as raw numbers, it stores those as chain of <name, value> pairs. + * The resulting structure is depicted below: + * + * ------------------------- + * | dc_statentry | + * | -------------- | --------------- + * | | nv_header | | | name, val | + * | -------------- | --------------- + * | | nve_stats---|----|-->| nv_nextstat--|---> to next name, val pair + * | -------------- | --------------- + * ------------------------- + * | dc_next ---------|------> to next stat entry + * ------------------------- + */ +static dladm_stat_desc_t dladm_stat_table[] = { +{ DLADM_STAT_RX_LANE, dlstat_rx_lane_stats, + i_dlstat_rx_lane_match, i_dlstat_rx_lane_stat_entry_diff, + offsetof(rx_lane_stat_entry_t, rle_stats), + rx_lane_stats_list, RX_LANE_STAT_SIZE}, + +{ DLADM_STAT_TX_LANE, dlstat_tx_lane_stats, + i_dlstat_tx_lane_match, i_dlstat_tx_lane_stat_entry_diff, + offsetof(tx_lane_stat_entry_t, tle_stats), + tx_lane_stats_list, TX_LANE_STAT_SIZE}, + +{ DLADM_STAT_RX_LANE_TOTAL, dlstat_rx_lane_total_stats, + i_dlstat_rx_lane_match, i_dlstat_rx_lane_stat_entry_diff, + offsetof(rx_lane_stat_entry_t, rle_stats), + rx_lane_stats_list, RX_LANE_STAT_SIZE}, + +{ DLADM_STAT_TX_LANE_TOTAL, dlstat_tx_lane_total_stats, + i_dlstat_tx_lane_match, i_dlstat_tx_lane_stat_entry_diff, + offsetof(tx_lane_stat_entry_t, tle_stats), + tx_lane_stats_list, TX_LANE_STAT_SIZE}, + +{ DLADM_STAT_RX_LANE_FOUT, dlstat_fanout_stats, + i_dlstat_fanout_match, i_dlstat_fanout_stat_entry_diff, + offsetof(fanout_stat_entry_t, fe_stats), + fanout_stats_list, FANOUT_STAT_SIZE}, + +{ DLADM_STAT_RX_RING, dlstat_rx_ring_stats, + i_dlstat_rx_ring_match, i_dlstat_rx_ring_stat_entry_diff, + offsetof(ring_stat_entry_t, re_stats), + rx_ring_stats_list, RX_RING_STAT_SIZE}, + +{ DLADM_STAT_TX_RING, dlstat_tx_ring_stats, + i_dlstat_tx_ring_match, i_dlstat_tx_ring_stat_entry_diff, + offsetof(ring_stat_entry_t, re_stats), + tx_ring_stats_list, TX_RING_STAT_SIZE}, + +{ DLADM_STAT_RX_RING_TOTAL, dlstat_rx_ring_total_stats, + i_dlstat_rx_ring_match, i_dlstat_rx_ring_stat_entry_diff, + offsetof(ring_stat_entry_t, re_stats), + rx_ring_stats_list, RX_RING_STAT_SIZE}, + +{ DLADM_STAT_TX_RING_TOTAL, dlstat_tx_ring_total_stats, + i_dlstat_tx_ring_match, i_dlstat_tx_ring_stat_entry_diff, + offsetof(ring_stat_entry_t, re_stats), + tx_ring_stats_list, TX_RING_STAT_SIZE}, + +{ DLADM_STAT_TOTAL, dlstat_total_stats, + i_dlstat_total_match, i_dlstat_total_stat_entry_diff, + offsetof(total_stat_entry_t, tse_stats), + total_stats_list, TOTAL_STAT_SIZE}, + +{ DLADM_STAT_AGGR_PORT, dlstat_aggr_port_stats, + i_dlstat_aggr_port_match, i_dlstat_aggr_port_stat_entry_diff, + offsetof(aggr_port_stat_entry_t, ape_stats), + aggr_port_stats_list, AGGR_PORT_STAT_SIZE}, +/* + * We don't support -i <interval> query with misc stats. Several table fields + * are left uninitialized thus. + */ +{ DLADM_STAT_MISC, dlstat_misc_stats, + NULL, NULL, + 0, + misc_stats_list, MISC_STAT_SIZE} +}; + +/* Internal functions */ +static void * +dlstat_diff_stats(void *arg1, void *arg2, dladm_stat_type_t stattype) +{ + return (dladm_stat_table[stattype].ds_diffstat(arg1, arg2)); +} + +static boolean_t +dlstat_match_stats(void *arg1, void *arg2, dladm_stat_type_t stattype) +{ + return (dladm_stat_table[stattype].ds_matchstat(arg1, arg2)); +} + +/* Diff between two stats */ +static void +i_dlstat_diff_stats(void *diff, void *op1, void *op2, + stat_info_t stats_list[], uint_t size) +{ + int i; + + for (i = 0; i < size; i++) { + uint64_t *op1_val = (void *) + ((uchar_t *)op1 + stats_list[i].si_offset); + uint64_t *op2_val = (void *) + ((uchar_t *)op2 + stats_list[i].si_offset); + uint64_t *diff_val = (void *) + ((uchar_t *)diff + stats_list[i].si_offset); + + *diff_val = DIFF_STAT(*op1_val, *op2_val); + } +} + +/* + * Perform diff = s1 - s2, where diff, s1, s2 are structure objects of same + * datatype. slist is list of offsets of the fields within the structure. + */ +#define DLSTAT_DIFF_STAT(s1, s2, diff, f, slist, sz) { \ + if (s2 == NULL) { \ + bcopy(&s1->f, &diff->f, sizeof (s1->f)); \ + } else { \ + i_dlstat_diff_stats(&diff->f, &s1->f, \ + &s2->f, slist, sz); \ + } \ +} + +/* Sum two stats */ +static void +i_dlstat_sum_stats(void *sum, void *op1, void *op2, + stat_info_t stats_list[], uint_t size) +{ + int i; + + for (i = 0; i < size; i++) { + uint64_t *op1_val = (void *) + ((uchar_t *)op1 + stats_list[i].si_offset); + uint64_t *op2_val = (void *) + ((uchar_t *)op2 + stats_list[i].si_offset); + uint64_t *sum_val = (void *) + ((uchar_t *)sum + stats_list[i].si_offset); + + *sum_val = *op1_val + *op2_val; + } +} + +/* Look up kstat value */ +static void +i_dlstat_get_stats(kstat_ctl_t *kcp, kstat_t *ksp, void *stats, + stat_info_t stats_list[], uint_t size) +{ + int i; + + if (kstat_read(kcp, ksp, NULL) == -1) + return; + + for (i = 0; i < size; i++) { + uint64_t *val = (void *) + ((uchar_t *)stats + stats_list[i].si_offset); + + if (dladm_kstat_value(ksp, stats_list[i].si_name, + KSTAT_DATA_UINT64, val) < 0) + return; + } +} + +/* Append linked list list1 to linked list list2 and return resulting list */ +static dladm_stat_chain_t * +i_dlstat_join_lists(dladm_stat_chain_t *list1, dladm_stat_chain_t *list2) +{ + dladm_stat_chain_t *curr; + + if (list1 == NULL) + return (list2); + + /* list1 has at least one element, find last element in list1 */ + curr = list1; + while (curr->dc_next != NULL) + curr = curr->dc_next; + + curr->dc_next = list2; + return (list1); +} + +uint_t default_idlist[] = {0}; +uint_t default_idlist_size = 1; + +typedef enum { + DLSTAT_RX_RING_IDLIST, + DLSTAT_TX_RING_IDLIST, + DLSTAT_RX_HWLANE_IDLIST, + DLSTAT_TX_HWLANE_IDLIST, + DLSTAT_FANOUT_IDLIST +} dlstat_idlist_type_t; + +void +dladm_sort_index_list(uint_t idlist[], uint_t size) +{ + int i, j; + + for (j = 1; j < size; j++) { + int key = idlist[j]; + for (i = j - 1; (i >= 0) && (idlist[i] > key); i--) + idlist[i + 1] = idlist[i]; + idlist[i + 1] = key; + } +} + +/* Support for legacy drivers */ +void +i_query_legacy_stats(const char *linkname, pktsum_t *stats) +{ + kstat_ctl_t *kcp; + kstat_t *ksp; + + bzero(stats, sizeof (*stats)); + + if ((kcp = kstat_open()) == NULL) + return; + + ksp = dladm_kstat_lookup(kcp, "link", 0, linkname, NULL); + + if (ksp != NULL) + dladm_get_stats(kcp, ksp, stats); + + (void) kstat_close(kcp); +} + +void * +i_dlstat_legacy_rx_lane_stats(const char *linkname) +{ + dladm_stat_chain_t *head = NULL; + pktsum_t stats; + rx_lane_stat_entry_t *rx_lane_stat_entry; + + bzero(&stats, sizeof (pktsum_t)); + + /* Query for dls stats */ + i_query_legacy_stats(linkname, &stats); + + /* Convert to desired data type */ + rx_lane_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (rx_lane_stat_entry == NULL) + goto done; + + rx_lane_stat_entry->rle_index = DLSTAT_INVALID_ENTRY; + rx_lane_stat_entry->rle_id = L_SWLANE; + + rx_lane_stat_entry->rle_stats.rl_ipackets = stats.ipackets; + rx_lane_stat_entry->rle_stats.rl_intrs = stats.ipackets; + rx_lane_stat_entry->rle_stats.rl_rbytes = stats.rbytes; + + /* Allocate memory for wrapper */ + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(rx_lane_stat_entry); + goto done; + } + + head->dc_statentry = rx_lane_stat_entry; + head->dc_next = NULL; +done: + return (head); +} + +void * +i_dlstat_legacy_tx_lane_stats(const char *linkname) +{ + dladm_stat_chain_t *head = NULL; + pktsum_t stats; + tx_lane_stat_entry_t *tx_lane_stat_entry; + + bzero(&stats, sizeof (pktsum_t)); + + /* Query for dls stats */ + i_query_legacy_stats(linkname, &stats); + + /* Convert to desired data type */ + tx_lane_stat_entry = calloc(1, sizeof (tx_lane_stat_entry_t)); + if (tx_lane_stat_entry == NULL) + goto done; + + tx_lane_stat_entry->tle_index = DLSTAT_INVALID_ENTRY; + tx_lane_stat_entry->tle_id = L_SWLANE; + + tx_lane_stat_entry->tle_stats.tl_opackets = stats.opackets; + tx_lane_stat_entry->tle_stats.tl_obytes = stats.obytes; + + /* Allocate memory for wrapper */ + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(tx_lane_stat_entry); + goto done; + } + + head->dc_statentry = tx_lane_stat_entry; + head->dc_next = NULL; +done: + return (head); +} + +/* + * Ideally, we would want an ioctl to return list of ring-ids (or lane-ids) + * for a given data-link (or mac client). We could then query for specific + * kstats based on these ring-ids (lane-ids). + * Ring-ids (or lane-ids) could be returned like any other link properties + * queried by dladm show-linkprop. However, non-global zones do not have + * access to this information today. + * We thus opt for an implementation that relies heavily on kstat internals: + * i_dlstat_*search routines and i_dlstat_get_idlist. + */ +/* rx hwlane specific */ +static boolean_t +i_dlstat_rx_hwlane_search(kstat_t *ksp) +{ + return (ksp->ks_instance == 0 && + strstr(ksp->ks_name, "mac_rx") != 0 && + strstr(ksp->ks_name, "hwlane") != 0 && + strstr(ksp->ks_name, "fanout") == 0 && + strcmp(ksp->ks_class, "net") == 0); +} + +/* tx hwlane specific */ +static boolean_t +i_dlstat_tx_hwlane_search(kstat_t *ksp) +{ + return (ksp->ks_instance == 0 && + strstr(ksp->ks_name, "mac_tx") != 0 && + strstr(ksp->ks_name, "hwlane") != 0 && + strcmp(ksp->ks_class, "net") == 0); +} + +/* rx fanout specific */ +static boolean_t +i_dlstat_fanout_search(kstat_t *ksp) +{ + return (ksp->ks_instance == 0 && + strstr(ksp->ks_name, "mac_rx") != 0 && + strstr(ksp->ks_name, "swlane") != 0 && + strstr(ksp->ks_name, "fanout") != 0 && + strcmp(ksp->ks_class, "net") == 0); +} + +/* rx ring specific */ +static boolean_t +i_dlstat_rx_ring_search(kstat_t *ksp) +{ + return (ksp->ks_instance == 0 && + strstr(ksp->ks_name, "mac_rx") != 0 && + strstr(ksp->ks_name, "ring") != 0 && + strcmp(ksp->ks_class, "net") == 0); +} + +/* tx ring specific */ +static boolean_t +i_dlstat_tx_ring_search(kstat_t *ksp) +{ + return (ksp->ks_instance == 0) && + strstr(ksp->ks_name, "mac_tx") != 0 && + strstr(ksp->ks_name, "ring") != 0 && + strcmp(ksp->ks_class, "net") == 0; +} + +typedef boolean_t dladm_search_kstat_t(kstat_t *); +typedef struct dladm_extract_idlist_s { + dlstat_idlist_type_t di_type; + char *di_prefix; + dladm_search_kstat_t *di_searchkstat; +} dladm_extract_idlist_t; + +static dladm_extract_idlist_t dladm_extract_idlist[] = { +{ DLSTAT_RX_RING_IDLIST, DLSTAT_MAC_RX_RING, + i_dlstat_rx_ring_search}, +{ DLSTAT_TX_RING_IDLIST, DLSTAT_MAC_TX_RING, + i_dlstat_tx_ring_search}, +{ DLSTAT_RX_HWLANE_IDLIST, DLSTAT_MAC_RX_HWLANE, + i_dlstat_rx_hwlane_search}, +{ DLSTAT_TX_HWLANE_IDLIST, DLSTAT_MAC_TX_HWLANE, + i_dlstat_tx_hwlane_search}, +{ DLSTAT_FANOUT_IDLIST, DLSTAT_MAC_FANOUT, + i_dlstat_fanout_search} +}; + +static void +i_dlstat_get_idlist(const char *modname, dlstat_idlist_type_t idlist_type, + uint_t idlist[], uint_t *size) +{ + kstat_ctl_t *kcp; + kstat_t *ksp; + char *prefix; + int prefixlen; + boolean_t (*fptr_searchkstat)(kstat_t *); + + *size = 0; + + if ((kcp = kstat_open()) == NULL) { + warn("kstat_open operation failed"); + goto done; + } + + prefix = dladm_extract_idlist[idlist_type].di_prefix; + fptr_searchkstat = dladm_extract_idlist[idlist_type].di_searchkstat; + prefixlen = strlen(prefix); + for (ksp = kcp->kc_chain; ksp != NULL; ksp = ksp->ks_next) { + if ((strcmp(ksp->ks_module, modname) == 0) && + fptr_searchkstat(ksp)) { + idlist[(*size)++] = atoi(&ksp->ks_name[prefixlen]); + } + } + dladm_sort_index_list(idlist, *size); + +done: + (void) kstat_close(kcp); +} + +static dladm_stat_chain_t * +i_dlstat_query_stats(const char *modname, const char *prefix, + uint_t idlist[], uint_t idlist_size, + void * (*fn)(kstat_ctl_t *, kstat_t *, int)) +{ + kstat_ctl_t *kcp; + kstat_t *ksp; + char statname[MAXLINKNAMELEN]; + int i = 0; + dladm_stat_chain_t *head = NULL, *prev = NULL; + dladm_stat_chain_t *curr; + + if ((kcp = kstat_open()) == NULL) { + warn("kstat_open operation failed"); + return (NULL); + } + + for (i = 0; i < idlist_size; i++) { + uint_t index = idlist[i]; + + (void) snprintf(statname, sizeof (statname), "%s%d", prefix, + index); + + ksp = dladm_kstat_lookup(kcp, modname, 0, statname, NULL); + if (ksp == NULL) + continue; + + curr = malloc(sizeof (dladm_stat_chain_t)); + if (curr == NULL) + break; + + curr->dc_statentry = fn(kcp, ksp, index); + if (curr->dc_statentry == NULL) { + free(curr); + break; + } + + (void) strlcpy(curr->dc_statheader, statname, + sizeof (curr->dc_statheader)); + curr->dc_next = NULL; + + if (head == NULL) /* First node */ + head = curr; + else + prev->dc_next = curr; + + prev = curr; + } +done: + (void) kstat_close(kcp); + return (head); +} + +static misc_stat_entry_t * +i_dlstat_misc_stats(const char *linkname) +{ + kstat_ctl_t *kcp; + kstat_t *ksp; + misc_stat_entry_t *misc_stat_entry = NULL; + + if ((kcp = kstat_open()) == NULL) + return (NULL); + + ksp = dladm_kstat_lookup(kcp, linkname, 0, DLSTAT_MAC_MISC_STAT, NULL); + if (ksp == NULL) + goto done; + + misc_stat_entry = calloc(1, sizeof (misc_stat_entry_t)); + if (misc_stat_entry == NULL) + goto done; + + i_dlstat_get_stats(kcp, ksp, &misc_stat_entry->mse_stats, + misc_stats_list, MISC_STAT_SIZE); +done: + (void) kstat_close(kcp); + return (misc_stat_entry); +} + +/* Rx lane statistic specific functions */ +static boolean_t +i_dlstat_rx_lane_match(void *arg1, void *arg2) +{ + rx_lane_stat_entry_t *s1 = arg1; + rx_lane_stat_entry_t *s2 = arg2; + + return (s1->rle_index == s2->rle_index && + s1->rle_id == s2->rle_id); +} + +static void * +i_dlstat_rx_lane_stat_entry_diff(void *arg1, void *arg2) +{ + rx_lane_stat_entry_t *s1 = arg1; + rx_lane_stat_entry_t *s2 = arg2; + rx_lane_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (rx_lane_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + diff_entry->rle_index = s1->rle_index; + diff_entry->rle_id = s1->rle_id; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, rle_stats, rx_lane_stats_list, + RX_LANE_STAT_SIZE); + +done: + return (diff_entry); +} + +static void * +i_dlstat_rx_hwlane_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + rx_lane_stat_entry_t *rx_lane_stat_entry; + + rx_lane_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (rx_lane_stat_entry == NULL) + goto done; + + rx_lane_stat_entry->rle_index = i; + rx_lane_stat_entry->rle_id = L_HWLANE; + + i_dlstat_get_stats(kcp, ksp, &rx_lane_stat_entry->rle_stats, + rx_hwlane_stats_list, RX_HWLANE_STAT_SIZE); + +done: + return (rx_lane_stat_entry); +} + +/*ARGSUSED*/ +static void * +i_dlstat_rx_swlane_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + rx_lane_stat_entry_t *rx_lane_stat_entry; + + rx_lane_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (rx_lane_stat_entry == NULL) + goto done; + + rx_lane_stat_entry->rle_index = DLSTAT_INVALID_ENTRY; + rx_lane_stat_entry->rle_id = L_SWLANE; + + i_dlstat_get_stats(kcp, ksp, &rx_lane_stat_entry->rle_stats, + rx_swlane_stats_list, RX_SWLANE_STAT_SIZE); + + rx_lane_stat_entry->rle_stats.rl_ipackets = + rx_lane_stat_entry->rle_stats.rl_intrs; + rx_lane_stat_entry->rle_stats.rl_rbytes = + rx_lane_stat_entry->rle_stats.rl_intrbytes; +done: + return (rx_lane_stat_entry); +} + +/*ARGSUSED*/ +static void * +i_dlstat_rx_local_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + rx_lane_stat_entry_t *local_stat_entry; + rx_lane_stat_entry_t *rx_lane_stat_entry; + + rx_lane_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (rx_lane_stat_entry == NULL) + goto done; + + local_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (local_stat_entry == NULL) + goto done; + + local_stat_entry->rle_index = DLSTAT_INVALID_ENTRY; + local_stat_entry->rle_id = L_LOCAL; + + i_dlstat_get_stats(kcp, ksp, &rx_lane_stat_entry->rle_stats, + rx_swlane_stats_list, RX_SWLANE_STAT_SIZE); + + local_stat_entry->rle_stats.rl_ipackets = + rx_lane_stat_entry->rle_stats.rl_lclpackets; + local_stat_entry->rle_stats.rl_rbytes = + rx_lane_stat_entry->rle_stats.rl_lclbytes; + +done: + free(rx_lane_stat_entry); + return (local_stat_entry); +} + +static dladm_stat_chain_t * +i_dlstat_rx_local_stats(const char *linkname) +{ + dladm_stat_chain_t *local_stats = NULL; + + local_stats = i_dlstat_query_stats(linkname, DLSTAT_MAC_RX_SWLANE, + default_idlist, default_idlist_size, + i_dlstat_rx_local_retrieve_stat); + + if (local_stats != NULL) { + (void) strlcpy(local_stats->dc_statheader, "mac_rx_local", + sizeof (local_stats->dc_statheader)); + } + return (local_stats); +} + +static dladm_stat_chain_t * +i_dlstat_rx_bcast_stats(const char *linkname) +{ + misc_stat_entry_t *misc_stat_entry; + dladm_stat_chain_t *head = NULL; + rx_lane_stat_entry_t *rx_lane_stat_entry; + + misc_stat_entry = i_dlstat_misc_stats(linkname); + if (misc_stat_entry == NULL) + goto done; + + rx_lane_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (rx_lane_stat_entry == NULL) + goto done; + + rx_lane_stat_entry->rle_index = DLSTAT_INVALID_ENTRY; + rx_lane_stat_entry->rle_id = L_BCAST; + + rx_lane_stat_entry->rle_stats.rl_ipackets = + misc_stat_entry->mse_stats.ms_brdcstrcv + + misc_stat_entry->mse_stats.ms_multircv; + rx_lane_stat_entry->rle_stats.rl_intrs = + misc_stat_entry->mse_stats.ms_brdcstrcv + + misc_stat_entry->mse_stats.ms_multircv; + rx_lane_stat_entry->rle_stats.rl_rbytes = + misc_stat_entry->mse_stats.ms_brdcstrcvbytes + + misc_stat_entry->mse_stats.ms_multircvbytes; + + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(rx_lane_stat_entry); + goto done; + } + + head->dc_statentry = rx_lane_stat_entry; + head->dc_next = NULL; + + free(misc_stat_entry); +done: + return (head); +} + +static dladm_stat_chain_t * +i_dlstat_rx_defunctlane_stats(const char *linkname) +{ + misc_stat_entry_t *misc_stat_entry; + dladm_stat_chain_t *head = NULL; + rx_lane_stat_entry_t *rx_lane_stat_entry; + + misc_stat_entry = i_dlstat_misc_stats(linkname); + if (misc_stat_entry == NULL) + goto done; + + rx_lane_stat_entry = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (rx_lane_stat_entry == NULL) + goto done; + + rx_lane_stat_entry->rle_index = DLSTAT_INVALID_ENTRY; + rx_lane_stat_entry->rle_id = L_DFNCT; + + rx_lane_stat_entry->rle_stats.rl_ipackets = + misc_stat_entry->mse_stats.ms_ipackets; + rx_lane_stat_entry->rle_stats.rl_rbytes = + misc_stat_entry->mse_stats.ms_rbytes; + rx_lane_stat_entry->rle_stats.rl_intrs = + misc_stat_entry->mse_stats.ms_intrs; + rx_lane_stat_entry->rle_stats.rl_polls = + misc_stat_entry->mse_stats.ms_polls; + rx_lane_stat_entry->rle_stats.rl_sdrops = + misc_stat_entry->mse_stats.ms_rxsdrops; + rx_lane_stat_entry->rle_stats.rl_chl10 = + misc_stat_entry->mse_stats.ms_chainunder10; + rx_lane_stat_entry->rle_stats.rl_ch10_50 = + misc_stat_entry->mse_stats.ms_chain10to50; + rx_lane_stat_entry->rle_stats.rl_chg50 = + misc_stat_entry->mse_stats.ms_chainover50; + + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(rx_lane_stat_entry); + goto done; + } + + head->dc_statentry = rx_lane_stat_entry; + head->dc_next = NULL; + +done: + return (head); +} + +static dladm_stat_chain_t * +i_dlstat_rx_hwlane_stats(const char *linkname) +{ + uint_t rx_hwlane_idlist[MAX_RINGS_PER_GROUP]; + uint_t rx_hwlane_idlist_size; + + i_dlstat_get_idlist(linkname, DLSTAT_RX_HWLANE_IDLIST, + rx_hwlane_idlist, &rx_hwlane_idlist_size); + + return (i_dlstat_query_stats(linkname, DLSTAT_MAC_RX_HWLANE, + rx_hwlane_idlist, rx_hwlane_idlist_size, + i_dlstat_rx_hwlane_retrieve_stat)); +} + +/*ARGSUSED*/ +static dladm_stat_chain_t * +i_dlstat_rx_swlane_stats(dladm_handle_t dh, datalink_id_t linkid, + const char *linkname) +{ + return (i_dlstat_query_stats(linkname, DLSTAT_MAC_RX_SWLANE, + default_idlist, default_idlist_size, + i_dlstat_rx_swlane_retrieve_stat)); +} + +void * +dlstat_rx_lane_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *head = NULL; + dladm_stat_chain_t *local_stats = NULL; + dladm_stat_chain_t *bcast_stats = NULL; + dladm_stat_chain_t *defunctlane_stats = NULL; + dladm_stat_chain_t *lane_stats = NULL; + char linkname[MAXLINKNAMELEN]; + boolean_t is_legacy_driver; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + goto done; + } + + /* Check if it is legacy driver */ + if (dladm_linkprop_is_set(dh, linkid, DLADM_PROP_VAL_CURRENT, + "_softmac", &is_legacy_driver) != DLADM_STATUS_OK) { + goto done; + } + + if (is_legacy_driver) { + head = i_dlstat_legacy_rx_lane_stats(linkname); + goto done; + } + + local_stats = i_dlstat_rx_local_stats(linkname); + bcast_stats = i_dlstat_rx_bcast_stats(linkname); + defunctlane_stats = i_dlstat_rx_defunctlane_stats(linkname); + lane_stats = i_dlstat_rx_hwlane_stats(linkname); + if (lane_stats == NULL) + lane_stats = i_dlstat_rx_swlane_stats(dh, linkid, linkname); + + head = i_dlstat_join_lists(local_stats, bcast_stats); + head = i_dlstat_join_lists(head, defunctlane_stats); + head = i_dlstat_join_lists(head, lane_stats); +done: + return (head); +} + +/* Tx lane statistic specific functions */ +static boolean_t +i_dlstat_tx_lane_match(void *arg1, void *arg2) +{ + tx_lane_stat_entry_t *s1 = arg1; + tx_lane_stat_entry_t *s2 = arg2; + + return (s1->tle_index == s2->tle_index && + s1->tle_id == s2->tle_id); +} + +static void * +i_dlstat_tx_lane_stat_entry_diff(void *arg1, void *arg2) +{ + tx_lane_stat_entry_t *s1 = arg1; + tx_lane_stat_entry_t *s2 = arg2; + tx_lane_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (tx_lane_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + diff_entry->tle_index = s1->tle_index; + diff_entry->tle_id = s1->tle_id; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, tle_stats, tx_lane_stats_list, + TX_LANE_STAT_SIZE); + +done: + return (diff_entry); +} + +static void * +i_dlstat_tx_hwlane_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + tx_lane_stat_entry_t *tx_lane_stat_entry; + + tx_lane_stat_entry = calloc(1, sizeof (tx_lane_stat_entry_t)); + if (tx_lane_stat_entry == NULL) + goto done; + + tx_lane_stat_entry->tle_index = i; + tx_lane_stat_entry->tle_id = L_HWLANE; + + i_dlstat_get_stats(kcp, ksp, &tx_lane_stat_entry->tle_stats, + tx_lane_stats_list, TX_LANE_STAT_SIZE); + +done: + return (tx_lane_stat_entry); +} + +/*ARGSUSED*/ +static void * +i_dlstat_tx_swlane_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + tx_lane_stat_entry_t *tx_lane_stat_entry; + + tx_lane_stat_entry = calloc(1, sizeof (tx_lane_stat_entry_t)); + if (tx_lane_stat_entry == NULL) + goto done; + + tx_lane_stat_entry->tle_index = DLSTAT_INVALID_ENTRY; + tx_lane_stat_entry->tle_id = L_SWLANE; + + i_dlstat_get_stats(kcp, ksp, &tx_lane_stat_entry->tle_stats, + tx_lane_stats_list, TX_LANE_STAT_SIZE); + +done: + return (tx_lane_stat_entry); +} + +static dladm_stat_chain_t * +i_dlstat_tx_bcast_stats(const char *linkname) +{ + misc_stat_entry_t *misc_stat_entry; + dladm_stat_chain_t *head = NULL; + tx_lane_stat_entry_t *tx_lane_stat_entry; + + misc_stat_entry = i_dlstat_misc_stats(linkname); + if (misc_stat_entry == NULL) + goto done; + + tx_lane_stat_entry = calloc(1, sizeof (tx_lane_stat_entry_t)); + if (tx_lane_stat_entry == NULL) + goto done; + + tx_lane_stat_entry->tle_index = DLSTAT_INVALID_ENTRY; + tx_lane_stat_entry->tle_id = L_BCAST; + + tx_lane_stat_entry->tle_stats.tl_opackets = + misc_stat_entry->mse_stats.ms_brdcstxmt + + misc_stat_entry->mse_stats.ms_multixmt; + + tx_lane_stat_entry->tle_stats.tl_obytes = + misc_stat_entry->mse_stats.ms_brdcstxmtbytes + + misc_stat_entry->mse_stats.ms_multixmtbytes; + + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(tx_lane_stat_entry); + goto done; + } + + head->dc_statentry = tx_lane_stat_entry; + head->dc_next = NULL; + + free(misc_stat_entry); +done: + return (head); +} + +static dladm_stat_chain_t * +i_dlstat_tx_defunctlane_stats(const char *linkname) +{ + misc_stat_entry_t *misc_stat_entry; + dladm_stat_chain_t *head = NULL; + tx_lane_stat_entry_t *tx_lane_stat_entry; + + misc_stat_entry = i_dlstat_misc_stats(linkname); + if (misc_stat_entry == NULL) + goto done; + + tx_lane_stat_entry = calloc(1, sizeof (tx_lane_stat_entry_t)); + if (tx_lane_stat_entry == NULL) + goto done; + + tx_lane_stat_entry->tle_index = DLSTAT_INVALID_ENTRY; + tx_lane_stat_entry->tle_id = L_DFNCT; + + tx_lane_stat_entry->tle_stats.tl_opackets = + misc_stat_entry->mse_stats.ms_opackets; + tx_lane_stat_entry->tle_stats.tl_obytes = + misc_stat_entry->mse_stats.ms_obytes; + tx_lane_stat_entry->tle_stats.tl_sdrops = + misc_stat_entry->mse_stats.ms_txsdrops; + + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(tx_lane_stat_entry); + goto done; + } + + head->dc_statentry = tx_lane_stat_entry; + head->dc_next = NULL; + +done: + return (head); +} + +static dladm_stat_chain_t * +i_dlstat_tx_hwlane_stats(const char *linkname) +{ + uint_t tx_hwlane_idlist[MAX_RINGS_PER_GROUP]; + uint_t tx_hwlane_idlist_size; + + i_dlstat_get_idlist(linkname, DLSTAT_TX_HWLANE_IDLIST, + tx_hwlane_idlist, &tx_hwlane_idlist_size); + + return (i_dlstat_query_stats(linkname, DLSTAT_MAC_TX_HWLANE, + tx_hwlane_idlist, tx_hwlane_idlist_size, + i_dlstat_tx_hwlane_retrieve_stat)); +} + +/*ARGSUSED*/ +static dladm_stat_chain_t * +i_dlstat_tx_swlane_stats(dladm_handle_t dh, datalink_id_t linkid, + const char *linkname) +{ + return (i_dlstat_query_stats(linkname, DLSTAT_MAC_TX_SWLANE, + default_idlist, default_idlist_size, + i_dlstat_tx_swlane_retrieve_stat)); +} + +void * +dlstat_tx_lane_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *head = NULL; + dladm_stat_chain_t *bcast_stats = NULL; + dladm_stat_chain_t *defunctlane_stats = NULL; + dladm_stat_chain_t *lane_stats; + char linkname[MAXLINKNAMELEN]; + boolean_t is_legacy_driver; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + goto done; + } + + /* Check if it is legacy driver */ + if (dladm_linkprop_is_set(dh, linkid, DLADM_PROP_VAL_CURRENT, + "_softmac", &is_legacy_driver) != DLADM_STATUS_OK) { + goto done; + } + + if (is_legacy_driver) { + head = i_dlstat_legacy_tx_lane_stats(linkname); + goto done; + } + + bcast_stats = i_dlstat_tx_bcast_stats(linkname); + defunctlane_stats = i_dlstat_tx_defunctlane_stats(linkname); + lane_stats = i_dlstat_tx_hwlane_stats(linkname); + if (lane_stats == NULL) + lane_stats = i_dlstat_tx_swlane_stats(dh, linkid, linkname); + + head = i_dlstat_join_lists(bcast_stats, defunctlane_stats); + head = i_dlstat_join_lists(head, lane_stats); + +done: + return (head); +} + +/* Rx lane total statistic specific functions */ +void * +dlstat_rx_lane_total_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *total_head = NULL; + dladm_stat_chain_t *rx_lane_head, *curr; + rx_lane_stat_entry_t *total_stats; + + /* Get per rx lane stats */ + rx_lane_head = dlstat_rx_lane_stats(dh, linkid); + if (rx_lane_head == NULL) + goto done; + + total_stats = calloc(1, sizeof (rx_lane_stat_entry_t)); + if (total_stats == NULL) + goto done; + + total_stats->rle_index = DLSTAT_INVALID_ENTRY; + total_stats->rle_id = DLSTAT_INVALID_ENTRY; + + for (curr = rx_lane_head; curr != NULL; curr = curr->dc_next) { + rx_lane_stat_entry_t *curr_lane_stats = curr->dc_statentry; + + i_dlstat_sum_stats(&total_stats->rle_stats, + &curr_lane_stats->rle_stats, &total_stats->rle_stats, + rx_lane_stats_list, RX_LANE_STAT_SIZE); + } + + total_head = malloc(sizeof (dladm_stat_chain_t)); + if (total_head == NULL) { + free(total_stats); + goto done; + } + + total_head->dc_statentry = total_stats; + (void) strlcpy(total_head->dc_statheader, "mac_rx_lane_total", + sizeof (total_head->dc_statheader)); + total_head->dc_next = NULL; + free(rx_lane_head); + +done: + return (total_head); +} + +/* Tx lane total statistic specific functions */ +void * +dlstat_tx_lane_total_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *total_head = NULL; + dladm_stat_chain_t *tx_lane_head, *curr; + tx_lane_stat_entry_t *total_stats; + + /* Get per tx lane stats */ + tx_lane_head = dlstat_tx_lane_stats(dh, linkid); + if (tx_lane_head == NULL) + goto done; + + total_stats = calloc(1, sizeof (tx_lane_stat_entry_t)); + if (total_stats == NULL) + goto done; + + total_stats->tle_index = DLSTAT_INVALID_ENTRY; + total_stats->tle_id = DLSTAT_INVALID_ENTRY; + + for (curr = tx_lane_head; curr != NULL; curr = curr->dc_next) { + tx_lane_stat_entry_t *curr_lane_stats = curr->dc_statentry; + + i_dlstat_sum_stats(&total_stats->tle_stats, + &curr_lane_stats->tle_stats, &total_stats->tle_stats, + tx_lane_stats_list, TX_LANE_STAT_SIZE); + } + + total_head = malloc(sizeof (dladm_stat_chain_t)); + if (total_head == NULL) { + free(total_stats); + goto done; + } + + total_head->dc_statentry = total_stats; + (void) strlcpy(total_head->dc_statheader, "mac_tx_lane_total", + sizeof (total_head->dc_statheader)); + total_head->dc_next = NULL; + free(tx_lane_head); + +done: + return (total_head); +} + +/* Fanout specific functions */ +static boolean_t +i_dlstat_fanout_match(void *arg1, void *arg2) +{ + fanout_stat_entry_t *s1 = arg1; + fanout_stat_entry_t *s2 = arg2; + + return (s1->fe_index == s2->fe_index && + s1->fe_id == s2->fe_id && + s1->fe_foutindex == s2->fe_foutindex); +} + +static void * +i_dlstat_fanout_stat_entry_diff(void *arg1, void *arg2) +{ + fanout_stat_entry_t *s1 = arg1; + fanout_stat_entry_t *s2 = arg2; + fanout_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (fanout_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + diff_entry->fe_index = s1->fe_index; + diff_entry->fe_id = s1->fe_id; + diff_entry->fe_foutindex = s1->fe_foutindex; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, fe_stats, fanout_stats_list, + FANOUT_STAT_SIZE); + +done: + return (diff_entry); +} + +static void * +i_dlstat_fanout_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + fanout_stat_entry_t *fanout_stat_entry; + + fanout_stat_entry = calloc(1, sizeof (fanout_stat_entry_t)); + if (fanout_stat_entry == NULL) + goto done; + + /* Set by the caller later */ + fanout_stat_entry->fe_index = DLSTAT_INVALID_ENTRY; + fanout_stat_entry->fe_id = DLSTAT_INVALID_ENTRY; + + fanout_stat_entry->fe_foutindex = i; + + i_dlstat_get_stats(kcp, ksp, &fanout_stat_entry->fe_stats, + fanout_stats_list, FANOUT_STAT_SIZE); + +done: + return (fanout_stat_entry); +} + +static void * +i_dlstat_query_fanout_stats(dladm_handle_t dh, datalink_id_t linkid, + uint_t idlist[], uint_t idlist_size, + const char *modname, const char *prefix) +{ + int i; + char statprefix[MAXLINKNAMELEN]; + char linkname[MAXLINKNAMELEN]; + dladm_stat_chain_t *curr, *curr_head; + dladm_stat_chain_t *head = NULL, *prev = NULL; + uint_t fanout_idlist[MAX_RINGS_PER_GROUP]; + uint_t fanout_idlist_size; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + return (NULL); + } + + i_dlstat_get_idlist(linkname, DLSTAT_FANOUT_IDLIST, + fanout_idlist, &fanout_idlist_size); + + for (i = 0; i < idlist_size; i++) { + uint_t index = idlist[i]; + + (void) snprintf(statprefix, sizeof (statprefix), "%s%d_fanout", + prefix, index); + + curr_head = i_dlstat_query_stats(modname, statprefix, + fanout_idlist, fanout_idlist_size, + i_dlstat_fanout_retrieve_stat); + + if (curr_head == NULL) /* Last lane */ + break; + + if (head == NULL) /* First lane */ + head = curr_head; + else /* Link new lane list to end of previous lane list */ + prev->dc_next = curr_head; + + /* Walk new lane list and set ids */ + for (curr = curr_head; curr != NULL; curr = curr->dc_next) { + fanout_stat_entry_t *curr_stats = curr->dc_statentry; + + curr_stats->fe_index = index; + curr_stats->fe_id = L_HWLANE; + /* + * Save last pointer of previous linked list. + * This pointer is used to chain linked lists + * generated in each iteration. + */ + prev = curr; + } + } + + return (head); +} + +void * +dlstat_fanout_swlane_and_local_stats(dladm_handle_t dh, datalink_id_t linkid, + const char *linkname) +{ + return (i_dlstat_query_fanout_stats(dh, linkid, + default_idlist, default_idlist_size, linkname, + DLSTAT_MAC_RX_SWLANE)); +} + +void * +dlstat_fanout_hwlane_stats(dladm_handle_t dh, datalink_id_t linkid, + const char *linkname) +{ + uint_t rx_hwlane_idlist[MAX_RINGS_PER_GROUP]; + uint_t rx_hwlane_idlist_size; + + i_dlstat_get_idlist(linkname, DLSTAT_RX_HWLANE_IDLIST, + rx_hwlane_idlist, &rx_hwlane_idlist_size); + + return (i_dlstat_query_fanout_stats(dh, linkid, rx_hwlane_idlist, + rx_hwlane_idlist_size, linkname, DLSTAT_MAC_RX_HWLANE)); +} + +void * +dlstat_fanout_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *head = NULL; + dladm_stat_chain_t *fout_hwlane_stats; + dladm_stat_chain_t *fout_swlane_and_local_stats; + fanout_stat_entry_t *fout_stats; + char linkname[MAXLINKNAMELEN]; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + goto done; + } + + fout_swlane_and_local_stats = + dlstat_fanout_swlane_and_local_stats(dh, linkid, linkname); + fout_hwlane_stats = dlstat_fanout_hwlane_stats(dh, linkid, linkname); + + if (fout_swlane_and_local_stats == NULL) { + head = fout_hwlane_stats; + goto done; + } + + fout_stats = fout_swlane_and_local_stats->dc_statentry; + + if (fout_hwlane_stats != NULL) { /* hwlane(s), only local traffic */ + fout_stats->fe_id = L_LOCAL; + fout_stats->fe_index = DLSTAT_INVALID_ENTRY; + } else { /* no hwlane, mix of local+sw classified */ + fout_stats->fe_id = L_LCLSWLANE; + fout_stats->fe_index = DLSTAT_INVALID_ENTRY; + } + + fout_swlane_and_local_stats->dc_next = fout_hwlane_stats; + head = fout_swlane_and_local_stats; + +done: + return (head); +} + +/* Rx ring statistic specific functions */ +static boolean_t +i_dlstat_rx_ring_match(void *arg1, void *arg2) +{ + rx_lane_stat_entry_t *s1 = arg1; + rx_lane_stat_entry_t *s2 = arg2; + + return (s1->rle_index == s2->rle_index); +} + +static void * +i_dlstat_rx_ring_stat_entry_diff(void *arg1, void *arg2) +{ + ring_stat_entry_t *s1 = arg1; + ring_stat_entry_t *s2 = arg2; + ring_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (ring_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + diff_entry->re_index = s1->re_index; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, re_stats, rx_ring_stats_list, + RX_RING_STAT_SIZE); + +done: + return (diff_entry); +} + +static void * +i_dlstat_rx_ring_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + ring_stat_entry_t *rx_ring_stat_entry; + + rx_ring_stat_entry = calloc(1, sizeof (ring_stat_entry_t)); + if (rx_ring_stat_entry == NULL) + goto done; + + rx_ring_stat_entry->re_index = i; + + i_dlstat_get_stats(kcp, ksp, &rx_ring_stat_entry->re_stats, + rx_ring_stats_list, RX_RING_STAT_SIZE); + +done: + return (rx_ring_stat_entry); +} + +void * +dlstat_rx_ring_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + uint_t rx_ring_idlist[MAX_RINGS_PER_GROUP]; + uint_t rx_ring_idlist_size; + dladm_phys_attr_t dpa; + char linkname[MAXLINKNAMELEN]; + char *modname; + datalink_class_t class; + + /* + * kstats corresponding to physical device rings continue to use + * device names even if the link is renamed using dladm rename-link. + * Thus, given a linkid, we lookup the physical device name. + * However, if an aggr is renamed, kstats corresponding to its + * pseudo rings are renamed as well. + */ + if (dladm_datalink_id2info(dh, linkid, NULL, &class, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + return (NULL); + } + + if (class != DATALINK_CLASS_AGGR) { + if (dladm_phys_info(dh, linkid, &dpa, DLADM_OPT_ACTIVE) != + DLADM_STATUS_OK) { + return (NULL); + } + modname = dpa.dp_dev; + } else + modname = linkname; + + i_dlstat_get_idlist(modname, DLSTAT_RX_RING_IDLIST, + rx_ring_idlist, &rx_ring_idlist_size); + + return (i_dlstat_query_stats(modname, DLSTAT_MAC_RX_RING, + rx_ring_idlist, rx_ring_idlist_size, + i_dlstat_rx_ring_retrieve_stat)); +} + +/* Tx ring statistic specific functions */ +static boolean_t +i_dlstat_tx_ring_match(void *arg1, void *arg2) +{ + tx_lane_stat_entry_t *s1 = arg1; + tx_lane_stat_entry_t *s2 = arg2; + + return (s1->tle_index == s2->tle_index); +} + +static void * +i_dlstat_tx_ring_stat_entry_diff(void *arg1, void *arg2) +{ + ring_stat_entry_t *s1 = arg1; + ring_stat_entry_t *s2 = arg2; + ring_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (ring_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + diff_entry->re_index = s1->re_index; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, re_stats, tx_ring_stats_list, + TX_RING_STAT_SIZE); + +done: + return (diff_entry); +} + +static void * +i_dlstat_tx_ring_retrieve_stat(kstat_ctl_t *kcp, kstat_t *ksp, int i) +{ + ring_stat_entry_t *tx_ring_stat_entry; + + tx_ring_stat_entry = calloc(1, sizeof (ring_stat_entry_t)); + if (tx_ring_stat_entry == NULL) + goto done; + + tx_ring_stat_entry->re_index = i; + + i_dlstat_get_stats(kcp, ksp, &tx_ring_stat_entry->re_stats, + tx_ring_stats_list, TX_RING_STAT_SIZE); + +done: + return (tx_ring_stat_entry); +} + +void * +dlstat_tx_ring_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + uint_t tx_ring_idlist[MAX_RINGS_PER_GROUP]; + uint_t tx_ring_idlist_size; + dladm_phys_attr_t dpa; + char linkname[MAXLINKNAMELEN]; + char *modname; + datalink_class_t class; + + /* + * kstats corresponding to physical device rings continue to use + * device names even if the link is renamed using dladm rename-link. + * Thus, given a linkid, we lookup the physical device name. + * However, if an aggr is renamed, kstats corresponding to its + * pseudo rings are renamed as well. + */ + if (dladm_datalink_id2info(dh, linkid, NULL, &class, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + return (NULL); + } + + if (class != DATALINK_CLASS_AGGR) { + if (dladm_phys_info(dh, linkid, &dpa, DLADM_OPT_ACTIVE) != + DLADM_STATUS_OK) { + return (NULL); + } + modname = dpa.dp_dev; + } else + modname = linkname; + + i_dlstat_get_idlist(modname, DLSTAT_TX_RING_IDLIST, + tx_ring_idlist, &tx_ring_idlist_size); + + return (i_dlstat_query_stats(modname, DLSTAT_MAC_TX_RING, + tx_ring_idlist, tx_ring_idlist_size, + i_dlstat_tx_ring_retrieve_stat)); +} + +/* Rx ring total statistic specific functions */ +void * +dlstat_rx_ring_total_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *total_head = NULL; + dladm_stat_chain_t *rx_ring_head, *curr; + ring_stat_entry_t *total_stats; + + /* Get per rx ring stats */ + rx_ring_head = dlstat_rx_ring_stats(dh, linkid); + if (rx_ring_head == NULL) + goto done; + + total_stats = calloc(1, sizeof (ring_stat_entry_t)); + if (total_stats == NULL) + goto done; + + total_stats->re_index = DLSTAT_INVALID_ENTRY; + + for (curr = rx_ring_head; curr != NULL; curr = curr->dc_next) { + ring_stat_entry_t *curr_ring_stats = curr->dc_statentry; + + i_dlstat_sum_stats(&total_stats->re_stats, + &curr_ring_stats->re_stats, &total_stats->re_stats, + rx_ring_stats_list, RX_RING_STAT_SIZE); + } + + total_head = malloc(sizeof (dladm_stat_chain_t)); + if (total_head == NULL) { + free(total_stats); + goto done; + } + + total_head->dc_statentry = total_stats; + (void) strlcpy(total_head->dc_statheader, "mac_rx_ring_total", + sizeof (total_head->dc_statheader)); + total_head->dc_next = NULL; + free(rx_ring_head); + +done: + return (total_head); +} + +/* Tx ring total statistic specific functions */ +void * +dlstat_tx_ring_total_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *total_head = NULL; + dladm_stat_chain_t *tx_ring_head, *curr; + ring_stat_entry_t *total_stats; + + /* Get per tx ring stats */ + tx_ring_head = dlstat_tx_ring_stats(dh, linkid); + if (tx_ring_head == NULL) + goto done; + + total_stats = calloc(1, sizeof (ring_stat_entry_t)); + if (total_stats == NULL) + goto done; + + total_stats->re_index = DLSTAT_INVALID_ENTRY; + + for (curr = tx_ring_head; curr != NULL; curr = curr->dc_next) { + ring_stat_entry_t *curr_ring_stats = curr->dc_statentry; + + i_dlstat_sum_stats(&total_stats->re_stats, + &curr_ring_stats->re_stats, &total_stats->re_stats, + tx_ring_stats_list, TX_RING_STAT_SIZE); + } + + total_head = malloc(sizeof (dladm_stat_chain_t)); + if (total_head == NULL) { + free(total_stats); + goto done; + } + + total_head->dc_statentry = total_stats; + (void) strlcpy(total_head->dc_statheader, "mac_tx_ring_total", + sizeof (total_head->dc_statheader)); + total_head->dc_next = NULL; + free(tx_ring_head); + +done: + return (total_head); +} + +/* Summary statistic specific functions */ +/*ARGSUSED*/ +static boolean_t +i_dlstat_total_match(void *arg1, void *arg2) +{ /* Always single entry for total */ + return (B_TRUE); +} + +static void * +i_dlstat_total_stat_entry_diff(void *arg1, void *arg2) +{ + total_stat_entry_t *s1 = arg1; + total_stat_entry_t *s2 = arg2; + total_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (total_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, tse_stats, total_stats_list, + TOTAL_STAT_SIZE); + +done: + return (diff_entry); +} + +void * +dlstat_total_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_stat_chain_t *head = NULL; + dladm_stat_chain_t *rx_total; + dladm_stat_chain_t *tx_total; + total_stat_entry_t *total_stat_entry; + rx_lane_stat_entry_t *rx_lane_stat_entry; + tx_lane_stat_entry_t *tx_lane_stat_entry; + + /* Get total rx lane stats */ + rx_total = dlstat_rx_lane_total_stats(dh, linkid); + if (rx_total == NULL) + goto done; + + /* Get total tx lane stats */ + tx_total = dlstat_tx_lane_total_stats(dh, linkid); + if (tx_total == NULL) + goto done; + + /* Build total stat */ + total_stat_entry = calloc(1, sizeof (total_stat_entry_t)); + if (total_stat_entry == NULL) + goto done; + + rx_lane_stat_entry = rx_total->dc_statentry; + tx_lane_stat_entry = tx_total->dc_statentry; + + /* Extract total rx ipackets, rbytes */ + total_stat_entry->tse_stats.ts_ipackets = + rx_lane_stat_entry->rle_stats.rl_ipackets; + total_stat_entry->tse_stats.ts_rbytes = + rx_lane_stat_entry->rle_stats.rl_rbytes; + + /* Extract total tx opackets, obytes */ + total_stat_entry->tse_stats.ts_opackets = + tx_lane_stat_entry->tle_stats.tl_opackets; + total_stat_entry->tse_stats.ts_obytes = + tx_lane_stat_entry->tle_stats.tl_obytes; + + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(total_stat_entry); + goto done; + } + + head->dc_statentry = total_stat_entry; + (void) strlcpy(head->dc_statheader, "mac_lane_total", + sizeof (head->dc_statheader)); + head->dc_next = NULL; + free(rx_total); + free(tx_total); + +done: + return (head); +} + +/* Aggr total statistic(summed across all component ports) specific functions */ +void * +dlstat_aggr_total_stats(dladm_stat_chain_t *head) +{ + dladm_stat_chain_t *curr; + dladm_stat_chain_t *total_head; + aggr_port_stat_entry_t *total_stats; + + total_stats = calloc(1, sizeof (aggr_port_stat_entry_t)); + if (total_stats == NULL) + goto done; + + total_stats->ape_portlinkid = DATALINK_INVALID_LINKID; + + for (curr = head; curr != NULL; curr = curr->dc_next) { + aggr_port_stat_entry_t *curr_aggr_port_stats; + + curr_aggr_port_stats = curr->dc_statentry; + + i_dlstat_sum_stats(&total_stats->ape_stats, + &curr_aggr_port_stats->ape_stats, &total_stats->ape_stats, + aggr_port_stats_list, AGGR_PORT_STAT_SIZE); + } + + total_head = malloc(sizeof (dladm_stat_chain_t)); + if (total_head == NULL) { + free(total_stats); + goto done; + } + + total_head->dc_statentry = total_stats; + total_head->dc_next = NULL; + +done: + return (total_head); +} + +/* Aggr port statistic specific functions */ +static boolean_t +i_dlstat_aggr_port_match(void *arg1, void *arg2) +{ + aggr_port_stat_entry_t *s1 = arg1; + aggr_port_stat_entry_t *s2 = arg2; + + return (s1->ape_portlinkid == s2->ape_portlinkid); +} + +static void * +i_dlstat_aggr_port_stat_entry_diff(void *arg1, void *arg2) +{ + aggr_port_stat_entry_t *s1 = arg1; + aggr_port_stat_entry_t *s2 = arg2; + aggr_port_stat_entry_t *diff_entry; + + diff_entry = malloc(sizeof (aggr_port_stat_entry_t)); + if (diff_entry == NULL) + goto done; + + diff_entry->ape_portlinkid = s1->ape_portlinkid; + + DLSTAT_DIFF_STAT(s1, s2, diff_entry, ape_stats, aggr_port_stats_list, + AGGR_PORT_STAT_SIZE); + +done: + return (diff_entry); +} + +/* + * Query dls stats for the aggr port. This results in query for stats into + * the corresponding device driver. + */ +static aggr_port_stat_entry_t * +i_dlstat_single_port_stats(const char *portname, datalink_id_t linkid) +{ + kstat_ctl_t *kcp; + kstat_t *ksp; + char module[DLPI_LINKNAME_MAX]; + uint_t instance; + aggr_port_stat_entry_t *aggr_port_stat_entry = NULL; + + if (dladm_parselink(portname, module, &instance) != DLADM_STATUS_OK) + goto done; + + if ((kcp = kstat_open()) == NULL) { + warn("kstat open operation failed"); + return (NULL); + } + + ksp = dladm_kstat_lookup(kcp, module, instance, "mac", NULL); + if (ksp == NULL) + goto done; + + aggr_port_stat_entry = calloc(1, sizeof (aggr_port_stat_entry_t)); + if (aggr_port_stat_entry == NULL) + goto done; + + /* Save port's linkid */ + aggr_port_stat_entry->ape_portlinkid = linkid; + + i_dlstat_get_stats(kcp, ksp, &aggr_port_stat_entry->ape_stats, + aggr_port_stats_list, AGGR_PORT_STAT_SIZE); +done: + (void) kstat_close(kcp); + return (aggr_port_stat_entry); +} + +void * +dlstat_aggr_port_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + dladm_aggr_grp_attr_t ginfo; + int i; + dladm_aggr_port_attr_t *portp; + dladm_phys_attr_t dpa; + aggr_port_stat_entry_t *aggr_port_stat_entry; + dladm_stat_chain_t *head = NULL, *prev = NULL, *curr; + dladm_stat_chain_t *total_stats; + + /* Get aggr info */ + bzero(&ginfo, sizeof (dladm_aggr_grp_attr_t)); + if (dladm_aggr_info(dh, linkid, &ginfo, DLADM_OPT_ACTIVE) + != DLADM_STATUS_OK) + goto done; + /* For every port that is member of this aggr do */ + for (i = 0; i < ginfo.lg_nports; i++) { + portp = &(ginfo.lg_ports[i]); + if (dladm_phys_info(dh, portp->lp_linkid, &dpa, + DLADM_OPT_ACTIVE) != DLADM_STATUS_OK) { + goto done; + } + + aggr_port_stat_entry = i_dlstat_single_port_stats(dpa.dp_dev, + portp->lp_linkid); + + /* Create dladm_stat_chain_t object for this stat */ + curr = malloc(sizeof (dladm_stat_chain_t)); + if (curr == NULL) { + free(aggr_port_stat_entry); + goto done; + } + (void) strlcpy(curr->dc_statheader, dpa.dp_dev, + sizeof (curr->dc_statheader)); + curr->dc_statentry = aggr_port_stat_entry; + curr->dc_next = NULL; + + /* Chain this aggr port stat entry */ + /* head of the stat list */ + if (prev == NULL) + head = curr; + else + prev->dc_next = curr; + prev = curr; + } + + /* + * Prepend the stat list with cumulative aggr stats i.e. summed over all + * component ports + */ + total_stats = dlstat_aggr_total_stats(head); + if (total_stats != NULL) { + total_stats->dc_next = head; + head = total_stats; + } + +done: + free(ginfo.lg_ports); + return (head); +} + +/* Misc stat specific functions */ +void * +dlstat_misc_stats(dladm_handle_t dh, datalink_id_t linkid) +{ + misc_stat_entry_t *misc_stat_entry; + dladm_stat_chain_t *head = NULL; + char linkname[MAXLINKNAMELEN]; + + if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname, + DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) { + goto done; + } + + misc_stat_entry = i_dlstat_misc_stats(linkname); + if (misc_stat_entry == NULL) + goto done; + + head = malloc(sizeof (dladm_stat_chain_t)); + if (head == NULL) { + free(misc_stat_entry); + goto done; + } + + head->dc_statentry = misc_stat_entry; + (void) strlcpy(head->dc_statheader, "mac_misc_stat", + sizeof (head->dc_statheader)); + head->dc_next = NULL; + +done: + return (head); +} + +/* Exported functions */ +dladm_stat_chain_t * +dladm_link_stat_query(dladm_handle_t dh, datalink_id_t linkid, + dladm_stat_type_t stattype) +{ + return (dladm_stat_table[stattype].ds_querystat(dh, linkid)); +} + +dladm_stat_chain_t * +dladm_link_stat_diffchain(dladm_stat_chain_t *op1, dladm_stat_chain_t *op2, + dladm_stat_type_t stattype) +{ + dladm_stat_chain_t *op1_curr, *op2_curr; + dladm_stat_chain_t *diff_curr; + dladm_stat_chain_t *diff_prev = NULL, *diff_head = NULL; + + /* Perform op1 - op2, store result in diff */ + for (op1_curr = op1; op1_curr != NULL; op1_curr = op1_curr->dc_next) { + for (op2_curr = op2; op2_curr != NULL; + op2_curr = op2_curr->dc_next) { + if (dlstat_match_stats(op1_curr->dc_statentry, + op2_curr->dc_statentry, stattype)) { + break; + } + } + diff_curr = malloc(sizeof (dladm_stat_chain_t)); + if (diff_curr == NULL) + goto done; + + diff_curr->dc_next = NULL; + + if (op2_curr == NULL) { + /* prev iteration did not have this stat entry */ + diff_curr->dc_statentry = + dlstat_diff_stats(op1_curr->dc_statentry, + NULL, stattype); + } else { + diff_curr->dc_statentry = + dlstat_diff_stats(op1_curr->dc_statentry, + op2_curr->dc_statentry, stattype); + } + + if (diff_curr->dc_statentry == NULL) { + free(diff_curr); + goto done; + } + + if (diff_prev == NULL) /* head of the diff stat list */ + diff_head = diff_curr; + else + diff_prev->dc_next = diff_curr; + diff_prev = diff_curr; + } +done: + return (diff_head); +} + +void +dladm_link_stat_free(dladm_stat_chain_t *curr) +{ + while (curr != NULL) { + dladm_stat_chain_t *tofree = curr; + + curr = curr->dc_next; + free(tofree->dc_statentry); + free(tofree); + } +} + +/* Query all link stats */ +static name_value_stat_t * +i_dlstat_convert_stats(void *stats, stat_info_t stats_list[], uint_t size) +{ + int i; + name_value_stat_t *head_stat = NULL, *prev_stat = NULL; + name_value_stat_t *curr_stat; + + for (i = 0; i < size; i++) { + uint64_t *val = (void *) + ((uchar_t *)stats + stats_list[i].si_offset); + + curr_stat = calloc(1, sizeof (name_value_stat_t)); + if (curr_stat == NULL) + break; + + (void) strlcpy(curr_stat->nv_statname, stats_list[i].si_name, + sizeof (curr_stat->nv_statname)); + curr_stat->nv_statval = *val; + curr_stat->nv_nextstat = NULL; + + if (head_stat == NULL) /* First node */ + head_stat = curr_stat; + else + prev_stat->nv_nextstat = curr_stat; + + prev_stat = curr_stat; + } + return (head_stat); +} + +void * +build_nvs_entry(char *statheader, void *statentry, dladm_stat_type_t stattype) +{ + name_value_stat_entry_t *name_value_stat_entry; + dladm_stat_desc_t *stattbl_ptr; + void *statfields; + + stattbl_ptr = &dladm_stat_table[stattype]; + + /* Allocate memory for query all stat entry */ + name_value_stat_entry = calloc(1, sizeof (name_value_stat_entry_t)); + if (name_value_stat_entry == NULL) + goto done; + + /* Header for these stat fields */ + (void) strlcpy(name_value_stat_entry->nve_header, statheader, + sizeof (name_value_stat_entry->nve_header)); + + /* Extract stat fields from the statentry */ + statfields = (uchar_t *)statentry + + dladm_stat_table[stattype].ds_offset; + + /* Convert curr_stat to <statname, statval> pair */ + name_value_stat_entry->nve_stats = + i_dlstat_convert_stats(statfields, + stattbl_ptr->ds_statlist, stattbl_ptr->ds_statsize); +done: + return (name_value_stat_entry); +} + +void * +i_walk_dlstat_chain(dladm_stat_chain_t *stat_head, dladm_stat_type_t stattype) +{ + dladm_stat_chain_t *curr; + dladm_stat_chain_t *nvstat_head = NULL, *nvstat_prev = NULL; + dladm_stat_chain_t *nvstat_curr; + + /* + * For every stat in the chain, build header and convert all + * its stat fields + */ + for (curr = stat_head; curr != NULL; curr = curr->dc_next) { + nvstat_curr = malloc(sizeof (dladm_stat_chain_t)); + if (nvstat_curr == NULL) + break; + + nvstat_curr->dc_statentry = build_nvs_entry(curr->dc_statheader, + curr->dc_statentry, stattype); + + if (nvstat_curr->dc_statentry == NULL) { + free(nvstat_curr); + break; + } + + nvstat_curr->dc_next = NULL; + + if (nvstat_head == NULL) /* First node */ + nvstat_head = nvstat_curr; + else + nvstat_prev->dc_next = nvstat_curr; + + nvstat_prev = nvstat_curr; + } +done: + return (nvstat_head); +} + +dladm_stat_chain_t * +dladm_link_stat_query_all(dladm_handle_t dh, datalink_id_t linkid, + dladm_stat_type_t stattype) +{ + dladm_stat_chain_t *stat_head; + dladm_stat_chain_t *nvstat_head = NULL; + + /* Query the requested stat */ + stat_head = dladm_link_stat_query(dh, linkid, stattype); + if (stat_head == NULL) + goto done; + + /* + * Convert every statfield in every stat-entry of stat chain to + * <statname, statval> pair + */ + nvstat_head = i_walk_dlstat_chain(stat_head, stattype); + + /* Free stat_head */ + dladm_link_stat_free(stat_head); + +done: + return (nvstat_head); +} + +void +dladm_link_stat_query_all_free(dladm_stat_chain_t *curr) +{ + while (curr != NULL) { + dladm_stat_chain_t *tofree = curr; + name_value_stat_entry_t *nv_entry = curr->dc_statentry; + name_value_stat_t *nv_curr = nv_entry->nve_stats; + + while (nv_curr != NULL) { + name_value_stat_t *nv_tofree = nv_curr; + + nv_curr = nv_curr->nv_nextstat; + free(nv_tofree); + } + + curr = curr->dc_next; + free(nv_entry); + free(tofree); + } +} + +/* flow stats specific routines */ +flow_stat_t * +dladm_flow_stat_query(const char *flowname) +{ + kstat_ctl_t *kcp; + kstat_t *ksp; + flow_stat_t *flow_stat = NULL; + + if ((kcp = kstat_open()) == NULL) + return (NULL); + + flow_stat = calloc(1, sizeof (flow_stat_t)); + if (flow_stat == NULL) + goto done; + + ksp = dladm_kstat_lookup(kcp, NULL, -1, flowname, "flow"); + + if (ksp != NULL) { + i_dlstat_get_stats(kcp, ksp, flow_stat, flow_stats_list, + FLOW_STAT_SIZE); + } + +done: + (void) kstat_close(kcp); + return (flow_stat); +} + +flow_stat_t * +dladm_flow_stat_diff(flow_stat_t *op1, flow_stat_t *op2) +{ + flow_stat_t *diff_stat; + + diff_stat = calloc(1, sizeof (flow_stat_t)); + if (diff_stat == NULL) + goto done; + + if (op2 == NULL) { + bcopy(op1, diff_stat, sizeof (flow_stat_t)); + } else { + i_dlstat_diff_stats(diff_stat, op1, op2, flow_stats_list, + FLOW_STAT_SIZE); + } +done: + return (diff_stat); +} + +void +dladm_flow_stat_free(flow_stat_t *curr) +{ + free(curr); +} + +/* Query all flow stats */ +name_value_stat_entry_t * +dladm_flow_stat_query_all(const char *flowname) +{ + flow_stat_t *flow_stat; + name_value_stat_entry_t *name_value_stat_entry = NULL; + + /* Query flow stats */ + flow_stat = dladm_flow_stat_query(flowname); + if (flow_stat == NULL) + goto done; + + /* Allocate memory for query all stat entry */ + name_value_stat_entry = calloc(1, sizeof (name_value_stat_entry_t)); + if (name_value_stat_entry == NULL) { + dladm_flow_stat_free(flow_stat); + goto done; + } + + /* Header for these stat fields */ + (void) strncpy(name_value_stat_entry->nve_header, flowname, + MAXFLOWNAMELEN); + + /* Convert every statfield in flow_stat to <statname, statval> pair */ + name_value_stat_entry->nve_stats = + i_dlstat_convert_stats(flow_stat, flow_stats_list, FLOW_STAT_SIZE); + + /* Free flow_stat */ + dladm_flow_stat_free(flow_stat); + +done: + return (name_value_stat_entry); +} + +void +dladm_flow_stat_query_all_free(name_value_stat_entry_t *curr) +{ + name_value_stat_t *nv_curr = curr->nve_stats; + + while (nv_curr != NULL) { + name_value_stat_t *nv_tofree = nv_curr; + + nv_curr = nv_curr->nv_nextstat; + free(nv_tofree); + } +} diff --git a/usr/src/lib/libdladm/common/libdlstat.h b/usr/src/lib/libdladm/common/libdlstat.h index 51e28627a9..381dafe22d 100644 --- a/usr/src/lib/libdladm/common/libdlstat.h +++ b/usr/src/lib/libdladm/common/libdlstat.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,6 +41,216 @@ extern "C" { #define LINK_REPORT 1 #define FLOW_REPORT 2 +#define DLSTAT_INVALID_ENTRY -1 +#define MAXSTATNAMELEN 256 +/* + * Definitions common to all stats + */ +typedef struct dladm_stat_chain_s { + char dc_statheader[MAXSTATNAMELEN]; + void *dc_statentry; + struct dladm_stat_chain_s *dc_next; +} dladm_stat_chain_t; + +typedef enum { + DLADM_STAT_RX_LANE = 0, /* Per lane rx stats */ + DLADM_STAT_TX_LANE, /* Per lane tx stats */ + DLADM_STAT_RX_LANE_TOTAL, /* Stats summed across all rx lanes */ + DLADM_STAT_TX_LANE_TOTAL, /* Stats summed across all tx lanes */ + DLADM_STAT_RX_LANE_FOUT, /* Per fanout (rx lane) stats */ + DLADM_STAT_RX_RING, /* Per ring rx stats */ + DLADM_STAT_TX_RING, /* Per ring tx stats */ + DLADM_STAT_RX_RING_TOTAL, /* Stats summed across all rx rings */ + DLADM_STAT_TX_RING_TOTAL, /* Stats summed across all tx rings */ + DLADM_STAT_TOTAL, /* Summary view */ + DLADM_STAT_AGGR_PORT, /* Aggr port stats */ + DLADM_STAT_MISC, /* Misc stats */ + DLADM_STAT_NUM_STATS /* This must always be the last entry */ +} dladm_stat_type_t; + +/* + * Definitions for rx lane stats + */ +typedef struct rx_lane_stat_s { + uint64_t rl_ipackets; + uint64_t rl_rbytes; + uint64_t rl_lclpackets; + uint64_t rl_lclbytes; + uint64_t rl_intrs; + uint64_t rl_intrbytes; + uint64_t rl_pollbytes; + uint64_t rl_polls; + uint64_t rl_sdrops; + uint64_t rl_chl10; + uint64_t rl_ch10_50; + uint64_t rl_chg50; +} rx_lane_stat_t; + +typedef enum { + L_HWLANE, + L_SWLANE, + L_LOCAL, + L_LCLSWLANE, + L_BCAST, + L_DFNCT +} lane_type_t; + +typedef struct rx_lane_stat_entry_s { + int64_t rle_index; + lane_type_t rle_id; + rx_lane_stat_t rle_stats; +} rx_lane_stat_entry_t; + +/* + * Definitions for tx lane stats + */ +typedef struct tx_lane_stat_s { + uint64_t tl_opackets; + uint64_t tl_obytes; + uint64_t tl_blockcnt; + uint64_t tl_unblockcnt; + uint64_t tl_sdrops; +} tx_lane_stat_t; + +typedef struct tx_lane_stat_entry_s { + int64_t tle_index; + lane_type_t tle_id; + tx_lane_stat_t tle_stats; +} tx_lane_stat_entry_t; + +/* + * Definitions for tx/rx misc stats + */ +typedef struct misc_stat_s { + uint64_t ms_multircv; + uint64_t ms_brdcstrcv; + uint64_t ms_multixmt; + uint64_t ms_brdcstxmt; + uint64_t ms_multircvbytes; + uint64_t ms_brdcstrcvbytes; + uint64_t ms_multixmtbytes; + uint64_t ms_brdcstxmtbytes; + uint64_t ms_txerrors; + uint64_t ms_macspoofed; + uint64_t ms_ipspoofed; + uint64_t ms_dhcpspoofed; + uint64_t ms_restricted; + uint64_t ms_dhcpdropped; + uint64_t ms_ipackets; + uint64_t ms_rbytes; + uint64_t ms_local; + uint64_t ms_localbytes; + uint64_t ms_intrs; + uint64_t ms_intrbytes; + uint64_t ms_polls; + uint64_t ms_pollbytes; + uint64_t ms_rxsdrops; + uint64_t ms_chainunder10; + uint64_t ms_chain10to50; + uint64_t ms_chainover50; + uint64_t ms_obytes; + uint64_t ms_opackets; + uint64_t ms_blockcnt; + uint64_t ms_unblockcnt; + uint64_t ms_txsdrops; +} misc_stat_t; + +/* + * To be consistent with other stat entries, misc stat + * is wrapped in stat entry + */ +typedef struct misc_stat_entry_s { + misc_stat_t mse_stats; +} misc_stat_entry_t; + +/* + * Definitions for ring stats: used by rx as well as tx + */ +typedef struct ring_stat_s { + uint64_t r_packets; + uint64_t r_bytes; +} ring_stat_t; + +typedef struct ring_stat_entry_s { + int64_t re_index; + ring_stat_t re_stats; +} ring_stat_entry_t; + +/* + * Definitions for fanout stats + */ +typedef struct fanout_stat_s { + uint64_t f_ipackets; + uint64_t f_rbytes; +} fanout_stat_t; + +typedef struct fanout_stat_entry_s { + int64_t fe_index; + lane_type_t fe_id; /* hw, sw, local */ + int64_t fe_foutindex; /* fanout index */ + fanout_stat_t fe_stats; +} fanout_stat_entry_t; + +/* + * Definitions for total stats + */ +typedef struct total_stat_s { + uint64_t ts_ipackets; + uint64_t ts_rbytes; + uint64_t ts_opackets; + uint64_t ts_obytes; +} total_stat_t; + +/* + * To be consistent with other stat entries, total stat + * is wrapped in stat entry + */ +typedef struct total_stat_entry_s { + total_stat_t tse_stats; +} total_stat_entry_t; + +/* + * Definitions for aggr stats + */ +typedef struct aggr_port_stat_s { + uint64_t ap_ipackets; + uint64_t ap_rbytes; + uint64_t ap_opackets; + uint64_t ap_obytes; +} aggr_port_stat_t; + +typedef struct aggr_port_stat_entry_s { + datalink_id_t ape_portlinkid; + aggr_port_stat_t ape_stats; +} aggr_port_stat_entry_t; + +/* + * Definitions for query all stats + */ +typedef struct name_value_stat_s { + char nv_statname[MAXSTATNAMELEN]; + uint64_t nv_statval; + struct name_value_stat_s *nv_nextstat; +} name_value_stat_t; + +typedef struct name_value_stat_entry_s { + char nve_header[MAXSTATNAMELEN]; + name_value_stat_t *nve_stats; +} name_value_stat_entry_t; + +/* + * Definitions for flow stats + */ +typedef struct flow_stat_s { + uint64_t fl_ipackets; + uint64_t fl_rbytes; + uint64_t fl_ierrors; + uint64_t fl_opackets; + uint64_t fl_obytes; + uint64_t fl_oerrors; + uint64_t fl_sdrops; +} flow_stat_t; + typedef struct pktsum_s { hrtime_t snaptime; uint64_t ipackets; @@ -65,6 +275,18 @@ extern dladm_status_t dladm_get_single_mac_stat(dladm_handle_t, datalink_id_t, extern void dladm_stats_total(pktsum_t *, pktsum_t *, pktsum_t *); extern void dladm_stats_diff(pktsum_t *, pktsum_t *, pktsum_t *); +extern dladm_stat_chain_t *dladm_link_stat_query(dladm_handle_t, + datalink_id_t, dladm_stat_type_t); +extern dladm_stat_chain_t *dladm_link_stat_diffchain(dladm_stat_chain_t *, + dladm_stat_chain_t *, dladm_stat_type_t); +extern dladm_stat_chain_t *dladm_link_stat_query_all(dladm_handle_t, + datalink_id_t, dladm_stat_type_t); + +extern flow_stat_t *dladm_flow_stat_query(const char *); +extern flow_stat_t *dladm_flow_stat_diff(flow_stat_t *, + flow_stat_t *); +extern name_value_stat_entry_t *dladm_flow_stat_query_all(const char *); + #ifdef __cplusplus } #endif diff --git a/usr/src/lib/libdladm/common/libdlvnic.c b/usr/src/lib/libdladm/common/libdlvnic.c index a49f51a6ca..7ff9563e1a 100644 --- a/usr/src/lib/libdladm/common/libdlvnic.c +++ b/usr/src/lib/libdladm/common/libdlvnic.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -117,7 +117,6 @@ i_dladm_vnic_create_sys(dladm_handle_t handle, dladm_vnic_attr_t *attr) ioc.vc_vrid = attr->va_vrid; ioc.vc_af = attr->va_af; ioc.vc_flags = attr->va_force ? VNIC_IOC_CREATE_FORCE : 0; - ioc.vc_flags |= attr->va_hwrings ? VNIC_IOC_CREATE_REQ_HWRINGS : 0; if (attr->va_mac_len > 0 || ioc.vc_mac_prefix_len > 0) bcopy(attr->va_mac_addr, ioc.vc_mac_addr, MAXMACADDRLEN); @@ -218,14 +217,6 @@ i_dladm_vnic_info_persist(dladm_handle_t handle, datalink_id_t linkid, goto done; } - status = dladm_get_conf_field(handle, conf, FHWRINGS, - &attrp->va_hwrings, sizeof (boolean_t)); - - if (status != DLADM_STATUS_OK && status != DLADM_STATUS_NOTFOUND) - goto done; - if (status == DLADM_STATUS_NOTFOUND) - attrp->va_hwrings = B_FALSE; - if ((status = dladm_datalink_id2info(handle, linkid, NULL, &class, NULL, NULL, 0)) != DLADM_STATUS_OK) goto done; @@ -521,7 +512,7 @@ dladm_vnic_create(dladm_handle_t handle, const char *vnic, datalink_id_t linkid, /* Extract resource_ctl and cpu_list from proplist */ if (proplist != NULL) { status = dladm_link_proplist_extract(handle, proplist, - &attr.va_resource_props); + &attr.va_resource_props, 0); if (status != DLADM_STATUS_OK) goto done; } @@ -541,7 +532,6 @@ dladm_vnic_create(dladm_handle_t handle, const char *vnic, datalink_id_t linkid, attr.va_vrid = vrid; attr.va_af = af; attr.va_force = (flags & DLADM_OPT_FORCE) != 0; - attr.va_hwrings = (flags & DLADM_OPT_HWRINGS) != 0; status = i_dladm_vnic_create_sys(handle, &attr); if (status != DLADM_STATUS_OK) @@ -745,14 +735,6 @@ dladm_vnic_persist_conf(dladm_handle_t handle, const char *name, goto done; } - if (attrp->va_hwrings) { - boolean_t hwrings = attrp->va_hwrings; - status = dladm_set_conf_field(handle, conf, FHWRINGS, - DLADM_TYPE_BOOLEAN, &hwrings); - if (status != DLADM_STATUS_OK) - goto done; - } - if (attrp->va_vid != 0) { u64 = attrp->va_vid; status = dladm_set_conf_field(handle, conf, FVLANID, @@ -776,9 +758,6 @@ typedef struct dladm_vnic_up_arg_s { dladm_status_t status; } dladm_vnic_up_arg_t; -#define DLADM_VNIC_UP_FIRST_WALK 0x1 -#define DLADM_VNIC_UP_SECOND_WALK 0x2 - static int i_dladm_vnic_up(dladm_handle_t handle, datalink_id_t linkid, void *arg) { @@ -786,7 +765,6 @@ i_dladm_vnic_up(dladm_handle_t handle, datalink_id_t linkid, void *arg) dladm_vnic_attr_t attr; dladm_status_t status; dladm_arg_list_t *proplist; - uint32_t flags = ((dladm_vnic_up_arg_t *)arg)->flags; bzero(&attr, sizeof (attr)); @@ -794,14 +772,6 @@ i_dladm_vnic_up(dladm_handle_t handle, datalink_id_t linkid, void *arg) if (status != DLADM_STATUS_OK) goto done; - /* - * Create the vnics that request hardware group first - * Create the vnics that don't request hardware group in the second walk - */ - if ((flags == DLADM_VNIC_UP_FIRST_WALK && !attr.va_hwrings) || - (flags == DLADM_VNIC_UP_SECOND_WALK && attr.va_hwrings)) - goto done; - /* Get all properties for this vnic */ status = dladm_link_get_proplist(handle, linkid, &proplist); if (status != DLADM_STATUS_OK) @@ -809,7 +779,7 @@ i_dladm_vnic_up(dladm_handle_t handle, datalink_id_t linkid, void *arg) if (proplist != NULL) { status = dladm_link_proplist_extract(handle, proplist, - &attr.va_resource_props); + &attr.va_resource_props, DLADM_OPT_BOOT); } status = i_dladm_vnic_create_sys(handle, &attr); @@ -834,11 +804,6 @@ dladm_vnic_up(dladm_handle_t handle, datalink_id_t linkid, uint32_t flags) (DATALINK_CLASS_VNIC | DATALINK_CLASS_ETHERSTUB); if (linkid == DATALINK_ALL_LINKID) { - vnic_arg.flags = DLADM_VNIC_UP_FIRST_WALK; - (void) dladm_walk_datalink_id(i_dladm_vnic_up, handle, - &vnic_arg, class, DATALINK_ANY_MEDIATYPE, - DLADM_OPT_PERSIST); - vnic_arg.flags = DLADM_VNIC_UP_SECOND_WALK; (void) dladm_walk_datalink_id(i_dladm_vnic_up, handle, &vnic_arg, class, DATALINK_ANY_MEDIATYPE, DLADM_OPT_PERSIST); diff --git a/usr/src/lib/libdladm/common/libdlvnic.h b/usr/src/lib/libdladm/common/libdlvnic.h index 8b859f9e04..94b656aadf 100644 --- a/usr/src/lib/libdladm/common/libdlvnic.h +++ b/usr/src/lib/libdladm/common/libdlvnic.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -47,7 +47,6 @@ typedef struct dladm_vnic_attr { uint_t va_mac_prefix_len; uint16_t va_vid; boolean_t va_force; - boolean_t va_hwrings; vrid_t va_vrid; int va_af; mac_resource_props_t va_resource_props; diff --git a/usr/src/lib/libdladm/common/linkprop.c b/usr/src/lib/libdladm/common/linkprop.c index bd4190b159..50e79616c0 100644 --- a/usr/src/lib/libdladm/common/linkprop.c +++ b/usr/src/lib/libdladm/common/linkprop.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -59,6 +59,11 @@ #include <sys/vlan.h> #include <libdlbridge.h> #include <stp_in.h> +#include <netinet/dhcp.h> +#include <netinet/dhcp6.h> +#include <net/if_types.h> +#include <libinetutil.h> +#include <pool.h> /* * The linkprop get() callback. @@ -106,7 +111,8 @@ typedef dladm_status_t pd_setf_t(dladm_handle_t, prop_desc_t *, datalink_id_t, */ typedef dladm_status_t pd_checkf_t(dladm_handle_t, prop_desc_t *pdp, datalink_id_t, char **propstrp, uint_t cnt, - val_desc_t *propval, datalink_media_t); + uint_t flags, val_desc_t *propval, + datalink_media_t); typedef struct link_attr_s { mac_prop_id_t pp_id; @@ -114,50 +120,47 @@ typedef struct link_attr_s { char *pp_name; } link_attr_t; +typedef struct dladm_linkprop_args_s { + dladm_status_t dla_status; + uint_t dla_flags; +} dladm_linkprop_args_t; + static dld_ioc_macprop_t *i_dladm_buf_alloc_by_name(size_t, datalink_id_t, const char *, uint_t, dladm_status_t *); static dld_ioc_macprop_t *i_dladm_buf_alloc_by_id(size_t, datalink_id_t, mac_prop_id_t, uint_t, dladm_status_t *); -static dld_ioc_macprop_t *i_dladm_get_public_prop(dladm_handle_t, datalink_id_t, - char *, uint_t, dladm_status_t *, uint_t *); +static dladm_status_t i_dladm_get_public_prop(dladm_handle_t, datalink_id_t, + char *, uint_t, uint_t *, void *, size_t); -static dladm_status_t i_dladm_set_private_prop(dladm_handle_t, datalink_id_t, +static dladm_status_t i_dladm_set_private_prop(dladm_handle_t, datalink_id_t, const char *, char **, uint_t, uint_t); -static dladm_status_t i_dladm_get_priv_prop(dladm_handle_t, datalink_id_t, +static dladm_status_t i_dladm_get_priv_prop(dladm_handle_t, datalink_id_t, const char *, char **, uint_t *, dladm_prop_type_t, uint_t); -static link_attr_t *dladm_name2prop(const char *); -static link_attr_t *dladm_id2prop(mac_prop_id_t); - -static pd_getf_t do_get_zone, do_get_autopush, do_get_rate_mod, - do_get_rate_prop, do_get_channel_prop, - do_get_powermode_prop, do_get_radio_prop, - i_dladm_duplex_get, i_dladm_status_get, - i_dladm_binary_get, i_dladm_uint32_get, - i_dladm_flowctl_get, i_dladm_maxbw_get, - i_dladm_cpus_get, i_dladm_priority_get, - i_dladm_tagmode_get, i_dladm_range_get, - get_stp_prop, get_bridge_forward, - get_bridge_pvid, - /* the above need to be renamed to "do_get_xxx" */ - do_get_protection; - -static pd_setf_t do_set_zone, do_set_rate_prop, - do_set_powermode_prop, do_set_radio_prop, - i_dladm_set_public_prop, do_set_res, do_set_cpus, - set_stp_prop, set_bridge_forward, set_bridge_pvid, - do_set_protection; - -static pd_checkf_t do_check_zone, do_check_autopush, do_check_rate, - do_check_hoplimit, do_check_encaplim, - i_dladm_uint32_check, do_check_maxbw, do_check_cpus, - do_check_priority, check_stp_prop, check_bridge_pvid, - do_check_allowedips, do_check_prop; - -static dladm_status_t i_dladm_speed_get(dladm_handle_t, prop_desc_t *, - datalink_id_t, char **, uint_t *, uint_t, uint_t *); static dladm_status_t i_dladm_macprop(dladm_handle_t, void *, boolean_t); static const char *dladm_perm2str(uint_t, char *); +static link_attr_t *dladm_name2prop(const char *); +static link_attr_t *dladm_id2prop(mac_prop_id_t); + +static pd_getf_t get_zone, get_autopush, get_rate_mod, get_rate, + get_speed, get_channel, get_powermode, get_radio, + get_duplex, get_link_state, get_binary, get_uint32, + get_flowctl, get_maxbw, get_cpus, get_priority, + get_tagmode, get_range, get_stp, get_bridge_forward, + get_bridge_pvid, get_protection, get_rxrings, + get_txrings, get_cntavail, + get_allowedips, get_allowedcids, get_pool, + get_rings_range; + +static pd_setf_t set_zone, set_rate, set_powermode, set_radio, + set_public_prop, set_resource, set_stp_prop, + set_bridge_forward, set_bridge_pvid; + +static pd_checkf_t check_zone, check_autopush, check_rate, check_hoplimit, + check_encaplim, check_uint32, check_maxbw, check_cpus, + check_stp_prop, check_bridge_pvid, check_allowedips, + check_allowedcids, check_rings, + check_pool, check_prop; struct prop_desc { /* @@ -326,12 +329,6 @@ static link_attr_t link_attr[] = { { MAC_PROP_WL_MLME, sizeof (wl_mlme_t), "mlme"}, - { MAC_PROP_MAXBW, sizeof (mac_resource_props_t), "maxbw"}, - - { MAC_PROP_PRIO, sizeof (mac_resource_props_t), "priority"}, - - { MAC_PROP_BIND_CPU, sizeof (mac_resource_props_t), "cpus"}, - { MAC_PROP_TAGMODE, sizeof (link_tagmode_t), "tagmode"}, { MAC_PROP_IPTUN_HOPLIMIT, sizeof (uint32_t), "hoplimit"}, @@ -344,7 +341,24 @@ static link_attr_t link_attr[] = { { MAC_PROP_LDECAY, sizeof (uint32_t), "learn_decay"}, - { MAC_PROP_PROTECT, sizeof (mac_resource_props_t), "protection"}, + { MAC_PROP_RESOURCE, sizeof (mac_resource_props_t), "resource"}, + + { MAC_PROP_RESOURCE_EFF, sizeof (mac_resource_props_t), + "resource-effective"}, + + { MAC_PROP_RXRINGSRANGE, sizeof (mac_propval_range_t), "rxrings"}, + + { MAC_PROP_TXRINGSRANGE, sizeof (mac_propval_range_t), "txrings"}, + + { MAC_PROP_MAX_TX_RINGS_AVAIL, sizeof (uint_t), + "txrings-available"}, + + { MAC_PROP_MAX_RX_RINGS_AVAIL, sizeof (uint_t), + "rxrings-available"}, + + { MAC_PROP_MAX_RXHWCLNT_AVAIL, sizeof (uint_t), "rxhwclnt-available"}, + + { MAC_PROP_MAX_TXHWCLNT_AVAIL, sizeof (uint_t), "txhwclnt-available"}, { MAC_PROP_PRIVATE, 0, "driver-private"} }; @@ -395,8 +409,9 @@ static val_desc_t link_tagmode_vals[] = { static val_desc_t link_protect_vals[] = { { "mac-nospoof", MPT_MACNOSPOOF }, + { "restricted", MPT_RESTRICTED }, { "ip-nospoof", MPT_IPNOSPOOF }, - { "restricted", MPT_RESTRICTED } + { "dhcp-nospoof", MPT_DHCPNOSPOOF }, }; static val_desc_t dladm_wlan_radio_vals[] = { @@ -418,164 +433,180 @@ static val_desc_t stp_p2p_vals[] = { #define VALCNT(vals) (sizeof ((vals)) / sizeof (val_desc_t)) #define RESET_VAL ((uintptr_t)-1) +#define UNSPEC_VAL ((uintptr_t)-2) static prop_desc_t prop_table[] = { { "channel", { NULL, 0 }, NULL, 0, NULL, NULL, - do_get_channel_prop, NULL, 0, + get_channel, NULL, 0, DATALINK_CLASS_PHYS, DL_WIFI }, { "powermode", { "off", DLADM_WLAN_PM_OFF }, dladm_wlan_powermode_vals, VALCNT(dladm_wlan_powermode_vals), - do_set_powermode_prop, NULL, - do_get_powermode_prop, NULL, 0, + set_powermode, NULL, + get_powermode, NULL, 0, DATALINK_CLASS_PHYS, DL_WIFI }, { "radio", { "on", DLADM_WLAN_RADIO_ON }, dladm_wlan_radio_vals, VALCNT(dladm_wlan_radio_vals), - do_set_radio_prop, NULL, - do_get_radio_prop, NULL, 0, + set_radio, NULL, + get_radio, NULL, 0, DATALINK_CLASS_PHYS, DL_WIFI }, { "speed", { "", 0 }, NULL, 0, - do_set_rate_prop, do_get_rate_mod, - do_get_rate_prop, do_check_rate, 0, + set_rate, get_rate_mod, + get_rate, check_rate, 0, DATALINK_CLASS_PHYS, DATALINK_ANY_MEDIATYPE }, { "autopush", { "", 0 }, NULL, 0, - i_dladm_set_public_prop, NULL, - do_get_autopush, do_check_autopush, PD_CHECK_ALLOC, + set_public_prop, NULL, + get_autopush, check_autopush, PD_CHECK_ALLOC, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "zone", { "", 0 }, NULL, 0, - do_set_zone, NULL, - do_get_zone, do_check_zone, PD_TEMPONLY|PD_CHECK_ALLOC, + set_zone, NULL, + get_zone, check_zone, PD_TEMPONLY|PD_CHECK_ALLOC, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "duplex", { "", 0 }, link_duplex_vals, VALCNT(link_duplex_vals), - NULL, NULL, i_dladm_duplex_get, NULL, + NULL, NULL, get_duplex, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "state", { "up", LINK_STATE_UP }, link_status_vals, VALCNT(link_status_vals), - NULL, NULL, i_dladm_status_get, NULL, + NULL, NULL, get_link_state, NULL, 0, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "adv_autoneg_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "mtu", { "", 0 }, NULL, 0, - i_dladm_set_public_prop, i_dladm_range_get, - i_dladm_uint32_get, i_dladm_uint32_check, 0, DATALINK_CLASS_ALL, + set_public_prop, get_range, + get_uint32, check_uint32, 0, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "flowctrl", { "", 0 }, link_flow_vals, VALCNT(link_flow_vals), - i_dladm_set_public_prop, NULL, i_dladm_flowctl_get, NULL, + set_public_prop, NULL, get_flowctl, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_10gfdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_10gfdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_1000fdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_1000fdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_1000hdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_1000hdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_100fdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_100fdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_100hdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_100hdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_10fdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_10fdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "adv_10hdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - NULL, NULL, i_dladm_binary_get, NULL, + NULL, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "en_10hdx_cap", { "", 0 }, link_01_vals, VALCNT(link_01_vals), - i_dladm_set_public_prop, NULL, i_dladm_binary_get, NULL, + set_public_prop, NULL, get_binary, NULL, 0, DATALINK_CLASS_PHYS, DL_ETHER }, { "maxbw", { "--", RESET_VAL }, NULL, 0, - do_set_res, NULL, - i_dladm_maxbw_get, do_check_maxbw, PD_CHECK_ALLOC, + set_resource, NULL, + get_maxbw, check_maxbw, PD_CHECK_ALLOC, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "cpus", { "--", RESET_VAL }, NULL, 0, - do_set_cpus, NULL, - i_dladm_cpus_get, do_check_cpus, 0, + set_resource, NULL, + get_cpus, check_cpus, 0, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, - { "priority", { "high", RESET_VAL }, - link_priority_vals, VALCNT(link_priority_vals), do_set_res, NULL, - i_dladm_priority_get, do_check_priority, PD_CHECK_ALLOC, + { "cpus-effective", { "--", 0 }, + NULL, 0, NULL, NULL, + get_cpus, 0, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "pool", { "--", RESET_VAL }, NULL, 0, + set_resource, NULL, + get_pool, check_pool, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "pool-effective", { "--", 0 }, + NULL, 0, NULL, NULL, + get_pool, 0, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "priority", { "high", MPL_RESET }, + link_priority_vals, VALCNT(link_priority_vals), set_resource, + NULL, get_priority, check_prop, 0, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "tagmode", { "vlanonly", LINK_TAGMODE_VLANONLY }, link_tagmode_vals, VALCNT(link_tagmode_vals), - i_dladm_set_public_prop, NULL, i_dladm_tagmode_get, + set_public_prop, NULL, get_tagmode, NULL, 0, DATALINK_CLASS_PHYS | DATALINK_CLASS_AGGR | DATALINK_CLASS_VNIC, DL_ETHER }, { "hoplimit", { "", 0 }, NULL, 0, - i_dladm_set_public_prop, i_dladm_range_get, i_dladm_uint32_get, - do_check_hoplimit, 0, DATALINK_CLASS_IPTUN, DATALINK_ANY_MEDIATYPE}, + set_public_prop, get_range, get_uint32, + check_hoplimit, 0, DATALINK_CLASS_IPTUN, DATALINK_ANY_MEDIATYPE}, { "encaplimit", { "", 0 }, NULL, 0, - i_dladm_set_public_prop, i_dladm_range_get, i_dladm_uint32_get, - do_check_encaplim, 0, DATALINK_CLASS_IPTUN, DL_IPV6}, + set_public_prop, get_range, get_uint32, + check_encaplim, 0, DATALINK_CLASS_IPTUN, DL_IPV6}, { "forward", { "1", 1 }, link_01_vals, VALCNT(link_01_vals), @@ -588,70 +619,118 @@ static prop_desc_t prop_table[] = { DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "learn_limit", { "1000", 1000 }, NULL, 0, - i_dladm_set_public_prop, NULL, i_dladm_uint32_get, - i_dladm_uint32_check, 0, + set_public_prop, NULL, get_uint32, + check_uint32, 0, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "learn_decay", { "200", 200 }, NULL, 0, - i_dladm_set_public_prop, NULL, i_dladm_uint32_get, - i_dladm_uint32_check, 0, + set_public_prop, NULL, get_uint32, + check_uint32, 0, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "stp", { "1", 1 }, link_01_vals, VALCNT(link_01_vals), - set_stp_prop, NULL, get_stp_prop, NULL, PD_AFTER_PERM, + set_stp_prop, NULL, get_stp, NULL, PD_AFTER_PERM, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "stp_priority", { "128", 128 }, NULL, 0, - set_stp_prop, NULL, get_stp_prop, check_stp_prop, PD_AFTER_PERM, + set_stp_prop, NULL, get_stp, check_stp_prop, PD_AFTER_PERM, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "stp_cost", { "auto", 0 }, NULL, 0, - set_stp_prop, NULL, get_stp_prop, check_stp_prop, PD_AFTER_PERM, + set_stp_prop, NULL, get_stp, check_stp_prop, PD_AFTER_PERM, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "stp_edge", { "1", 1 }, link_01_vals, VALCNT(link_01_vals), - set_stp_prop, NULL, get_stp_prop, NULL, PD_AFTER_PERM, + set_stp_prop, NULL, get_stp, NULL, PD_AFTER_PERM, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "stp_p2p", { "auto", P2P_AUTO }, stp_p2p_vals, VALCNT(stp_p2p_vals), - set_stp_prop, NULL, get_stp_prop, NULL, PD_AFTER_PERM, + set_stp_prop, NULL, get_stp, NULL, PD_AFTER_PERM, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "stp_mcheck", { "0", 0 }, link_01_vals, VALCNT(link_01_vals), - set_stp_prop, NULL, get_stp_prop, check_stp_prop, PD_AFTER_PERM, + set_stp_prop, NULL, get_stp, check_stp_prop, PD_AFTER_PERM, DATALINK_CLASS_PHYS|DATALINK_CLASS_AGGR| DATALINK_CLASS_ETHERSTUB|DATALINK_CLASS_SIMNET, DL_ETHER }, { "protection", { "--", RESET_VAL }, link_protect_vals, VALCNT(link_protect_vals), - do_set_protection, NULL, do_get_protection, do_check_prop, 0, + set_resource, NULL, get_protection, check_prop, 0, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, { "allowed-ips", { "--", 0 }, - NULL, 0, do_set_protection, NULL, - do_get_protection, do_check_allowedips, 0, + NULL, 0, set_resource, NULL, + get_allowedips, check_allowedips, PD_CHECK_ALLOC, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "allowed-dhcp-cids", { "--", 0 }, + NULL, 0, set_resource, NULL, + get_allowedcids, check_allowedcids, PD_CHECK_ALLOC, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "rxrings", { "--", RESET_VAL }, NULL, 0, + set_resource, get_rings_range, get_rxrings, check_rings, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "rxrings-effective", { "--", 0 }, + NULL, 0, NULL, NULL, + get_rxrings, NULL, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "txrings", { "--", RESET_VAL }, NULL, 0, + set_resource, get_rings_range, get_txrings, check_rings, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "txrings-effective", { "--", 0 }, + NULL, 0, NULL, NULL, + get_txrings, NULL, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "txrings-available", { "", 0 }, NULL, 0, + NULL, NULL, get_cntavail, NULL, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "rxrings-available", { "", 0 }, NULL, 0, + NULL, NULL, get_cntavail, NULL, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "rxhwclnt-available", { "", 0 }, NULL, 0, + NULL, NULL, get_cntavail, NULL, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + + { "txhwclnt-available", { "", 0 }, NULL, 0, + NULL, NULL, get_cntavail, NULL, 0, + DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE }, + }; #define DLADM_MAX_PROPS (sizeof (prop_table) / sizeof (prop_desc_t)) static resource_prop_t rsrc_prop_table[] = { - {"maxbw", do_extract_maxbw}, - {"priority", do_extract_priority}, - {"cpus", do_extract_cpus}, - {"protection", do_extract_protection}, - {"allowed-ips", do_extract_allowedips} + {"maxbw", extract_maxbw}, + {"priority", extract_priority}, + {"cpus", extract_cpus}, + {"cpus-effective", extract_cpus}, + {"pool", extract_pool}, + {"pool-effective", extract_pool}, + {"protection", extract_protection}, + {"allowed-ips", extract_allowedips}, + {"allowed-dhcp-cids", extract_allowedcids}, + {"rxrings", extract_rxrings}, + {"rxrings-effective", extract_rxrings}, + {"txrings", extract_txrings}, + {"txrings-effective", extract_txrings} }; #define DLADM_MAX_RSRC_PROP (sizeof (rsrc_prop_table) / \ sizeof (resource_prop_t)) @@ -691,8 +770,9 @@ static dladm_status_t i_dladm_getset_defval(dladm_handle_t, prop_desc_t *, /* ARGSUSED */ static dladm_status_t -do_check_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { int i, j; @@ -737,17 +817,17 @@ i_dladm_set_single_prop(dladm_handle_t handle, datalink_id_t linkid, return (DLADM_STATUS_PROPRDONLY); if (prop_val != NULL) { - vdp = malloc(sizeof (val_desc_t) * val_cnt); + vdp = calloc(val_cnt, sizeof (val_desc_t)); if (vdp == NULL) return (DLADM_STATUS_NOMEM); if (pdp->pd_check != NULL) { needfree = ((pdp->pd_flags & PD_CHECK_ALLOC) != 0); status = pdp->pd_check(handle, pdp, linkid, prop_val, - val_cnt, vdp, media); + val_cnt, flags, vdp, media); } else if (pdp->pd_optval != NULL) { - status = do_check_prop(handle, pdp, linkid, prop_val, - val_cnt, vdp, media); + status = check_prop(handle, pdp, linkid, prop_val, + val_cnt, flags, vdp, media); } else { status = DLADM_STATUS_BADARG; } @@ -765,7 +845,7 @@ i_dladm_set_single_prop(dladm_handle_t handle, datalink_id_t linkid, cnt = 1; defval = (strlen(pdp->pd_defval.vd_name) > 0); if ((pdp->pd_flags & PD_CHECK_ALLOC) != 0 || defval) { - if ((vdp = malloc(sizeof (val_desc_t))) == NULL) + if ((vdp = calloc(1, sizeof (val_desc_t))) == NULL) return (DLADM_STATUS_NOMEM); if (defval) { @@ -773,7 +853,7 @@ i_dladm_set_single_prop(dladm_handle_t handle, datalink_id_t linkid, sizeof (val_desc_t)); } else if (pdp->pd_check != NULL) { status = pdp->pd_check(handle, pdp, linkid, - prop_val, cnt, vdp, media); + prop_val, cnt, flags, vdp, media); if (status != DLADM_STATUS_OK) goto done; } @@ -842,7 +922,6 @@ i_dladm_set_linkprop(dladm_handle_t handle, datalink_id_t linkid, status = DLADM_STATUS_NOTFOUND; } } - return (status); } @@ -955,9 +1034,9 @@ dladm_get_linkprop(dladm_handle_t handle, datalink_id_t linkid, uint_t perm_flags; if (type == DLADM_PROP_VAL_DEFAULT) - dld_flags |= MAC_PROP_DEFAULT; + dld_flags |= DLD_PROP_DEFAULT; else if (type == DLADM_PROP_VAL_MODIFIABLE) - dld_flags |= MAC_PROP_POSSIBLE; + dld_flags |= DLD_PROP_POSSIBLE; if (linkid == DATALINK_INVALID_LINKID || prop_name == NULL || prop_val == NULL || val_cntp == NULL || *val_cntp == 0) @@ -1124,7 +1203,7 @@ dladm_get_linkprop_values(dladm_handle_t handle, datalink_id_t linkid, prop_val[valc] = (char *)(prop_val + *val_cntp) + valc * DLADM_PROP_VAL_MAX; - dld_flags = (type == DLADM_PROP_VAL_DEFAULT) ? MAC_PROP_DEFAULT : 0; + dld_flags = (type == DLADM_PROP_VAL_DEFAULT) ? DLD_PROP_DEFAULT : 0; switch (type) { case DLADM_PROP_VAL_CURRENT: @@ -1177,7 +1256,7 @@ dladm_get_linkprop_values(dladm_handle_t handle, datalink_id_t linkid, status = DLADM_STATUS_NOMEM; else status = pdp->pd_check(handle, pdp, linkid, - prop_val, *val_cntp, vdp, media); + prop_val, *val_cntp, 0, vdp, media); if (status == DLADM_STATUS_OK) { for (valc = 0; valc < *val_cntp; valc++) ret_val[valc] = vdp[valc].vd_val; @@ -1211,9 +1290,10 @@ static int i_dladm_init_one_prop(dladm_handle_t handle, datalink_id_t linkid, const char *prop_name, void *arg) { - char *buf, **propvals; - uint_t i, valcnt = DLADM_MAX_PROP_VALCNT; - dladm_status_t status, *retval = arg; + char *buf, **propvals; + uint_t i, valcnt = DLADM_MAX_PROP_VALCNT; + dladm_status_t status; + dladm_linkprop_args_t *dla = arg; if ((buf = malloc((sizeof (char *) + DLADM_PROP_VAL_MAX) * DLADM_MAX_PROP_VALCNT)) == NULL) { @@ -1233,9 +1313,10 @@ i_dladm_init_one_prop(dladm_handle_t handle, datalink_id_t linkid, } status = dladm_set_linkprop(handle, linkid, prop_name, propvals, - valcnt, DLADM_OPT_ACTIVE); + valcnt, dla->dla_flags | DLADM_OPT_ACTIVE); + if (status != DLADM_STATUS_OK) - *retval = status; + dla->dla_status = status; done: if (buf != NULL) @@ -1269,9 +1350,16 @@ dladm_init_linkprop(dladm_handle_t handle, datalink_id_t linkid, dladm_status_t status = DLADM_STATUS_OK; datalink_media_t dmedia; uint32_t media; + dladm_linkprop_args_t *dla; dmedia = any_media ? DATALINK_ANY_MEDIATYPE : DL_WIFI; + dla = malloc(sizeof (dladm_linkprop_args_t)); + if (dla == NULL) + return (DLADM_STATUS_NOMEM); + dla->dla_flags = DLADM_OPT_BOOT; + dla->dla_status = DLADM_STATUS_OK; + if (linkid == DATALINK_ALL_LINKID) { (void) dladm_walk_datalink_id(i_dladm_init_linkprop, handle, NULL, DATALINK_CLASS_ALL, dmedia, DLADM_OPT_PERSIST); @@ -1279,36 +1367,32 @@ dladm_init_linkprop(dladm_handle_t handle, datalink_id_t linkid, ((dladm_datalink_id2info(handle, linkid, NULL, NULL, &media, NULL, 0) == DLADM_STATUS_OK) && DATALINK_MEDIA_ACCEPTED(dmedia, media))) { - (void) dladm_walk_linkprop(handle, linkid, &status, + (void) dladm_walk_linkprop(handle, linkid, (void *)dla, i_dladm_init_one_prop); + status = dla->dla_status; } + free(dla); return (status); } /* ARGSUSED */ static dladm_status_t -do_get_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { char zone_name[ZONENAME_MAX]; zoneid_t zid; dladm_status_t status; - char *cp; - dld_ioc_macprop_t *dip; if (flags != 0) return (DLADM_STATUS_NOTSUP); - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + perm_flags, &zid, sizeof (zid)); if (status != DLADM_STATUS_OK) return (status); - cp = dip->pr_val; - (void) memcpy(&zid, cp, sizeof (zid)); - free(dip); - *val_cnt = 1; if (zid != GLOBAL_ZONEID) { if (getzonenamebyid(zid, zone_name, sizeof (zone_name)) < 0) { @@ -1393,13 +1477,11 @@ cleanup: /* ARGSUSED */ static dladm_status_t -do_set_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +set_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) { dladm_status_t status = DLADM_STATUS_OK; zoneid_t zid_old, zid_new; - char *cp; - dld_ioc_macprop_t *dip; dld_ioc_zid_t *dzp; if (val_cnt != 1) @@ -1407,20 +1489,16 @@ do_set_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, dzp = (dld_ioc_zid_t *)vdp->vd_val; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, NULL); + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + NULL, &zid_old, sizeof (zid_old)); if (status != DLADM_STATUS_OK) return (status); - cp = dip->pr_val; - (void) memcpy(&zid_old, cp, sizeof (zid_old)); - free(dip); - zid_new = dzp->diz_zid; if (zid_new == zid_old) return (DLADM_STATUS_OK); - if ((status = i_dladm_set_public_prop(handle, pdp, linkid, vdp, val_cnt, + if ((status = set_public_prop(handle, pdp, linkid, vdp, val_cnt, flags, media)) != DLADM_STATUS_OK) return (status); @@ -1440,8 +1518,9 @@ do_set_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -do_check_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { char *zone_name; zoneid_t zoneid; @@ -1490,35 +1569,33 @@ done: /* ARGSUSED */ static dladm_status_t -i_dladm_maxbw_get(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_maxbw(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; mac_resource_props_t mrp; dladm_status_t status; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, "resource", flags, + perm_flags, &mrp, sizeof (mrp)); + if (status != DLADM_STATUS_OK) return (status); - bcopy(dip->pr_val, &mrp, sizeof (mac_resource_props_t)); - free(dip); - if ((mrp.mrp_mask & MRP_MAXBW) == 0) { - (*prop_val)[0] = '\0'; - } else { - (void) dladm_bw2str(mrp.mrp_maxbw, prop_val[0]); + *val_cnt = 0; + return (DLADM_STATUS_OK); } + + (void) dladm_bw2str(mrp.mrp_maxbw, prop_val[0]); *val_cnt = 1; return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -do_check_maxbw(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_maxbw(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { uint64_t *maxbw; dladm_status_t status = DLADM_STATUS_OK; @@ -1547,11 +1624,15 @@ do_check_maxbw(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, /* ARGSUSED */ dladm_status_t -do_extract_maxbw(val_desc_t *vdp, uint_t cnt, void *arg) +extract_maxbw(val_desc_t *vdp, uint_t cnt, void *arg) { mac_resource_props_t *mrp = arg; - bcopy((char *)vdp->vd_val, &mrp->mrp_maxbw, sizeof (uint64_t)); + if (vdp->vd_val == RESET_VAL) { + mrp->mrp_maxbw = MRP_MAXBW_RESETVAL; + } else { + bcopy((char *)vdp->vd_val, &mrp->mrp_maxbw, sizeof (uint64_t)); + } mrp->mrp_mask |= MRP_MAXBW; return (DLADM_STATUS_OK); @@ -1559,34 +1640,33 @@ do_extract_maxbw(val_desc_t *vdp, uint_t cnt, void *arg) /* ARGSUSED */ static dladm_status_t -i_dladm_cpus_get(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_cpus(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; + dladm_status_t status; mac_resource_props_t mrp; int i; uint32_t ncpus; - uchar_t *cp; - dladm_status_t status; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) - return (status); + if (strcmp(pdp->pd_name, "cpus-effective") == 0) { + status = i_dladm_get_public_prop(handle, linkid, + "resource-effective", flags, perm_flags, &mrp, + sizeof (mrp)); + } else { + status = i_dladm_get_public_prop(handle, linkid, + "resource", flags, perm_flags, &mrp, sizeof (mrp)); + } - cp = (uchar_t *)dip->pr_val; - (void) memcpy(&mrp, cp, sizeof (mac_resource_props_t)); - free(dip); + if (status != DLADM_STATUS_OK) + return (status); ncpus = mrp.mrp_ncpus; - if (ncpus > *val_cnt) return (DLADM_STATUS_TOOSMALL); if (ncpus == 0) { - (*prop_val)[0] = '\0'; - *val_cnt = 1; + *val_cnt = 0; return (DLADM_STATUS_OK); } @@ -1600,194 +1680,215 @@ i_dladm_cpus_get(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -do_set_res(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) +check_cpus(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { + uint32_t cpuid; + int i, j, rc; + char *endp; + long nproc = sysconf(_SC_NPROCESSORS_CONF); mac_resource_props_t mrp; - dladm_status_t status = DLADM_STATUS_OK; - dld_ioc_macprop_t *dip; + dladm_status_t status; + uint_t perm_flags; + + /* Get the current pool property */ + status = i_dladm_get_public_prop(handle, linkid, "resource", 0, + &perm_flags, &mrp, sizeof (mrp)); + + if (status == DLADM_STATUS_OK) { + /* Can't set cpus if a pool is set */ + if (strlen(mrp.mrp_pool) != 0) + return (DLADM_STATUS_POOLCPU); + } bzero(&mrp, sizeof (mac_resource_props_t)); - dip = i_dladm_buf_alloc_by_name(0, linkid, pdp->pd_name, - flags, &status); - if (dip == NULL) - return (status); + for (i = 0; i < val_cnt; i++) { + errno = 0; + cpuid = strtol(prop_val[i], &endp, 10); + if (errno != 0 || *endp != '\0') + return (DLADM_STATUS_BADVAL); - if (vdp->vd_val == RESET_VAL) { - switch (dip->pr_num) { - case MAC_PROP_MAXBW: - mrp.mrp_maxbw = MRP_MAXBW_RESETVAL; - mrp.mrp_mask = MRP_MAXBW; - break; - case MAC_PROP_PRIO: - mrp.mrp_priority = MPL_RESET; - mrp.mrp_mask = MRP_PRIORITY; - break; - default: - free(dip); - return (DLADM_STATUS_BADARG); - } - } else { - switch (dip->pr_num) { - case MAC_PROP_MAXBW: - bcopy((void *)vdp->vd_val, &mrp.mrp_maxbw, - sizeof (uint64_t)); - mrp.mrp_mask = MRP_MAXBW; - break; - case MAC_PROP_PRIO: - bcopy((void *)vdp->vd_val, &mrp.mrp_priority, - sizeof (mac_priority_level_t)); - mrp.mrp_mask = MRP_PRIORITY; - break; - default: - free(dip); - return (DLADM_STATUS_BADARG); + if (cpuid >= nproc) + return (DLADM_STATUS_CPUMAX); + + rc = p_online(cpuid, P_STATUS); + if (rc < 1) + return (DLADM_STATUS_CPUERR); + + if (rc != P_ONLINE) + return (DLADM_STATUS_CPUNOTONLINE); + + vdp[i].vd_val = (uintptr_t)cpuid; + } + + /* Check for duplicates */ + for (i = 0; i < val_cnt; i++) { + for (j = 0; j < val_cnt; j++) { + if (i != j && vdp[i].vd_val == vdp[j].vd_val) + return (DLADM_STATUS_BADVAL); } } + return (DLADM_STATUS_OK); +} - (void) memcpy(dip->pr_val, &mrp, dip->pr_valsize); - status = i_dladm_macprop(handle, dip, B_TRUE); - free(dip); - return (status); +/* ARGSUSED */ +dladm_status_t +extract_cpus(val_desc_t *vdp, uint_t cnt, void *arg) +{ + mac_resource_props_t *mrp = arg; + int i; + + if (vdp[0].vd_val == RESET_VAL) { + bzero(&mrp->mrp_cpus, sizeof (mac_cpus_t)); + mrp->mrp_mask |= MRP_CPUS; + return (DLADM_STATUS_OK); + } + + for (i = 0; i < cnt; i++) + mrp->mrp_cpu[i] = (uint32_t)vdp[i].vd_val; + + mrp->mrp_ncpus = cnt; + mrp->mrp_mask |= (MRP_CPUS|MRP_CPUS_USERSPEC); + mrp->mrp_fanout_mode = MCM_CPUS; + mrp->mrp_rx_intr_cpu = -1; + + return (DLADM_STATUS_OK); } +/* + * Get the pool datalink property from the kernel. This is used + * for both the user specified pool and effective pool properties. + */ /* ARGSUSED */ static dladm_status_t -do_set_cpus(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) +get_pool(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t *val_cnt, datalink_media_t media, + uint_t flags, uint_t *perm_flags) { mac_resource_props_t mrp; dladm_status_t status; - dld_ioc_macprop_t *dip; - datalink_class_t class; - /* - * CPU bindings can be set on VNIC and regular physical links. - * However VNICs fails the dladm_phys_info test(). So apply - * the phys_info test only on physical links. - */ - if ((status = dladm_datalink_id2info(handle, linkid, NULL, &class, - NULL, NULL, 0)) != DLADM_STATUS_OK) { - return (status); - } - - /* - * We set intr_cpu to -1. The interrupt will be retargetted, - * if possible when the setup is complete in MAC. - */ - bzero(&mrp, sizeof (mac_resource_props_t)); - mrp.mrp_mask = MRP_CPUS; - if (vdp != NULL && vdp->vd_val != RESET_VAL) { - mac_resource_props_t *vmrp; - - vmrp = (mac_resource_props_t *)vdp->vd_val; - if (vmrp->mrp_ncpus > 0) { - bcopy(vmrp, &mrp, sizeof (mac_resource_props_t)); - mrp.mrp_mask = MRP_CPUS; - } - mrp.mrp_mask |= MRP_CPUS_USERSPEC; - mrp.mrp_fanout_mode = MCM_CPUS; - mrp.mrp_intr_cpu = -1; + if (strcmp(pdp->pd_name, "pool-effective") == 0) { + status = i_dladm_get_public_prop(handle, linkid, + "resource-effective", flags, perm_flags, &mrp, + sizeof (mrp)); + } else { + status = i_dladm_get_public_prop(handle, linkid, + "resource", flags, perm_flags, &mrp, sizeof (mrp)); } - dip = i_dladm_buf_alloc_by_name(0, linkid, pdp->pd_name, - flags, &status); - if (dip == NULL) + if (status != DLADM_STATUS_OK) return (status); - (void) memcpy(dip->pr_val, &mrp, dip->pr_valsize); - status = i_dladm_macprop(handle, dip, B_TRUE); - free(dip); - return (status); + if (strlen(mrp.mrp_pool) == 0) { + (*prop_val)[0] = '\0'; + } else { + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, + "%s", mrp.mrp_pool); + } + *val_cnt = 1; + + return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -do_check_cpus(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_pool(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { - uint32_t cpuid; - int i, j, rc; - long nproc = sysconf(_SC_NPROCESSORS_CONF); - mac_resource_props_t *mrp; + pool_conf_t *poolconf; + pool_t *pool; + mac_resource_props_t mrp; + dladm_status_t status; + uint_t perm_flags; + char *poolname; + + /* Get the current cpus property */ + status = i_dladm_get_public_prop(handle, linkid, "resource", 0, + &perm_flags, &mrp, sizeof (mrp)); - mrp = malloc(sizeof (mac_resource_props_t)); - if (mrp == NULL) + if (status == DLADM_STATUS_OK) { + /* Can't set pool if cpus are set */ + if (mrp.mrp_ncpus != 0) + return (DLADM_STATUS_POOLCPU); + } + + poolname = malloc(sizeof (mrp.mrp_pool)); + if (poolname == NULL) return (DLADM_STATUS_NOMEM); - for (i = 0; i < val_cnt; i++) { - errno = 0; - cpuid = strtol(prop_val[i], (char **)NULL, 10); - if (errno != 0 || cpuid >= nproc) { - free(mrp); - return (DLADM_STATUS_CPUMAX); - } - rc = p_online(cpuid, P_STATUS); - if (rc < 1) { - free(mrp); - return (DLADM_STATUS_CPUERR); - } - if (rc != P_ONLINE) { - free(mrp); - return (DLADM_STATUS_CPUNOTONLINE); + /* Check for pool's availability if not booting */ + if ((flags & DLADM_OPT_BOOT) == 0) { + + /* Allocate and open pool configuration */ + if ((poolconf = pool_conf_alloc()) == NULL) + return (DLADM_STATUS_BADVAL); + + if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) + != PO_SUCCESS) { + pool_conf_free(poolconf); + return (DLADM_STATUS_BADVAL); } - mrp->mrp_cpu[i] = cpuid; - } - mrp->mrp_ncpus = (uint32_t)val_cnt; - /* Check for duplicates */ - for (i = 0; i < val_cnt; i++) { - for (j = 0; j < val_cnt; j++) { - if (i != j && mrp->mrp_cpu[i] == mrp->mrp_cpu[j]) { - free(mrp); - return (DLADM_STATUS_BADARG); - } + /* Look for pool name */ + if ((pool = pool_get_pool(poolconf, *prop_val)) == NULL) { + pool_conf_free(poolconf); + return (DLADM_STATUS_BADVAL); } + + pool_conf_free(poolconf); + free(pool); } - vdp->vd_val = (uintptr_t)mrp; + + (void) strlcpy(poolname, *prop_val, sizeof (mrp.mrp_pool)); + vdp->vd_val = (uintptr_t)poolname; return (DLADM_STATUS_OK); } /* ARGSUSED */ dladm_status_t -do_extract_cpus(val_desc_t *vdp, uint_t cnt, void *arg) +extract_pool(val_desc_t *vdp, uint_t cnt, void *arg) { - mac_resource_props_t *mrp = arg; - mac_resource_props_t *vmrp = (mac_resource_props_t *)vdp->vd_val; - int i; + mac_resource_props_t *mrp = (mac_resource_props_t *)arg; - for (i = 0; i < vmrp->mrp_ncpus; i++) { - mrp->mrp_cpu[i] = vmrp->mrp_cpu[i]; + if (vdp->vd_val == RESET_VAL) { + bzero(&mrp->mrp_pool, sizeof (mrp->mrp_pool)); + mrp->mrp_mask |= MRP_POOL; + return (DLADM_STATUS_OK); } - mrp->mrp_ncpus = vmrp->mrp_ncpus; - mrp->mrp_mask |= (MRP_CPUS|MRP_CPUS_USERSPEC); + + (void) strlcpy(mrp->mrp_pool, (char *)vdp->vd_val, + sizeof (mrp->mrp_pool)); + mrp->mrp_mask |= MRP_POOL; + /* + * Use MCM_CPUS since the fanout count is not user specified + * and will be determined by the cpu list generated from the + * pool. + */ mrp->mrp_fanout_mode = MCM_CPUS; - mrp->mrp_intr_cpu = -1; return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -i_dladm_priority_get(dladm_handle_t handle, prop_desc_t *pdp, +get_priority(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; mac_resource_props_t mrp; mac_priority_level_t pri; dladm_status_t status; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, "resource", flags, + perm_flags, &mrp, sizeof (mrp)); + if (status != DLADM_STATUS_OK) return (status); - bcopy(dip->pr_val, &mrp, sizeof (mac_resource_props_t)); - free(dip); - pri = ((mrp.mrp_mask & MRP_PRIORITY) == 0) ? MPL_HIGH : mrp.mrp_priority; @@ -1797,77 +1898,297 @@ i_dladm_priority_get(dladm_handle_t handle, prop_desc_t *pdp, } /* ARGSUSED */ -static dladm_status_t -do_check_priority(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +dladm_status_t +extract_priority(val_desc_t *vdp, uint_t cnt, void *arg) { - mac_priority_level_t *pri; - dladm_status_t status = DLADM_STATUS_OK; + mac_resource_props_t *mrp = arg; + + if (cnt != 1) + return (DLADM_STATUS_BADVAL); + + mrp->mrp_priority = (mac_priority_level_t)vdp->vd_val; + mrp->mrp_mask |= MRP_PRIORITY; + + return (DLADM_STATUS_OK); +} + +/* + * Determines the size of the structure that needs to be sent to drivers + * for retrieving the property range values. + */ +static int +i_dladm_range_size(mac_propval_range_t *r, size_t *sz) +{ + uint_t count = r->mpr_count; + + *sz = sizeof (mac_propval_range_t); + --count; + + switch (r->mpr_type) { + case MAC_PROPVAL_UINT32: + *sz += (count * sizeof (mac_propval_uint32_range_t)); + return (0); + default: + break; + } + *sz = 0; + return (EINVAL); +} + +/* ARGSUSED */ +static dladm_status_t +check_rings(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t val_cnt, uint_t flags, + val_desc_t *v, datalink_media_t media) +{ if (val_cnt != 1) - return (DLADM_STATUS_BADVALCNT); + return (DLADM_STATUS_BADVAL); + if (strncasecmp(prop_val[0], "hw", strlen("hw")) == 0) { + v->vd_val = UNSPEC_VAL; + } else if (strncasecmp(prop_val[0], "sw", strlen("sw")) == 0) { + v->vd_val = 0; + } else { + v->vd_val = strtoul(prop_val[0], NULL, 0); + if (v->vd_val == 0) + return (DLADM_STATUS_BADVAL); + } + return (DLADM_STATUS_OK); +} - pri = malloc(sizeof (mac_priority_level_t)); - if (pri == NULL) - return (DLADM_STATUS_NOMEM); +/* ARGSUSED */ +static dladm_status_t +get_rings_range(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t *val_cnt, + datalink_media_t media, uint_t flags, uint_t *perm_flags) +{ + dld_ioc_macprop_t *dip; + dladm_status_t status = DLADM_STATUS_OK; + mac_propval_range_t *rangep; + size_t sz; + mac_propval_uint32_range_t *ur; - status = dladm_str2pri(*prop_val, pri); - if (status != DLADM_STATUS_OK) { - free(pri); + sz = sizeof (mac_propval_range_t); + + if ((dip = i_dladm_buf_alloc_by_name(sz, linkid, pdp->pd_name, flags, + &status)) == NULL) + return (status); + + status = i_dladm_macprop(handle, dip, B_FALSE); + if (status != DLADM_STATUS_OK) return (status); + + rangep = (mac_propval_range_t *)(void *)&dip->pr_val; + *val_cnt = 1; + ur = &rangep->mpr_range_uint32[0]; + /* This is the case where the dev doesn't have any rings/groups */ + if (rangep->mpr_count == 0) { + (*prop_val)[0] = '\0'; + /* + * This is the case where the dev supports rings, but static + * grouping. + */ + } else if (ur->mpur_min == ur->mpur_max && + ur->mpur_max == 0) { + (void) snprintf(prop_val[0], DLADM_PROP_VAL_MAX, "sw,hw"); + /* + * This is the case where the dev supports rings and dynamic + * grouping, but has only one value (say 2 rings and 2 groups). + */ + } else if (ur->mpur_min == ur->mpur_max) { + (void) snprintf(prop_val[0], DLADM_PROP_VAL_MAX, "sw,hw,%d", + ur->mpur_min); + /* + * This is the case where the dev supports rings and dynamic + * grouping and has a range of rings. + */ + } else { + (void) snprintf(prop_val[0], DLADM_PROP_VAL_MAX, + "sw,hw,<%ld-%ld>", ur->mpur_min, ur->mpur_max); } + free(dip); + return (status); +} - if (*pri < MPL_LOW || *pri > MPL_HIGH) { - free(pri); - return (DLADM_STATUS_BADVAL); + +/* ARGSUSED */ +static dladm_status_t +get_rxrings(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t *val_cnt, datalink_media_t media, + uint_t flags, uint_t *perm_flags) +{ + mac_resource_props_t mrp; + dladm_status_t status; + uint32_t nrings = 0; + + /* + * Get the number of (effective-)rings from the resource property. + */ + if (strcmp(pdp->pd_name, "rxrings-effective") == 0) { + status = i_dladm_get_public_prop(handle, linkid, + "resource-effective", flags, perm_flags, &mrp, + sizeof (mrp)); + } else { + /* + * Get the permissions from the "rxrings" property. + */ + status = i_dladm_get_public_prop(handle, linkid, "rxrings", + flags, perm_flags, NULL, 0); + if (status != DLADM_STATUS_OK) + return (status); + + status = i_dladm_get_public_prop(handle, linkid, + "resource", flags, NULL, &mrp, sizeof (mrp)); } - vdp->vd_val = (uintptr_t)pri; + if (status != DLADM_STATUS_OK) + return (status); + + if ((mrp.mrp_mask & MRP_RX_RINGS) == 0) { + *val_cnt = 0; + return (DLADM_STATUS_OK); + } + nrings = mrp.mrp_nrxrings; + *val_cnt = 1; + if (mrp.mrp_mask & MRP_RXRINGS_UNSPEC) + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "hw"); + else if (nrings == 0) + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "sw"); + else + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "%ld", nrings); return (DLADM_STATUS_OK); } /* ARGSUSED */ dladm_status_t -do_extract_priority(val_desc_t *vdp, uint_t cnt, void *arg) +extract_rxrings(val_desc_t *vdp, uint_t cnt, void *arg) { - mac_resource_props_t *mrp = arg; + mac_resource_props_t *mrp = (mac_resource_props_t *)arg; - bcopy((char *)vdp->vd_val, &mrp->mrp_priority, - sizeof (mac_priority_level_t)); - mrp->mrp_mask |= MRP_PRIORITY; + mrp->mrp_nrxrings = 0; + if (vdp->vd_val == RESET_VAL) + mrp->mrp_mask = MRP_RINGS_RESET; + else if (vdp->vd_val == UNSPEC_VAL) + mrp->mrp_mask = MRP_RXRINGS_UNSPEC; + else + mrp->mrp_nrxrings = vdp->vd_val; + mrp->mrp_mask |= MRP_RX_RINGS; return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -do_set_protection(dladm_handle_t handle, prop_desc_t *pdp, +get_txrings(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t *val_cnt, datalink_media_t media, + uint_t flags, uint_t *perm_flags) +{ + mac_resource_props_t mrp; + dladm_status_t status; + uint32_t nrings = 0; + + + /* + * Get the number of (effective-)rings from the resource property. + */ + if (strcmp(pdp->pd_name, "txrings-effective") == 0) { + status = i_dladm_get_public_prop(handle, linkid, + "resource-effective", flags, perm_flags, &mrp, + sizeof (mrp)); + } else { + /* + * Get the permissions from the "txrings" property. + */ + status = i_dladm_get_public_prop(handle, linkid, "txrings", + flags, perm_flags, NULL, 0); + if (status != DLADM_STATUS_OK) + return (status); + + /* + * Get the number of rings from the "resource" property. + */ + status = i_dladm_get_public_prop(handle, linkid, "resource", + flags, NULL, &mrp, sizeof (mrp)); + } + + if (status != DLADM_STATUS_OK) + return (status); + + if ((mrp.mrp_mask & MRP_TX_RINGS) == 0) { + *val_cnt = 0; + return (DLADM_STATUS_OK); + } + nrings = mrp.mrp_ntxrings; + *val_cnt = 1; + if (mrp.mrp_mask & MRP_TXRINGS_UNSPEC) + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "hw"); + else if (nrings == 0) + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "sw"); + else + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "%ld", nrings); + return (DLADM_STATUS_OK); +} + +/* ARGSUSED */ +dladm_status_t +extract_txrings(val_desc_t *vdp, uint_t cnt, void *arg) +{ + mac_resource_props_t *mrp = (mac_resource_props_t *)arg; + + mrp->mrp_ntxrings = 0; + if (vdp->vd_val == RESET_VAL) + mrp->mrp_mask = MRP_RINGS_RESET; + else if (vdp->vd_val == UNSPEC_VAL) + mrp->mrp_mask = MRP_TXRINGS_UNSPEC; + else + mrp->mrp_ntxrings = vdp->vd_val; + mrp->mrp_mask |= MRP_TX_RINGS; + + return (DLADM_STATUS_OK); +} + +/* ARGSUSED */ +static dladm_status_t +get_cntavail(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, + uint_t *perm_flags) +{ + if (flags & DLD_PROP_DEFAULT) + return (DLADM_STATUS_NOTDEFINED); + + return (get_uint32(handle, pdp, linkid, prop_val, val_cnt, media, + flags, perm_flags)); +} + +/* ARGSUSED */ +static dladm_status_t +set_resource(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) { mac_resource_props_t mrp; dladm_status_t status = DLADM_STATUS_OK; dld_ioc_macprop_t *dip; + int i; bzero(&mrp, sizeof (mac_resource_props_t)); - dip = i_dladm_buf_alloc_by_name(0, linkid, "protection", + dip = i_dladm_buf_alloc_by_name(0, linkid, "resource", flags, &status); if (dip == NULL) return (status); - if (strcmp(pdp->pd_name, "protection") == 0) { - status = do_extract_protection(vdp, val_cnt, &mrp); - if (status != DLADM_STATUS_OK) - goto done; + for (i = 0; i < DLADM_MAX_RSRC_PROP; i++) { + resource_prop_t *rp = &rsrc_prop_table[i]; + + if (strcmp(pdp->pd_name, rp->rp_name) != 0) + continue; - } else if (strcmp(pdp->pd_name, "allowed-ips") == 0) { - status = do_extract_allowedips(vdp, val_cnt, &mrp); + status = rp->rp_extract(vdp, val_cnt, &mrp); if (status != DLADM_STATUS_OK) goto done; - } else { - status = DLADM_STATUS_BADARG; - goto done; + + break; } (void) memcpy(dip->pr_val, &mrp, dip->pr_valsize); @@ -1880,59 +2201,77 @@ done: /* ARGSUSED */ static dladm_status_t -do_get_protection(dladm_handle_t handle, prop_desc_t *pdp, +get_protection(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; mac_resource_props_t mrp; mac_protect_t *p; dladm_status_t status; - int i; + uint32_t i, cnt = 0, setbits[32]; - dip = i_dladm_get_public_prop(handle, linkid, "protection", flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, "resource", flags, + perm_flags, &mrp, sizeof (mrp)); + if (status != DLADM_STATUS_OK) return (status); - bcopy(dip->pr_val, &mrp, sizeof (mac_resource_props_t)); - free(dip); - p = &mrp.mrp_protect; - if ((mrp.mrp_mask & MRP_PROTECT) != 0 && - strcmp(pdp->pd_name, "protection") == 0) { - uint32_t cnt = 0, setbits[32]; + if ((mrp.mrp_mask & MRP_PROTECT) == 0) { + *val_cnt = 0; + return (DLADM_STATUS_OK); + } + dladm_find_setbits32(p->mp_types, setbits, &cnt); + if (cnt > *val_cnt) + return (DLADM_STATUS_BADVALCNT); - dladm_find_setbits32(p->mp_types, setbits, &cnt); - if (cnt > *val_cnt) - return (DLADM_STATUS_BADVALCNT); + for (i = 0; i < cnt; i++) + (void) dladm_protect2str(setbits[i], prop_val[i]); - for (i = 0; i < cnt; i++) - (void) dladm_protect2str(setbits[i], prop_val[i]); + *val_cnt = cnt; + return (DLADM_STATUS_OK); +} - *val_cnt = cnt; +/* ARGSUSED */ +static dladm_status_t +get_allowedips(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t *val_cnt, + datalink_media_t media, uint_t flags, uint_t *perm_flags) +{ + mac_resource_props_t mrp; + mac_protect_t *p; + dladm_status_t status; + int i; + + status = i_dladm_get_public_prop(handle, linkid, "resource", flags, + perm_flags, &mrp, sizeof (mrp)); + if (status != DLADM_STATUS_OK) + return (status); + + p = &mrp.mrp_protect; + if (p->mp_ipaddrcnt == 0) { + *val_cnt = 0; return (DLADM_STATUS_OK); } + if (p->mp_ipaddrcnt > *val_cnt) + return (DLADM_STATUS_BADVALCNT); - if (p->mp_ipaddrcnt > 0 && - strcmp(pdp->pd_name, "allowed-ips") == 0) { - if (p->mp_ipaddrcnt > *val_cnt) - return (DLADM_STATUS_BADVALCNT); + for (i = 0; i < p->mp_ipaddrcnt; i++) { + if (p->mp_ipaddrs[i].ip_version == IPV4_VERSION) { + ipaddr_t v4addr; - for (i = 0; i < p->mp_ipaddrcnt; i++) { - (void) dladm_ipv4addr2str(&p->mp_ipaddrs[i], + v4addr = V4_PART_OF_V6(p->mp_ipaddrs[i].ip_addr); + (void) dladm_ipv4addr2str(&v4addr, prop_val[i]); + } else { + (void) dladm_ipv6addr2str(&p->mp_ipaddrs[i].ip_addr, prop_val[i]); } - *val_cnt = p->mp_ipaddrcnt; - return (DLADM_STATUS_OK); } - - *val_cnt = 0; + *val_cnt = p->mp_ipaddrcnt; return (DLADM_STATUS_OK); } dladm_status_t -do_extract_protection(val_desc_t *vdp, uint_t cnt, void *arg) +extract_protection(val_desc_t *vdp, uint_t cnt, void *arg) { mac_resource_props_t *mrp = arg; uint32_t types = 0; @@ -1947,7 +2286,7 @@ do_extract_protection(val_desc_t *vdp, uint_t cnt, void *arg) } dladm_status_t -do_extract_allowedips(val_desc_t *vdp, uint_t cnt, void *arg) +extract_allowedips(val_desc_t *vdp, uint_t cnt, void *arg) { mac_resource_props_t *mrp = arg; mac_protect_t *p = &mrp->mrp_protect; @@ -1956,63 +2295,441 @@ do_extract_allowedips(val_desc_t *vdp, uint_t cnt, void *arg) if (vdp->vd_val == 0) { cnt = (uint_t)-1; } else { - for (i = 0; i < cnt; i++) - p->mp_ipaddrs[i] = (ipaddr_t)vdp[i].vd_val; + for (i = 0; i < cnt; i++) { + bcopy((void *)vdp[i].vd_val, &p->mp_ipaddrs[i], + sizeof (mac_ipaddr_t)); + } } p->mp_ipaddrcnt = cnt; mrp->mrp_mask |= MRP_PROTECT; return (DLADM_STATUS_OK); } +static dladm_status_t +check_single_ip(char *buf, mac_ipaddr_t *addr) +{ + dladm_status_t status; + ipaddr_t v4addr; + in6_addr_t v6addr; + boolean_t isv4 = B_TRUE; + + status = dladm_str2ipv4addr(buf, &v4addr); + if (status == DLADM_STATUS_INVALID_IP) { + status = dladm_str2ipv6addr(buf, &v6addr); + if (status == DLADM_STATUS_OK) + isv4 = B_FALSE; + } + if (status != DLADM_STATUS_OK) + return (status); + + if (isv4) { + if (v4addr == INADDR_ANY) + return (DLADM_STATUS_INVALID_IP); + + IN6_IPADDR_TO_V4MAPPED(v4addr, &addr->ip_addr); + addr->ip_version = IPV4_VERSION; + } else { + if (IN6_IS_ADDR_UNSPECIFIED(&v6addr)) + return (DLADM_STATUS_INVALID_IP); + + addr->ip_addr = v6addr; + addr->ip_version = IPV6_VERSION; + } + return (DLADM_STATUS_OK); +} + /* ARGSUSED */ static dladm_status_t -do_check_allowedips(dladm_handle_t handle, prop_desc_t *pdp, - datalink_id_t linkid, char **prop_val, uint_t val_cnt, +check_allowedips(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, datalink_media_t media) { dladm_status_t status; - ipaddr_t addr; + mac_ipaddr_t *addr; int i; if (val_cnt > MPT_MAXIPADDR) return (DLADM_STATUS_BADVALCNT); for (i = 0; i < val_cnt; i++) { - status = dladm_str2ipv4addr(prop_val[i], &addr); + if ((addr = calloc(1, sizeof (mac_ipaddr_t))) == NULL) { + status = DLADM_STATUS_NOMEM; + goto fail; + } + vdp[i].vd_val = (uintptr_t)addr; + + status = check_single_ip(prop_val[i], addr); if (status != DLADM_STATUS_OK) - return (status); + goto fail; + } + return (DLADM_STATUS_OK); - if (addr == 0) - return (DLADM_STATUS_BADVAL); +fail: + for (i = 0; i < val_cnt; i++) { + free((void *)vdp[i].vd_val); + vdp[i].vd_val = NULL; + } + return (status); +} - vdp[i].vd_val = (uintptr_t)addr; +static void +dladm_cid2str(mac_dhcpcid_t *cid, char *buf) +{ + char tmp_buf[DLADM_STRSIZE]; + uint_t hexlen; + + switch (cid->dc_form) { + case CIDFORM_TYPED: { + uint16_t duidtype, hwtype; + uint32_t timestamp, ennum; + char *lladdr; + + if (cid->dc_len < sizeof (duidtype)) + goto fail; + + bcopy(cid->dc_id, &duidtype, sizeof (duidtype)); + duidtype = ntohs(duidtype); + switch (duidtype) { + case DHCPV6_DUID_LLT: { + duid_llt_t llt; + + if (cid->dc_len < sizeof (llt)) + goto fail; + + bcopy(cid->dc_id, &llt, sizeof (llt)); + hwtype = ntohs(llt.dllt_hwtype); + timestamp = ntohl(llt.dllt_time); + lladdr = _link_ntoa(cid->dc_id + sizeof (llt), + NULL, cid->dc_len - sizeof (llt), IFT_OTHER); + if (lladdr == NULL) + goto fail; + + (void) snprintf(buf, DLADM_STRSIZE, "%d.%d.%d.%s", + duidtype, hwtype, timestamp, lladdr); + free(lladdr); + break; + } + case DHCPV6_DUID_EN: { + duid_en_t en; + + if (cid->dc_len < sizeof (en)) + goto fail; + + bcopy(cid->dc_id, &en, sizeof (en)); + ennum = DHCPV6_GET_ENTNUM(&en); + hexlen = sizeof (tmp_buf); + if (octet_to_hexascii(cid->dc_id + sizeof (en), + cid->dc_len - sizeof (en), tmp_buf, &hexlen) != 0) + goto fail; + + (void) snprintf(buf, DLADM_STRSIZE, "%d.%d.%s", + duidtype, ennum, tmp_buf); + break; + } + case DHCPV6_DUID_LL: { + duid_ll_t ll; + + if (cid->dc_len < sizeof (ll)) + goto fail; + + bcopy(cid->dc_id, &ll, sizeof (ll)); + hwtype = ntohs(ll.dll_hwtype); + lladdr = _link_ntoa(cid->dc_id + sizeof (ll), + NULL, cid->dc_len - sizeof (ll), IFT_OTHER); + if (lladdr == NULL) + goto fail; + + (void) snprintf(buf, DLADM_STRSIZE, "%d.%d.%s", + duidtype, hwtype, lladdr); + free(lladdr); + break; + } + default: { + hexlen = sizeof (tmp_buf); + if (octet_to_hexascii(cid->dc_id + sizeof (duidtype), + cid->dc_len - sizeof (duidtype), + tmp_buf, &hexlen) != 0) + goto fail; + + (void) snprintf(buf, DLADM_STRSIZE, "%d.%s", + duidtype, tmp_buf); + } + } + break; + } + case CIDFORM_HEX: { + hexlen = sizeof (tmp_buf); + if (octet_to_hexascii(cid->dc_id, cid->dc_len, + tmp_buf, &hexlen) != 0) + goto fail; + + (void) snprintf(buf, DLADM_STRSIZE, "0x%s", tmp_buf); + break; } + case CIDFORM_STR: { + int i; + + for (i = 0; i < cid->dc_len; i++) { + if (!isprint(cid->dc_id[i])) + goto fail; + } + (void) snprintf(buf, DLADM_STRSIZE, "%s", cid->dc_id); + break; + } + default: + goto fail; + } + return; + +fail: + (void) snprintf(buf, DLADM_STRSIZE, "<unknown>"); +} + +static dladm_status_t +dladm_str2cid(char *buf, mac_dhcpcid_t *cid) +{ + char *ptr = buf; + char tmp_buf[DLADM_STRSIZE]; + uint_t hexlen, cidlen; + + bzero(cid, sizeof (*cid)); + if (isdigit(*ptr) && + ptr[strspn(ptr, "0123456789")] == '.') { + char *cp; + ulong_t duidtype; + ulong_t subtype; + ulong_t timestamp; + uchar_t *lladdr; + int addrlen; + + errno = 0; + duidtype = strtoul(ptr, &cp, 0); + if (ptr == cp || errno != 0 || *cp != '.' || + duidtype > USHRT_MAX) + return (DLADM_STATUS_BADARG); + ptr = cp + 1; + + if (duidtype != 0 && duidtype <= DHCPV6_DUID_LL) { + errno = 0; + subtype = strtoul(ptr, &cp, 0); + if (ptr == cp || errno != 0 || *cp != '.') + return (DLADM_STATUS_BADARG); + ptr = cp + 1; + } + switch (duidtype) { + case DHCPV6_DUID_LLT: { + duid_llt_t llt; + + errno = 0; + timestamp = strtoul(ptr, &cp, 0); + if (ptr == cp || errno != 0 || *cp != '.') + return (DLADM_STATUS_BADARG); + + ptr = cp + 1; + lladdr = _link_aton(ptr, &addrlen); + if (lladdr == NULL) + return (DLADM_STATUS_BADARG); + + cidlen = sizeof (llt) + addrlen; + if (cidlen > sizeof (cid->dc_id)) { + free(lladdr); + return (DLADM_STATUS_TOOSMALL); + } + llt.dllt_dutype = htons(duidtype); + llt.dllt_hwtype = htons(subtype); + llt.dllt_time = htonl(timestamp); + bcopy(&llt, cid->dc_id, sizeof (llt)); + bcopy(lladdr, cid->dc_id + sizeof (llt), addrlen); + free(lladdr); + break; + } + case DHCPV6_DUID_LL: { + duid_ll_t ll; + + lladdr = _link_aton(ptr, &addrlen); + if (lladdr == NULL) + return (DLADM_STATUS_BADARG); + + cidlen = sizeof (ll) + addrlen; + if (cidlen > sizeof (cid->dc_id)) { + free(lladdr); + return (DLADM_STATUS_TOOSMALL); + } + ll.dll_dutype = htons(duidtype); + ll.dll_hwtype = htons(subtype); + bcopy(&ll, cid->dc_id, sizeof (ll)); + bcopy(lladdr, cid->dc_id + sizeof (ll), addrlen); + free(lladdr); + break; + } + default: { + hexlen = sizeof (tmp_buf); + if (hexascii_to_octet(ptr, strlen(ptr), + tmp_buf, &hexlen) != 0) + return (DLADM_STATUS_BADARG); + + if (duidtype == DHCPV6_DUID_EN) { + duid_en_t en; + + en.den_dutype = htons(duidtype); + DHCPV6_SET_ENTNUM(&en, subtype); + + cidlen = sizeof (en) + hexlen; + if (cidlen > sizeof (cid->dc_id)) + return (DLADM_STATUS_TOOSMALL); + + bcopy(&en, cid->dc_id, sizeof (en)); + bcopy(tmp_buf, cid->dc_id + sizeof (en), + hexlen); + } else { + uint16_t dutype = htons(duidtype); + + cidlen = sizeof (dutype) + hexlen; + if (cidlen > sizeof (cid->dc_id)) + return (DLADM_STATUS_TOOSMALL); + + bcopy(&dutype, cid->dc_id, sizeof (dutype)); + bcopy(tmp_buf, cid->dc_id + sizeof (dutype), + hexlen); + } + break; + } + } + cid->dc_form = CIDFORM_TYPED; + } else if (strncasecmp("0x", ptr, 2) == 0 && ptr[2] != '\0') { + ptr += 2; + hexlen = sizeof (tmp_buf); + if (hexascii_to_octet(ptr, strlen(ptr), tmp_buf, + &hexlen) != 0) { + return (DLADM_STATUS_BADARG); + } + cidlen = hexlen; + if (cidlen > sizeof (cid->dc_id)) + return (DLADM_STATUS_TOOSMALL); + + bcopy(tmp_buf, cid->dc_id, cidlen); + cid->dc_form = CIDFORM_HEX; + } else { + cidlen = strlen(ptr); + if (cidlen > sizeof (cid->dc_id)) + return (DLADM_STATUS_TOOSMALL); + + bcopy(ptr, cid->dc_id, cidlen); + cid->dc_form = CIDFORM_STR; + } + cid->dc_len = cidlen; return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -do_get_autopush(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_allowedcids(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t *val_cnt, + datalink_media_t media, uint_t flags, uint_t *perm_flags) +{ + mac_resource_props_t mrp; + mac_protect_t *p; + dladm_status_t status; + int i; + + status = i_dladm_get_public_prop(handle, linkid, "resource", flags, + perm_flags, &mrp, sizeof (mrp)); + if (status != DLADM_STATUS_OK) + return (status); + + p = &mrp.mrp_protect; + if (p->mp_cidcnt == 0) { + *val_cnt = 0; + return (DLADM_STATUS_OK); + } + if (p->mp_cidcnt > *val_cnt) + return (DLADM_STATUS_BADVALCNT); + + for (i = 0; i < p->mp_cidcnt; i++) { + mac_dhcpcid_t *cid = &p->mp_cids[i]; + + dladm_cid2str(cid, prop_val[i]); + } + *val_cnt = p->mp_cidcnt; + return (DLADM_STATUS_OK); +} + +dladm_status_t +extract_allowedcids(val_desc_t *vdp, uint_t cnt, void *arg) +{ + mac_resource_props_t *mrp = arg; + mac_protect_t *p = &mrp->mrp_protect; + int i; + + if (vdp->vd_val == 0) { + cnt = (uint_t)-1; + } else { + for (i = 0; i < cnt; i++) { + bcopy((void *)vdp[i].vd_val, &p->mp_cids[i], + sizeof (mac_dhcpcid_t)); + } + } + p->mp_cidcnt = cnt; + mrp->mrp_mask |= MRP_PROTECT; + return (DLADM_STATUS_OK); +} + +/* ARGSUSED */ +static dladm_status_t +check_allowedcids(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t val_cnt, + uint_t flags, val_desc_t *vdp, datalink_media_t media) +{ + dladm_status_t status; + mac_dhcpcid_t *cid; + int i; + + if (val_cnt > MPT_MAXCID) + return (DLADM_STATUS_BADVALCNT); + + for (i = 0; i < val_cnt; i++) { + if ((cid = calloc(1, sizeof (mac_dhcpcid_t))) == NULL) { + status = DLADM_STATUS_NOMEM; + goto fail; + } + vdp[i].vd_val = (uintptr_t)cid; + + status = dladm_str2cid(prop_val[i], cid); + if (status != DLADM_STATUS_OK) + goto fail; + } + return (DLADM_STATUS_OK); + +fail: + for (i = 0; i < val_cnt; i++) { + free((void *)vdp[i].vd_val); + vdp[i].vd_val = NULL; + } + return (status); +} + +/* ARGSUSED */ +static dladm_status_t +get_autopush(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { struct dlautopush dlap; int i, len; dladm_status_t status; - dld_ioc_macprop_t *dip; - if (flags & MAC_PROP_DEFAULT) + if (flags & DLD_PROP_DEFAULT) return (DLADM_STATUS_NOTDEFINED); - *val_cnt = 1; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) { - (*prop_val)[0] = '\0'; + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + perm_flags, &dlap, sizeof (dlap)); + if (status != DLADM_STATUS_OK) + return (status); + + if (dlap.dap_npush == 0) { + *val_cnt = 0; return (DLADM_STATUS_OK); } - (void) memcpy(&dlap, dip->pr_val, sizeof (dlap)); - for (i = 0, len = 0; i < dlap.dap_npush; i++) { if (i != 0) { (void) snprintf(*prop_val + len, @@ -2029,8 +2746,7 @@ do_get_autopush(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, len += (strlen(AP_ANCHOR) + 1); } } - free(dip); -done: + *val_cnt = 1; return (DLADM_STATUS_OK); } @@ -2073,8 +2789,9 @@ i_dladm_add_ap_module(const char *module, struct dlautopush *dlap) */ /* ARGSUSED */ static dladm_status_t -do_check_autopush(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_autopush(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { char *module; struct dlautopush *dlap; @@ -2112,7 +2829,7 @@ do_check_autopush(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -do_get_rate_common(dladm_handle_t handle, prop_desc_t *pdp, +get_rate_common(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, uint_t id, uint_t *perm_flags) { @@ -2154,22 +2871,22 @@ done: } static dladm_status_t -do_get_rate_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_rate(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { if (media != DL_WIFI) { - return (i_dladm_speed_get(handle, pdp, linkid, prop_val, - val_cnt, flags, perm_flags)); + return (get_speed(handle, pdp, linkid, prop_val, + val_cnt, media, flags, perm_flags)); } - return (do_get_rate_common(handle, pdp, linkid, prop_val, val_cnt, + return (get_rate_common(handle, pdp, linkid, prop_val, val_cnt, MAC_PROP_WL_DESIRED_RATES, perm_flags)); } /* ARGSUSED */ static dladm_status_t -do_get_rate_mod(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_rate_mod(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { @@ -2182,7 +2899,7 @@ do_get_rate_mod(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, return (DLADM_STATUS_NOTSUP); case DL_WIFI: - return (do_get_rate_common(handle, pdp, linkid, prop_val, + return (get_rate_common(handle, pdp, linkid, prop_val, val_cnt, MAC_PROP_WL_SUPPORTED_RATES, perm_flags)); default: return (DLADM_STATUS_BADARG); @@ -2190,7 +2907,7 @@ do_get_rate_mod(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, } static dladm_status_t -do_set_rate(dladm_handle_t handle, datalink_id_t linkid, +set_wlan_rate(dladm_handle_t handle, datalink_id_t linkid, dladm_wlan_rates_t *rates) { int i; @@ -2218,7 +2935,7 @@ do_set_rate(dladm_handle_t handle, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -do_set_rate_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +set_rate(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) { dladm_wlan_rates_t rates; @@ -2236,16 +2953,16 @@ do_set_rate_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, rates.wr_cnt = 1; rates.wr_rates[0] = vdp[0].vd_val; - status = do_set_rate(handle, linkid, &rates); + status = set_wlan_rate(handle, linkid, &rates); -done: return (status); } /* ARGSUSED */ static dladm_status_t -do_check_rate(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_rate(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { int i; uint_t modval_cnt = MAX_SUPPORT_RATES; @@ -2269,7 +2986,7 @@ do_check_rate(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, i * DLADM_STRSIZE; } - status = do_get_rate_mod(handle, NULL, linkid, modval, &modval_cnt, + status = get_rate_mod(handle, NULL, linkid, modval, &modval_cnt, media, 0, &perm_flags); if (status != DLADM_STATUS_OK) goto done; @@ -2290,7 +3007,7 @@ done: } static dladm_status_t -do_get_phyconf(dladm_handle_t handle, datalink_id_t linkid, void *buf, +get_phyconf(dladm_handle_t handle, datalink_id_t linkid, void *buf, int buflen) { return (i_dladm_wlan_param(handle, linkid, buf, MAC_PROP_WL_PHY_CONFIG, @@ -2299,54 +3016,43 @@ do_get_phyconf(dladm_handle_t handle, datalink_id_t linkid, void *buf, /* ARGSUSED */ static dladm_status_t -do_get_channel_prop(dladm_handle_t handle, prop_desc_t *pdp, +get_channel(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { uint32_t channel; char buf[WLDP_BUFSIZE]; - dladm_status_t status = DLADM_STATUS_OK; + dladm_status_t status; wl_phy_conf_t wl_phy_conf; - if ((status = do_get_phyconf(handle, linkid, buf, sizeof (buf))) + if ((status = get_phyconf(handle, linkid, buf, sizeof (buf))) != DLADM_STATUS_OK) - goto done; + return (status); (void) memcpy(&wl_phy_conf, buf, sizeof (wl_phy_conf)); - if (!i_dladm_wlan_convert_chan(&wl_phy_conf, &channel)) { - status = DLADM_STATUS_NOTFOUND; - goto done; - } + if (!i_dladm_wlan_convert_chan(&wl_phy_conf, &channel)) + return (DLADM_STATUS_NOTFOUND); (void) snprintf(*prop_val, DLADM_STRSIZE, "%u", channel); *val_cnt = 1; *perm_flags = MAC_PROP_PERM_READ; -done: - return (status); -} - -static dladm_status_t -do_get_powermode(dladm_handle_t handle, datalink_id_t linkid, void *buf, - int buflen) -{ - return (i_dladm_wlan_param(handle, linkid, buf, MAC_PROP_WL_POWER_MODE, - buflen, B_FALSE)); + return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -do_get_powermode_prop(dladm_handle_t handle, prop_desc_t *pdp, +get_powermode(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { wl_ps_mode_t mode; const char *s; char buf[WLDP_BUFSIZE]; - dladm_status_t status = DLADM_STATUS_OK; + dladm_status_t status; - if ((status = do_get_powermode(handle, linkid, buf, sizeof (buf))) - != DLADM_STATUS_OK) - goto done; + if ((status = i_dladm_wlan_param(handle, linkid, buf, + MAC_PROP_WL_POWER_MODE, sizeof (buf), B_FALSE)) != DLADM_STATUS_OK) + return (status); (void) memcpy(&mode, buf, sizeof (mode)); switch (mode.wl_ps_mode) { @@ -2360,25 +3066,29 @@ do_get_powermode_prop(dladm_handle_t handle, prop_desc_t *pdp, s = "fast"; break; default: - status = DLADM_STATUS_NOTFOUND; - goto done; + return (DLADM_STATUS_NOTFOUND); } (void) snprintf(*prop_val, DLADM_STRSIZE, "%s", s); *val_cnt = 1; *perm_flags = MAC_PROP_PERM_RW; -done: - return (status); + return (DLADM_STATUS_OK); } +/* ARGSUSED */ static dladm_status_t -do_set_powermode(dladm_handle_t handle, datalink_id_t linkid, - dladm_wlan_powermode_t *pm) +set_powermode(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags, + datalink_media_t media) { - wl_ps_mode_t ps_mode; + dladm_wlan_powermode_t powermode = vdp->vd_val; + wl_ps_mode_t ps_mode; + + if (val_cnt != 1) + return (DLADM_STATUS_BADVALCNT); (void) memset(&ps_mode, 0xff, sizeof (ps_mode)); - switch (*pm) { + switch (powermode) { case DLADM_WLAN_PM_OFF: ps_mode.wl_ps_mode = WL_PM_AM; break; @@ -2397,42 +3107,18 @@ do_set_powermode(dladm_handle_t handle, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -do_set_powermode_prop(dladm_handle_t handle, prop_desc_t *pdp, - datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags, - datalink_media_t media) -{ - dladm_wlan_powermode_t powermode = (dladm_wlan_powermode_t)vdp->vd_val; - dladm_status_t status; - - if (val_cnt != 1) - return (DLADM_STATUS_BADVALCNT); - - status = do_set_powermode(handle, linkid, &powermode); - - return (status); -} - -static dladm_status_t -do_get_radio(dladm_handle_t handle, datalink_id_t linkid, void *buf, int buflen) -{ - return (i_dladm_wlan_param(handle, linkid, buf, MAC_PROP_WL_RADIO, - buflen, B_FALSE)); -} - -/* ARGSUSED */ -static dladm_status_t -do_get_radio_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, +get_radio(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { wl_radio_t radio; const char *s; char buf[WLDP_BUFSIZE]; - dladm_status_t status = DLADM_STATUS_OK; + dladm_status_t status; - if ((status = do_get_radio(handle, linkid, buf, sizeof (buf))) - != DLADM_STATUS_OK) - goto done; + if ((status = i_dladm_wlan_param(handle, linkid, buf, + MAC_PROP_WL_RADIO, sizeof (buf), B_FALSE)) != DLADM_STATUS_OK) + return (status); (void) memcpy(&radio, buf, sizeof (radio)); switch (radio) { @@ -2443,23 +3129,26 @@ do_get_radio_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, s = "off"; break; default: - status = DLADM_STATUS_NOTFOUND; - goto done; + return (DLADM_STATUS_NOTFOUND); } (void) snprintf(*prop_val, DLADM_STRSIZE, "%s", s); *val_cnt = 1; *perm_flags = MAC_PROP_PERM_RW; -done: - return (status); + return (DLADM_STATUS_OK); } +/* ARGSUSED */ static dladm_status_t -do_set_radio(dladm_handle_t handle, datalink_id_t linkid, - dladm_wlan_radio_t *radio) +set_radio(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) { - wl_radio_t r; + dladm_wlan_radio_t radio = vdp->vd_val; + wl_radio_t r; + + if (val_cnt != 1) + return (DLADM_STATUS_BADVALCNT); - switch (*radio) { + switch (radio) { case DLADM_WLAN_RADIO_ON: r = B_TRUE; break; @@ -2475,25 +3164,9 @@ do_set_radio(dladm_handle_t handle, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -do_set_radio_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) -{ - dladm_wlan_radio_t radio = (dladm_wlan_radio_t)vdp->vd_val; - dladm_status_t status; - - if (val_cnt != 1) - return (DLADM_STATUS_BADVALCNT); - - status = do_set_radio(handle, linkid, &radio); - - return (status); -} - -/* ARGSUSED */ -static dladm_status_t -do_check_hoplimit(dladm_handle_t handle, prop_desc_t *pdp, - datalink_id_t linkid, char **prop_val, uint_t val_cnt, val_desc_t *vdp, - datalink_media_t media) +check_hoplimit(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t val_cnt, uint_t flags, + val_desc_t *vdp, datalink_media_t media) { int32_t hlim; char *ep; @@ -2512,8 +3185,9 @@ do_check_hoplimit(dladm_handle_t handle, prop_desc_t *pdp, /* ARGSUSED */ static dladm_status_t -do_check_encaplim(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t val_cnt, val_desc_t *vdp, datalink_media_t media) +check_encaplim(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t val_cnt, uint_t flags, val_desc_t *vdp, + datalink_media_t media) { int32_t elim; char *ep; @@ -2685,7 +3359,6 @@ i_dladm_buf_alloc_impl(size_t valsize, datalink_id_t linkid, bzero(dip, dsize); dip->pr_valsize = valsize; (void) strlcpy(dip->pr_name, prop_name, sizeof (dip->pr_name)); - dip->pr_version = MAC_PROP_VERSION; dip->pr_linkid = linkid; dip->pr_num = propid; dip->pr_flags = flags; @@ -2718,7 +3391,7 @@ i_dladm_buf_alloc_by_id(size_t valsize, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -i_dladm_set_public_prop(dladm_handle_t handle, prop_desc_t *pdp, +set_public_prop(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags, datalink_media_t media) { @@ -2783,32 +3456,37 @@ i_dladm_macprop(dladm_handle_t handle, void *dip, boolean_t set) return (status); } -static dld_ioc_macprop_t * +static dladm_status_t i_dladm_get_public_prop(dladm_handle_t handle, datalink_id_t linkid, - char *prop_name, uint_t flags, dladm_status_t *status, uint_t *perm_flags) + char *prop_name, uint_t flags, uint_t *perm_flags, void *arg, size_t size) { - dld_ioc_macprop_t *dip = NULL; + dld_ioc_macprop_t *dip; + dladm_status_t status; - dip = i_dladm_buf_alloc_by_name(0, linkid, prop_name, flags, status); + dip = i_dladm_buf_alloc_by_name(0, linkid, prop_name, flags, &status); if (dip == NULL) - return (NULL); + return (DLADM_STATUS_NOMEM); - *status = i_dladm_macprop(handle, dip, B_FALSE); - if (*status != DLADM_STATUS_OK) { + status = i_dladm_macprop(handle, dip, B_FALSE); + if (status != DLADM_STATUS_OK) { free(dip); - return (NULL); + return (status); } + if (perm_flags != NULL) *perm_flags = dip->pr_perm_flags; - return (dip); + if (arg != NULL) + (void) memcpy(arg, dip->pr_val, size); + free(dip); + return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -i_dladm_uint32_check(dladm_handle_t handle, prop_desc_t *pdp, - datalink_id_t linkid, char **prop_val, uint_t val_cnt, val_desc_t *v, - datalink_media_t media) +check_uint32(dladm_handle_t handle, prop_desc_t *pdp, + datalink_id_t linkid, char **prop_val, uint_t val_cnt, uint_t flags, + val_desc_t *v, datalink_media_t media) { if (val_cnt != 1) return (DLADM_STATUS_BADVAL); @@ -2818,7 +3496,7 @@ i_dladm_uint32_check(dladm_handle_t handle, prop_desc_t *pdp, /* ARGSUSED */ static dladm_status_t -i_dladm_duplex_get(dladm_handle_t handle, prop_desc_t *pdp, +get_duplex(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { @@ -2846,8 +3524,9 @@ i_dladm_duplex_get(dladm_handle_t handle, prop_desc_t *pdp, /* ARGSUSED */ static dladm_status_t -i_dladm_speed_get(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, - char **prop_val, uint_t *val_cnt, uint_t flags, uint_t *perm_flags) +get_speed(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, + char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, + uint_t *perm_flags) { uint64_t ifspeed = 0; dladm_status_t status; @@ -2870,14 +3549,14 @@ i_dladm_speed_get(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t -i_dladm_status_get(dladm_handle_t handle, prop_desc_t *pdp, +get_link_state(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { link_state_t link_state; dladm_status_t status; - status = i_dladm_get_state(handle, linkid, &link_state); + status = dladm_get_state(handle, linkid, &link_state); if (status != DLADM_STATUS_OK) return (status); @@ -2899,74 +3578,45 @@ i_dladm_status_get(dladm_handle_t handle, prop_desc_t *pdp, /* ARGSUSED */ static dladm_status_t -i_dladm_binary_get(dladm_handle_t handle, prop_desc_t *pdp, +get_binary(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; - dladm_status_t status; + dladm_status_t status; + uint_t v = 0; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + perm_flags, &v, sizeof (v)); + if (status != DLADM_STATUS_OK) return (status); - (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "%x", dip->pr_val[0]); - free(dip); + (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "%d", (uint_t)(v > 0)); *val_cnt = 1; return (DLADM_STATUS_OK); } /* ARGSUSED */ static dladm_status_t -i_dladm_uint32_get(dladm_handle_t handle, prop_desc_t *pdp, +get_uint32(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; - uint32_t v = 0; - uchar_t *cp; - dladm_status_t status; + dladm_status_t status; + uint32_t v = 0; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + perm_flags, &v, sizeof (v)); + if (status != DLADM_STATUS_OK) return (status); - cp = (uchar_t *)dip->pr_val; - (void) memcpy(&v, cp, sizeof (v)); (void) snprintf(*prop_val, DLADM_PROP_VAL_MAX, "%ld", v); - free(dip); *val_cnt = 1; return (DLADM_STATUS_OK); } -/* - * Determines the size of the structure that needs to be sent to drivers - * for retrieving the property range values. - */ -static int -i_dladm_range_size(mac_propval_range_t *r, size_t *sz) -{ - uint_t count = r->mpr_count; - - *sz = sizeof (mac_propval_range_t); - --count; - - switch (r->mpr_type) { - case MAC_PROPVAL_UINT32: - *sz += (count * sizeof (mac_propval_uint32_range_t)); - return (0); - default: - break; - } - *sz = 0; - return (EINVAL); -} - /* ARGSUSED */ static dladm_status_t -i_dladm_range_get(dladm_handle_t handle, prop_desc_t *pdp, +get_range(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { @@ -3004,14 +3654,20 @@ retry: free(dip); return (status); } + rangep = (mac_propval_range_t *)(void *)&dip->pr_val; + if (rangep->mpr_count == 0) { + *val_cnt = 1; + (void) snprintf(prop_val[0], DLADM_PROP_VAL_MAX, "--"); + goto done; + } switch (rangep->mpr_type) { case MAC_PROPVAL_UINT32: { mac_propval_uint32_range_t *ur; uint_t count = rangep->mpr_count, i; - ur = &rangep->range_uint32[0]; + ur = &rangep->mpr_range_uint32[0]; for (i = 0; i < count; i++, ur++) { if (ur->mpur_min == ur->mpur_max) { @@ -3029,26 +3685,24 @@ retry: status = DLADM_STATUS_BADARG; break; } +done: free(dip); return (status); } /* ARGSUSED */ static dladm_status_t -i_dladm_tagmode_get(dladm_handle_t handle, prop_desc_t *pdp, +get_tagmode(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; link_tagmode_t mode; dladm_status_t status; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + perm_flags, &mode, sizeof (mode)); + if (status != DLADM_STATUS_OK) return (status); - (void) memcpy(&mode, dip->pr_val, sizeof (mode)); - free(dip); switch (mode) { case LINK_TAGMODE_NORMAL: @@ -3066,22 +3720,18 @@ i_dladm_tagmode_get(dladm_handle_t handle, prop_desc_t *pdp, /* ARGSUSED */ static dladm_status_t -i_dladm_flowctl_get(dladm_handle_t handle, prop_desc_t *pdp, +get_flowctl(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { - dld_ioc_macprop_t *dip; - link_flowctrl_t v; - dladm_status_t status; - uchar_t *cp; + link_flowctrl_t v; + dladm_status_t status; - dip = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, - &status, perm_flags); - if (dip == NULL) + status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags, + perm_flags, &v, sizeof (v)); + if (status != DLADM_STATUS_OK) return (status); - cp = (uchar_t *)dip->pr_val; - (void) memcpy(&v, cp, sizeof (v)); switch (v) { case LINK_FLOWCTRL_NONE: (void) sprintf(*prop_val, "no"); @@ -3096,7 +3746,6 @@ i_dladm_flowctl_get(dladm_handle_t handle, prop_desc_t *pdp, (void) sprintf(*prop_val, "bi"); break; } - free(dip); *val_cnt = 1; return (DLADM_STATUS_OK); } @@ -3141,7 +3790,7 @@ i_dladm_set_private_prop(dladm_handle_t handle, datalink_id_t linkid, } dip = i_dladm_buf_alloc_by_name(bufsize + 1, linkid, prop_name, - (prop_val != NULL ? 0 : MAC_PROP_DEFAULT), &status); + (prop_val != NULL ? 0 : DLD_PROP_DEFAULT), &status); if (dip == NULL) return (status); @@ -3255,7 +3904,7 @@ i_dladm_getset_defval(dladm_handle_t handle, prop_desc_t *pdp, * a setprop to reset the value to default. */ status = pdp->pd_get(handle, pdp, linkid, prop_vals, &cnt, media, - MAC_PROP_DEFAULT, &perm_flags); + DLD_PROP_DEFAULT, &perm_flags); if (status == DLADM_STATUS_OK) { if (perm_flags == MAC_PROP_PERM_RW) { status = i_dladm_set_single_prop(handle, linkid, @@ -3270,7 +3919,7 @@ i_dladm_getset_defval(dladm_handle_t handle, prop_desc_t *pdp, /* ARGSUSED */ static dladm_status_t -get_stp_prop(dladm_handle_t handle, struct prop_desc *pd, datalink_id_t linkid, +get_stp(dladm_handle_t handle, struct prop_desc *pd, datalink_id_t linkid, char **prop_val, uint_t *val_cnt, datalink_media_t media, uint_t flags, uint_t *perm_flags) { @@ -3336,8 +3985,8 @@ set_stp_prop(dladm_handle_t handle, prop_desc_t *pd, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t check_stp_prop(dladm_handle_t handle, struct prop_desc *pd, - datalink_id_t linkid, char **prop_val, uint_t val_cnt, val_desc_t *vdp, - datalink_media_t media) + datalink_id_t linkid, char **prop_val, uint_t val_cnt, uint_t flags, + val_desc_t *vdp, datalink_media_t media) { char *cp; boolean_t iscost; @@ -3488,8 +4137,8 @@ set_bridge_pvid(dladm_handle_t handle, prop_desc_t *pd, datalink_id_t linkid, /* ARGSUSED */ static dladm_status_t check_bridge_pvid(dladm_handle_t handle, struct prop_desc *pd, - datalink_id_t linkid, char **prop_val, uint_t val_cnt, val_desc_t *vdp, - datalink_media_t media) + datalink_id_t linkid, char **prop_val, uint_t val_cnt, uint_t flags, + val_desc_t *vdp, datalink_media_t media) { char *cp; @@ -3613,7 +4262,7 @@ dladm_link_get_proplist(dladm_handle_t handle, datalink_id_t linkid, */ static dladm_status_t i_dladm_link_proplist_extract_one(dladm_handle_t handle, - dladm_arg_list_t *proplist, const char *name, void *arg) + dladm_arg_list_t *proplist, const char *name, uint_t flags, void *arg) { dladm_status_t status; dladm_arg_info_t *aip = NULL; @@ -3647,7 +4296,7 @@ i_dladm_link_proplist_extract_one(dladm_handle_t handle, /* Check property value */ if (pdp->pd_check != NULL) { status = pdp->pd_check(handle, pdp, 0, aip->ai_val, - aip->ai_count, vdp, 0); + aip->ai_count, flags, vdp, 0); } else { status = DLADM_STATUS_BADARG; } @@ -3684,14 +4333,14 @@ i_dladm_link_proplist_extract_one(dladm_handle_t handle, */ dladm_status_t dladm_link_proplist_extract(dladm_handle_t handle, dladm_arg_list_t *proplist, - mac_resource_props_t *mrp) + mac_resource_props_t *mrp, uint_t flags) { dladm_status_t status; int i; for (i = 0; i < DLADM_MAX_RSRC_PROP; i++) { status = i_dladm_link_proplist_extract_one(handle, - proplist, rsrc_prop_table[i].rp_name, mrp); + proplist, rsrc_prop_table[i].rp_name, flags, mrp); if (status != DLADM_STATUS_OK) return (status); } @@ -3708,20 +4357,13 @@ dladm_perm2str(uint_t perm, char *buf) } dladm_status_t -i_dladm_get_state(dladm_handle_t handle, datalink_id_t linkid, +dladm_get_state(dladm_handle_t handle, datalink_id_t linkid, link_state_t *state) { - dld_ioc_macprop_t *dip; - dladm_status_t status; uint_t perms; - dip = i_dladm_get_public_prop(handle, linkid, "state", 0, &status, - &perms); - if (status != DLADM_STATUS_OK) - return (status); - (void) memcpy(state, dip->pr_val, sizeof (*state)); - free(dip); - return (status); + return (i_dladm_get_public_prop(handle, linkid, "state", 0, + &perms, state, sizeof (*state))); } boolean_t @@ -3752,3 +4394,45 @@ dladm_attr_is_linkprop(const char *name) return (!is_nonprop); } + +dladm_status_t +dladm_linkprop_is_set(dladm_handle_t handle, datalink_id_t linkid, + dladm_prop_type_t type, const char *prop_name, boolean_t *is_set) +{ + char *buf, **propvals; + uint_t valcnt = DLADM_MAX_PROP_VALCNT; + int i; + dladm_status_t status = DLADM_STATUS_OK; + + *is_set = B_FALSE; + + if ((buf = malloc((sizeof (char *) + DLADM_PROP_VAL_MAX) * + DLADM_MAX_PROP_VALCNT)) == NULL) + return (DLADM_STATUS_NOMEM); + + propvals = (char **)(void *)buf; + for (i = 0; i < valcnt; i++) { + propvals[i] = buf + + sizeof (char *) * DLADM_MAX_PROP_VALCNT + + i * DLADM_PROP_VAL_MAX; + } + + if (dladm_get_linkprop(handle, linkid, type, prop_name, propvals, + &valcnt) != DLADM_STATUS_OK) { + goto done; + } + + if ((strcmp(prop_name, "pool") == 0) && (strlen(*propvals) != 0)) { + *is_set = B_TRUE; + } else if ((strcmp(prop_name, "cpus") == 0) && (valcnt != 0)) { + *is_set = B_TRUE; + } else if ((strcmp(prop_name, "_softmac") == 0) && (valcnt != 0) && + (strcmp(propvals[0], "true") == 0)) { + *is_set = B_TRUE; + } + +done: + if (buf != NULL) + free(buf); + return (status); +} diff --git a/usr/src/lib/libdladm/common/mapfile-vers b/usr/src/lib/libdladm/common/mapfile-vers index f64b2d3cd1..429355e6a0 100644 --- a/usr/src/lib/libdladm/common/mapfile-vers +++ b/usr/src/lib/libdladm/common/mapfile-vers @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -57,6 +57,7 @@ SUNWprivate_1.1 { dladm_set_linkprop; dladm_walk_linkprop; dladm_attr_is_linkprop; + dladm_linkprop_is_set; dladm_valid_secobj_name; dladm_init_secobj; dladm_get_secobj; @@ -161,6 +162,8 @@ SUNWprivate_1.1 { dladm_str2protect; dladm_ipv4addr2str; dladm_str2ipv4addr; + dladm_ipv6addr2str; + dladm_str2ipv6addr; dladm_start_usagelog; dladm_stop_usagelog; dladm_walk_usage_res; @@ -241,6 +244,18 @@ SUNWprivate_1.1 { dladm_bridge_get_nick; dladm_bridge_set_nick; dladm_bridge_get_privprop; + + dladm_link_stat_query; + dladm_link_stat_diffchain; + dladm_link_stat_free; + dladm_link_stat_query_all; + dladm_link_stat_query_all_free; + + dladm_flow_stat_query; + dladm_flow_stat_diff; + dladm_flow_stat_free; + dladm_flow_stat_query_all; + dladm_flow_stat_query_all_free; local: *; }; diff --git a/usr/src/lib/libdladm/common/usage.c b/usr/src/lib/libdladm/common/usage.c index 82a13e4f5f..a74e81ee59 100644 --- a/usr/src/lib/libdladm/common/usage.c +++ b/usr/src/lib/libdladm/common/usage.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1333,8 +1333,10 @@ dladm_usage_summary(int (*fn)(dladm_usage_t *, void *), int logtype, ns = ne->net_entry_tstats; nd = ne->net_entry_desc; - if (ns->net_stat_ibytes + ns->net_stat_obytes == 0) + if (ns->net_stat_ibytes + ns->net_stat_obytes == 0) { + ne = ne->net_entry_next; continue; + } bcopy(&nd->net_desc_name, &usage.du_name, sizeof (usage.du_name)); usage.du_duration = ne->net_entry_ttime; diff --git a/usr/src/lib/libinetutil/common/ofmt.c b/usr/src/lib/libinetutil/common/ofmt.c index c0fce1839e..9d5ad12e53 100644 --- a/usr/src/lib/libinetutil/common/ofmt.c +++ b/usr/src/lib/libinetutil/common/ofmt.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <errno.h> @@ -62,11 +62,9 @@ typedef struct ofmt_state_s { uint_t os_overflow; struct winsize os_winsize; int os_nrow; - boolean_t os_parsable; - boolean_t os_wrap; + uint_t os_flags; int os_nbad; char **os_badfields; - boolean_t os_multiline; int os_maxnamelen; /* longest name (f. multiline) */ } ofmt_state_t; /* @@ -187,8 +185,8 @@ ofmt_open(const char *str, const ofmt_field_t *template, uint_t flags, ofmt_state_t *os; int nfields = 0; ofmt_status_t err = OFMT_SUCCESS; - boolean_t parsable = ((flags & OFMT_PARSABLE) != 0); - boolean_t wrap = ((flags & OFMT_WRAP) != 0); + boolean_t parsable = (flags & OFMT_PARSABLE); + boolean_t wrap = (flags & OFMT_WRAP); boolean_t multiline = (flags & OFMT_MULTILINE); *ofmt = NULL; @@ -231,10 +229,8 @@ ofmt_open(const char *str, const ofmt_field_t *template, uint_t flags, goto nomem; *ofmt = os; os->os_fields = (ofmt_field_t *)&os[1]; - os->os_parsable = parsable; - os->os_wrap = wrap; + os->os_flags = flags; - os->os_multiline = multiline; of = os->os_fields; of_index = 0; /* @@ -321,13 +317,16 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value, uint_t width = ofp->of_width; uint_t valwidth; uint_t compress; + boolean_t parsable = (os->os_flags & OFMT_PARSABLE); + boolean_t multiline = (os->os_flags & OFMT_MULTILINE); + boolean_t rightjust = (os->os_flags & OFMT_RIGHTJUST); char c; /* * Parsable fields are separated by ':'. If such a field contains * a ':' or '\', this character is prefixed by a '\'. */ - if (os->os_parsable) { + if (parsable) { if (os->os_nfields == 1) { (void) printf("%s", value); return; @@ -339,7 +338,7 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value, } if (!os->os_lastfield) (void) putchar(':'); - } else if (os->os_multiline) { + } else if (multiline) { if (value[0] == '\0') value = OFMT_VAL_UNDEF; (void) printf("%*.*s: %s", os->os_maxnamelen, @@ -348,7 +347,10 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value, (void) putchar('\n'); } else { if (os->os_lastfield) { - (void) printf("%s", value); + if (rightjust) + (void) printf("%*s", width, value); + else + (void) printf("%s", value); os->os_overflow = 0; return; } @@ -356,7 +358,10 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value, valwidth = strlen(value); if (valwidth + os->os_overflow >= width) { os->os_overflow += valwidth - width + 1; - (void) printf("%s ", value); + if (rightjust) + (void) printf("%*s ", width, value); + else + (void) printf("%s ", value); return; } @@ -365,7 +370,10 @@ ofmt_print_field(ofmt_state_t *os, ofmt_field_t *ofp, const char *value, os->os_overflow -= compress; width -= compress; } - (void) printf("%-*s", width, value); + if (rightjust) + (void) printf("%*s ", width, value); + else + (void) printf("%-*s", width, value); } } @@ -417,20 +425,23 @@ ofmt_print(ofmt_handle_t ofmt, void *arg) boolean_t escsep, more_rows; ofmt_arg_t ofarg; split_t **sp = NULL; + boolean_t parsable = (os->os_flags & OFMT_PARSABLE); + boolean_t multiline = (os->os_flags & OFMT_MULTILINE); + boolean_t wrap = (os->os_flags & OFMT_WRAP); - if (os->os_wrap) { + if (wrap) { sp = calloc(sizeof (split_t *), os->os_nfields); if (sp == NULL) return; } - if ((os->os_nrow++ % os->os_winsize.ws_row) == 0 && !os->os_parsable && - !os->os_multiline) { + if ((os->os_nrow++ % os->os_winsize.ws_row) == 0 && + !parsable && !multiline) { ofmt_print_header(os); os->os_nrow++; } - if (os->os_multiline && os->os_nrow > 1) + if (multiline && os->os_nrow > 1) (void) putchar('\n'); of = os->os_fields; @@ -443,7 +454,7 @@ ofmt_print(ofmt_handle_t ofmt, void *arg) ofarg.ofmt_cbarg = arg; if ((*of[i].of_cb)(&ofarg, value, sizeof (value))) { - if (os->os_wrap) { + if (wrap) { /* * 'value' will be split at comma boundaries * and stored into sp[i]. @@ -454,8 +465,9 @@ ofmt_print(ofmt_handle_t ofmt, void *arg) sp[i]->s_currfield < sp[i]->s_nfields) more_rows = B_TRUE; } + ofmt_print_field(os, &of[i], - (*value == '\0' && !os->os_parsable) ? + (*value == '\0' && !parsable) ? OFMT_VAL_UNDEF : value, escsep); } else { ofmt_print_field(os, &of[i], OFMT_VAL_UNKNOWN, escsep); @@ -531,6 +543,7 @@ ofmt_strerror(ofmt_handle_t ofmt, ofmt_status_t err, char *buf, uint_t bufsize) int i; const char *s; char ebuf[OFMT_BUFSIZE]; + boolean_t parsable; /* * ebuf is intended for optional error-specific data to be appended @@ -547,7 +560,8 @@ ofmt_strerror(ofmt_handle_t ofmt, ofmt_status_t err, char *buf, uint_t bufsize) * Enumerate the singular/plural version of the warning * and error to simplify and improve localization. */ - if (!os->os_parsable) { + parsable = (os->os_flags & OFMT_PARSABLE); + if (!parsable) { if (os->os_nbad > 1) s = "ignoring unknown output fields:"; else diff --git a/usr/src/lib/libinetutil/common/ofmt.h b/usr/src/lib/libinetutil/common/ofmt.h index 81693ae325..e69d43e20a 100644 --- a/usr/src/lib/libinetutil/common/ofmt.h +++ b/usr/src/lib/libinetutil/common/ofmt.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -152,6 +152,8 @@ typedef enum { */ typedef struct ofmt_arg_s { uint_t ofmt_id; + uint_t ofmt_width; + uint_t ofmt_index; void *ofmt_cbarg; } ofmt_arg_t; @@ -178,6 +180,7 @@ extern ofmt_status_t ofmt_open(const char *, const ofmt_field_t *, uint_t, #define OFMT_PARSABLE 0x00000001 /* machine parsable mode */ #define OFMT_WRAP 0x00000002 /* wrap output if field width is exceeded */ #define OFMT_MULTILINE 0x00000004 /* "long" output: "name: value" lines */ +#define OFMT_RIGHTJUST 0x00000008 /* right justified output */ /* * ofmt_close() must be called to free resources associated diff --git a/usr/src/lib/libsecdb/exec_attr.txt b/usr/src/lib/libsecdb/exec_attr.txt index d6258a1e2c..db47b15bd0 100644 --- a/usr/src/lib/libsecdb/exec_attr.txt +++ b/usr/src/lib/libsecdb/exec_attr.txt @@ -171,8 +171,10 @@ Network Management:solaris:cmd:::/sbin/routeadm:euid=0;\ privs=proc_chroot,proc_owner,sys_ip_config Network Management:solaris:cmd:::/sbin/dladm:euid=dladm;egid=netadm;\ privs=sys_dl_config,net_rawaccess,proc_audit -Network Management:solaris:cmd:::/sbin/flowadm:euid=dladm;egid=netadm;\ +Network Management:solaris:cmd:::/sbin/dlstat:euid=dladm;egid=sys; +Network Management:solaris:cmd:::/sbin/flowadm:euid=dladm;egid=sys;\ privs=sys_dl_config,net_rawaccess,proc_audit +Network Management:solaris:cmd:::/sbin/flowstat:euid=dladm;egid=sys; Network Management:suser:cmd:::/usr/bin/netstat:uid=0 Network Management:suser:cmd:::/usr/bin/rup:euid=0 Network Management:suser:cmd:::/usr/bin/ruptime:euid=0 @@ -189,6 +191,7 @@ Network Management:suser:cmd:::/usr/sbin/spray:euid=0 Network Observability:solaris:cmd:::/usr/sbin/snoop:privs=net_observability Network Link Security:solaris:cmd:::/sbin/dladm:euid=dladm;egid=sys;\ privs=sys_dl_config,net_rawaccess,proc_audit +Network Link Security:solaris:cmd:::/sbin/dlstat:euid=dladm;egid=sys; Network IPsec Management:solaris:cmd:::/usr/lib/inet/certdb:euid=0;privs=none Network IPsec Management:solaris:cmd:::/usr/lib/inet/certlocal:euid=0;privs=none Network IPsec Management:solaris:cmd:::/usr/lib/inet/certrldb:euid=0;privs=none diff --git a/usr/src/lib/libzonecfg/Makefile.com b/usr/src/lib/libzonecfg/Makefile.com index f2b2bd03b9..b0ab5e7e61 100644 --- a/usr/src/lib/libzonecfg/Makefile.com +++ b/usr/src/lib/libzonecfg/Makefile.com @@ -19,11 +19,9 @@ # CDDL HEADER END # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# LIBRARY= libzonecfg.a VERS= .1 diff --git a/usr/src/lib/libzonecfg/common/libzonecfg.c b/usr/src/lib/libzonecfg/common/libzonecfg.c index ab0f4c498d..bdd63a31e8 100644 --- a/usr/src/lib/libzonecfg/common/libzonecfg.c +++ b/usr/src/lib/libzonecfg/common/libzonecfg.c @@ -4321,6 +4321,26 @@ zonecfg_bind_pool(zone_dochandle_t handle, zoneid_t zoneid, char *pool_err, return (Z_OK); } +int +zonecfg_get_poolname(zone_dochandle_t handle, char *zone, char *pool, + size_t poolsize) +{ + int err; + struct zone_psettab pset_tab; + + err = zonecfg_lookup_pset(handle, &pset_tab); + if ((err != Z_NO_ENTRY) && (err != Z_OK)) + return (err); + + /* pset was found so a temporary pool was created */ + if (err == Z_OK) { + (void) snprintf(pool, poolsize, TMP_POOL_NAME, zone); + return (Z_OK); + } + + /* lookup the poolname in zonecfg */ + return (zonecfg_get_pool(handle, pool, poolsize)); +} static boolean_t svc_enabled(char *svc_name) diff --git a/usr/src/lib/libzonecfg/common/mapfile-vers b/usr/src/lib/libzonecfg/common/mapfile-vers index dbc66657b3..e8e3021648 100644 --- a/usr/src/lib/libzonecfg/common/mapfile-vers +++ b/usr/src/lib/libzonecfg/common/mapfile-vers @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -129,6 +129,7 @@ SUNWprivate_1.1 { zonecfg_getnwifent; zonecfg_getpkgdata; zonecfg_get_pool; + zonecfg_get_poolname; zonecfg_get_privset; zonecfg_getpsetent; zonecfg_getrctlent; diff --git a/usr/src/pkg/manifests/SUNWcs.mf b/usr/src/pkg/manifests/SUNWcs.mf index 004d8894b8..19a5ec6bfb 100644 --- a/usr/src/pkg/manifests/SUNWcs.mf +++ b/usr/src/pkg/manifests/SUNWcs.mf @@ -2649,10 +2649,12 @@ link path=usr/sbin/cryptoadm target=../../sbin/cryptoadm link path=usr/sbin/dcopy target=./clri link path=usr/sbin/devnm target=./df link path=usr/sbin/dladm target=../../sbin/dladm +link path=usr/sbin/dlstat target=../../sbin/dlstat link path=usr/sbin/edquota target=../lib/fs/ufs/edquota link path=usr/sbin/fdisk target=../../sbin/fdisk link path=usr/sbin/fiocompress target=../../sbin/fiocompress link path=usr/sbin/flowadm target=../../sbin/flowadm +link path=usr/sbin/flowstat target=../../sbin/flowstat link path=usr/sbin/fsdb target=./clri link path=usr/sbin/fsirand target=../lib/fs/ufs/fsirand link path=usr/sbin/fssnap target=./clri diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf index 633e04cb85..828c6eaa42 100644 --- a/usr/src/pkg/manifests/system-header.mf +++ b/usr/src/pkg/manifests/system-header.mf @@ -1162,6 +1162,7 @@ file path=usr/include/sys/lwp_timer_impl.h file path=usr/include/sys/lwp_upimutex_impl.h file path=usr/include/sys/mac.h file path=usr/include/sys/mac_flow.h +file path=usr/include/sys/mac_provider.h file path=usr/include/sys/machelf.h file path=usr/include/sys/machlock.h file path=usr/include/sys/machsig.h diff --git a/usr/src/pkg/manifests/system-network.mf b/usr/src/pkg/manifests/system-network.mf index 1384abb2ec..a0f2b9af96 100644 --- a/usr/src/pkg/manifests/system-network.mf +++ b/usr/src/pkg/manifests/system-network.mf @@ -77,7 +77,9 @@ file path=etc/nwam/loc/NoNet/ipf.conf.dfl group=netadm owner=netadm \ file path=etc/nwam/loc/NoNet/ipf6.conf.dfl group=netadm owner=netadm \ preserve=true file path=sbin/dladm mode=0555 +file path=sbin/dlstat mode=0555 file path=sbin/flowadm mode=0555 +file path=sbin/flowstat mode=0555 legacy pkg=SUNWcnetr arch=$(ARCH) category=system \ desc="core software for network infrastructure configuration" \ hotline="Please contact your local service provider" \ diff --git a/usr/src/uts/common/inet/ip/ip6_input.c b/usr/src/uts/common/inet/ip/ip6_input.c index d596c313c5..8f305114d1 100644 --- a/usr/src/uts/common/inet/ip/ip6_input.c +++ b/usr/src/uts/common/inet/ip/ip6_input.c @@ -1910,6 +1910,13 @@ ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h, hck_flags = DB_CKSUMFLAGS(mp); + if (hck_flags & HCK_FULLCKSUM_OK) { + /* + * Hardware has already verified the checksum. + */ + return (B_TRUE); + } + if (hck_flags & HCK_FULLCKSUM) { /* * Full checksum has been computed by the hardware @@ -1918,9 +1925,6 @@ ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h, * order to protect against faulty hardware, compare * it against -0 (0xFFFF) to see if it's valid. */ - if (hck_flags & HCK_FULLCKSUM_OK) - return (B_TRUE); - cksum = DB_CKSUM16(mp); if (cksum == 0xFFFF) return (B_TRUE); diff --git a/usr/src/uts/common/inet/ip/ip_input.c b/usr/src/uts/common/inet/ip/ip_input.c index a54b2e8737..0781560daf 100644 --- a/usr/src/uts/common/inet/ip/ip_input.c +++ b/usr/src/uts/common/inet/ip/ip_input.c @@ -2260,6 +2260,13 @@ ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, hck_flags = DB_CKSUMFLAGS(mp); + if (hck_flags & HCK_FULLCKSUM_OK) { + /* + * Hardware has already verified the checksum. + */ + return (B_TRUE); + } + if (hck_flags & HCK_FULLCKSUM) { /* * Full checksum has been computed by the hardware @@ -2268,9 +2275,6 @@ ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, * order to protect against faulty hardware, compare * it against -0 (0xFFFF) to see if it's valid. */ - if (hck_flags & HCK_FULLCKSUM_OK) - return (B_TRUE); - cksum = DB_CKSUM16(mp); if (cksum == 0xFFFF) return (B_TRUE); diff --git a/usr/src/uts/common/inet/ip/ip_netinfo.c b/usr/src/uts/common/inet/ip/ip_netinfo.c index 0d0d943676..3849d1fe06 100644 --- a/usr/src/uts/common/inet/ip/ip_netinfo.c +++ b/usr/src/uts/common/inet/ip/ip_netinfo.c @@ -1175,10 +1175,10 @@ ip_isvalidchecksum(net_handle_t neti, mblk_t *mp) ASSERT(mp != NULL); if (dohwcksum && - DB_CKSUM16(mp) != 0xFFFF && - (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) && - (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM_OK) && - (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) + ((DB_CKSUM16(mp) != 0xFFFF && + (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM)) || + (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM_OK)) && + (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM_OK)) return (1); hlen = (ipha->ipha_version_and_hdr_length & 0x0F) << 2; diff --git a/usr/src/uts/common/inet/iptun/iptun.c b/usr/src/uts/common/inet/iptun/iptun.c index 099a14fc2e..215221241d 100644 --- a/usr/src/uts/common/inet/iptun/iptun.c +++ b/usr/src/uts/common/inet/iptun/iptun.c @@ -379,92 +379,58 @@ iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, /* ARGSUSED */ static int iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { iptun_t *iptun = barg; - mac_propval_range_t range; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); - boolean_t is_possible = (pr_flags & MAC_PROP_POSSIBLE); int err; if ((err = iptun_enter(iptun)) != 0) return (err); - if ((pr_flags & ~(MAC_PROP_DEFAULT | MAC_PROP_POSSIBLE)) != 0) { + switch (pr_num) { + case MAC_PROP_IPTUN_HOPLIMIT: + ASSERT(pr_valsize >= sizeof (uint32_t)); + *(uint32_t *)pr_val = iptun->iptun_hoplimit; + break; + + case MAC_PROP_IPTUN_ENCAPLIMIT: + *(uint32_t *)pr_val = iptun->iptun_encaplimit; + break; + default: err = ENOTSUP; - goto done; - } - if (is_default && is_possible) { - err = EINVAL; - goto done; } +done: + iptun_exit(iptun); + return (err); +} - *perm = MAC_PROP_PERM_RW; - - if (is_possible) { - if (pr_valsize < sizeof (mac_propval_range_t)) { - err = EINVAL; - goto done; - } - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - } else if (pr_valsize < sizeof (uint32_t)) { - err = EINVAL; - goto done; - } +/* ARGSUSED */ +static void +iptun_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + iptun_t *iptun = barg; switch (pr_num) { case MAC_PROP_IPTUN_HOPLIMIT: - if (is_possible) { - range.range_uint32[0].mpur_min = IPTUN_MIN_HOPLIMIT; - range.range_uint32[0].mpur_max = IPTUN_MAX_HOPLIMIT; - } else if (is_default) { - *(uint32_t *)pr_val = IPTUN_DEFAULT_HOPLIMIT; - } else { - *(uint32_t *)pr_val = iptun->iptun_hoplimit; - } + mac_prop_info_set_range_uint32(prh, + IPTUN_MIN_HOPLIMIT, IPTUN_MAX_HOPLIMIT); + mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_HOPLIMIT); break; + case MAC_PROP_IPTUN_ENCAPLIMIT: - if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) { - err = ENOTSUP; - goto done; - } - if (is_possible) { - range.range_uint32[0].mpur_min = IPTUN_MIN_ENCAPLIMIT; - range.range_uint32[0].mpur_max = IPTUN_MAX_ENCAPLIMIT; - } else if (is_default) { - *(uint32_t *)pr_val = IPTUN_DEFAULT_ENCAPLIMIT; - } else { - *(uint32_t *)pr_val = iptun->iptun_encaplimit; - } + if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) + break; + mac_prop_info_set_range_uint32(prh, + IPTUN_MIN_ENCAPLIMIT, IPTUN_MAX_ENCAPLIMIT); + mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_ENCAPLIMIT); break; - case MAC_PROP_MTU: { - uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0); - - if (is_possible) { - range.range_uint32[0].mpur_min = - iptun->iptun_typeinfo->iti_minmtu; - range.range_uint32[0].mpur_max = maxmtu; - } else { - /* - * The MAC module knows the current value and should - * never call us for it. There is also no default - * MTU, as by default, it is a dynamic property. - */ - err = ENOTSUP; - goto done; - } + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + iptun->iptun_typeinfo->iti_minmtu, + iptun_get_maxmtu(iptun, NULL, 0)); break; } - default: - err = EINVAL; - goto done; - } - if (is_possible) - bcopy(&range, pr_val, sizeof (range)); -done: - iptun_exit(iptun); - return (err); } uint_t @@ -3514,7 +3480,7 @@ iptun_output_common(iptun_t *iptun, ip_xmit_attr_t *ixa, mblk_t *mp) } static mac_callbacks_t iptun_m_callbacks = { - .mc_callbacks = (MC_SETPROP | MC_GETPROP), + .mc_callbacks = (MC_SETPROP | MC_GETPROP | MC_PROPINFO), .mc_getstat = iptun_m_getstat, .mc_start = iptun_m_start, .mc_stop = iptun_m_stop, @@ -3522,6 +3488,8 @@ static mac_callbacks_t iptun_m_callbacks = { .mc_multicst = iptun_m_multicst, .mc_unicst = iptun_m_unicst, .mc_tx = iptun_m_tx, + .mc_reserved = NULL, .mc_setprop = iptun_m_setprop, - .mc_getprop = iptun_m_getprop + .mc_getprop = iptun_m_getprop, + .mc_propinfo = iptun_m_propinfo }; diff --git a/usr/src/uts/common/io/afe/afe.c b/usr/src/uts/common/io/afe/afe.c index ca67e753b9..a80775c502 100644 --- a/usr/src/uts/common/io/afe/afe.c +++ b/usr/src/uts/common/io/afe/afe.c @@ -29,7 +29,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -119,9 +119,11 @@ static int afe_m_stat(void *, uint_t, uint64_t *); static int afe_m_start(void *); static void afe_m_stop(void *); static int afe_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int afe_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void afe_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static unsigned afe_intr(caddr_t); static void afe_startmac(afe_t *); static void afe_stopmac(afe_t *); @@ -173,7 +175,7 @@ static mii_ops_t afe_mii_ops = { }; static mac_callbacks_t afe_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, afe_m_stat, afe_m_start, afe_m_stop, @@ -181,12 +183,14 @@ static mac_callbacks_t afe_m_callbacks = { afe_m_multicst, afe_m_unicst, afe_m_tx, + NULL, afe_m_ioctl, /* mc_ioctl */ NULL, /* mc_getcapab */ NULL, /* mc_open */ NULL, /* mc_close */ afe_m_setprop, afe_m_getprop, + afe_m_propinfo }; @@ -2372,12 +2376,12 @@ afe_m_stat(void *arg, uint_t stat, uint64_t *val) } int -afe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +afe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { afe_t *afep = arg; - return (mii_m_getprop(afep->afe_mii, name, num, flags, sz, val, perm)); + return (mii_m_getprop(afep->afe_mii, name, num, sz, val)); } int @@ -2389,6 +2393,15 @@ afe_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, return (mii_m_setprop(afep->afe_mii, name, num, sz, val)); } +static void +afe_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + afe_t *afep = arg; + + mii_m_propinfo(afep->afe_mii, name, num, prh); +} + /* * Debugging and error reporting. */ diff --git a/usr/src/uts/common/io/aggr/aggr_grp.c b/usr/src/uts/common/io/aggr/aggr_grp.c index 32ce4dfd08..eac04f2087 100644 --- a/usr/src/uts/common/io/aggr/aggr_grp.c +++ b/usr/src/uts/common/io/aggr/aggr_grp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,6 +33,38 @@ * aggregation group. * * A set of MAC ports are associated with each association group. + * + * Aggr pseudo TX rings + * -------------------- + * The underlying ports (NICs) in an aggregation can have TX rings. To + * enhance aggr's performance, these TX rings are made available to the + * aggr layer as pseudo TX rings. The concept of pseudo rings are not new. + * They are already present and implemented on the RX side. It is called + * as pseudo RX rings. The same concept is extended to the TX side where + * each TX ring of an underlying port is reflected in aggr as a pseudo + * TX ring. Thus each pseudo TX ring will map to a specific hardware TX + * ring. Even in the case of a NIC that does not have a TX ring, a pseudo + * TX ring is given to the aggregation layer. + * + * With this change, the outgoing stack depth looks much better: + * + * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() -> + * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx() + * + * Two new modes are introduced to mac_tx() to handle aggr pseudo TX rings: + * SRS_TX_AGGR and SRS_TX_BW_AGGR. + * + * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine + * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) TX + * ring belonging to a port on which the packet has to be sent. + * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4 + * policy and then uses the fanout_hint passed to it to pick a TX ring from + * the selected port. + * + * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where + * bandwidth limit is applied first on the outgoing packet and the packets + * allowed to go out would call mac_tx_aggr_mode() to send the packet on a + * particular TX ring. */ #include <sys/types.h> @@ -71,9 +103,8 @@ static void aggr_m_ioctl(void *, queue_t *, mblk_t *); static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *); static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -static int aggr_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); - +static void aggr_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t); static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *, @@ -113,7 +144,7 @@ static id_space_t *key_ids; static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; #define AGGR_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO) static mac_callbacks_t aggr_m_callbacks = { AGGR_M_CALLBACK_FLAGS, @@ -123,13 +154,15 @@ static mac_callbacks_t aggr_m_callbacks = { aggr_m_promisc, aggr_m_multicst, NULL, - aggr_m_tx, + NULL, + NULL, aggr_m_ioctl, aggr_m_capab_get, NULL, NULL, aggr_m_setprop, - aggr_m_getprop + NULL, + aggr_m_propinfo }; /*ARGSUSED*/ @@ -144,6 +177,8 @@ aggr_grp_constructor(void *buf, void *arg, int kmflag) rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL); mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&grp->lg_tx_flowctl_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&grp->lg_tx_flowctl_cv, NULL, CV_DEFAULT, NULL); grp->lg_link_state = LINK_STATE_UNKNOWN; return (0); } @@ -164,6 +199,8 @@ aggr_grp_destructor(void *buf, void *arg) mutex_destroy(&grp->lg_port_lock); cv_destroy(&grp->lg_port_cv); rw_destroy(&grp->lg_tx_lock); + mutex_destroy(&grp->lg_tx_flowctl_lock); + cv_destroy(&grp->lg_tx_flowctl_cv); } void @@ -536,7 +573,7 @@ aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force, } /* - * Add a pseudo Rx ring for the given HW ring handle. + * Add a pseudo RX ring for the given HW ring handle. */ static int aggr_add_pseudo_rx_ring(aggr_port_t *port, @@ -553,7 +590,7 @@ aggr_add_pseudo_rx_ring(aggr_port_t *port, } /* - * No slot for this new Rx ring. + * No slot for this new RX ring. */ if (j == MAX_RINGS_PER_GROUP) return (EIO); @@ -567,19 +604,20 @@ aggr_add_pseudo_rx_ring(aggr_port_t *port, * The group is already registered, dynamically add a new ring to the * mac group. */ - mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring); if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) { ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE; ring->arr_hw_rh = NULL; ring->arr_port = NULL; rx_grp->arg_ring_cnt--; - mac_hwring_teardown(hw_rh); + } else { + mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring, + mac_find_ring(rx_grp->arg_gh, j)); } return (err); } /* - * Remove the pseudo Rx ring of the given HW ring handle. + * Remove the pseudo RX ring of the given HW ring handle. */ static void aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) @@ -632,8 +670,8 @@ aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) /* * Get the list the the underlying HW rings. */ - hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh, - MAC_RING_TYPE_RX); + hw_rh_cnt = mac_hwrings_get(port->lp_mch, + &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX); if (port->lp_hwgh != NULL) { /* @@ -671,7 +709,7 @@ aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) port->lp_hwgh = NULL; } } else { - port->lp_grp_added = B_TRUE; + port->lp_rx_grp_added = B_TRUE; } done: mac_perim_exit(pmph); @@ -695,12 +733,12 @@ aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) ASSERT(MAC_PERIM_HELD(grp->lg_mh)); mac_perim_enter_by_mh(port->lp_mh, &pmph); - if (!port->lp_grp_added) + if (!port->lp_rx_grp_added) goto done; ASSERT(rx_grp->arg_gh != NULL); - hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh, - MAC_RING_TYPE_RX); + hw_rh_cnt = mac_hwrings_get(port->lp_mch, + &hwgh, hw_rh, MAC_RING_TYPE_RX); /* * If hw_rh_cnt is 0, it means that the underlying port does not @@ -725,7 +763,196 @@ aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) mac_rx_client_restart(port->lp_mch); } - port->lp_grp_added = B_FALSE; + port->lp_rx_grp_added = B_FALSE; +done: + mac_perim_exit(pmph); +} + +/* + * Add a pseudo TX ring for the given HW ring handle. + */ +static int +aggr_add_pseudo_tx_ring(aggr_port_t *port, + aggr_pseudo_tx_group_t *tx_grp, mac_ring_handle_t hw_rh, + mac_ring_handle_t *pseudo_rh) +{ + aggr_pseudo_tx_ring_t *ring; + int err; + int i; + + ASSERT(MAC_PERIM_HELD(port->lp_mh)); + for (i = 0; i < MAX_RINGS_PER_GROUP; i++) { + ring = tx_grp->atg_rings + i; + if (!(ring->atr_flags & MAC_PSEUDO_RING_INUSE)) + break; + } + /* + * No slot for this new TX ring. + */ + if (i == MAX_RINGS_PER_GROUP) + return (EIO); + /* + * The following 4 statements needs to be done before + * calling mac_group_add_ring(). Otherwise it will + * result in an assertion failure in mac_init_ring(). + */ + ring->atr_flags |= MAC_PSEUDO_RING_INUSE; + ring->atr_hw_rh = hw_rh; + ring->atr_port = port; + tx_grp->atg_ring_cnt++; + + /* + * The TX side has no concept of ring groups unlike RX groups. + * There is just a single group which stores all the TX rings. + * This group will be used to store aggr's pseudo TX rings. + */ + if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) { + ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE; + ring->atr_hw_rh = NULL; + ring->atr_port = NULL; + tx_grp->atg_ring_cnt--; + } else { + *pseudo_rh = mac_find_ring(tx_grp->atg_gh, i); + if (hw_rh != NULL) { + mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring, + mac_find_ring(tx_grp->atg_gh, i)); + } + } + return (err); +} + +/* + * Remove the pseudo TX ring of the given HW ring handle. + */ +static void +aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp, + mac_ring_handle_t pseudo_hw_rh) +{ + aggr_pseudo_tx_ring_t *ring; + int i; + + for (i = 0; i < MAX_RINGS_PER_GROUP; i++) { + ring = tx_grp->atg_rings + i; + if (ring->atr_rh != pseudo_hw_rh) + continue; + + ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE); + mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh); + ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE; + mac_hwring_teardown(ring->atr_hw_rh); + ring->atr_hw_rh = NULL; + ring->atr_port = NULL; + tx_grp->atg_ring_cnt--; + break; + } +} + +/* + * This function is called to create pseudo rings over hardware rings of + * the underlying device. There is a 1:1 mapping between the pseudo TX + * rings of the aggr and the hardware rings of the underlying port. + */ +static int +aggr_add_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp) +{ + aggr_grp_t *grp = port->lp_grp; + mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP], pseudo_rh; + mac_perim_handle_t pmph; + int hw_rh_cnt, i = 0, j; + int err = 0; + + ASSERT(MAC_PERIM_HELD(grp->lg_mh)); + mac_perim_enter_by_mh(port->lp_mh, &pmph); + + /* + * Get the list the the underlying HW rings. + */ + hw_rh_cnt = mac_hwrings_get(port->lp_mch, + NULL, hw_rh, MAC_RING_TYPE_TX); + + /* + * Even if the underlying NIC does not have TX rings, we + * still make a psuedo TX ring for that NIC with NULL as + * the ring handle. + */ + if (hw_rh_cnt == 0) + port->lp_tx_ring_cnt = 1; + else + port->lp_tx_ring_cnt = hw_rh_cnt; + + port->lp_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * + port->lp_tx_ring_cnt), KM_SLEEP); + port->lp_pseudo_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * + port->lp_tx_ring_cnt), KM_SLEEP); + + if (hw_rh_cnt == 0) { + if ((err = aggr_add_pseudo_tx_ring(port, tx_grp, + NULL, &pseudo_rh)) == 0) { + port->lp_tx_rings[0] = NULL; + port->lp_pseudo_tx_rings[0] = pseudo_rh; + } + } else { + for (i = 0; err == 0 && i < hw_rh_cnt; i++) { + err = aggr_add_pseudo_tx_ring(port, + tx_grp, hw_rh[i], &pseudo_rh); + if (err != 0) + break; + port->lp_tx_rings[i] = hw_rh[i]; + port->lp_pseudo_tx_rings[i] = pseudo_rh; + } + } + + if (err != 0) { + if (hw_rh_cnt != 0) { + for (j = 0; j < i; j++) { + aggr_rem_pseudo_tx_ring(tx_grp, + port->lp_pseudo_tx_rings[j]); + } + } + kmem_free(port->lp_tx_rings, + (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); + kmem_free(port->lp_pseudo_tx_rings, + (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); + port->lp_tx_ring_cnt = 0; + } else { + port->lp_tx_grp_added = B_TRUE; + port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch, + aggr_tx_ring_update, port); + } + mac_perim_exit(pmph); + return (err); +} + +/* + * This function is called by aggr to remove pseudo TX rings over the + * HW rings of the underlying port. + */ +static void +aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp) +{ + aggr_grp_t *grp = port->lp_grp; + mac_perim_handle_t pmph; + int i; + + ASSERT(MAC_PERIM_HELD(grp->lg_mh)); + mac_perim_enter_by_mh(port->lp_mh, &pmph); + + if (!port->lp_tx_grp_added) + goto done; + + ASSERT(tx_grp->atg_gh != NULL); + + for (i = 0; i < port->lp_tx_ring_cnt; i++) + aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]); + + kmem_free(port->lp_tx_rings, + (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); + kmem_free(port->lp_pseudo_tx_rings, + (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); + + port->lp_tx_ring_cnt = 0; + (void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh); + port->lp_tx_grp_added = B_FALSE; done: mac_perim_exit(pmph); } @@ -813,6 +1040,9 @@ aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force, * Create the pseudo ring for each HW ring of the underlying * port. */ + rc = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group); + if (rc != 0) + goto bail; rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group); if (rc != 0) goto bail; @@ -877,6 +1107,7 @@ bail: aggr_port_stop(port); mac_perim_exit(pmph); } + aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); (void) aggr_grp_rem_port(grp, port, NULL, NULL); } @@ -1001,6 +1232,7 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, mac_perim_handle_t mph; int err; int i; + kt_did_t tid = 0; /* need at least one port */ if (nports == 0) @@ -1029,10 +1261,17 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, grp->lg_started = B_FALSE; grp->lg_promisc = B_FALSE; grp->lg_lacp_done = B_FALSE; + grp->lg_tx_notify_done = B_FALSE; grp->lg_lacp_head = grp->lg_lacp_tail = NULL; grp->lg_lacp_rx_thread = thread_create(NULL, 0, aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri); + grp->lg_tx_notify_thread = thread_create(NULL, 0, + aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri); + grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * + MAX_RINGS_PER_GROUP), KM_SLEEP); + grp->lg_tx_blocked_cnt = 0; bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t)); + bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t)); aggr_lacp_init_grp(grp); /* add MAC ports to group */ @@ -1127,6 +1366,7 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, * port. Note that this is done after the aggr registers the * mac. */ + VERIFY(aggr_add_pseudo_tx_group(port, &grp->lg_tx_group) == 0); VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0); if (aggr_port_notify_link(grp, port)) link_state_changed = B_TRUE; @@ -1172,7 +1412,21 @@ bail: while (grp->lg_lacp_rx_thread != NULL) cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); mutex_exit(&grp->lg_lacp_lock); - + /* + * Inform the tx_notify thread to exit. + */ + mutex_enter(&grp->lg_tx_flowctl_lock); + if (grp->lg_tx_notify_thread != NULL) { + tid = grp->lg_tx_notify_thread->t_did; + grp->lg_tx_notify_done = B_TRUE; + cv_signal(&grp->lg_tx_flowctl_cv); + } + mutex_exit(&grp->lg_tx_flowctl_lock); + if (tid != 0) + thread_join(tid); + + kmem_free(grp->lg_tx_blocked_rings, + (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP)); rw_exit(&aggr_grp_lock); AGGR_GRP_REFRELE(grp); return (err); @@ -1272,6 +1526,7 @@ aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, grp->lg_nports--; mac_perim_exit(mph); + aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); aggr_port_delete(port); /* @@ -1378,7 +1633,20 @@ aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports) mac_perim_exit(pmph); } + /* + * aggr_rem_pseudo_tx_group() is not called here. Instead + * it is called from inside aggr_grp_rem_port() after the + * port has been detached. The reason is that + * aggr_rem_pseudo_tx_group() removes one ring at a time + * and if there is still traffic going on, then there + * is the possibility of aggr_find_tx_ring() returning a + * removed ring for transmission. Once the port has been + * detached, that port will not be used and + * aggr_find_tx_ring() will not return any rings + * belonging to it. + */ aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); + /* remove port from group */ rc = aggr_grp_rem_port(grp, port, &mac_addr_changed, &link_state_changed); @@ -1408,6 +1676,7 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) mod_hash_val_t val; mac_perim_handle_t mph, pmph; int err; + kt_did_t tid = 0; rw_enter(&aggr_grp_lock, RW_WRITER); @@ -1455,6 +1724,18 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) while (grp->lg_lacp_rx_thread != NULL) cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); mutex_exit(&grp->lg_lacp_lock); + /* + * Inform the tx_notify_thread to exit. + */ + mutex_enter(&grp->lg_tx_flowctl_lock); + if (grp->lg_tx_notify_thread != NULL) { + tid = grp->lg_tx_notify_thread->t_did; + grp->lg_tx_notify_done = B_TRUE; + cv_signal(&grp->lg_tx_flowctl_cv); + } + mutex_exit(&grp->lg_tx_flowctl_lock); + if (tid != 0) + thread_join(tid); mac_perim_enter_by_mh(grp->lg_mh, &mph); @@ -1468,6 +1749,7 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) aggr_port_stop(port); (void) aggr_grp_detach_port(grp, port); mac_perim_exit(pmph); + aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); aggr_port_delete(port); port = cport; @@ -1475,6 +1757,8 @@ aggr_grp_delete(datalink_id_t linkid, cred_t *cred) mac_perim_exit(mph); + kmem_free(grp->lg_tx_blocked_rings, + (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP)); /* * Wait for the port's lacp timer thread and its notification callback * to exit before calling mac_unregister() since both needs to access @@ -1600,6 +1884,37 @@ aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val) return (0); } +int +aggr_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + aggr_pseudo_rx_ring_t *rx_ring = (aggr_pseudo_rx_ring_t *)rdriver; + + if (rx_ring->arr_hw_rh != NULL) { + *val = mac_pseudo_rx_ring_stat_get(rx_ring->arr_hw_rh, stat); + } else { + aggr_port_t *port = rx_ring->arr_port; + + *val = mac_stat_get(port->lp_mh, stat); + + } + return (0); +} + +int +aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + aggr_pseudo_tx_ring_t *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver; + + if (tx_ring->atr_hw_rh != NULL) { + *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat); + } else { + aggr_port_t *port = tx_ring->atr_port; + + *val = mac_stat_get(port->lp_mh, stat); + } + return (0); +} + static int aggr_m_stat(void *arg, uint_t stat, uint64_t *val) { @@ -1821,7 +2136,6 @@ aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) if (cap_rings->mr_type == MAC_RING_TYPE_RX) { cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt; - cap_rings->mr_rget = aggr_fill_ring; /* * An aggregation advertises only one (pseudo) RX @@ -1829,12 +2143,15 @@ aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) * the underlying devices. */ cap_rings->mr_gnum = 1; - cap_rings->mr_gget = aggr_fill_group; cap_rings->mr_gaddring = NULL; cap_rings->mr_gremring = NULL; } else { - return (B_FALSE); + cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; + cap_rings->mr_rnum = grp->lg_tx_group.atg_ring_cnt; + cap_rings->mr_gnum = 0; } + cap_rings->mr_rget = aggr_fill_ring; + cap_rings->mr_gget = aggr_fill_group; break; } case MAC_CAPAB_AGGR: @@ -1845,6 +2162,8 @@ aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) aggr_cap = cap_data; aggr_cap->mca_rename_fn = aggr_grp_port_rename; aggr_cap->mca_unicst = aggr_m_unicst; + aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring; + aggr_cap->mca_arg = arg; } return (B_TRUE); } @@ -1863,18 +2182,24 @@ aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index, { aggr_grp_t *grp = arg; aggr_pseudo_rx_group_t *rx_group; - - ASSERT(rtype == MAC_RING_TYPE_RX && index == 0); - rx_group = &grp->lg_rx_group; - rx_group->arg_gh = gh; - rx_group->arg_grp = grp; - - infop->mgi_driver = (mac_group_driver_t)rx_group; - infop->mgi_start = NULL; - infop->mgi_stop = NULL; - infop->mgi_addmac = aggr_addmac; - infop->mgi_remmac = aggr_remmac; - infop->mgi_count = rx_group->arg_ring_cnt; + aggr_pseudo_tx_group_t *tx_group; + + ASSERT(index == 0); + if (rtype == MAC_RING_TYPE_RX) { + rx_group = &grp->lg_rx_group; + rx_group->arg_gh = gh; + rx_group->arg_grp = grp; + + infop->mgi_driver = (mac_group_driver_t)rx_group; + infop->mgi_start = NULL; + infop->mgi_stop = NULL; + infop->mgi_addmac = aggr_addmac; + infop->mgi_remmac = aggr_remmac; + infop->mgi_count = rx_group->arg_ring_cnt; + } else { + tx_group = &grp->lg_tx_group; + tx_group->atg_gh = gh; + } } /* @@ -1905,6 +2230,7 @@ aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring; aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr; aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr; + aggr_mac_intr.mi_ddi_handle = NULL; infop->mri_driver = (mac_ring_driver_t)rx_ring; infop->mri_start = aggr_pseudo_start_ring; @@ -1912,6 +2238,34 @@ aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_intr = aggr_mac_intr; infop->mri_poll = aggr_rx_poll; + + infop->mri_stat = aggr_rx_ring_stat; + break; + } + case MAC_RING_TYPE_TX: { + aggr_pseudo_tx_group_t *tx_group = &grp->lg_tx_group; + aggr_pseudo_tx_ring_t *tx_ring; + + ASSERT(rg_index == -1); + ASSERT(index < tx_group->atg_ring_cnt); + + tx_ring = &tx_group->atg_rings[index]; + tx_ring->atr_rh = rh; + + infop->mri_driver = (mac_ring_driver_t)tx_ring; + infop->mri_start = NULL; + infop->mri_stop = NULL; + infop->mri_tx = aggr_ring_tx; + infop->mri_stat = aggr_tx_ring_stat; + /* + * Use the hw TX ring handle to find if the ring needs + * serialization or not. For NICs that do not expose + * Tx rings, atr_hw_rh will be NULL. + */ + if (tx_ring->atr_hw_rh != NULL) { + infop->mri_flags = + mac_hwring_getinfo(tx_ring->atr_hw_rh); + } break; } default: @@ -2399,34 +2753,33 @@ aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, } int -aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_range_t *range) +aggr_grp_possible_mtu_range(aggr_grp_t *grp, uint32_t *min, uint32_t *max) { mac_propval_range_t *vals; mac_propval_uint32_range_t *ur; aggr_port_t *port; mac_perim_handle_t mph; - mac_prop_t macprop; - uint_t perm, i; - uint32_t min = 0, max = (uint32_t)-1; + uint_t i; int err = 0; ASSERT(MAC_PERIM_HELD(grp->lg_mh)); + *min = 0; + *max = (uint32_t)-1; + vals = kmem_alloc(sizeof (mac_propval_range_t) * grp->lg_nports, KM_SLEEP); - macprop.mp_id = MAC_PROP_MTU; - macprop.mp_name = "mtu"; - macprop.mp_flags = MAC_PROP_POSSIBLE; for (port = grp->lg_ports, i = 0; port != NULL; port = port->lp_next, i++) { mac_perim_enter_by_mh(port->lp_mh, &mph); - err = mac_get_prop(port->lp_mh, &macprop, vals + i, - sizeof (mac_propval_range_t), &perm); + err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL, + NULL, 0, vals + i, NULL); mac_perim_exit(mph); if (err != 0) break; } + /* * if any of the underlying ports does not support changing MTU then * just return ENOTSUP @@ -2435,47 +2788,42 @@ aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_range_t *range) ASSERT(err != 0); goto done; } - range->mpr_count = 1; - range->mpr_type = MAC_PROPVAL_UINT32; + for (i = 0; i < grp->lg_nports; i++) { - ur = &((vals + i)->range_uint32[0]); + ur = &((vals + i)->mpr_range_uint32[0]); /* * Take max of the min, for range_min; that is the minimum * MTU value for an aggregation is the maximum of the * minimum values of all the underlying ports */ - if (ur->mpur_min > min) - min = ur->mpur_min; + if (ur->mpur_min > *min) + *min = ur->mpur_min; /* Take min of the max, for range_max */ - if (ur->mpur_max < max) - max = ur->mpur_max; + if (ur->mpur_max < *max) + *max = ur->mpur_max; } - range->range_uint32[0].mpur_min = min; - range->range_uint32[0].mpur_max = max; done: kmem_free(vals, sizeof (mac_propval_range_t) * grp->lg_nports); + return (err); } -/*ARGSUSED*/ -static int -aggr_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) +static void +aggr_m_propinfo(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) { - mac_propval_range_t range; - int err = ENOTSUP; aggr_grp_t *grp = m_driver; + _NOTE(ARGUNUSED(pr_name)); + switch (pr_num) { - case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - if ((err = aggr_grp_possible_mtu_range(grp, &range)) != 0) - return (err); - bcopy(&range, pr_val, sizeof (range)); - return (0); + case MAC_PROP_MTU: { + uint32_t min, max; + + if (aggr_grp_possible_mtu_range(grp, &min, &max) != 0) + return; + mac_prop_info_set_range_uint32(prh, min, max); + break; + } } - return (err); } diff --git a/usr/src/uts/common/io/aggr/aggr_lacp.c b/usr/src/uts/common/io/aggr/aggr_lacp.c index 936e783e9e..2892f1438a 100644 --- a/usr/src/uts/common/io/aggr/aggr_lacp.c +++ b/usr/src/uts/common/io/aggr/aggr_lacp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -653,7 +653,10 @@ lacp_xmit_sm(aggr_port_t *portp) fill_lacp_pdu(portp, (lacp_t *)(mp->b_rptr + sizeof (struct ether_header))); - (void) mac_tx(portp->lp_mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL); + /* Send the packet over the first TX ring */ + mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp); + if (mp != NULL) + freemsg(mp); pl->NTT = B_FALSE; portp->lp_lacp_stats.LACPDUsTx++; @@ -1322,8 +1325,14 @@ lacp_selection_logic(aggr_port_t *portp) if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem, &aggrp->aggr.PartnerSystem) == 0 && (tpp->lp_lacp.PartnerOperKey == - aggrp->aggr.PartnerOperAggrKey)) + aggrp->aggr.PartnerOperAggrKey)) { + /* Set aggregation Partner MAC and key */ + aggrp->aggr.PartnerSystem = + pl->PartnerOperSystem; + aggrp->aggr.PartnerOperAggrKey = + pl->PartnerOperKey; break; + } } if (tpp == NULL) { @@ -2293,7 +2302,11 @@ aggr_lacp_rx(mblk_t *dmp) if (receive_marker_pdu(portp, dmp) != 0) break; - (void) mac_tx(portp->lp_mch, dmp, 0, MAC_DROP_ON_NO_DESC, NULL); + /* Send the packet over the first TX ring */ + dmp = mac_hwring_send_priv(portp->lp_mch, + portp->lp_tx_rings[0], dmp); + if (dmp != NULL) + freemsg(dmp); mac_perim_exit(mph); AGGR_PORT_REFRELE(portp); return; diff --git a/usr/src/uts/common/io/aggr/aggr_port.c b/usr/src/uts/common/io/aggr/aggr_port.c index 2c7e74131a..00545d2c03 100644 --- a/usr/src/uts/common/io/aggr/aggr_port.c +++ b/usr/src/uts/common/io/aggr/aggr_port.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -71,7 +71,7 @@ aggr_port_destructor(void *buf, void *arg) ASSERT(port->lp_mnh == NULL); ASSERT(port->lp_mphp == NULL); - ASSERT(!port->lp_grp_added); + ASSERT(!port->lp_rx_grp_added && !port->lp_tx_grp_added); ASSERT(port->lp_hwgh == NULL); } @@ -111,7 +111,7 @@ aggr_port_init_callbacks(aggr_port_t *port) port->lp_mnh = mac_notify_add(port->lp_mh, aggr_port_notify_cb, port); /* * Hold a reference of the grp and the port and this reference will - * be release when the thread exits. + * be released when the thread exits. * * The reference on the port is used for aggr_port_delete() to * continue without waiting for the thread to exit; the reference diff --git a/usr/src/uts/common/io/aggr/aggr_send.c b/usr/src/uts/common/io/aggr/aggr_send.c index bc0a19368d..7d423f267e 100644 --- a/usr/src/uts/common/io/aggr/aggr_send.c +++ b/usr/src/uts/common/io/aggr/aggr_send.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ #include <sys/conf.h> #include <sys/modctl.h> #include <sys/sunddi.h> +#include <sys/callb.h> #include <sys/vlan.h> #include <sys/strsun.h> #include <sys/strsubr.h> @@ -68,79 +69,163 @@ aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy) grp->lg_mac_tx_policy = mac_policy; } +#define HASH_HINT(hint) \ + ((hint) ^ ((hint) >> 24) ^ ((hint) >> 16) ^ ((hint) >> 8)) + /* - * Send function invoked by the MAC service module. + * Function invoked by mac layer to find a specific TX ring on a port + * to send data. */ mblk_t * -aggr_m_tx(void *arg, mblk_t *mp) +aggr_find_tx_ring(void *arg, mblk_t *mp, uintptr_t hint, mac_ring_handle_t *rh) { aggr_grp_t *grp = arg; aggr_port_t *port; - mblk_t *nextp; - mac_tx_cookie_t cookie; uint64_t hash; - void *mytx_handle; - - for (;;) { - rw_enter(&grp->lg_tx_lock, RW_READER); - if (grp->lg_ntx_ports == 0) { - /* - * We could have returned from aggr_m_start() before - * the ports were actually attached. Drop the chain. - */ - rw_exit(&grp->lg_tx_lock); - freemsgchain(mp); - return (NULL); - } - - nextp = mp->b_next; - mp->b_next = NULL; - - hash = mac_pkt_hash(DL_ETHER, mp, grp->lg_mac_tx_policy, - B_TRUE); - port = grp->lg_tx_ports[hash % grp->lg_ntx_ports]; + rw_enter(&grp->lg_tx_lock, RW_READER); + if (grp->lg_ntx_ports == 0) { /* - * Bump the active Tx ref count so that the port won't - * be deleted. The reference count will be dropped in mac_tx(). + * We could have returned from aggr_m_start() before + * the ports were actually attached. Drop the chain. */ - mytx_handle = mac_tx_hold(port->lp_mch); rw_exit(&grp->lg_tx_lock); + freemsgchain(mp); + return (NULL); + } + hash = mac_pkt_hash(DL_ETHER, mp, grp->lg_mac_tx_policy, B_TRUE); + port = grp->lg_tx_ports[hash % grp->lg_ntx_ports]; - if (mytx_handle == NULL) { - /* - * The port is quiesced. - */ - freemsg(mp); - } else { - mblk_t *ret_mp = NULL; - - /* - * It is fine that the port state changes now. - * Set MAC_TX_NO_HOLD to inform mac_tx() not to bump - * the active Tx ref again. Use hash as the hint so - * to direct traffic to different TX rings. Note below - * bit operation is needed to get the most benefit - * from the mac_tx() hash algorithm. - */ + /* + * Use hash as the hint so to direct traffic to + * different TX rings. Note below bit operation + * is needed in case hint is 0 to get the most + * benefit from HASH_HINT() algorithm. + */ + if (port->lp_tx_ring_cnt > 1) { + if (hint == 0) { hash = (hash << 24 | hash << 16 | hash); hash = (hash << 32 | hash); - cookie = mac_tx(port->lp_mch, mp, (uintptr_t)hash, - MAC_TX_NO_ENQUEUE | MAC_TX_NO_HOLD, &ret_mp); + } else { + hash = hint; + } + hash = HASH_HINT(hash); + *rh = port->lp_pseudo_tx_rings[hash % port->lp_tx_ring_cnt]; + } else { + *rh = port->lp_pseudo_tx_rings[0]; + } + rw_exit(&grp->lg_tx_lock); - mac_tx_rele(port->lp_mch, mytx_handle); + return (mp); +} - if (cookie != NULL) { - ret_mp->b_next = nextp; - mp = ret_mp; - break; - } +/* + * aggr_tx_notify_thread: + * + * aggr_tx_ring_update() callback function wakes up this thread when + * it gets called. This thread will call mac_tx_ring_update() to + * notify upper mac of flow control getting relieved. Note that + * aggr_tx_ring_update() cannot call mac_tx_ring_update() directly + * because aggr_tx_ring_update() is called from lower mac with + * mi_rw_lock held. + */ +void +aggr_tx_notify_thread(void *arg) +{ + callb_cpr_t cprinfo; + aggr_grp_t *grp = (aggr_grp_t *)arg; + mac_ring_handle_t pseudo_mrh; + + CALLB_CPR_INIT(&cprinfo, &grp->lg_tx_flowctl_lock, callb_generic_cpr, + "aggr_tx_notify_thread"); + + mutex_enter(&grp->lg_tx_flowctl_lock); + while (!grp->lg_tx_notify_done) { + if ((grp->lg_tx_blocked_cnt) == 0) { + CALLB_CPR_SAFE_BEGIN(&cprinfo); + cv_wait(&grp->lg_tx_flowctl_cv, + &grp->lg_tx_flowctl_lock); + CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_tx_flowctl_lock); + continue; + } + while (grp->lg_tx_blocked_cnt != 0) { + grp->lg_tx_blocked_cnt--; + pseudo_mrh = + grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt]; + mutex_exit(&grp->lg_tx_flowctl_lock); + mac_tx_ring_update(grp->lg_mh, pseudo_mrh); + mutex_enter(&grp->lg_tx_flowctl_lock); } + } + /* + * The grp is being destroyed, exit the thread. + */ + grp->lg_tx_notify_thread = NULL; + CALLB_CPR_EXIT(&cprinfo); + thread_exit(); +} + +/* + * Callback function registered with lower mac to receive wakeups from + * drivers when flow control is relieved (i.e. Tx descriptors are + * available). + */ +void +aggr_tx_ring_update(void *arg1, uintptr_t arg2) +{ + aggr_port_t *port = (aggr_port_t *)arg1; + mac_ring_handle_t mrh = (mac_ring_handle_t)arg2; + mac_ring_handle_t pseudo_mrh; + aggr_grp_t *grp = port->lp_grp; + int i = 0; - if ((mp = nextp) == NULL) - break; + if (mrh == NULL) { + /* + * If the underlying NIC does not expose TX rings, + * still as pseudo TX ring is presented to the + * aggr mac. + */ + pseudo_mrh = port->lp_pseudo_tx_rings[0]; + } else { + for (i = 0; i < port->lp_tx_ring_cnt; i++) { + if (port->lp_tx_rings[i] == mrh) + break; + } + ASSERT(i < port->lp_tx_ring_cnt); + pseudo_mrh = port->lp_pseudo_tx_rings[i]; } - return (mp); + mutex_enter(&grp->lg_tx_flowctl_lock); + /* + * It could be possible that some (broken?) device driver + * could send more than one wakeup on the same ring. In + * such a case, multiple instances of the same pseudo TX + * ring should not be saved in lg_tx_blocked_rings[] + * array. So first check if woken up ring (pseudo_mrh) is + * already in the lg_tx_blocked_rings[] array. + */ + for (i = 0; i < grp->lg_tx_blocked_cnt; i++) { + if (grp->lg_tx_blocked_rings[i] == pseudo_mrh) { + mutex_exit(&grp->lg_tx_flowctl_lock); + return; + } + } + /* A distinct mac_ring_handle. Save and increment count */ + grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt] = pseudo_mrh; + grp->lg_tx_blocked_cnt++; + cv_signal(&grp->lg_tx_flowctl_cv); + mutex_exit(&grp->lg_tx_flowctl_lock); +} + +/* + * Send function invoked by the MAC service module. + */ +mblk_t * +aggr_ring_tx(void *arg, mblk_t *mp) +{ + aggr_pseudo_tx_ring_t *pseudo_ring = (aggr_pseudo_tx_ring_t *)arg; + aggr_port_t *port = pseudo_ring->atr_port; + + return (mac_hwring_send_priv(port->lp_mch, pseudo_ring->atr_hw_rh, mp)); } /* diff --git a/usr/src/uts/common/io/arn/arn_main.c b/usr/src/uts/common/io/arn/arn_main.c index 32f22b007a..68e61a6773 100644 --- a/usr/src/uts/common/io/arn/arn_main.c +++ b/usr/src/uts/common/io/arn/arn_main.c @@ -139,11 +139,13 @@ static void arn_m_ioctl(void *, queue_t *, mblk_t *); static int arn_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int arn_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void arn_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); /* MAC Callcack Functions */ static mac_callbacks_t arn_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, arn_m_stat, arn_m_start, arn_m_stop, @@ -151,12 +153,14 @@ static mac_callbacks_t arn_m_callbacks = { arn_m_multicst, arn_m_unicst, arn_m_tx, + NULL, arn_m_ioctl, NULL, NULL, NULL, arn_m_setprop, - arn_m_getprop + arn_m_getprop, + arn_m_propinfo }; /* @@ -2518,17 +2522,26 @@ arn_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, /* ARGSUSED */ static int arn_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct arn_softc *sc = arg; int err = 0; err = ieee80211_getprop(&sc->sc_isc, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +arn_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct arn_softc *sc = arg; + + ieee80211_propinfo(&sc->sc_isc, pr_name, wldp_pr_num, prh); +} + /* return bus cachesize in 4B word units */ static void arn_pci_config_cachesize(struct arn_softc *sc) diff --git a/usr/src/uts/common/io/atge/atge.h b/usr/src/uts/common/io/atge/atge.h index 01c295047a..68a11705c9 100644 --- a/usr/src/uts/common/io/atge/atge.h +++ b/usr/src/uts/common/io/atge/atge.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,6 +30,7 @@ extern "C" { #endif +#include <sys/ethernet.h> #include <sys/mac_provider.h> #include "atge_l1e_reg.h" diff --git a/usr/src/uts/common/io/atge/atge_main.c b/usr/src/uts/common/io/atge/atge_main.c index c368df44aa..938d0ddefb 100644 --- a/usr/src/uts/common/io/atge/atge_main.c +++ b/usr/src/uts/common/io/atge/atge_main.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -204,16 +204,18 @@ static int atge_m_stat(void *, uint_t, uint64_t *); static int atge_m_start(void *); static void atge_m_stop(void *); static int atge_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int atge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void atge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int atge_m_unicst(void *, const uint8_t *); static int atge_m_multicst(void *, boolean_t, const uint8_t *); static int atge_m_promisc(void *, boolean_t); static mblk_t *atge_m_tx(void *, mblk_t *); static mac_callbacks_t atge_m_callbacks = { - MC_SETPROP | MC_GETPROP, + MC_SETPROP | MC_GETPROP | MC_PROPINFO, atge_m_stat, atge_m_start, atge_m_stop, @@ -221,12 +223,14 @@ static mac_callbacks_t atge_m_callbacks = { atge_m_multicst, atge_m_unicst, atge_m_tx, + NULL, /* mc_reserved */ NULL, /* mc_ioctl */ NULL, /* mc_getcapab */ NULL, /* mc_open */ NULL, /* mc_close */ atge_m_setprop, atge_m_getprop, + atge_m_propinfo }; /* @@ -1724,13 +1728,12 @@ atge_m_stat(void *arg, uint_t stat, uint64_t *val) } int -atge_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +atge_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { atge_t *atgep = arg; - return (mii_m_getprop(atgep->atge_mii, name, num, flags, sz, val, - perm)); + return (mii_m_getprop(atgep->atge_mii, name, num, sz, val)); } int @@ -1757,6 +1760,14 @@ atge_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, return (r); } +static void +atge_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + atge_t *atgep = arg; + + mii_m_propinfo(atgep->atge_mii, name, num, prh); +} void atge_program_ether(atge_t *atgep) diff --git a/usr/src/uts/common/io/ath/ath_main.c b/usr/src/uts/common/io/ath/ath_main.c index 451f827415..fa2a3dba24 100644 --- a/usr/src/uts/common/io/ath/ath_main.c +++ b/usr/src/uts/common/io/ath/ath_main.c @@ -1,5 +1,5 @@ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -221,10 +221,12 @@ static void ath_m_ioctl(void *, queue_t *, mblk_t *); static int ath_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int ath_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void ath_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t ath_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, ath_m_stat, ath_m_start, ath_m_stop, @@ -232,12 +234,14 @@ static mac_callbacks_t ath_m_callbacks = { ath_m_multicst, ath_m_unicst, ath_m_tx, + NULL, ath_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, ath_m_setprop, - ath_m_getprop + ath_m_getprop, + ath_m_propinfo }; /* @@ -1779,21 +1783,30 @@ ath_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } -/* ARGSUSED */ + static int ath_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { ath_t *asc = arg; int err = 0; err = ieee80211_getprop(&asc->asc_isc, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } static void +ath_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + ath_t *asc = arg; + + ieee80211_propinfo(&asc->asc_isc, pr_name, wldp_pr_num, mph); +} + +static void ath_m_ioctl(void *arg, queue_t *wq, mblk_t *mp) { ath_t *asc = arg; diff --git a/usr/src/uts/common/io/atu/atu.c b/usr/src/uts/common/io/atu/atu.c index 881d72f869..fdbb932fca 100644 --- a/usr/src/uts/common/io/atu/atu.c +++ b/usr/src/uts/common/io/atu/atu.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1511,6 +1511,26 @@ atu_m_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t len, return (0); } +static int +atu_m_getprop(void *arg, const char *name, mac_prop_id_t id, + uint_t length, void *buf) +{ + struct atu_softc *sc = (struct atu_softc *)arg; + struct ieee80211com *ic = &sc->sc_ic; + + return (ieee80211_getprop(ic, name, id, length, buf)); +} + +static void +atu_m_propinfo(void *arg, const char *name, mac_prop_id_t id, + mac_prop_info_handle_t mph) +{ + struct atu_softc *sc = (struct atu_softc *)arg; + struct ieee80211com *ic = &sc->sc_ic; + + ieee80211_propinfo(ic, name, id, mph); +} + static void atu_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) { @@ -1635,7 +1655,7 @@ atu_m_stat(void *arg, uint_t stat, uint64_t *val) } static mac_callbacks_t atu_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, atu_m_stat, atu_m_start, atu_m_stop, @@ -1643,10 +1663,12 @@ static mac_callbacks_t atu_m_callbacks = { atu_m_multicst, atu_m_unicst, atu_m_tx, + NULL, atu_m_ioctl, NULL, NULL, NULL, atu_m_setprop, - ieee80211_getprop + atu_m_getprop, + atu_m_propinfo }; diff --git a/usr/src/uts/common/io/bfe/bfe.c b/usr/src/uts/common/io/bfe/bfe.c index b71bcc229b..42e87bb745 100644 --- a/usr/src/uts/common/io/bfe/bfe.c +++ b/usr/src/uts/common/io/bfe/bfe.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <sys/stream.h> @@ -161,7 +161,7 @@ static void bfe_clear_stats(bfe_t *); static void bfe_gather_stats(bfe_t *); static void bfe_error(dev_info_t *, char *, ...); static int bfe_mac_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int bfe_mac_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int bfe_tx_reclaim(bfe_ring_t *); @@ -1651,92 +1651,66 @@ bfe_mac_getstat(void *arg, uint_t stat, uint64_t *val) return (err); } -/*ARGSUSED*/ int -bfe_mac_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +bfe_mac_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { bfe_t *bfe = (bfe_t *)arg; int err = 0; - boolean_t dfl = flags & MAC_PROP_DEFAULT; - - if (sz == 0) - return (EINVAL); - *perm = MAC_PROP_PERM_RW; switch (num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (link_duplex_t)) { - bcopy(&bfe->bfe_chip.duplex, val, - sizeof (link_duplex_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (link_duplex_t)); + bcopy(&bfe->bfe_chip.duplex, val, sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (uint64_t)) { - bcopy(&bfe->bfe_chip.speed, val, sizeof (uint64_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (uint64_t)); + bcopy(&bfe->bfe_chip.speed, val, sizeof (uint64_t)); break; case MAC_PROP_AUTONEG: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_aneg : bfe->bfe_adv_aneg; + *(uint8_t *)val = bfe->bfe_adv_aneg; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100fdx : bfe->bfe_adv_100fdx; + *(uint8_t *)val = bfe->bfe_adv_100fdx; break; + case MAC_PROP_EN_100FDX_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100fdx : bfe->bfe_adv_100fdx; + *(uint8_t *)val = bfe->bfe_adv_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100hdx : bfe->bfe_adv_100hdx; + *(uint8_t *)val = bfe->bfe_adv_100hdx; break; + case MAC_PROP_EN_100HDX_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100hdx : bfe->bfe_adv_100hdx; + *(uint8_t *)val = bfe->bfe_adv_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_10fdx : bfe->bfe_adv_10fdx; + *(uint8_t *)val = bfe->bfe_adv_10fdx; break; + case MAC_PROP_EN_10FDX_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_10fdx : bfe->bfe_adv_10fdx; + *(uint8_t *)val = bfe->bfe_adv_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_10hdx : bfe->bfe_adv_10hdx; + *(uint8_t *)val = bfe->bfe_adv_10hdx; break; + case MAC_PROP_EN_10HDX_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_10hdx : bfe->bfe_adv_10hdx; + *(uint8_t *)val = bfe->bfe_adv_10hdx; break; case MAC_PROP_ADV_100T4_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100T4 : bfe->bfe_adv_100T4; + *(uint8_t *)val = bfe->bfe_adv_100T4; break; + case MAC_PROP_EN_100T4_CAP: - *(uint8_t *)val = - dfl ? bfe->bfe_cap_100T4 : bfe->bfe_adv_100T4; + *(uint8_t *)val = bfe->bfe_adv_100T4; break; default: @@ -1746,6 +1720,51 @@ bfe_mac_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, return (err); } + +static void +bfe_mac_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + bfe_t *bfe = (bfe_t *)arg; + + switch (num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_aneg); + break; + + case MAC_PROP_EN_100FDX_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_100fdx); + break; + + case MAC_PROP_EN_100HDX_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_100hdx); + break; + + case MAC_PROP_EN_10FDX_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_10fdx); + break; + + case MAC_PROP_EN_10HDX_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_10hdx); + break; + + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_default_uint8(prh, bfe->bfe_cap_100T4); + break; + } +} + + /*ARGSUSED*/ int bfe_mac_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, @@ -2067,7 +2086,7 @@ bfe_mac_set_multicast(void *arg, boolean_t add, const uint8_t *macaddr) } static mac_callbacks_t bfe_mac_callbacks = { - MC_SETPROP | MC_GETPROP, + MC_SETPROP | MC_GETPROP | MC_PROPINFO, bfe_mac_getstat, /* gets stats */ bfe_mac_start, /* starts mac */ bfe_mac_stop, /* stops mac */ @@ -2075,12 +2094,14 @@ static mac_callbacks_t bfe_mac_callbacks = { bfe_mac_set_multicast, /* multicast implementation */ bfe_mac_set_ether_addr, /* sets ethernet address (unicast) */ bfe_mac_transmit_packet, /* transmits packet */ + NULL, NULL, /* ioctl */ NULL, /* getcap */ NULL, /* open */ NULL, /* close */ bfe_mac_setprop, bfe_mac_getprop, + bfe_mac_propinfo }; static void diff --git a/usr/src/uts/common/io/bge/bge_impl.h b/usr/src/uts/common/io/bge/bge_impl.h index 86b8d3093a..350cc32ac2 100644 --- a/usr/src/uts/common/io/bge/bge_impl.h +++ b/usr/src/uts/common/io/bge/bge_impl.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -441,7 +441,11 @@ typedef struct recv_ring { bge_rule_info_t *mac_addr_rule; uint8_t mac_addr_val[ETHERADDRL]; int poll_flag; /* Polling flag */ -} recv_ring_t; /* 0x90 (144) bytes */ + + /* Per-ring statistics */ + uint64_t rx_pkts; /* Received Packets Count */ + uint64_t rx_bytes; /* Received Bytes Count */ +} recv_ring_t; /* @@ -1196,6 +1200,7 @@ void bge_chip_msi_trig(bge_t *bgep); void bge_init_kstats(bge_t *bgep, int instance); void bge_fini_kstats(bge_t *bgep); int bge_m_stat(void *arg, uint_t stat, uint64_t *val); +int bge_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); /* bge_log.c */ #if BGE_DEBUGGING diff --git a/usr/src/uts/common/io/bge/bge_kstats.c b/usr/src/uts/common/io/bge/bge_kstats.c index 73994cb8d3..c10a1b4601 100644 --- a/usr/src/uts/common/io/bge/bge_kstats.c +++ b/usr/src/uts/common/io/bge/bge_kstats.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1126,3 +1126,28 @@ bge_m_stat(void *arg, uint_t stat, uint64_t *val) return (0); } + +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +bge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + recv_ring_t *rx_ring = (recv_ring_t *)rh; + + switch (stat) { + case MAC_STAT_RBYTES: + *val = rx_ring->rx_bytes; + break; + + case MAC_STAT_IPACKETS: + *val = rx_ring->rx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} diff --git a/usr/src/uts/common/io/bge/bge_main2.c b/usr/src/uts/common/io/bge/bge_main2.c index 73045e9e3e..fdd38676e7 100644 --- a/usr/src/uts/common/io/bge/bge_main2.c +++ b/usr/src/uts/common/io/bge/bge_main2.c @@ -34,10 +34,6 @@ * This is the string displayed by modinfo, etc. */ static char bge_ident[] = "Broadcom Gb Ethernet"; -/* - * Make sure you keep the version ID up to date! - */ -static char bge_version[] = "Broadcom Gb Ethernet v1.14"; /* * Property names @@ -116,13 +112,18 @@ static int bge_unicst_set(void *, const uint8_t *, static int bge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int bge_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void bge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int bge_set_priv_prop(bge_t *, const char *, uint_t, const void *); static int bge_get_priv_prop(bge_t *, const char *, uint_t, - uint_t, void *); + void *); +static void bge_priv_propinfo(const char *, + mac_prop_info_handle_t); -#define BGE_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) +#define BGE_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | \ + MC_GETPROP | MC_PROPINFO) static mac_callbacks_t bge_m_callbacks = { BGE_M_CALLBACK_FLAGS, @@ -133,22 +134,28 @@ static mac_callbacks_t bge_m_callbacks = { bge_m_multicst, NULL, bge_m_tx, + NULL, bge_m_ioctl, bge_m_getcapab, NULL, NULL, bge_m_setprop, - bge_m_getprop + bge_m_getprop, + bge_m_propinfo }; -mac_priv_prop_t bge_priv_prop[] = { - {"_adv_asym_pause_cap", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_RW} +char *bge_priv_prop[] = { + "_adv_asym_pause_cap", + "_adv_pause_cap", + "_drain_max", + "_msi_cnt", + "_rx_intr_coalesce_blank_time", + "_tx_intr_coalesce_blank_time", + "_rx_intr_coalesce_pkt_cnt", + "_tx_intr_coalesce_pkt_cnt", + NULL }; -#define BGE_MAX_PRIV_PROPS \ - (sizeof (bge_priv_prop) / sizeof (mac_priv_prop_t)) - uint8_t zero_addr[6] = {0, 0, 0, 0, 0, 0}; /* * ========== Transmit and receive ring reinitialisation ========== @@ -716,7 +723,6 @@ bge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, bge_t *bgep = barg; int err = 0; uint32_t cur_mtu, new_mtu; - uint_t maxsdu; link_flowctrl_t fl; mutex_enter(bgep->genlock); @@ -819,16 +825,11 @@ reprogram: err = EINVAL; break; } - maxsdu = bgep->chipid.ethmax_size - - sizeof (struct ether_header); - err = mac_maxsdu_update(bgep->mh, maxsdu); - if (err == 0) { - bgep->bge_dma_error = B_TRUE; - bgep->manual_reset = B_TRUE; - bge_chip_stop(bgep, B_TRUE); - bge_wake_factotum(bgep); - err = 0; - } + bgep->bge_dma_error = B_TRUE; + bgep->manual_reset = B_TRUE; + bge_chip_stop(bgep, B_TRUE); + bge_wake_factotum(bgep); + err = 0; break; case MAC_PROP_FLOWCTRL: bcopy(pr_val, &fl, sizeof (fl)); @@ -887,71 +888,36 @@ reprogram: /* ARGSUSED */ static int bge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { bge_t *bgep = barg; int err = 0; - link_flowctrl_t fl; - uint64_t speed; - int flags = bgep->chipid.flags; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); - - if (pr_valsize == 0) - return (EINVAL); - bzero(pr_val, pr_valsize); - - *perm = MAC_PROP_PERM_RW; - - mutex_enter(bgep->genlock); - if ((bgep->param_loop_mode != BGE_LOOP_NONE && - bge_param_locked(pr_num)) || - ((bgep->chipid.flags & CHIP_FLAG_SERDES) && - ((pr_num == MAC_PROP_EN_100FDX_CAP) || - (pr_num == MAC_PROP_EN_100HDX_CAP) || - (pr_num == MAC_PROP_EN_10FDX_CAP) || - (pr_num == MAC_PROP_EN_10HDX_CAP))) || - (DEVICE_5906_SERIES_CHIPSETS(bgep) && - ((pr_num == MAC_PROP_EN_1000FDX_CAP) || - (pr_num == MAC_PROP_EN_1000HDX_CAP)))) - *perm = MAC_PROP_PERM_READ; - mutex_exit(bgep->genlock); switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (link_duplex_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (link_duplex_t)); bcopy(&bgep->param_link_duplex, pr_val, sizeof (link_duplex_t)); break; - case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (speed)) - return (EINVAL); - speed = bgep->param_link_speed * 1000000ull; + case MAC_PROP_SPEED: { + uint64_t speed = bgep->param_link_speed * 1000000ull; + + ASSERT(pr_valsize >= sizeof (speed)); bcopy(&speed, pr_val, sizeof (speed)); break; + } case MAC_PROP_STATUS: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (link_state_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (link_state_t)); bcopy(&bgep->link_state, pr_val, sizeof (link_state_t)); break; case MAC_PROP_AUTONEG: - if (is_default) - *(uint8_t *)pr_val = 1; - else - *(uint8_t *)pr_val = bgep->param_adv_autoneg; + *(uint8_t *)pr_val = bgep->param_adv_autoneg; break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (fl)) - return (EINVAL); - if (is_default) { - fl = LINK_FLOWCTRL_BI; - bcopy(&fl, pr_val, sizeof (fl)); - break; - } + case MAC_PROP_FLOWCTRL: { + link_flowctrl_t fl; + + ASSERT(pr_valsize >= sizeof (fl)); if (bgep->param_link_rx_pause && !bgep->param_link_tx_pause) @@ -970,148 +936,135 @@ bge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, fl = LINK_FLOWCTRL_BI; bcopy(&fl, pr_val, sizeof (fl)); break; + } case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - if (DEVICE_5906_SERIES_CHIPSETS(bgep)) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = 1; - } - else - *(uint8_t *)pr_val = bgep->param_adv_1000fdx; + *(uint8_t *)pr_val = bgep->param_adv_1000fdx; break; case MAC_PROP_EN_1000FDX_CAP: - if (is_default) { - if (DEVICE_5906_SERIES_CHIPSETS(bgep)) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = 1; - } - else - *(uint8_t *)pr_val = bgep->param_en_1000fdx; + *(uint8_t *)pr_val = bgep->param_en_1000fdx; break; case MAC_PROP_ADV_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - if (DEVICE_5906_SERIES_CHIPSETS(bgep)) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = 1; - } - else - *(uint8_t *)pr_val = bgep->param_adv_1000hdx; + *(uint8_t *)pr_val = bgep->param_adv_1000hdx; break; case MAC_PROP_EN_1000HDX_CAP: - if (is_default) { - if (DEVICE_5906_SERIES_CHIPSETS(bgep)) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = 1; - } - else - *(uint8_t *)pr_val = bgep->param_en_1000hdx; + *(uint8_t *)pr_val = bgep->param_en_1000hdx; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_adv_100fdx; - } + *(uint8_t *)pr_val = bgep->param_adv_100fdx; break; case MAC_PROP_EN_100FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_en_100fdx; - } + *(uint8_t *)pr_val = bgep->param_en_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_adv_100hdx; - } + *(uint8_t *)pr_val = bgep->param_adv_100hdx; break; case MAC_PROP_EN_100HDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_en_100hdx; - } + *(uint8_t *)pr_val = bgep->param_en_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_adv_10fdx; - } + *(uint8_t *)pr_val = bgep->param_adv_10fdx; break; case MAC_PROP_EN_10FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_en_10fdx; - } + *(uint8_t *)pr_val = bgep->param_en_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_adv_10hdx; - } + *(uint8_t *)pr_val = bgep->param_adv_10hdx; break; case MAC_PROP_EN_10HDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = - ((flags & CHIP_FLAG_SERDES) ? 0 : 1); - } else { - *(uint8_t *)pr_val = bgep->param_en_10hdx; - } + *(uint8_t *)pr_val = bgep->param_en_10hdx; break; case MAC_PROP_ADV_100T4_CAP: case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = 0; break; case MAC_PROP_PRIVATE: - err = bge_get_priv_prop(bgep, pr_name, pr_flags, + err = bge_get_priv_prop(bgep, pr_name, pr_valsize, pr_val); return (err); - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = BGE_DEFAULT_MTU; - if (!(flags & CHIP_FLAG_NO_JUMBO)) - range.range_uint32[0].mpur_max = - BGE_MAXIMUM_MTU; - bcopy(&range, pr_val, sizeof (range)); - break; - } default: return (ENOTSUP); } return (0); } +static void +bge_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + bge_t *bgep = barg; + int flags = bgep->chipid.flags; + + /* + * By default permissions are read/write unless specified + * otherwise by the driver. + */ + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + if (DEVICE_5906_SERIES_CHIPSETS(bgep)) + mac_prop_info_set_default_uint8(prh, 0); + else + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + mac_prop_info_set_default_uint8(prh, + (flags & CHIP_FLAG_SERDES) ? 0 : 1); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, + LINK_FLOWCTRL_BI); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, BGE_DEFAULT_MTU, + (flags & CHIP_FLAG_NO_JUMBO) ? + BGE_DEFAULT_MTU : BGE_MAXIMUM_MTU); + break; + + case MAC_PROP_PRIVATE: + bge_priv_propinfo(pr_name, prh); + break; + } + + mutex_enter(bgep->genlock); + if ((bgep->param_loop_mode != BGE_LOOP_NONE && + bge_param_locked(pr_num)) || + ((bgep->chipid.flags & CHIP_FLAG_SERDES) && + ((pr_num == MAC_PROP_EN_100FDX_CAP) || + (pr_num == MAC_PROP_EN_100HDX_CAP) || + (pr_num == MAC_PROP_EN_10FDX_CAP) || + (pr_num == MAC_PROP_EN_10HDX_CAP))) || + (DEVICE_5906_SERIES_CHIPSETS(bgep) && + ((pr_num == MAC_PROP_EN_1000FDX_CAP) || + (pr_num == MAC_PROP_EN_1000HDX_CAP)))) + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + mutex_exit(bgep->genlock); +} + /* ARGSUSED */ static int bge_set_priv_prop(bge_t *bgep, const char *pr_name, uint_t pr_valsize, @@ -1235,53 +1188,61 @@ bge_set_priv_prop(bge_t *bgep, const char *pr_name, uint_t pr_valsize, } static int -bge_get_priv_prop(bge_t *bge, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val) +bge_get_priv_prop(bge_t *bge, const char *pr_name, uint_t pr_valsize, + void *pr_val) { - int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; - if (strcmp(pr_name, "_adv_pause_cap") == 0) { - value = (is_default? 1 : bge->param_adv_pause); - err = 0; - goto done; - } - if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { - value = (is_default? 1 : bge->param_adv_asym_pause); - err = 0; - goto done; - } - if (strcmp(pr_name, "_drain_max") == 0) { - value = (is_default? 64 : bge->param_drain_max); - err = 0; - goto done; - } - if (strcmp(pr_name, "_msi_cnt") == 0) { - value = (is_default? 0 : bge->param_msi_cnt); - err = 0; - goto done; - } + if (strcmp(pr_name, "_adv_pause_cap") == 0) + value = bge->param_adv_pause; + else if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) + value = bge->param_adv_asym_pause; + else if (strcmp(pr_name, "_drain_max") == 0) + value = bge->param_drain_max; + else if (strcmp(pr_name, "_msi_cnt") == 0) + value = bge->param_msi_cnt; + else if (strcmp(pr_name, "_rx_intr_coalesce_blank_time") == 0) + value = bge->chipid.rx_ticks_norm; + else if (strcmp(pr_name, "_tx_intr_coalesce_blank_time") == 0) + value = bge->chipid.tx_ticks_norm; + else if (strcmp(pr_name, "_rx_intr_coalesce_pkt_cnt") == 0) + value = bge->chipid.rx_count_norm; + else if (strcmp(pr_name, "_tx_intr_coalesce_pkt_cnt") == 0) + value = bge->chipid.tx_count_norm; + else + return (ENOTSUP); - if (strcmp(pr_name, "_intr_coalesce_blank_time") == 0) { - value = (is_default? bge_rx_ticks_norm : - bge->chipid.rx_ticks_norm); - err = 0; - goto done; - } + (void) snprintf(pr_val, pr_valsize, "%d", value); + return (0); +} - if (strcmp(pr_name, "_intr_coalesce_pkt_cnt") == 0) { - value = (is_default? bge_rx_count_norm : - bge->chipid.rx_count_norm); - err = 0; - goto done; - } +static void +bge_priv_propinfo(const char *pr_name, mac_prop_info_handle_t mph) +{ + char valstr[64]; + int value; -done: - if (err == 0) { - (void) snprintf(pr_val, pr_valsize, "%d", value); - } - return (err); + if (strcmp(pr_name, "_adv_pause_cap") == 0) + value = 1; + else if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) + value = 1; + else if (strcmp(pr_name, "_drain_max") == 0) + value = 64; + else if (strcmp(pr_name, "_msi_cnt") == 0) + value = 0; + else if (strcmp(pr_name, "_rx_intr_coalesce_blank_time") == 0) + value = bge_rx_ticks_norm; + else if (strcmp(pr_name, "_tx_intr_coalesce_blank_time") == 0) + value = bge_tx_ticks_norm; + else if (strcmp(pr_name, "_rx_intr_coalesce_pkt_cnt") == 0) + value = bge_rx_count_norm; + else if (strcmp(pr_name, "_tx_intr_coalesce_pkt_cnt") == 0) + value = bge_tx_count_norm; + else + return; + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + mac_prop_info_set_default_str(mph, valstr); } /* @@ -1682,6 +1643,7 @@ bge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = bge_ring_start; infop->mri_stop = NULL; infop->mri_poll = bge_poll_ring; + infop->mri_stat = bge_rx_ring_stat; mintr = &infop->mri_intr; mintr->mi_handle = (mac_intr_handle_t)rx_ring; @@ -3517,7 +3479,6 @@ bge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) macp->m_max_sdu = cidp->ethmax_size - sizeof (struct ether_header); macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = bge_priv_prop; - macp->m_priv_prop_count = BGE_MAX_PRIV_PROPS; macp->m_v12n = MAC_VIRT_LEVEL1; /* @@ -3551,7 +3512,6 @@ bge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) #endif ddi_report_dev(devinfo); - BGE_REPORT((bgep, "bge version: %s", bge_version)); return (DDI_SUCCESS); diff --git a/usr/src/uts/common/io/bge/bge_recv2.c b/usr/src/uts/common/io/bge/bge_recv2.c index fb8e1fa881..ecda51cb0a 100644 --- a/usr/src/uts/common/io/bge/bge_recv2.c +++ b/usr/src/uts/common/io/bge/bge_recv2.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -65,11 +65,12 @@ bge_refill(bge_t *bgep, buff_ring_t *brp, sw_rbd_t *srbdp) bge_mbx_put(bgep, brp->chip_mbx_reg, slot); } -static mblk_t *bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p); +static mblk_t *bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p, + recv_ring_t *rrp); #pragma inline(bge_receive_packet) static mblk_t * -bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p) +bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p, recv_ring_t *rrp) { bge_rbd_t hw_rbd; buff_ring_t *brp; @@ -237,10 +238,13 @@ bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p) if (hw_rbd.flags & RBD_FLAG_TCP_UDP_CHECKSUM) pflags |= HCK_FULLCKSUM; if (hw_rbd.flags & RBD_FLAG_IP_CHECKSUM) - pflags |= HCK_IPV4_HDRCKSUM; + pflags |= HCK_IPV4_HDRCKSUM_OK; if (pflags != 0) - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, - hw_rbd.tcp_udp_cksum, pflags, 0); + mac_hcksum_set(mp, 0, 0, 0, hw_rbd.tcp_udp_cksum, pflags); + + /* Update per-ring rx statistics */ + rrp->rx_pkts++; + rrp->rx_bytes += len; refill: /* @@ -313,7 +317,8 @@ bge_receive_ring(bge_t *bgep, recv_ring_t *rrp) while ((slot != *rrp->prod_index_p) && /* Note: volatile */ (recv_cnt < BGE_MAXPKT_RCVED)) { - if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot])) != NULL) { + if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot], rrp)) + != NULL) { *tail = mp; tail = &mp->b_next; recv_cnt++; @@ -383,7 +388,8 @@ bge_poll_ring(void *arg, int bytes_to_pickup) /* Note: volatile */ while ((slot != *rrp->prod_index_p) && (sz <= bytes_to_pickup)) { - if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot])) != NULL) { + if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot], rrp)) + != NULL) { *tail = mp; sz += msgdsize(mp); tail = &mp->b_next; diff --git a/usr/src/uts/common/io/bge/bge_send.c b/usr/src/uts/common/io/bge/bge_send.c index 11f23e9f64..be3f179f31 100644 --- a/usr/src/uts/common/io/bge/bge_send.c +++ b/usr/src/uts/common/io/bge/bge_send.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -533,7 +533,7 @@ bge_ring_tx(void *arg, mblk_t *mp) /* * Retrieve checksum offloading info. */ - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); /* * Calculate pseudo checksum if needed. diff --git a/usr/src/uts/common/io/bridge.c b/usr/src/uts/common/io/bridge.c index ffd215c132..7b45039b62 100644 --- a/usr/src/uts/common/io/bridge.c +++ b/usr/src/uts/common/io/bridge.c @@ -492,36 +492,16 @@ bridge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, static int bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { bridge_mac_t *bmp = arg; int err = 0; _NOTE(ARGUNUSED(pr_name)); switch (pr_num) { - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = bmp->bm_maxsdu; - bcopy(&range, pr_val, sizeof (range)); - *perm = MAC_PROP_PERM_RW; - break; - } case MAC_PROP_STATUS: - if (pr_valsize < sizeof (bmp->bm_linkstate)) { - err = EINVAL; - } else { - bcopy(&bmp->bm_linkstate, pr_val, - sizeof (&bmp->bm_linkstate)); - *perm = MAC_PROP_PERM_READ; - } + ASSERT(pr_valsize >= sizeof (bmp->bm_linkstate)); + bcopy(&bmp->bm_linkstate, pr_val, sizeof (&bmp->bm_linkstate)); break; default: @@ -531,8 +511,27 @@ bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, return (err); } +static void +bridge_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + bridge_mac_t *bmp = arg; + + _NOTE(ARGUNUSED(pr_name)); + + switch (pr_num) { + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, bmp->bm_maxsdu, + bmp->bm_maxsdu); + break; + case MAC_PROP_STATUS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + } +} + static mac_callbacks_t bridge_m_callbacks = { - MC_SETPROP | MC_GETPROP, + MC_SETPROP | MC_GETPROP | MC_PROPINFO, bridge_m_getstat, bridge_m_start, bridge_m_stop, @@ -540,12 +539,14 @@ static mac_callbacks_t bridge_m_callbacks = { bridge_m_multicst, bridge_m_unicst, bridge_m_tx, + NULL, /* reserved */ NULL, /* ioctl */ NULL, /* getcapab */ NULL, /* open */ NULL, /* close */ bridge_m_setprop, - bridge_m_getprop + bridge_m_getprop, + bridge_m_propinfo }; /* diff --git a/usr/src/uts/common/io/dld/dld_drv.c b/usr/src/uts/common/io/dld/dld_drv.c index c91793723e..36d1c3d6ff 100644 --- a/usr/src/uts/common/io/dld/dld_drv.c +++ b/usr/src/uts/common/io/dld/dld_drv.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -429,8 +429,9 @@ drv_ioc_hwgrpget(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) dld_ioc_hwgrpget_t *hwgrpp = karg; dld_hwgrpinfo_t hwgrp, *hip; mac_handle_t mh = NULL; - int i, err, grpnum; + int i, err, rgrpnum, tgrpnum; uint_t bytes_left; + int totgrps = 0; zoneid_t zoneid = crgetzoneid(cred); if (zoneid != GLOBAL_ZONEID && @@ -445,8 +446,35 @@ drv_ioc_hwgrpget(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) hip = (dld_hwgrpinfo_t *) ((uchar_t *)arg + sizeof (dld_ioc_hwgrpget_t)); bytes_left = hwgrpp->dih_size; - grpnum = mac_hwgrp_num(mh); - for (i = 0; i < grpnum; i++) { + + rgrpnum = mac_hwgrp_num(mh, MAC_RING_TYPE_RX); + /* display the default group information first */ + if (rgrpnum > 0) { + if (sizeof (dld_hwgrpinfo_t) > bytes_left) { + err = ENOSPC; + goto done; + } + + bzero(&hwgrp, sizeof (hwgrp)); + bcopy(mac_name(mh), hwgrp.dhi_link_name, + sizeof (hwgrp.dhi_link_name)); + mac_get_hwrxgrp_info(mh, 0, &hwgrp.dhi_grp_num, + &hwgrp.dhi_n_rings, hwgrp.dhi_rings, &hwgrp.dhi_grp_type, + &hwgrp.dhi_n_clnts, hwgrp.dhi_clnts); + if (hwgrp.dhi_n_rings != 0) { + if (copyout(&hwgrp, hip, sizeof (hwgrp)) != 0) { + err = EFAULT; + goto done; + } + } + hip++; + totgrps++; + bytes_left -= sizeof (dld_hwgrpinfo_t); + } + + tgrpnum = mac_hwgrp_num(mh, MAC_RING_TYPE_TX); + /* display the default group information first */ + if (tgrpnum > 0) { if (sizeof (dld_hwgrpinfo_t) > bytes_left) { err = ENOSPC; goto done; @@ -455,15 +483,68 @@ drv_ioc_hwgrpget(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) bzero(&hwgrp, sizeof (hwgrp)); bcopy(mac_name(mh), hwgrp.dhi_link_name, sizeof (hwgrp.dhi_link_name)); - mac_get_hwgrp_info(mh, i, &hwgrp.dhi_grp_num, - &hwgrp.dhi_n_rings, &hwgrp.dhi_grp_type, + mac_get_hwtxgrp_info(mh, tgrpnum - 1, &hwgrp.dhi_grp_num, + &hwgrp.dhi_n_rings, hwgrp.dhi_rings, &hwgrp.dhi_grp_type, &hwgrp.dhi_n_clnts, hwgrp.dhi_clnts); + if (hwgrp.dhi_n_rings != 0) { + if (copyout(&hwgrp, hip, sizeof (hwgrp)) != 0) { + err = EFAULT; + goto done; + } + } + hip++; + totgrps++; + bytes_left -= sizeof (dld_hwgrpinfo_t); + } + + /* Rest of the rx groups */ + for (i = 1; i < rgrpnum; i++) { + if (sizeof (dld_hwgrpinfo_t) > bytes_left) { + err = ENOSPC; + goto done; + } + + bzero(&hwgrp, sizeof (hwgrp)); + bcopy(mac_name(mh), hwgrp.dhi_link_name, + sizeof (hwgrp.dhi_link_name)); + mac_get_hwrxgrp_info(mh, i, &hwgrp.dhi_grp_num, + &hwgrp.dhi_n_rings, hwgrp.dhi_rings, &hwgrp.dhi_grp_type, + &hwgrp.dhi_n_clnts, hwgrp.dhi_clnts); + if (hwgrp.dhi_n_rings == 0) + continue; if (copyout(&hwgrp, hip, sizeof (hwgrp)) != 0) { err = EFAULT; goto done; } hip++; + totgrps++; + bytes_left -= sizeof (dld_hwgrpinfo_t); + } + + /* Rest of the tx group */ + tgrpnum = mac_hwgrp_num(mh, MAC_RING_TYPE_TX); + for (i = 0; i < tgrpnum - 1; i++) { + if (sizeof (dld_hwgrpinfo_t) > bytes_left) { + err = ENOSPC; + goto done; + } + + bzero(&hwgrp, sizeof (hwgrp)); + bcopy(mac_name(mh), hwgrp.dhi_link_name, + sizeof (hwgrp.dhi_link_name)); + mac_get_hwtxgrp_info(mh, i, &hwgrp.dhi_grp_num, + &hwgrp.dhi_n_rings, hwgrp.dhi_rings, &hwgrp.dhi_grp_type, + &hwgrp.dhi_n_clnts, hwgrp.dhi_clnts); + if (hwgrp.dhi_n_rings == 0) + continue; + if (copyout(&hwgrp, hip, sizeof (hwgrp)) != 0) { + err = EFAULT; + goto done; + } + + hip++; + totgrps++; bytes_left -= sizeof (dld_hwgrpinfo_t); } @@ -471,7 +552,7 @@ done: if (mh != NULL) dld_mac_close(mh); if (err == 0) - hwgrpp->dih_n_groups = grpnum; + hwgrpp->dih_n_groups = totgrps; return (err); } @@ -542,7 +623,7 @@ done: } /* - * DLDIOC_SET/GETPROP + * DLDIOC_SET/GETMACPROP */ static int drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, @@ -552,7 +633,6 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, dls_dl_handle_t dlh = NULL; dls_link_t *dlp = NULL; mac_perim_handle_t mph = NULL; - mac_prop_t macprop; dld_ioc_macprop_t *kprop; datalink_id_t linkid; datalink_class_t class; @@ -606,6 +686,12 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, goto done; } + if (!mac_prop_check_size(kprop->pr_num, kprop->pr_valsize, + kprop->pr_flags & DLD_PROP_POSSIBLE)) { + err = ENOBUFS; + goto done; + } + switch (kprop->pr_num) { case MAC_PROP_ZONE: if (set) { @@ -630,6 +716,9 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, else err = drv_ioc_clrap(linkid); } else { + if (kprop->pr_valsize == 0) + return (ENOBUFS); + kprop->pr_perm_flags = MAC_PROP_PERM_RW; err = drv_ioc_getap(linkid, dlap); } @@ -652,19 +741,51 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, err = 0; } break; - default: - macprop.mp_name = kprop->pr_name; - macprop.mp_id = kprop->pr_num; - macprop.mp_flags = kprop->pr_flags; - + default: { + mac_propval_range_t range, *rangep = NULL; + void *default_val = NULL; + uint_t default_size = 0; + void *val = kprop->pr_val; + uint_t val_size = kprop->pr_valsize; + + /* set a property value */ if (set) { - err = mac_set_prop(dlp->dl_mh, &macprop, kprop->pr_val, - kprop->pr_valsize); - } else { - kprop->pr_perm_flags = MAC_PROP_PERM_RW; - err = mac_get_prop(dlp->dl_mh, &macprop, kprop->pr_val, - kprop->pr_valsize, &kprop->pr_perm_flags); + err = mac_set_prop(dlp->dl_mh, kprop->pr_num, + kprop->pr_name, kprop->pr_val, kprop->pr_valsize); + break; + } + + /* + * Get the property value, default, or possible value + * depending on flags passed from the user. + */ + + /* a property has RW permissions by default */ + kprop->pr_perm_flags = MAC_PROP_PERM_RW; + + if (kprop->pr_flags & DLD_PROP_POSSIBLE) { + rangep = ⦥ + } else if (kprop->pr_flags & DLD_PROP_DEFAULT) { + default_val = val; + default_size = val_size; } + + /* + * Always return the permissions, and optionally return + * the default value or possible values range. + */ + mac_prop_info(dlp->dl_mh, kprop->pr_num, kprop->pr_name, + default_val, default_size, rangep, &kprop->pr_perm_flags); + err = 0; + + if (default_val == NULL && rangep == NULL) { + err = mac_get_prop(dlp->dl_mh, kprop->pr_num, + kprop->pr_name, kprop->pr_val, kprop->pr_valsize); + } + + if (rangep != NULL) + bcopy(rangep, val, sizeof (range)); + } } done: @@ -673,6 +794,7 @@ done: if (dlp != NULL) dls_link_rele(dlp); + if (mph != NULL) { int32_t cpuid; void *mdip = NULL; @@ -684,9 +806,10 @@ done: mac_perim_exit(mph); - if (mdip != NULL) + if (mdip != NULL && cpuid != -1) mac_client_set_intr_cpu(mdip, dlp->dl_mch, cpuid); } + if (dlh != NULL) dls_devnet_rele_tmp(dlh); @@ -828,7 +951,8 @@ drv_ioc_getap(datalink_id_t linkid, struct dlautopush *dlap) (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&dap) != 0) { rw_exit(&dld_ap_hash_lock); - return (ENOENT); + dlap->dap_npush = 0; + return (0); } /* @@ -1221,7 +1345,7 @@ static dld_ioc_info_t drv_ioc_list[] = { {DLDIOC_GETMACPROP, DLDCOPYIN, sizeof (dld_ioc_macprop_t), drv_ioc_getprop, NULL}, {DLDIOC_GETHWGRP, DLDCOPYINOUT, sizeof (dld_ioc_hwgrpget_t), - drv_ioc_hwgrpget, secpolicy_dl_config}, + drv_ioc_hwgrpget, NULL}, }; typedef struct dld_ioc_modentry { diff --git a/usr/src/uts/common/io/dld/dld_flow.c b/usr/src/uts/common/io/dld/dld_flow.c index 281217d02d..7171953a2d 100644 --- a/usr/src/uts/common/io/dld/dld_flow.c +++ b/usr/src/uts/common/io/dld/dld_flow.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -69,20 +69,23 @@ static int dld_walk_flow_cb(mac_flowinfo_t *finfo, void *arg) { flowinfo_state_t *statep = arg; - dld_flowinfo_t fi; + dld_flowinfo_t *fi; if (statep->fi_bufsize < sizeof (dld_flowinfo_t)) return (ENOSPC); - (void) strlcpy(fi.fi_flowname, finfo->fi_flow_name, - sizeof (fi.fi_flowname)); - fi.fi_linkid = finfo->fi_link_id; - fi.fi_flow_desc = finfo->fi_flow_desc; - fi.fi_resource_props = finfo->fi_resource_props; + fi = kmem_zalloc(sizeof (*fi), KM_SLEEP); + (void) strlcpy(fi->fi_flowname, finfo->fi_flow_name, + sizeof (fi->fi_flowname)); + fi->fi_linkid = finfo->fi_link_id; + fi->fi_flow_desc = finfo->fi_flow_desc; + fi->fi_resource_props = finfo->fi_resource_props; - if (copyout(&fi, statep->fi_fl, sizeof (fi)) != 0) { + if (copyout(fi, statep->fi_fl, sizeof (*fi)) != 0) { + kmem_free(fi, sizeof (*fi)); return (EFAULT); } + kmem_free(fi, sizeof (*fi)); statep->fi_nflows++; statep->fi_bufsize -= sizeof (dld_flowinfo_t); statep->fi_fl += sizeof (dld_flowinfo_t); @@ -98,13 +101,14 @@ int dld_walk_flow(dld_ioc_walkflow_t *wf, intptr_t uaddr, cred_t *credp) { flowinfo_state_t state; - mac_flowinfo_t finfo; + mac_flowinfo_t *finfo; int err = 0; /* For now, one can only view flows from the global zone. */ if (crgetzoneid(credp) != GLOBAL_ZONEID) return (EPERM); + finfo = kmem_zalloc(sizeof (*finfo), KM_SLEEP); state.fi_bufsize = wf->wf_len; state.fi_fl = (uchar_t *)uaddr + sizeof (*wf); state.fi_nflows = 0; @@ -113,12 +117,14 @@ dld_walk_flow(dld_ioc_walkflow_t *wf, intptr_t uaddr, cred_t *credp) err = mac_link_flow_walk(wf->wf_linkid, dld_walk_flow_cb, &state); } else { - err = mac_link_flow_info(wf->wf_name, &finfo); - if (err != 0) + err = mac_link_flow_info(wf->wf_name, finfo); + if (err != 0) { + kmem_free(finfo, sizeof (*finfo)); return (err); - - err = dld_walk_flow_cb(&finfo, &state); + } + err = dld_walk_flow_cb(finfo, &state); } + kmem_free(finfo, sizeof (*finfo)); wf->wf_nflows = state.fi_nflows; return (err); } diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c index ca1fc10306..67774c329f 100644 --- a/usr/src/uts/common/io/dld/dld_proto.c +++ b/usr/src/uts/common/io/dld/dld_proto.c @@ -476,7 +476,8 @@ proto_bind_req(dld_str_t *dsp, mblk_t *mp) * etc. since part of mac_client_retarget_intr is to walk the * device tree in order to find and retarget the interrupts. */ - mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu); + if (intr_cpu != -1) + mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu); /* * Copy in MAC address. diff --git a/usr/src/uts/common/io/dmfe/dmfe_main.c b/usr/src/uts/common/io/dmfe/dmfe_main.c index 7c32a176aa..1bd204683d 100644 --- a/usr/src/uts/common/io/dmfe/dmfe_main.c +++ b/usr/src/uts/common/io/dmfe/dmfe_main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -197,12 +197,14 @@ static void dmfe_m_ioctl(void *, queue_t *, mblk_t *); static mblk_t *dmfe_m_tx(void *, mblk_t *); static int dmfe_m_stat(void *, uint_t, uint64_t *); static int dmfe_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); static int dmfe_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void dmfe_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t dmfe_m_callbacks = { - (MC_IOCTL | MC_SETPROP | MC_GETPROP), + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, dmfe_m_stat, dmfe_m_start, dmfe_m_stop, @@ -210,12 +212,14 @@ static mac_callbacks_t dmfe_m_callbacks = { dmfe_m_multicst, dmfe_m_unicst, dmfe_m_tx, + NULL, dmfe_m_ioctl, NULL, /* getcapab */ NULL, /* open */ NULL, /* close */ dmfe_m_setprop, - dmfe_m_getprop + dmfe_m_getprop, + dmfe_m_propinfo }; @@ -2178,12 +2182,12 @@ dmfe_m_ioctl(void *arg, queue_t *wq, mblk_t *mp) } int -dmfe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +dmfe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { dmfe_t *dmfep = arg; - return (mii_m_getprop(dmfep->mii, name, num, flags, sz, val, perm)); + return (mii_m_getprop(dmfep->mii, name, num, sz, val)); } int @@ -2195,6 +2199,14 @@ dmfe_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, return (mii_m_setprop(dmfep->mii, name, num, sz, val)); } +static void +dmfe_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t mph) +{ + dmfe_t *dmfep = arg; + + mii_m_propinfo(dmfep->mii, name, num, mph); +} /* * ========== Per-instance setup/teardown code ========== diff --git a/usr/src/uts/common/io/e1000g/e1000g_main.c b/usr/src/uts/common/io/e1000g/e1000g_main.c index 57d2401894..569b3f6f87 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_main.c +++ b/usr/src/uts/common/io/e1000g/e1000g_main.c @@ -45,8 +45,6 @@ #include "e1000g_debug.h" static char ident[] = "Intel PRO/1000 Ethernet"; -static char e1000g_string[] = "Intel(R) PRO/1000 Network Connection"; -static char e1000g_version[] = "Driver Ver. 5.3.22"; /* * Proto types for DDI entry points @@ -76,11 +74,12 @@ static void e1000g_m_ioctl(void *, queue_t *, mblk_t *); static int e1000g_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int e1000g_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void e1000g_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int e1000g_set_priv_prop(struct e1000g *, const char *, uint_t, const void *); -static int e1000g_get_priv_prop(struct e1000g *, const char *, uint_t, - uint_t, void *, uint_t *); +static int e1000g_get_priv_prop(struct e1000g *, const char *, uint_t, void *); static void e1000g_init_locks(struct e1000g *); static void e1000g_destroy_locks(struct e1000g *); static int e1000g_identify_hardware(struct e1000g *); @@ -154,29 +153,26 @@ static int e1000g_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data); static void e1000g_fm_init(struct e1000g *Adapter); static void e1000g_fm_fini(struct e1000g *Adapter); -static int e1000g_get_def_val(struct e1000g *, mac_prop_id_t, uint_t, void *); static void e1000g_param_sync(struct e1000g *); static void e1000g_get_driver_control(struct e1000_hw *); static void e1000g_release_driver_control(struct e1000_hw *); static void e1000g_restore_promisc(struct e1000g *Adapter); -mac_priv_prop_t e1000g_priv_props[] = { - {"_tx_bcopy_threshold", MAC_PROP_PERM_RW}, - {"_tx_interrupt_enable", MAC_PROP_PERM_RW}, - {"_tx_intr_delay", MAC_PROP_PERM_RW}, - {"_tx_intr_abs_delay", MAC_PROP_PERM_RW}, - {"_rx_bcopy_threshold", MAC_PROP_PERM_RW}, - {"_max_num_rcv_packets", MAC_PROP_PERM_RW}, - {"_rx_intr_delay", MAC_PROP_PERM_RW}, - {"_rx_intr_abs_delay", MAC_PROP_PERM_RW}, - {"_intr_throttling_rate", MAC_PROP_PERM_RW}, - {"_intr_adaptive", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_READ}, - {"_adv_asym_pause_cap", MAC_PROP_PERM_READ}, +char *e1000g_priv_props[] = { + "_tx_bcopy_threshold", + "_tx_interrupt_enable", + "_tx_intr_delay", + "_tx_intr_abs_delay", + "_rx_bcopy_threshold", + "_max_num_rcv_packets", + "_rx_intr_delay", + "_rx_intr_abs_delay", + "_intr_throttling_rate", + "_intr_adaptive", + "_adv_pause_cap", + "_adv_asym_pause_cap", + NULL }; -#define E1000G_MAX_PRIV_PROPS \ - (sizeof (e1000g_priv_props)/sizeof (mac_priv_prop_t)) - static struct cb_ops cb_ws_ops = { nulldev, /* cb_open */ @@ -233,7 +229,7 @@ static ddi_device_acc_attr_t e1000g_regs_acc_attr = { }; #define E1000G_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) static mac_callbacks_t e1000g_m_callbacks = { E1000G_M_CALLBACK_FLAGS, @@ -244,12 +240,14 @@ static mac_callbacks_t e1000g_m_callbacks = { e1000g_m_multicst, NULL, e1000g_m_tx, + NULL, e1000g_m_ioctl, e1000g_m_getcapab, NULL, NULL, e1000g_m_setprop, - e1000g_m_getprop + e1000g_m_getprop, + e1000g_m_propinfo }; /* @@ -581,7 +579,6 @@ e1000g_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) mutex_exit(&e1000g_rx_detach_lock); } - cmn_err(CE_CONT, "!%s, %s\n", e1000g_string, e1000g_version); Adapter->e1000g_state = E1000G_INITIALIZED; return (DDI_SUCCESS); @@ -610,7 +607,6 @@ e1000g_register_mac(struct e1000g *Adapter) mac->m_max_sdu = Adapter->default_mtu; mac->m_margin = VLAN_TAGSZ; mac->m_priv_props = e1000g_priv_props; - mac->m_priv_prop_count = E1000G_MAX_PRIV_PROPS; mac->m_v12n = MAC_VIRT_LEVEL1; err = mac_register(mac, &Adapter->mh); @@ -1964,6 +1960,10 @@ e1000g_stop(struct e1000g *Adapter, boolean_t global) ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_LOST); } + mutex_enter(&Adapter->link_lock); + Adapter->link_complete = B_FALSE; + mutex_exit(&Adapter->link_lock); + /* Release resources still held by the TX descriptors */ e1000g_tx_clean(Adapter); @@ -2961,12 +2961,15 @@ e1000g_fill_ring(void *arg, mac_ring_type_t rtype, const int grp_index, infop->mri_start = e1000g_ring_start; infop->mri_stop = NULL; infop->mri_poll = e1000g_poll_ring; + infop->mri_stat = e1000g_rx_ring_stat; /* Ring level interrupts */ mintr = &infop->mri_intr; mintr->mi_handle = (mac_intr_handle_t)rx_ring; mintr->mi_enable = e1000g_rx_ring_intr_enable; mintr->mi_disable = e1000g_rx_ring_intr_disable; + if (Adapter->msi_enable) + mintr->mi_ddi_handle = Adapter->htable[0]; } /* ARGSUSED */ @@ -3282,159 +3285,246 @@ reset: static int e1000g_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { struct e1000g *Adapter = arg; struct e1000_fc_info *fc = &Adapter->shared.fc; - struct e1000_hw *hw = &Adapter->shared; int err = 0; link_flowctrl_t flowctrl; uint64_t tmp = 0; - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; - - bzero(pr_val, pr_valsize); - if ((pr_flags & MAC_PROP_DEFAULT) && (pr_num != MAC_PROP_PRIVATE)) { - return (e1000g_get_def_val(Adapter, pr_num, - pr_valsize, pr_val)); - } - switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (link_duplex_t)) { - bcopy(&Adapter->link_duplex, pr_val, - sizeof (link_duplex_t)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_duplex_t)); + bcopy(&Adapter->link_duplex, pr_val, + sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (uint64_t)) { - tmp = Adapter->link_speed * 1000000ull; - bcopy(&tmp, pr_val, sizeof (tmp)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint64_t)); + tmp = Adapter->link_speed * 1000000ull; + bcopy(&tmp, pr_val, sizeof (tmp)); break; case MAC_PROP_AUTONEG: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_autoneg; break; case MAC_PROP_FLOWCTRL: - if (pr_valsize >= sizeof (link_flowctrl_t)) { - switch (fc->current_mode) { - case e1000_fc_none: - flowctrl = LINK_FLOWCTRL_NONE; - break; - case e1000_fc_rx_pause: - flowctrl = LINK_FLOWCTRL_RX; - break; - case e1000_fc_tx_pause: - flowctrl = LINK_FLOWCTRL_TX; - break; - case e1000_fc_full: - flowctrl = LINK_FLOWCTRL_BI; - break; - } - bcopy(&flowctrl, pr_val, sizeof (flowctrl)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_flowctrl_t)); + switch (fc->current_mode) { + case e1000_fc_none: + flowctrl = LINK_FLOWCTRL_NONE; + break; + case e1000_fc_rx_pause: + flowctrl = LINK_FLOWCTRL_RX; + break; + case e1000_fc_tx_pause: + flowctrl = LINK_FLOWCTRL_TX; + break; + case e1000_fc_full: + flowctrl = LINK_FLOWCTRL_BI; + break; + } + bcopy(&flowctrl, pr_val, sizeof (flowctrl)); break; case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_1000fdx; break; case MAC_PROP_EN_1000FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_1000fdx; break; case MAC_PROP_ADV_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_1000hdx; break; case MAC_PROP_EN_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_1000hdx; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_100fdx; break; case MAC_PROP_EN_100FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_100hdx; break; case MAC_PROP_EN_100HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_10fdx; break; case MAC_PROP_EN_10FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_10hdx; break; case MAC_PROP_EN_10HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_en_10hdx; break; case MAC_PROP_ADV_100T4_CAP: case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = Adapter->param_adv_100t4; break; case MAC_PROP_PRIVATE: err = e1000g_get_priv_prop(Adapter, pr_name, - pr_flags, pr_valsize, pr_val, perm); - break; - case MAC_PROP_MTU: { - struct e1000_mac_info *mac = &Adapter->shared.mac; - struct e1000_phy_info *phy = &Adapter->shared.phy; - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = DEFAULT_MTU; - range.range_uint32[0].mpur_max = Adapter->max_mtu; - /* following MAC type do not support jumbo frames */ - if ((mac->type == e1000_ich8lan) || - ((mac->type == e1000_ich9lan) && (phy->type == - e1000_phy_ife))) { - range.range_uint32[0].mpur_max = DEFAULT_MTU; - } - bcopy(&range, pr_val, sizeof (range)); + pr_valsize, pr_val); break; - } default: err = ENOTSUP; break; } + return (err); } +static void +e1000g_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + struct e1000g *Adapter = arg; + struct e1000_hw *hw = &Adapter->shared; + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_EN_1000FDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + ((Adapter->phy_ext_status & + IEEE_ESR_1000T_FD_CAPS) || + (Adapter->phy_ext_status & + IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0); + } + break; + + case MAC_PROP_EN_100FDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + ((Adapter->phy_status & MII_SR_100X_FD_CAPS) || + (Adapter->phy_status & MII_SR_100T2_FD_CAPS)) + ? 1 : 0); + } + break; + + case MAC_PROP_EN_100HDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + ((Adapter->phy_status & MII_SR_100X_HD_CAPS) || + (Adapter->phy_status & MII_SR_100T2_HD_CAPS)) + ? 1 : 0); + } + break; + + case MAC_PROP_EN_10FDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + (Adapter->phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0); + } + break; + + case MAC_PROP_EN_10HDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + (Adapter->phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0); + } + break; + + case MAC_PROP_EN_1000HDX_CAP: + if (hw->phy.media_type != e1000_media_type_copper) + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + mac_prop_info_set_default_uint8(prh, + (Adapter->phy_status & MII_SR_AUTONEG_CAPS) + ? 1 : 0); + } + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, LINK_FLOWCTRL_BI); + break; + + case MAC_PROP_MTU: { + struct e1000_mac_info *mac = &Adapter->shared.mac; + struct e1000_phy_info *phy = &Adapter->shared.phy; + uint32_t max; + + /* some MAC types do not support jumbo frames */ + if ((mac->type == e1000_ich8lan) || + ((mac->type == e1000_ich9lan) && (phy->type == + e1000_phy_ife))) { + max = DEFAULT_MTU; + } else { + max = Adapter->max_mtu; + } + + mac_prop_info_set_range_uint32(prh, DEFAULT_MTU, max); + break; + } + case MAC_PROP_PRIVATE: { + char valstr[64]; + int value; + + if (strcmp(pr_name, "_adv_pause_cap") == 0 || + strcmp(pr_name, "_adv_asym_pause_cap") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; + } else if (strcmp(pr_name, "_tx_bcopy_threshold") == 0) { + value = DEFAULT_TX_BCOPY_THRESHOLD; + } else if (strcmp(pr_name, "_tx_interrupt_enable") == 0) { + value = DEFAULT_TX_INTR_ENABLE; + } else if (strcmp(pr_name, "_tx_intr_delay") == 0) { + value = DEFAULT_TX_INTR_DELAY; + } else if (strcmp(pr_name, "_tx_intr_abs_delay") == 0) { + value = DEFAULT_TX_INTR_ABS_DELAY; + } else if (strcmp(pr_name, "_rx_bcopy_threshold") == 0) { + value = DEFAULT_RX_BCOPY_THRESHOLD; + } else if (strcmp(pr_name, "_max_num_rcv_packets") == 0) { + value = DEFAULT_RX_LIMIT_ON_INTR; + } else if (strcmp(pr_name, "_rx_intr_delay") == 0) { + value = DEFAULT_RX_INTR_DELAY; + } else if (strcmp(pr_name, "_rx_intr_abs_delay") == 0) { + value = DEFAULT_RX_INTR_ABS_DELAY; + } else if (strcmp(pr_name, "_intr_throttling_rate") == 0) { + value = DEFAULT_INTR_THROTTLING; + } else if (strcmp(pr_name, "_intr_adaptive") == 0) { + value = 1; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + mac_prop_info_set_default_str(prh, valstr); + break; + } + } +} + /* ARGSUSED2 */ static int e1000g_set_priv_prop(struct e1000g *Adapter, const char *pr_name, @@ -3643,84 +3733,68 @@ e1000g_set_priv_prop(struct e1000g *Adapter, const char *pr_name, static int e1000g_get_priv_prop(struct e1000g *Adapter, const char *pr_name, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; if (strcmp(pr_name, "_adv_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - if (is_default) - goto done; value = Adapter->param_adv_pause; err = 0; goto done; } if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - if (is_default) - goto done; value = Adapter->param_adv_asym_pause; err = 0; goto done; } if (strcmp(pr_name, "_tx_bcopy_threshold") == 0) { - value = (is_default ? DEFAULT_TX_BCOPY_THRESHOLD : - Adapter->tx_bcopy_thresh); + value = Adapter->tx_bcopy_thresh; err = 0; goto done; } if (strcmp(pr_name, "_tx_interrupt_enable") == 0) { - value = (is_default ? DEFAULT_TX_INTR_ENABLE : - Adapter->tx_intr_enable); + value = Adapter->tx_intr_enable; err = 0; goto done; } if (strcmp(pr_name, "_tx_intr_delay") == 0) { - value = (is_default ? DEFAULT_TX_INTR_DELAY : - Adapter->tx_intr_delay); + value = Adapter->tx_intr_delay; err = 0; goto done; } if (strcmp(pr_name, "_tx_intr_abs_delay") == 0) { - value = (is_default ? DEFAULT_TX_INTR_ABS_DELAY : - Adapter->tx_intr_abs_delay); + value = Adapter->tx_intr_abs_delay; err = 0; goto done; } if (strcmp(pr_name, "_rx_bcopy_threshold") == 0) { - value = (is_default ? DEFAULT_RX_BCOPY_THRESHOLD : - Adapter->rx_bcopy_thresh); + value = Adapter->rx_bcopy_thresh; err = 0; goto done; } if (strcmp(pr_name, "_max_num_rcv_packets") == 0) { - value = (is_default ? DEFAULT_RX_LIMIT_ON_INTR : - Adapter->rx_limit_onintr); + value = Adapter->rx_limit_onintr; err = 0; goto done; } if (strcmp(pr_name, "_rx_intr_delay") == 0) { - value = (is_default ? DEFAULT_RX_INTR_DELAY : - Adapter->rx_intr_delay); + value = Adapter->rx_intr_delay; err = 0; goto done; } if (strcmp(pr_name, "_rx_intr_abs_delay") == 0) { - value = (is_default ? DEFAULT_RX_INTR_ABS_DELAY : - Adapter->rx_intr_abs_delay); + value = Adapter->rx_intr_abs_delay; err = 0; goto done; } if (strcmp(pr_name, "_intr_throttling_rate") == 0) { - value = (is_default ? DEFAULT_INTR_THROTTLING : - Adapter->intr_throttling_rate); + value = Adapter->intr_throttling_rate; err = 0; goto done; } if (strcmp(pr_name, "_intr_adaptive") == 0) { - value = (is_default ? 1 : Adapter->intr_adaptive); + value = Adapter->intr_adaptive; err = 0; goto done; } @@ -6284,88 +6358,6 @@ e1000g_quiesce(dev_info_t *devinfo) return (DDI_SUCCESS); } -static int -e1000g_get_def_val(struct e1000g *Adapter, mac_prop_id_t pr_num, - uint_t pr_valsize, void *pr_val) -{ - link_flowctrl_t fl; - struct e1000_hw *hw = &Adapter->shared; - int err = 0; - - ASSERT(pr_valsize > 0); - switch (pr_num) { - case MAC_PROP_AUTONEG: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - ((Adapter->phy_status & MII_SR_AUTONEG_CAPS) - ? 1 : 0); - break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); - fl = LINK_FLOWCTRL_BI; - bcopy(&fl, pr_val, sizeof (fl)); - break; - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_EN_1000FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 1; - else - *(uint8_t *)pr_val = - ((Adapter->phy_ext_status & - IEEE_ESR_1000T_FD_CAPS) || - (Adapter->phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) - ? 1 : 0; - break; - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - *(uint8_t *)pr_val = 0; - break; - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_EN_100FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - ((Adapter->phy_status & MII_SR_100X_FD_CAPS) || - (Adapter->phy_status & MII_SR_100T2_FD_CAPS)) - ? 1 : 0; - break; - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - ((Adapter->phy_status & MII_SR_100X_HD_CAPS) || - (Adapter->phy_status & MII_SR_100T2_HD_CAPS)) - ? 1 : 0; - break; - case MAC_PROP_ADV_10FDX_CAP: - case MAC_PROP_EN_10FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - (Adapter->phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0; - break; - case MAC_PROP_ADV_10HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *(uint8_t *)pr_val = 0; - else - *(uint8_t *)pr_val = - (Adapter->phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0; - break; - default: - err = ENOTSUP; - break; - } - return (err); -} - /* * synchronize the adv* and en* parameters. * diff --git a/usr/src/uts/common/io/e1000g/e1000g_rx.c b/usr/src/uts/common/io/e1000g/e1000g_rx.c index a4ff68894a..fb4d621bfb 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_rx.c +++ b/usr/src/uts/common/io/e1000g/e1000g_rx.c @@ -19,7 +19,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -709,8 +709,7 @@ rx_copy: E1000_RXD_STAT_TCPCS) && !(current_desc->errors & E1000_RXD_ERR_TCPE)) - cksumflags |= HCK_FULLCKSUM | - HCK_FULLCKSUM_OK; + cksumflags |= HCK_FULLCKSUM_OK; /* * Check IP Checksum */ @@ -718,7 +717,7 @@ rx_copy: E1000_RXD_STAT_IPCS) && !(current_desc->errors & E1000_RXD_ERR_IPE)) - cksumflags |= HCK_IPV4_HDRCKSUM; + cksumflags |= HCK_IPV4_HDRCKSUM_OK; } } @@ -771,8 +770,8 @@ rx_end_of_packet: * Process the last fragment. */ if (cksumflags != 0) { - (void) hcksum_assoc(rx_data->rx_mblk, - NULL, NULL, 0, 0, 0, 0, cksumflags, 0); + mac_hcksum_set(rx_data->rx_mblk, + 0, 0, 0, 0, cksumflags); cksumflags = 0; } diff --git a/usr/src/uts/common/io/e1000g/e1000g_stat.c b/usr/src/uts/common/io/e1000g/e1000g_stat.c index e2a7544004..7ec964f628 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_stat.c +++ b/usr/src/uts/common/io/e1000g/e1000g_stat.c @@ -19,7 +19,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -971,3 +971,60 @@ e1000g_read_phy_stat(struct e1000_hw *hw, int reg) return (val); } + +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +e1000g_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + e1000g_rx_ring_t *rx_ring = (e1000g_rx_ring_t *)rh; + struct e1000g *Adapter = rx_ring->adapter; + struct e1000_hw *hw = &Adapter->shared; + p_e1000g_stat_t e1000g_ksp = + (p_e1000g_stat_t)Adapter->e1000g_ksp->ks_data; + uint32_t low_val, high_val; + + rw_enter(&Adapter->chip_lock, RW_READER); + + if (Adapter->e1000g_state & E1000G_SUSPENDED) { + rw_exit(&Adapter->chip_lock); + return (ECANCELED); + } + + switch (stat) { + case MAC_STAT_RBYTES: + /* + * The 64-bit register will reset whenever the upper + * 32 bits are read. So we need to read the lower + * 32 bits first, then read the upper 32 bits. + */ + low_val = E1000_READ_REG(hw, E1000_TORL); + high_val = E1000_READ_REG(hw, E1000_TORH); + *val = (uint64_t)e1000g_ksp->Torh.value.ul << 32 | + (uint64_t)e1000g_ksp->Torl.value.ul; + *val += (uint64_t)high_val << 32 | (uint64_t)low_val; + + e1000g_ksp->Torl.value.ul = (uint32_t)*val; + e1000g_ksp->Torh.value.ul = (uint32_t)(*val >> 32); + break; + + case MAC_STAT_IPACKETS: + e1000g_ksp->Tpr.value.ul += + E1000_READ_REG(hw, E1000_TPR); + *val = e1000g_ksp->Tpr.value.ul; + break; + + default: + *val = 0; + rw_exit(&Adapter->chip_lock); + return (ENOTSUP); + } + + rw_exit(&Adapter->chip_lock); + + if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) + ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_UNAFFECTED); + + return (0); +} diff --git a/usr/src/uts/common/io/e1000g/e1000g_sw.h b/usr/src/uts/common/io/e1000g/e1000g_sw.h index ee9ff56fbf..40611707bb 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_sw.h +++ b/usr/src/uts/common/io/e1000g/e1000g_sw.h @@ -1052,6 +1052,7 @@ void e1000g_rxfree_func(p_rx_sw_packet_t packet); int e1000g_m_stat(void *arg, uint_t stat, uint64_t *val); int e1000g_init_stats(struct e1000g *Adapter); +int e1000g_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); void e1000_tbi_adjust_stats(struct e1000g *Adapter, uint32_t frame_len, uint8_t *mac_addr); diff --git a/usr/src/uts/common/io/e1000g/e1000g_tx.c b/usr/src/uts/common/io/e1000g/e1000g_tx.c index 9d58d9b127..512f1bd21e 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_tx.c +++ b/usr/src/uts/common/io/e1000g/e1000g_tx.c @@ -19,7 +19,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -470,10 +470,10 @@ e1000g_retrieve_context(mblk_t *mp, context_data_t *cur_context, bzero(cur_context, sizeof (context_data_t)); /* first check lso information */ - lso_info_get(mp, &mss, &lsoflags); + mac_lso_get(mp, &mss, &lsoflags); /* retrieve checksum info */ - hcksum_retrieve(mp, NULL, NULL, &cur_context->cksum_start, + mac_hcksum_get(mp, &cur_context->cksum_start, &cur_context->cksum_stuff, NULL, NULL, &cur_context->cksum_flags); /* retrieve ethernet header size */ if (((struct ether_vlan_header *)(uintptr_t)mp->b_rptr)->ether_tpid == diff --git a/usr/src/uts/common/io/elxl/elxl.c b/usr/src/uts/common/io/elxl/elxl.c index b23702cebc..2ffe96aff3 100644 --- a/usr/src/uts/common/io/elxl/elxl.c +++ b/usr/src/uts/common/io/elxl/elxl.c @@ -84,9 +84,11 @@ static int elxl_m_promisc(void *, boolean_t); static int elxl_m_multicst(void *, boolean_t, const uint8_t *); static int elxl_m_unicst(void *, const uint8_t *); static int elxl_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int elxl_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void elxl_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static boolean_t elxl_m_getcapab(void *, mac_capab_t cap, void *); static uint_t elxl_intr(caddr_t, caddr_t); static void elxl_error(elxl_t *, char *, ...); @@ -198,9 +200,10 @@ static const struct ex_product { { 0, NULL, 0 }, }; -mac_priv_prop_t ex_priv_prop[] = { - { "_media", MAC_PROP_PERM_RW }, - { "_available_media", MAC_PROP_PERM_READ }, +static char *ex_priv_prop[] = { + "_media", + "_available_media", + NULL }; static mii_ops_t ex_mii_ops = { @@ -211,7 +214,7 @@ static mii_ops_t ex_mii_ops = { }; static mac_callbacks_t elxl_m_callbacks = { - MC_GETCAPAB | MC_SETPROP | MC_GETPROP, + MC_GETCAPAB | MC_PROPERTIES, elxl_m_stat, elxl_m_start, elxl_m_stop, @@ -220,11 +223,13 @@ static mac_callbacks_t elxl_m_callbacks = { elxl_m_unicst, elxl_m_tx, NULL, + NULL, elxl_m_getcapab, NULL, NULL, elxl_m_setprop, - elxl_m_getprop + elxl_m_getprop, + elxl_m_propinfo }; /* @@ -575,7 +580,6 @@ elxl_attach(dev_info_t *dip) macp->m_max_sdu = ETHERMTU; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = ex_priv_prop; - macp->m_priv_prop_count = 2; (void) ddi_intr_enable(sc->ex_intrh); @@ -1387,38 +1391,32 @@ elxl_m_getcapab(void *arg, mac_capab_t cap, void *data) } static int -elxl_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +elxl_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { elxl_t *sc = arg; int rv; - boolean_t isdef = (flags & MAC_PROP_DEFAULT); if (sc->ex_mii_active) { - rv = mii_m_getprop(sc->ex_miih, name, num, flags, sz, - val, perm); + rv = mii_m_getprop(sc->ex_miih, name, num, sz, val); if (rv != ENOTSUP) return (rv); } switch (num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = isdef ? LINK_DUPLEX_HALF : sc->ex_duplex; + *(uint8_t *)val = sc->ex_duplex; break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)val = sc->ex_speed; break; case MAC_PROP_STATUS: - *perm = MAC_PROP_PERM_READ; bcopy(&sc->ex_link, val, sizeof (link_state_t)); break; case MAC_PROP_PRIVATE: if (strcmp(name, "_media") == 0) { char *str; - *perm = MAC_PROP_PERM_RW; switch (sc->ex_xcvr) { case XCVR_SEL_AUTO: @@ -1456,7 +1454,6 @@ elxl_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, * MAC_PROP_POSSIBLE with private properties.) */ if (strcmp(name, "_available_media") == 0) { - *perm = MAC_PROP_PERM_READ; (void) snprintf(val, sz, "%s", sc->ex_medias); return (0); } @@ -1577,6 +1574,29 @@ reset: return (0); } +static void +elxl_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + elxl_t *sc = arg; + + if (sc->ex_mii_active) + mii_m_propinfo(sc->ex_miih, name, num, prh); + + switch (num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_PRIVATE: + if (strcmp(name, "_available_media") == 0) + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + } +} + static int elxl_m_stat(void *arg, uint_t stat, uint64_t *val) { diff --git a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_gld.c b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_gld.c index 9814fdb5e7..c13b5237b5 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_gld.c +++ b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_gld.c @@ -33,14 +33,12 @@ #include <oce_ioctl.h> /* array of properties supported by this driver */ -mac_priv_prop_t oce_priv_props[] = { - {"_tx_ring_size", MAC_PROP_PERM_READ}, - {"_tx_bcopy_limit", MAC_PROP_PERM_RW}, - {"_rx_bcopy_limit", MAC_PROP_PERM_RW}, - {"_rx_ring_size", MAC_PROP_PERM_READ}, +char *oce_priv_props[] = { + "_tx_ring_size", + "_tx_bcopy_limit", + "_rx_ring_size", + NULL }; -uint32_t oce_num_props = sizeof (oce_priv_props) / sizeof (mac_priv_prop_t); - /* ---[ static function declarations ]----------------------------------- */ static int oce_power10(int power); @@ -48,7 +46,7 @@ static int oce_set_priv_prop(struct oce_dev *dev, const char *name, uint_t size, const void *val); static int oce_get_priv_prop(struct oce_dev *dev, const char *name, - uint_t flags, uint_t size, void *val); + uint_t size, void *val); /* ---[ GLD entry points ]----------------------------------------------- */ int @@ -446,119 +444,62 @@ oce_m_setprop(void *arg, const char *name, mac_prop_id_t id, int oce_m_getprop(void *arg, const char *name, mac_prop_id_t id, - uint_t flags, uint_t size, void *val, uint_t *perm) + uint_t size, void *val) { struct oce_dev *dev = arg; uint32_t ret = 0; - *perm = MAC_PROP_PERM_READ; - switch (id) { - case MAC_PROP_AUTONEG: - case MAC_PROP_EN_AUTONEG: - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_EN_1000FDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_EN_100FDX_CAP: - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - case MAC_PROP_ADV_10FDX_CAP: - case MAC_PROP_EN_10FDX_CAP: - case MAC_PROP_ADV_10HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - case MAC_PROP_ADV_100T4_CAP: - case MAC_PROP_EN_100T4_CAP: { - *(uint8_t *)val = 0x0; - break; - } - - case MAC_PROP_ADV_10GFDX_CAP: { - *(uint8_t *)val = 0x01; - break; - } - - case MAC_PROP_EN_10GFDX_CAP: { + case MAC_PROP_ADV_10GFDX_CAP: + case MAC_PROP_EN_10GFDX_CAP: *(uint8_t *)val = 0x01; break; - } case MAC_PROP_DUPLEX: { - if (size >= sizeof (link_duplex_t)) { - uint32_t *mode = (uint32_t *)val; - - *perm = MAC_PROP_PERM_READ; - if (dev->state & STATE_MAC_STARTED) - *mode = LINK_DUPLEX_FULL; - else - *mode = LINK_DUPLEX_UNKNOWN; + uint32_t *mode = (uint32_t *)val; - } else - ret = EINVAL; + ASSERT(size >= sizeof (link_duplex_t)); + if (dev->state & STATE_MAC_STARTED) + *mode = LINK_DUPLEX_FULL; + else + *mode = LINK_DUPLEX_UNKNOWN; break; } case MAC_PROP_SPEED: { - if (size >= sizeof (uint64_t)) { - uint64_t *speed = (uint64_t *)val; - - *perm = MAC_PROP_PERM_READ; - *speed = 0; - if ((dev->state & STATE_MAC_STARTED) && - (dev->link.mac_speed != 0)) { - *speed = 1000000ull * - oce_power10(dev->link.mac_speed); - } - } else - ret = EINVAL; - break; - } - - case MAC_PROP_MTU: { - mac_propval_range_t range; + uint64_t *speed = (uint64_t *)val; - *perm = MAC_PROP_PERM_RW; - if (!(flags & MAC_PROP_POSSIBLE)) { - ret = ENOTSUP; - break; + ASSERT(size >= sizeof (uint64_t)); + *speed = 0; + if ((dev->state & STATE_MAC_STARTED) && + (dev->link.mac_speed != 0)) { + *speed = 1000000ull * oce_power10(dev->link.mac_speed); } - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = OCE_MIN_MTU; - range.range_uint32[0].mpur_max = OCE_MAX_MTU; - bcopy(&range, val, sizeof (mac_propval_range_t)); break; } case MAC_PROP_FLOWCTRL: { link_flowctrl_t *fc = (link_flowctrl_t *)val; - if (size < sizeof (link_flowctrl_t)) { + ASSERT(size >= sizeof (link_flowctrl_t)); + if (dev->flow_control & OCE_FC_TX && + dev->flow_control & OCE_FC_RX) + *fc = LINK_FLOWCTRL_BI; + else if (dev->flow_control == OCE_FC_TX) + *fc = LINK_FLOWCTRL_TX; + else if (dev->flow_control == OCE_FC_RX) + *fc = LINK_FLOWCTRL_RX; + else if (dev->flow_control == 0) + *fc = LINK_FLOWCTRL_NONE; + else ret = EINVAL; - break; - } - - if (size >= sizeof (link_flowctrl_t)) { - if (dev->flow_control & OCE_FC_TX && - dev->flow_control & OCE_FC_RX) - *fc = LINK_FLOWCTRL_BI; - else if (dev->flow_control == OCE_FC_TX) - *fc = LINK_FLOWCTRL_TX; - else if (dev->flow_control == OCE_FC_RX) - *fc = LINK_FLOWCTRL_RX; - else if (dev->flow_control == 0) - *fc = LINK_FLOWCTRL_NONE; - else - ret = EINVAL; - } break; } - case MAC_PROP_PRIVATE: { - ret = oce_get_priv_prop(dev, name, flags, size, val); + case MAC_PROP_PRIVATE: + ret = oce_get_priv_prop(dev, name, size, val); break; - } + default: ret = ENOTSUP; break; @@ -566,6 +507,59 @@ oce_m_getprop(void *arg, const char *name, mac_prop_id_t id, return (ret); } /* oce_m_getprop */ +void +oce_m_propinfo(void *arg, const char *name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(arg)); + + switch (pr_num) { + case MAC_PROP_AUTONEG: + case MAC_PROP_EN_AUTONEG: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + case MAC_PROP_ADV_10GFDX_CAP: + case MAC_PROP_EN_10GFDX_CAP: + case MAC_PROP_SPEED: + case MAC_PROP_DUPLEX: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, OCE_MIN_MTU, OCE_MAX_MTU); + break; + + case MAC_PROP_PRIVATE: { + char valstr[64]; + int value; + + if (strcmp(name, "_tx_ring_size") == 0) { + value = OCE_DEFAULT_TX_RING_SIZE; + } else if (strcmp(name, "_rx_ring_size") == 0) { + value = OCE_DEFAULT_RX_RING_SIZE; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + mac_prop_info_set_default_str(prh, valstr); + break; + } + } +} /* oce_m_propinfo */ + /* * function to handle dlpi streams message from GLDv3 mac layer */ @@ -701,7 +695,6 @@ oce_set_priv_prop(struct oce_dev *dev, const char *name, * * dev - software handle to the device * name - string containing the property name - * flags - flags sent by the OS to get_prop * size - length of the string contained name * val - [OUT] pointer to the location where the result is returned * @@ -709,46 +702,22 @@ oce_set_priv_prop(struct oce_dev *dev, const char *name, */ static int oce_get_priv_prop(struct oce_dev *dev, const char *name, - uint_t flags, uint_t size, void *val) + uint_t size, void *val) { - int ret = ENOTSUP; int value; - boolean_t is_default = (flags & MAC_PROP_DEFAULT); - - if (NULL == val) { - ret = EINVAL; - return (ret); - } if (strcmp(name, "_tx_ring_size") == 0) { - value = is_default ? OCE_DEFAULT_TX_RING_SIZE : - dev->tx_ring_size; - ret = 0; - goto done; - } - - if (strcmp(name, "_tx_bcopy_limit") == 0) { + value = dev->tx_ring_size; + } else if (strcmp(name, "_tx_bcopy_limit") == 0) { value = dev->tx_bcopy_limit; - ret = 0; - goto done; - } - - if (strcmp(name, "_rx_bcopy_limit") == 0) { + } else if (strcmp(name, "_rx_ring_size") == 0) { + value = dev->rx_ring_size; + } else if (strcmp(name, "_rx_bcopy_limit") == 0) { value = dev->rx_bcopy_limit; - ret = 0; - goto done; - } - - if (strcmp(name, "_rx_ring_size") == 0) { - value = is_default ? OCE_DEFAULT_RX_RING_SIZE : - dev->rx_ring_size; - ret = 0; - goto done; + } else { + return (ENOTSUP); } -done: - if (ret == 0) { - (void) snprintf(val, size, "%d", value); - } - return (ret); + (void) snprintf(val, size, "%d", value); + return (0); } /* oce_get_priv_prop */ diff --git a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_main.c b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_main.c index f3346bb444..a4c0fdc6a5 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_main.c +++ b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_main.c @@ -116,7 +116,8 @@ static struct modlinkage oce_mod_linkage = { MODREV_1, &oce_drv, NULL }; -#define OCE_M_CB_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) +#define OCE_M_CB_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | \ + MC_PROPINFO) static mac_callbacks_t oce_mac_cb = { OCE_M_CB_FLAGS, /* mc_callbacks */ oce_m_stat, /* mc_getstat */ @@ -126,16 +127,17 @@ static mac_callbacks_t oce_mac_cb = { oce_m_multicast, /* mc_multicast */ oce_m_unicast, /* mc_unicast */ oce_m_send, /* mc_tx */ + NULL, oce_m_ioctl, /* mc_ioctl */ oce_m_getcap, /* mc_getcapab */ NULL, /* open */ NULL, /* close */ oce_m_setprop, /* set properties */ - oce_m_getprop /* get properties */ + oce_m_getprop, /* get properties */ + oce_m_propinfo /* properties info */ }; -extern mac_priv_prop_t oce_priv_props[]; -extern uint32_t oce_num_props; +extern char *oce_priv_props[]; /* Module Init */ int @@ -293,7 +295,6 @@ oce_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) mac->m_max_sdu = dev->mtu; mac->m_margin = VLAN_TAGSZ; mac->m_priv_props = oce_priv_props; - mac->m_priv_prop_count = oce_num_props; oce_log(dev, CE_NOTE, MOD_CONFIG, "Driver Private structure = 0x%p", (void *)dev); diff --git a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c index 99f210925f..cc1ddb33f9 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c +++ b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c @@ -25,7 +25,7 @@ */ /* - * Source file containing the Recieve Path handling + * Source file containing the Receive Path handling * functions */ #include <oce_impl.h> @@ -420,7 +420,7 @@ oce_set_rx_oflags(mblk_t *mp, struct oce_nic_rx_cqe *cqe) /* set flags */ if (cqe->u0.s.ip_cksum_pass) { - csum_flags |= HCK_IPV4_HDRCKSUM; + csum_flags |= HCK_IPV4_HDRCKSUM_OK; } if (cqe->u0.s.l4_cksum_pass) { @@ -428,8 +428,7 @@ oce_set_rx_oflags(mblk_t *mp, struct oce_nic_rx_cqe *cqe) } if (csum_flags) { - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, - csum_flags, 0); + (void) mac_hcksum_set(mp, 0, 0, 0, 0, csum_flags); } } diff --git a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_tx.c b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_tx.c index 5198cfc710..c1925b8074 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_tx.c +++ b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_tx.c @@ -712,11 +712,10 @@ oce_send_packet(struct oce_wq *wq, mblk_t *mp) } /* Retrieve LSO info */ - lso_info_get(mp, &mss, &flags); + mac_lso_get(mp, &mss, &flags); /* get the offload flags */ - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, - NULL, &csum_flags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &csum_flags); /* Limit should be always less than Tx Buffer Size */ if (pkt_len < dev->tx_bcopy_limit) { diff --git a/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge.c b/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge.c index 7d2873e9fe..9290ecdde7 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge.c +++ b/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge.c @@ -2203,13 +2203,13 @@ ql_set_rx_cksum(mblk_t *mp, struct ib_mac_iocb_rsp *net_rsp) /* TCP or UDP packet and checksum valid */ if (((net_rsp->flags2 & IB_MAC_IOCB_RSP_T) != 0) && ((net_rsp->flags1 & IB_MAC_IOCB_RSP_NU) == 0)) { - flags = HCK_FULLCKSUM | HCK_FULLCKSUM_OK; - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, flags, 0); + flags = HCK_FULLCKSUM_OK; + mac_hcksum_set(mp, 0, 0, 0, 0, flags); } if (((net_rsp->flags2 & IB_MAC_IOCB_RSP_U) != 0) && ((net_rsp->flags1 & IB_MAC_IOCB_RSP_NU) == 0)) { - flags = HCK_FULLCKSUM | HCK_FULLCKSUM_OK; - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, flags, 0); + flags = HCK_FULLCKSUM_OK; + mac_hcksum_set(mp, 0, 0, 0, 0, flags); } } @@ -4750,13 +4750,12 @@ ql_send_common(struct tx_ring *tx_ring, mblk_t *mp) tx_mode = USE_COPY; if (qlge->chksum_cap) { - hcksum_retrieve(mp, NULL, NULL, NULL, - NULL, NULL, NULL, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); QL_PRINT(DBG_TX, ("checksum flag is :0x%x, card capability " "is 0x%x \n", pflags, qlge->chksum_cap)); if (qlge->lso_enable) { uint32_t lso_flags = 0; - lso_info_get(mp, &mss, &lso_flags); + mac_lso_get(mp, &mss, &lso_flags); use_lso = (lso_flags == HW_LSO); } QL_PRINT(DBG_TX, ("mss :%d, use_lso %x \n", diff --git a/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge_gld.c b/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge_gld.c index 6ad591435c..83ef993a0c 100644 --- a/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge_gld.c +++ b/usr/src/uts/common/io/fibre-channel/fca/qlge/qlge_gld.c @@ -23,6 +23,7 @@ * Copyright 2009 QLogic Corporation. All rights reserved. */ +#include <sys/note.h> #include <qlge.h> #include <sys/strsubr.h> #include <netinet/in.h> @@ -46,9 +47,12 @@ static int ql_unicst_set(qlge_t *qlge, const uint8_t *macaddr, int slot); static int ql_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -static int ql_m_getprop(void *, const char *, mac_prop_id_t, uint_t, uint_t, - void *, uint_t *); -#define QL_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) +static int ql_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +static void ql_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); + +#define QL_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | \ + MC_GETPROP | MC_PROPINFO) static mac_callbacks_t ql_m_callbacks = { QL_M_CALLBACK_FLAGS, ql_m_getstat, @@ -58,19 +62,20 @@ static mac_callbacks_t ql_m_callbacks = { ql_m_multicst, NULL, NULL, + NULL, ql_m_ioctl, ql_m_getcapab, NULL, NULL, ql_m_setprop, - ql_m_getprop -}; -mac_priv_prop_t qlge_priv_prop[] = { - {"_adv_pause_mode", MAC_PROP_PERM_RW} + ql_m_getprop, + ql_m_propinfo }; -#define QLGE_MAX_PRIV_PROPS \ - (sizeof (qlge_priv_prop) / sizeof (mac_priv_prop_t)) +char *qlge_priv_prop[] = { + "_adv_pause_mode", + NULL +}; /* * This function starts the driver @@ -689,27 +694,6 @@ qlge_set_priv_prop(qlge_t *qlge, const char *pr_name, uint_t pr_valsize, return (ENOTSUP); } -static int -qlge_get_priv_prop(qlge_t *qlge, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val) -{ - int err = ENOTSUP; - boolean_t is_default = (boolean_t)(pr_flags & MAC_PROP_DEFAULT); - uint32_t value; - - if (strcmp(pr_name, "_adv_pause_mode") == 0) { - value = (is_default? 2 : qlge->pause); - err = 0; - goto done; - } - -done: - if (err == 0) { - (void) snprintf(pr_val, pr_valsize, "%d", value); - } - return (err); -} - /* * callback functions for set/get of properties */ @@ -778,10 +762,30 @@ ql_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, return (err); } +static int +qlge_get_priv_prop(qlge_t *qlge, const char *pr_name, uint_t pr_valsize, + void *pr_val) +{ + int err = ENOTSUP; + uint32_t value; + + if (strcmp(pr_name, "_adv_pause_mode") == 0) { + value = qlge->pause; + err = 0; + goto done; + } + +done: + if (err == 0) { + (void) snprintf(pr_val, pr_valsize, "%d", value); + } + return (err); +} + /* ARGSUSED */ static int ql_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { qlge_t *qlge = barg; uint64_t speed; @@ -795,20 +799,9 @@ ql_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, goto out; } - if (pr_valsize == 0) { - err = EINVAL; - goto out; - } - bzero(pr_val, pr_valsize); - /* mostly read only */ - *perm = MAC_PROP_PERM_READ; - switch (pr_num) { case MAC_PROP_DUPLEX: - if (pr_valsize < sizeof (link_duplex_t)) { - err = EINVAL; - goto out; - } + ASSERT(pr_valsize >= sizeof (link_duplex_t)); if (qlge->duplex) link_duplex = LINK_DUPLEX_FULL; else @@ -818,18 +811,12 @@ ql_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - if (pr_valsize < sizeof (speed)) { - err = EINVAL; - goto out; - } + ASSERT(pr_valsize >= sizeof (speed)); speed = qlge->speed * 1000000ull; bcopy(&speed, pr_val, sizeof (speed)); break; case MAC_PROP_STATUS: - if (pr_valsize < sizeof (link_state_t)) { - err = EINVAL; - goto out; - } + ASSERT(pr_valsize >= sizeof (link_state_t)); if (qlge->port_link_state == LS_DOWN) link_state = LINK_STATE_DOWN; else @@ -839,8 +826,7 @@ ql_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, break; case MAC_PROP_PRIVATE: - err = qlge_get_priv_prop(qlge, pr_name, pr_flags, - pr_valsize, pr_val); + err = qlge_get_priv_prop(qlge, pr_name, pr_valsize, pr_val); break; default: @@ -851,6 +837,35 @@ out: return (err); } +static void +ql_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(barg)); + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_PRIVATE: { + char val_str[64]; + int default_val; + + if (strcmp(pr_name, "_adv_pause_mode") == 0) + default_val = 2; + else + return; + + (void) snprintf(val_str, sizeof (val_str), "%d", default_val); + mac_prop_info_set_default_str(prh, val_str); + break; + } + } +} + /* ARGSUSED */ static boolean_t ql_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) @@ -911,7 +926,6 @@ ql_gld3_init(qlge_t *qlge, mac_register_t *macp) macp->m_max_sdu = qlge->mtu; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = qlge_priv_prop; - macp->m_priv_prop_count = QLGE_MAX_PRIV_PROPS; macp->m_v12n = 0; ql_m_callbacks.mc_unicst = ql_m_unicst; ql_m_callbacks.mc_tx = ql_m_tx; diff --git a/usr/src/uts/common/io/hme/hme.c b/usr/src/uts/common/io/hme/hme.c index 2d1d3995df..71017b5464 100644 --- a/usr/src/uts/common/io/hme/hme.c +++ b/usr/src/uts/common/io/hme/hme.c @@ -50,6 +50,7 @@ #include <sys/policy.h> #include <sys/ddi.h> #include <sys/sunddi.h> +#include <sys/byteorder.h> #include "hme_phy.h" #include "hme_mac.h" #include "hme.h" @@ -113,11 +114,12 @@ static int hme_64bit_enable = 1; /* Use 64-bit sbus transfers */ static int hme_reject_own = 1; /* Reject packets with own SA */ static int hme_ngu_enable = 0; /* Never Give Up mode */ -mac_priv_prop_t hme_priv_prop[] = { - { "_ipg0", MAC_PROP_PERM_RW }, - { "_ipg1", MAC_PROP_PERM_RW }, - { "_ipg2", MAC_PROP_PERM_RW }, - { "_lance_mode", MAC_PROP_PERM_RW }, +char *hme_priv_prop[] = { + "_ipg0", + "_ipg1", + "_ipg2", + "_lance_mode", + NULL }; static int hme_lance_mode = 1; /* to enable lance mode */ @@ -232,8 +234,9 @@ static int hme_m_multicst(void *, boolean_t, const uint8_t *); static int hme_m_unicst(void *, const uint8_t *); static mblk_t *hme_m_tx(void *, mblk_t *); static boolean_t hme_m_getcapab(void *, mac_capab_t, void *); -static int hme_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); +static int hme_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +static void hme_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int hme_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); @@ -246,7 +249,7 @@ static mii_ops_t hme_mii_ops = { }; static mac_callbacks_t hme_m_callbacks = { - MC_GETCAPAB | MC_SETPROP | MC_GETPROP, + MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO, hme_m_stat, hme_m_start, hme_m_stop, @@ -255,11 +258,13 @@ static mac_callbacks_t hme_m_callbacks = { hme_m_unicst, hme_m_tx, NULL, + NULL, hme_m_getcapab, NULL, NULL, hme_m_setprop, hme_m_getprop, + hme_m_propinfo }; DDI_DEFINE_STREAM_OPS(hme_dev_ops, nulldev, nulldev, hmeattach, hmedetach, @@ -1506,8 +1511,6 @@ hmeattach(dev_info_t *dip, ddi_attach_cmd_t cmd) macp->m_max_sdu = ETHERMTU; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = hme_priv_prop; - macp->m_priv_prop_count = - sizeof (hme_priv_prop) / sizeof (hme_priv_prop[0]); if (mac_register(macp, &hmep->hme_mh) != 0) { mac_free(macp); goto error_intr; @@ -1901,15 +1904,14 @@ hmestatinit(struct hme *hmep) } int -hme_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +hme_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { struct hme *hmep = arg; int value; - boolean_t is_default; int rv; - rv = mii_m_getprop(hmep->hme_mii, name, num, flags, sz, val, perm); + rv = mii_m_getprop(hmep->hme_mii, name, num, sz, val); if (rv != ENOTSUP) return (rv); @@ -1920,18 +1922,14 @@ hme_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, return (ENOTSUP); } - *perm = MAC_PROP_PERM_RW; - - is_default = (flags & MAC_PROP_DEFAULT) ? B_TRUE : B_FALSE; if (strcmp(name, "_ipg0") == 0) { - value = is_default ? hme_ipg0 : hmep->hme_ipg0; - + value = hmep->hme_ipg0; } else if (strcmp(name, "_ipg1") == 0) { - value = is_default ? hme_ipg1 : hmep->hme_ipg1; + value = hmep->hme_ipg1; } else if (strcmp(name, "_ipg2") == 0) { - value = is_default ? hme_ipg2 : hmep->hme_ipg2; + value = hmep->hme_ipg2; } else if (strcmp(name, "_lance_mode") == 0) { - value = is_default ? hme_lance_mode : hmep->hme_lance_mode; + value = hmep->hme_lance_mode; } else { return (ENOTSUP); } @@ -1939,6 +1937,38 @@ hme_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, return (0); } +static void +hme_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t mph) +{ + struct hme *hmep = arg; + + mii_m_propinfo(hmep->hme_mii, name, num, mph); + + switch (num) { + case MAC_PROP_PRIVATE: { + char valstr[64]; + int default_val; + + if (strcmp(name, "_ipg0") == 0) { + default_val = hme_ipg0; + } else if (strcmp(name, "_ipg1") == 0) { + default_val = hme_ipg1; + } else if (strcmp(name, "_ipg2") == 0) { + default_val = hme_ipg2; + } if (strcmp(name, "_lance_mode") == 0) { + default_val = hme_lance_mode; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%d", default_val); + mac_prop_info_set_default_str(mph, valstr); + break; + } + } +} + int hme_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, const void *val) @@ -2267,8 +2297,7 @@ hmestart(struct hme *hmep, mblk_t *mp) uint32_t start_offset; uint32_t stuff_offset; - hcksum_retrieve(mp, NULL, NULL, &start_offset, &stuff_offset, - NULL, NULL, &flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, NULL, NULL, &flags); if (flags & HCK_PARTIALCKSUM) { if (get_ether_type(mp->b_rptr) == ETHERTYPE_VLAN) { @@ -3434,8 +3463,7 @@ hmeread(struct hme *hmep, hmebuf_t *rbuf, uint32_t rflags) if (type == ETHERTYPE_IP || type == ETHERTYPE_IPV6) { uint16_t cksum = ~rflags & HMERMD_CKSUM; uint_t end = len - sizeof (struct ether_header); - (void) hcksum_assoc(bp, NULL, NULL, 0, - 0, end, htons(cksum), HCK_PARTIALCKSUM, 0); + mac_hcksum_set(bp, 0, 0, end, htons(cksum), HCK_PARTIALCKSUM); } return (bp); diff --git a/usr/src/uts/common/io/hxge/hxge_impl.h b/usr/src/uts/common/io/hxge/hxge_impl.h index 36b94382bf..0e1567e148 100644 --- a/usr/src/uts/common/io/hxge/hxge_impl.h +++ b/usr/src/uts/common/io/hxge/hxge_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -70,6 +70,7 @@ extern "C" { #include <sys/mac_provider.h> #include <sys/mac_ether.h> +#include <sys/note.h> /* * Handy macros (taken from bge driver) @@ -258,6 +259,7 @@ struct _hxge_ldg_t { p_hxge_ldv_t ldvp; hxge_sys_intr_t sys_intr_handler; p_hxge_t hxgep; + uint32_t htable_idx; }; struct _hxge_ldv_t { @@ -378,6 +380,8 @@ void hxge_destroy_kstats(p_hxge_t); int hxge_port_kstat_update(kstat_t *, int); int hxge_m_stat(void *arg, uint_t stat, uint64_t *val); +int hxge_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); +int hxge_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); /* hxge_hw.c */ void diff --git a/usr/src/uts/common/io/hxge/hxge_kstats.c b/usr/src/uts/common/io/hxge/hxge_kstats.c index bd42641d5d..d9bfffeece 100644 --- a/usr/src/uts/common/io/hxge/hxge_kstats.c +++ b/usr/src/uts/common/io/hxge/hxge_kstats.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -881,6 +881,70 @@ hxge_port_kstat_update(kstat_t *ksp, int rw) return (0); } +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +hxge_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + p_hxge_ring_handle_t rhp = (p_hxge_ring_handle_t)rdriver; + p_hxge_t hxgep = rhp->hxgep; + + ASSERT(rhp != NULL); + ASSERT(hxgep != NULL); + ASSERT(hxgep->statsp != NULL); + ASSERT(0 <= rhp->index < HXGE_MAX_RDCS); + + switch (stat) { + case MAC_STAT_IERRORS: + *val = hxgep->statsp->rdc_stats[rhp->index].ierrors; + break; + case MAC_STAT_RBYTES: + *val = hxgep->statsp->rdc_stats[rhp->index].ibytes; + break; + case MAC_STAT_IPACKETS: + *val = hxgep->statsp->rdc_stats[rhp->index].ipackets; + break; + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +hxge_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + p_hxge_ring_handle_t rhp = (p_hxge_ring_handle_t)rdriver; + p_hxge_t hxgep = rhp->hxgep; + + ASSERT(rhp != NULL); + ASSERT(hxgep != NULL); + ASSERT(hxgep->statsp != NULL); + ASSERT(0 <= rhp->index < HXGE_MAX_TDCS); + + switch (stat) { + case MAC_STAT_OERRORS: + *val = hxgep->statsp->tdc_stats[rhp->index].oerrors; + break; + case MAC_STAT_OBYTES: + *val = hxgep->statsp->tdc_stats[rhp->index].obytes; + break; + case MAC_STAT_OPACKETS: + *val = hxgep->statsp->tdc_stats[rhp->index].opackets; + break; + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + int hxge_m_stat(void *arg, uint_t stat, uint64_t *value) { diff --git a/usr/src/uts/common/io/hxge/hxge_main.c b/usr/src/uts/common/io/hxge/hxge_main.c index 24d4bec784..ee2dfc365a 100644 --- a/usr/src/uts/common/io/hxge/hxge_main.c +++ b/usr/src/uts/common/io/hxge/hxge_main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -146,28 +146,29 @@ static boolean_t hxge_param_locked(mac_prop_id_t pr_num); static int hxge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, const void *pr_val); static int hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *); -static int hxge_get_def_val(hxge_t *hxgep, mac_prop_id_t pr_num, uint_t pr_valsize, void *pr_val); +static void hxge_m_propinfo(void *barg, const char *pr_name, + mac_prop_id_t pr_num, mac_prop_info_handle_t mph); static int hxge_set_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_valsize, const void *pr_val); static int hxge_get_priv_prop(p_hxge_t hxgep, const char *pr_name, - uint_t pr_flags, uint_t pr_valsize, void *pr_val); + uint_t pr_valsize, void *pr_val); static void hxge_link_poll(void *arg); static void hxge_link_update(p_hxge_t hxge, link_state_t state); static void hxge_msix_init(p_hxge_t hxgep); -mac_priv_prop_t hxge_priv_props[] = { - {"_rxdma_intr_time", MAC_PROP_PERM_RW}, - {"_rxdma_intr_pkts", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_tcp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_udp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_ah", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_sctp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_tcp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_udp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_ah", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_sctp", MAC_PROP_PERM_RW} +char *hxge_priv_props[] = { + "_rxdma_intr_time", + "_rxdma_intr_pkts", + "_class_opt_ipv4_tcp", + "_class_opt_ipv4_udp", + "_class_opt_ipv4_ah", + "_class_opt_ipv4_sctp", + "_class_opt_ipv6_tcp", + "_class_opt_ipv6_udp", + "_class_opt_ipv6_ah", + "_class_opt_ipv6_sctp", + NULL }; #define HXGE_MAX_PRIV_PROPS \ @@ -177,7 +178,7 @@ mac_priv_prop_t hxge_priv_props[] = { #define MAX_DUMP_SZ 256 #define HXGE_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) extern hxge_status_t hxge_pfc_set_default_mac_addr(p_hxge_t hxgep); @@ -190,12 +191,14 @@ static mac_callbacks_t hxge_m_callbacks = { hxge_m_multicst, NULL, NULL, + NULL, hxge_m_ioctl, hxge_m_getcapab, NULL, NULL, hxge_m_setprop, - hxge_m_getprop + hxge_m_getprop, + hxge_m_propinfo }; /* PSARC/2007/453 MSI-X interrupt limit override. */ @@ -2935,6 +2938,41 @@ hxge_group_get(void *arg, mac_ring_type_t type, int groupid, } } +static int +hxge_ring_get_htable_idx(p_hxge_t hxgep, mac_ring_type_t type, uint32_t channel) +{ + int i; + + ASSERT(hxgep->ldgvp != NULL); + + switch (type) { + case MAC_RING_TYPE_RX: + for (i = 0; i < hxgep->ldgvp->maxldvs; i++) { + if ((hxgep->ldgvp->ldvp[i].is_rxdma) && + (hxgep->ldgvp->ldvp[i].channel == channel)) { + return ((int) + hxgep->ldgvp->ldvp[i].ldgp->htable_idx); + } + } + break; + + case MAC_RING_TYPE_TX: + for (i = 0; i < hxgep->ldgvp->maxldvs; i++) { + if ((hxgep->ldgvp->ldvp[i].is_txdma) && + (hxgep->ldgvp->ldvp[i].channel == channel)) { + return ((int) + hxgep->ldgvp->ldvp[i].ldgp->htable_idx); + } + } + break; + + default: + break; + } + + return (-1); +} + /* * Callback function for the GLDv3 layer to register all rings. */ @@ -2945,9 +2983,15 @@ hxge_fill_ring(void *arg, mac_ring_type_t type, const int rg_index, { p_hxge_t hxgep = arg; + ASSERT(hxgep != NULL); + ASSERT(infop != NULL); + switch (type) { case MAC_RING_TYPE_TX: { p_hxge_ring_handle_t rhp; + mac_intr_t *mintr = &infop->mri_intr; + p_hxge_intr_t intrp; + int htable_idx; ASSERT((index >= 0) && (index < HXGE_MAX_TDCS)); rhp = &hxgep->tx_ring_handles[index]; @@ -2958,11 +3002,22 @@ hxge_fill_ring(void *arg, mac_ring_type_t type, const int rg_index, infop->mri_start = hxge_tx_ring_start; infop->mri_stop = hxge_tx_ring_stop; infop->mri_tx = hxge_tx_ring_send; + infop->mri_stat = hxge_tx_ring_stat; + + intrp = (p_hxge_intr_t)&hxgep->hxge_intr_type; + htable_idx = hxge_ring_get_htable_idx(hxgep, type, index); + if (htable_idx >= 0) + mintr->mi_ddi_handle = intrp->htable[htable_idx]; + else + mintr->mi_ddi_handle = NULL; break; } + case MAC_RING_TYPE_RX: { p_hxge_ring_handle_t rhp; mac_intr_t hxge_mac_intr; + p_hxge_intr_t intrp; + int htable_idx; ASSERT((index >= 0) && (index < HXGE_MAX_RDCS)); rhp = &hxgep->rx_ring_handles[index]; @@ -2975,17 +3030,25 @@ hxge_fill_ring(void *arg, mac_ring_type_t type, const int rg_index, * disable interrupt (enable poll). */ hxge_mac_intr.mi_handle = (mac_intr_handle_t)rhp; - hxge_mac_intr.mi_enable = - (mac_intr_enable_t)hxge_disable_poll; - hxge_mac_intr.mi_disable = - (mac_intr_disable_t)hxge_enable_poll; + hxge_mac_intr.mi_enable = (mac_intr_enable_t)hxge_disable_poll; + hxge_mac_intr.mi_disable = (mac_intr_disable_t)hxge_enable_poll; + + intrp = (p_hxge_intr_t)&hxgep->hxge_intr_type; + htable_idx = hxge_ring_get_htable_idx(hxgep, type, index); + if (htable_idx >= 0) + hxge_mac_intr.mi_ddi_handle = intrp->htable[htable_idx]; + else + hxge_mac_intr.mi_ddi_handle = NULL; + infop->mri_driver = (mac_ring_driver_t)rhp; infop->mri_start = hxge_rx_ring_start; infop->mri_stop = hxge_rx_ring_stop; infop->mri_intr = hxge_mac_intr; infop->mri_poll = hxge_rx_poll; + infop->mri_stat = hxge_rx_ring_stat; break; } + default: break; } @@ -3186,37 +3249,9 @@ hxge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, return (err); } -/* ARGSUSED */ -static int -hxge_get_def_val(hxge_t *hxgep, mac_prop_id_t pr_num, uint_t pr_valsize, - void *pr_val) -{ - int err = 0; - link_flowctrl_t fl; - - switch (pr_num) { - case MAC_PROP_DUPLEX: - *(uint8_t *)pr_val = 2; - break; - case MAC_PROP_AUTONEG: - *(uint8_t *)pr_val = 0; - break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); - fl = LINK_FLOWCTRL_TX; - bcopy(&fl, pr_val, sizeof (fl)); - break; - default: - err = ENOTSUP; - break; - } - return (err); -} - static int hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { hxge_t *hxgep = barg; p_hxge_stats_t statsp = hxgep->statsp; @@ -3228,20 +3263,8 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, HXGE_DEBUG_MSG((hxgep, DLADM_CTL, "==> hxge_m_getprop: pr_num %d", pr_num)); - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; - - if ((pr_flags & MAC_PROP_DEFAULT) && (pr_num != MAC_PROP_PRIVATE)) { - err = hxge_get_def_val(hxgep, pr_num, pr_valsize, pr_val); - return (err); - } - - bzero(pr_val, pr_valsize); switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = statsp->mac_stats.link_duplex; HXGE_DEBUG_MSG((hxgep, DLADM_CTL, "==> hxge_m_getprop: duplex mode %d", @@ -3249,17 +3272,13 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (uint64_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (uint64_t)); tmp = statsp->mac_stats.link_speed * 1000000ull; bcopy(&tmp, pr_val, sizeof (tmp)); break; case MAC_PROP_STATUS: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (link_state_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (link_state_t)); if (!statsp->mac_stats.link_up) ls = LINK_STATE_DOWN; else @@ -3272,15 +3291,12 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, * Flow control is supported by the shared domain and * it is currently transmit only */ - *perm = MAC_PROP_PERM_READ; - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); + ASSERT(pr_valsize < sizeof (link_flowctrl_t)); fl = LINK_FLOWCTRL_TX; bcopy(&fl, pr_val, sizeof (fl)); break; case MAC_PROP_AUTONEG: /* 10G link only and it is not negotiable */ - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = 0; break; case MAC_PROP_ADV_1000FDX_CAP: @@ -3299,25 +3315,10 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, break; case MAC_PROP_PRIVATE: - err = hxge_get_priv_prop(hxgep, pr_name, pr_flags, - pr_valsize, pr_val); - break; - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = MIN_FRAME_SIZE - - MTU_TO_FRAME_SIZE; - range.range_uint32[0].mpur_max = MAX_FRAME_SIZE - - MTU_TO_FRAME_SIZE; - bcopy(&range, pr_val, sizeof (range)); + err = hxge_get_priv_prop(hxgep, pr_name, pr_valsize, + pr_val); break; - } + default: err = EINVAL; break; @@ -3328,6 +3329,60 @@ hxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, return (err); } +static void +hxge_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t pr_num, mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(arg)); + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + case MAC_PROP_AUTONEG: + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + MIN_FRAME_SIZE - MTU_TO_FRAME_SIZE, + MAX_FRAME_SIZE - MTU_TO_FRAME_SIZE); + break; + + case MAC_PROP_PRIVATE: { + char valstr[MAXNAMELEN]; + + bzero(valstr, sizeof (valstr)); + + /* Receive Interrupt Blanking Parameters */ + if (strcmp(pr_name, "_rxdma_intr_time") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", + RXDMA_RCR_TO_DEFAULT); + } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", + RXDMA_RCR_PTHRES_DEFAULT); + + /* Classification and Load Distribution Configuration */ + } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0 || + strcmp(pr_name, "_class_opt_ipv4_udp") == 0 || + strcmp(pr_name, "_class_opt_ipv4_ah") == 0 || + strcmp(pr_name, "_class_opt_ipv4_sctp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_tcp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_udp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_ah") == 0 || + strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", + HXGE_CLASS_TCAM_LOOKUP); + } + + if (strlen(valstr) > 0) + mac_prop_info_set_default_str(prh, valstr); + break; + } + } +} + + /* ARGSUSED */ static int hxge_set_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_valsize, @@ -3387,8 +3442,8 @@ hxge_set_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_valsize, } static int -hxge_get_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val) +hxge_get_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_valsize, + void *pr_val) { p_hxge_param_t param_arr = hxgep->param_arr; char valstr[MAXNAMELEN]; @@ -3399,77 +3454,55 @@ hxge_get_priv_prop(p_hxge_t hxgep, const char *pr_name, uint_t pr_flags, HXGE_DEBUG_MSG((hxgep, DLADM_CTL, "==> hxge_get_priv_prop: property %s", pr_name)); - if (pr_flags & MAC_PROP_DEFAULT) { - /* Receive Interrupt Blanking Parameters */ - if (strcmp(pr_name, "_rxdma_intr_time") == 0) { - value = RXDMA_RCR_TO_DEFAULT; - } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { - value = RXDMA_RCR_PTHRES_DEFAULT; + /* Receive Interrupt Blanking Parameters */ + if (strcmp(pr_name, "_rxdma_intr_time") == 0) { + value = hxgep->intr_timeout; + } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { + value = hxgep->intr_threshold; - /* Classification and Load Distribution Configuration */ - } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0 || - strcmp(pr_name, "_class_opt_ipv4_udp") == 0 || - strcmp(pr_name, "_class_opt_ipv4_ah") == 0 || - strcmp(pr_name, "_class_opt_ipv4_sctp") == 0 || - strcmp(pr_name, "_class_opt_ipv6_tcp") == 0 || - strcmp(pr_name, "_class_opt_ipv6_udp") == 0 || - strcmp(pr_name, "_class_opt_ipv6_ah") == 0 || - strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { - value = HXGE_CLASS_TCAM_LOOKUP; - } else { - err = EINVAL; - } - } else { - /* Receive Interrupt Blanking Parameters */ - if (strcmp(pr_name, "_rxdma_intr_time") == 0) { - value = hxgep->intr_timeout; - } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { - value = hxgep->intr_threshold; + /* Classification and Load Distribution Configuration */ + } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv4_tcp]); - /* Classification and Load Distribution Configuration */ - } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv4_tcp]); - - value = (int)param_arr[param_class_opt_ipv4_tcp].value; - } else if (strcmp(pr_name, "_class_opt_ipv4_udp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv4_udp]); - - value = (int)param_arr[param_class_opt_ipv4_udp].value; - } else if (strcmp(pr_name, "_class_opt_ipv4_ah") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv4_ah]); - - value = (int)param_arr[param_class_opt_ipv4_ah].value; - } else if (strcmp(pr_name, "_class_opt_ipv4_sctp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv4_sctp]); - - value = (int)param_arr[param_class_opt_ipv4_sctp].value; - } else if (strcmp(pr_name, "_class_opt_ipv6_tcp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv6_tcp]); - - value = (int)param_arr[param_class_opt_ipv6_tcp].value; - } else if (strcmp(pr_name, "_class_opt_ipv6_udp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv6_udp]); - - value = (int)param_arr[param_class_opt_ipv6_udp].value; - } else if (strcmp(pr_name, "_class_opt_ipv6_ah") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv6_ah]); - - value = (int)param_arr[param_class_opt_ipv6_ah].value; - } else if (strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { - err = hxge_param_get_ip_opt(hxgep, NULL, NULL, - (caddr_t)¶m_arr[param_class_opt_ipv6_sctp]); - - value = (int)param_arr[param_class_opt_ipv6_sctp].value; - } else { - err = EINVAL; - } + value = (int)param_arr[param_class_opt_ipv4_tcp].value; + } else if (strcmp(pr_name, "_class_opt_ipv4_udp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv4_udp]); + + value = (int)param_arr[param_class_opt_ipv4_udp].value; + } else if (strcmp(pr_name, "_class_opt_ipv4_ah") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv4_ah]); + + value = (int)param_arr[param_class_opt_ipv4_ah].value; + } else if (strcmp(pr_name, "_class_opt_ipv4_sctp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv4_sctp]); + + value = (int)param_arr[param_class_opt_ipv4_sctp].value; + } else if (strcmp(pr_name, "_class_opt_ipv6_tcp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv6_tcp]); + + value = (int)param_arr[param_class_opt_ipv6_tcp].value; + } else if (strcmp(pr_name, "_class_opt_ipv6_udp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv6_udp]); + + value = (int)param_arr[param_class_opt_ipv6_udp].value; + } else if (strcmp(pr_name, "_class_opt_ipv6_ah") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv6_ah]); + + value = (int)param_arr[param_class_opt_ipv6_ah].value; + } else if (strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { + err = hxge_param_get_ip_opt(hxgep, NULL, NULL, + (caddr_t)¶m_arr[param_class_opt_ipv6_sctp]); + + value = (int)param_arr[param_class_opt_ipv6_sctp].value; + } else { + err = EINVAL; } if (err == 0) { @@ -3916,6 +3949,7 @@ hxge_add_intrs_adv_type(p_hxge_t hxgep, uint32_t int_type) return (HXGE_ERROR | HXGE_DDI_FAILED); } + ldgp->htable_idx = x; intrp->intr_added++; } intrp->msi_intx_cnt = nactual; @@ -4219,7 +4253,6 @@ hxge_mac_register(p_hxge_t hxgep) macp->m_max_sdu = hxgep->vmac.maxframesize - MTU_TO_FRAME_SIZE; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = hxge_priv_props; - macp->m_priv_prop_count = HXGE_MAX_PRIV_PROPS; macp->m_v12n = MAC_VIRT_LEVEL1; HXGE_DEBUG_MSG((hxgep, DDI_CTL, diff --git a/usr/src/uts/common/io/hxge/hxge_rxdma.c b/usr/src/uts/common/io/hxge/hxge_rxdma.c index 6700313f63..3ac170277d 100644 --- a/usr/src/uts/common/io/hxge/hxge_rxdma.c +++ b/usr/src/uts/common/io/hxge/hxge_rxdma.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2060,8 +2060,7 @@ hxge_receive_packet(p_hxge_t hxgep, p_rx_rcr_ring_t rcr_p, pkt_type == RCR_PKT_IS_UDP) ? B_TRUE : B_FALSE); if (!no_port_bit && l4_cs_eq_bit && is_tcp_udp && !error_type) { - (void) hcksum_assoc(nmp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM_OK | HCK_FULLCKSUM, 0); + mac_hcksum_set(nmp, 0, 0, 0, 0, HCK_FULLCKSUM_OK); HXGE_DEBUG_MSG((hxgep, RX_CTL, "==> hxge_receive_packet: Full tcp/udp cksum " diff --git a/usr/src/uts/common/io/hxge/hxge_send.c b/usr/src/uts/common/io/hxge/hxge_send.c index e453322486..647717b82c 100644 --- a/usr/src/uts/common/io/hxge/hxge_send.c +++ b/usr/src/uts/common/io/hxge/hxge_send.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -163,8 +163,8 @@ hxge_start(p_hxge_t hxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp) } } - hcksum_retrieve(mp, NULL, NULL, &start_offset, - &stuff_offset, &end_offset, &value, &cksum_flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, &end_offset, &value, + &cksum_flags); if (!HXGE_IS_VLAN_PACKET(mp->b_rptr)) { start_offset += sizeof (ether_header_t); stuff_offset += sizeof (ether_header_t); @@ -593,8 +593,8 @@ hxge_start_control_header_only: i = TXDMA_DESC_NEXT_INDEX(i, 1, tx_ring_p->tx_wrap_mask); if (ngathers > hxge_tx_max_gathers) { good_packet = B_FALSE; - hcksum_retrieve(mp, NULL, NULL, &start_offset, - &stuff_offset, &end_offset, &value, &cksum_flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, + &end_offset, &value, &cksum_flags); HXGE_DEBUG_MSG((NULL, TX_CTL, "==> hxge_start(14): pull msg - " diff --git a/usr/src/uts/common/io/ib/clients/ibd/ibd.c b/usr/src/uts/common/io/ib/clients/ibd/ibd.c index b3a39a2efc..1ca10a43e4 100644 --- a/usr/src/uts/common/io/ib/clients/ibd/ibd.c +++ b/usr/src/uts/common/io/ib/clients/ibd/ibd.c @@ -458,6 +458,7 @@ static mac_callbacks_t ibd_m_callbacks = { ibd_m_unicst, ibd_m_tx, NULL, + NULL, ibd_m_getcapab }; @@ -6256,7 +6257,7 @@ ibd_rc_large_copy: * ud destination, the opcode and the LSO header information to the * work request. */ - lso_info_get(mp, &mss, &lsoflags); + mac_lso_get(mp, &mss, &lsoflags); if ((lsoflags & HW_LSO) != HW_LSO) { node->w_swr.wr_opcode = IBT_WRC_SEND; lsohdr_sz = 0; @@ -6277,7 +6278,7 @@ ibd_rc_large_copy: lsohdr_sz = (node->w_swr.wr.ud_lso).lso_hdr_sz; } - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &hckflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &hckflags); if ((hckflags & HCK_FULLCKSUM) == HCK_FULLCKSUM) node->w_swr.wr_flags |= IBT_WR_SEND_CKSUM; else @@ -6940,8 +6941,7 @@ ibd_process_rx(ibd_state_t *state, ibd_rwqe_t *rwqe, ibt_wc_t *wc) if (((wc->wc_flags & IBT_WC_CKSUM_OK) == IBT_WC_CKSUM_OK) && (wc->wc_cksum == 0xFFFF) && (iphap->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION)) { - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM_OK); } return (mp); diff --git a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c index 45fbfd7932..334c7dcd04 100644 --- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +29,7 @@ #include <inet/ip.h> #include <inet/ip_ire.h> #include <inet/ip_if.h> +#include <sys/ethernet.h> #include <sys/ib/mgt/ibcm/ibcm_arp.h> extern char cmlog[]; diff --git a/usr/src/uts/common/io/igb/igb_gld.c b/usr/src/uts/common/io/igb/igb_gld.c index becf960af5..3630bb5019 100644 --- a/usr/src/uts/common/io/igb/igb_gld.c +++ b/usr/src/uts/common/io/igb/igb_gld.c @@ -850,11 +850,15 @@ igb_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = igb_ring_start; infop->mri_stop = NULL; infop->mri_poll = (mac_ring_poll_t)igb_rx_ring_poll; + infop->mri_stat = igb_rx_ring_stat; mintr->mi_handle = (mac_intr_handle_t)rx_ring; mintr->mi_enable = igb_rx_ring_intr_enable; mintr->mi_disable = igb_rx_ring_intr_disable; - + if (igb->intr_type & (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) { + mintr->mi_ddi_handle = + igb->htable[rx_ring->intr_vector]; + } break; } case MAC_RING_TYPE_TX: { @@ -867,7 +871,11 @@ igb_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = NULL; infop->mri_stop = NULL; infop->mri_tx = igb_tx_ring_send; - + infop->mri_stat = igb_tx_ring_stat; + if (igb->intr_type & (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) { + mintr->mi_ddi_handle = + igb->htable[tx_ring->intr_vector]; + } break; } default: @@ -1152,141 +1160,90 @@ setup_link: int igb_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { igb_t *igb = (igb_t *)arg; struct e1000_hw *hw = &igb->hw; int err = 0; uint32_t flow_control; uint64_t tmp = 0; - mac_propval_range_t range; - - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; - - bzero(pr_val, pr_valsize); - if ((pr_flags & MAC_PROP_DEFAULT) && (pr_num != MAC_PROP_PRIVATE)) - return (igb_get_def_val(igb, pr_num, pr_valsize, pr_val)); switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (link_duplex_t)) { - bcopy(&igb->link_duplex, pr_val, - sizeof (link_duplex_t)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_duplex_t)); + bcopy(&igb->link_duplex, pr_val, sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (uint64_t)) { - tmp = igb->link_speed * 1000000ull; - bcopy(&tmp, pr_val, sizeof (tmp)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint64_t)); + tmp = igb->link_speed * 1000000ull; + bcopy(&tmp, pr_val, sizeof (tmp)); break; case MAC_PROP_AUTONEG: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; + ASSERT(pr_valsize >= sizeof (uint8_t)); *(uint8_t *)pr_val = igb->param_adv_autoneg_cap; break; case MAC_PROP_FLOWCTRL: - if (pr_valsize >= sizeof (uint32_t)) { - switch (hw->fc.requested_mode) { - case e1000_fc_none: - flow_control = LINK_FLOWCTRL_NONE; - break; - case e1000_fc_rx_pause: - flow_control = LINK_FLOWCTRL_RX; - break; - case e1000_fc_tx_pause: - flow_control = LINK_FLOWCTRL_TX; - break; - case e1000_fc_full: - flow_control = LINK_FLOWCTRL_BI; - break; - } - bcopy(&flow_control, pr_val, sizeof (flow_control)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint32_t)); + switch (hw->fc.requested_mode) { + case e1000_fc_none: + flow_control = LINK_FLOWCTRL_NONE; + break; + case e1000_fc_rx_pause: + flow_control = LINK_FLOWCTRL_RX; + break; + case e1000_fc_tx_pause: + flow_control = LINK_FLOWCTRL_TX; + break; + case e1000_fc_full: + flow_control = LINK_FLOWCTRL_BI; + break; + } + bcopy(&flow_control, pr_val, sizeof (flow_control)); break; case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_1000fdx_cap; break; case MAC_PROP_EN_1000FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_1000fdx_cap; break; case MAC_PROP_ADV_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_1000hdx_cap; break; case MAC_PROP_EN_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_1000hdx_cap; break; case MAC_PROP_ADV_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_100t4_cap; break; case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_100t4_cap; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_100fdx_cap; break; case MAC_PROP_EN_100FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_100fdx_cap; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_100hdx_cap; break; case MAC_PROP_EN_100HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_100hdx_cap; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_10fdx_cap; break; case MAC_PROP_EN_10FDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_10fdx_cap; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_adv_10hdx_cap; break; case MAC_PROP_EN_10HDX_CAP: - if (hw->phy.media_type != e1000_media_type_copper) - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = igb->param_en_10hdx_cap; break; case MAC_PROP_PRIVATE: - err = igb_get_priv_prop(igb, pr_name, - pr_flags, pr_valsize, pr_val, perm); - break; - case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = MIN_MTU; - range.range_uint32[0].mpur_max = MAX_MTU; - bcopy(&range, pr_val, sizeof (range)); + err = igb_get_priv_prop(igb, pr_name, pr_valsize, pr_val); break; default: err = EINVAL; @@ -1295,98 +1252,106 @@ igb_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, return (err); } -int -igb_get_def_val(igb_t *igb, mac_prop_id_t pr_num, - uint_t pr_valsize, void *pr_val) +void +igb_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) { - uint32_t flow_control; + igb_t *igb = (igb_t *)arg; struct e1000_hw *hw = &igb->hw; - uint16_t phy_status; - uint16_t phy_ext_status; - int err = 0; + uint16_t phy_status, phy_ext_status; - ASSERT(pr_valsize > 0); switch (pr_num) { - case MAC_PROP_AUTONEG: - if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; - } else { - (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = - (phy_status & MII_SR_AUTONEG_CAPS) ? 1 : 0; - } - break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (uint32_t)) - return (EINVAL); - flow_control = LINK_FLOWCTRL_BI; - bcopy(&flow_control, pr_val, sizeof (flow_control)); - break; + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + case MAC_PROP_EN_1000FDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 1; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { - (void) e1000_read_phy_reg(hw, - PHY_EXT_STATUS, &phy_ext_status); - *(uint8_t *)pr_val = + (void) e1000_read_phy_reg(hw, PHY_EXT_STATUS, + &phy_ext_status); + mac_prop_info_set_default_uint8(prh, ((phy_ext_status & IEEE_ESR_1000T_FD_CAPS) || - (phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0; + (phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0); } break; - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - case MAC_PROP_ADV_100T4_CAP: - case MAC_PROP_EN_100T4_CAP: - *(uint8_t *)pr_val = 0; - break; + case MAC_PROP_ADV_100FDX_CAP: case MAC_PROP_EN_100FDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = + mac_prop_info_set_default_uint8(prh, ((phy_status & MII_SR_100X_FD_CAPS) || - (phy_status & MII_SR_100T2_FD_CAPS)) ? 1 : 0; + (phy_status & MII_SR_100T2_FD_CAPS)) ? 1 : 0); } break; + case MAC_PROP_ADV_100HDX_CAP: case MAC_PROP_EN_100HDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = + mac_prop_info_set_default_uint8(prh, ((phy_status & MII_SR_100X_HD_CAPS) || - (phy_status & MII_SR_100T2_HD_CAPS)) ? 1 : 0; + (phy_status & MII_SR_100T2_HD_CAPS)) ? 1 : 0); } break; + case MAC_PROP_ADV_10FDX_CAP: case MAC_PROP_EN_10FDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = - (phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0; + mac_prop_info_set_default_uint8(prh, + (phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0); } break; + case MAC_PROP_ADV_10HDX_CAP: case MAC_PROP_EN_10HDX_CAP: if (hw->phy.media_type != e1000_media_type_copper) { - *(uint8_t *)pr_val = 0; + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } else { (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); - *(uint8_t *)pr_val = - (phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0; + mac_prop_info_set_default_uint8(prh, + (phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0); } break; - default: - err = ENOTSUP; + + case MAC_PROP_AUTONEG: + if (hw->phy.media_type != e1000_media_type_copper) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else { + (void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); + mac_prop_info_set_default_uint8(prh, + (phy_status & MII_SR_AUTONEG_CAPS) ? 1 : 0); + } + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, LINK_FLOWCTRL_BI); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, MIN_MTU, MAX_MTU); + break; + + case MAC_PROP_PRIVATE: + igb_priv_prop_info(igb, pr_name, prh); break; } - return (err); + } boolean_t @@ -1533,72 +1498,65 @@ igb_set_priv_prop(igb_t *igb, const char *pr_name, } int -igb_get_priv_prop(igb_t *igb, const char *pr_name, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) +igb_get_priv_prop(igb_t *igb, const char *pr_name, uint_t pr_valsize, + void *pr_val) { - int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; - *perm = MAC_PROP_PERM_RW; - if (strcmp(pr_name, "_adv_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - value = (is_default ? 1 : igb->param_adv_pause_cap); - err = 0; - goto done; - } - if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - value = (is_default ? 1 : igb->param_adv_asym_pause_cap); - err = 0; - goto done; - } - if (strcmp(pr_name, "_tx_copy_thresh") == 0) { - value = (is_default ? DEFAULT_TX_COPY_THRESHOLD : - igb->tx_copy_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { - value = (is_default ? DEFAULT_TX_RECYCLE_THRESHOLD : - igb->tx_recycle_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_tx_overload_thresh") == 0) { - value = (is_default ? DEFAULT_TX_OVERLOAD_THRESHOLD : - igb->tx_overload_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_tx_resched_thresh") == 0) { - value = (is_default ? DEFAULT_TX_RESCHED_THRESHOLD : - igb->tx_resched_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_rx_copy_thresh") == 0) { - value = (is_default ? DEFAULT_RX_COPY_THRESHOLD : - igb->rx_copy_thresh); - err = 0; - goto done; - } - if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { - value = (is_default ? DEFAULT_RX_LIMIT_PER_INTR : - igb->rx_limit_per_intr); - err = 0; - goto done; - } - if (strcmp(pr_name, "_intr_throttling") == 0) { - value = (is_default ? igb->capab->def_intr_throttle : - igb->intr_throttling[0]); - err = 0; - goto done; + value = igb->param_adv_pause_cap; + } else if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { + value = igb->param_adv_asym_pause_cap; + } else if (strcmp(pr_name, "_tx_copy_thresh") == 0) { + value = igb->tx_copy_thresh; + } else if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { + value = igb->tx_recycle_thresh; + } else if (strcmp(pr_name, "_tx_overload_thresh") == 0) { + value = igb->tx_overload_thresh; + } else if (strcmp(pr_name, "_tx_resched_thresh") == 0) { + value = igb->tx_resched_thresh; + } else if (strcmp(pr_name, "_rx_copy_thresh") == 0) { + value = igb->rx_copy_thresh; + } else if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { + value = igb->rx_limit_per_intr; + } else if (strcmp(pr_name, "_intr_throttling") == 0) { + value = igb->intr_throttling[0]; + } else { + return (ENOTSUP); } -done: - if (err == 0) { - (void) snprintf(pr_val, pr_valsize, "%d", value); + + (void) snprintf(pr_val, pr_valsize, "%d", value); + return (0); +} + +void +igb_priv_prop_info(igb_t *igb, const char *pr_name, mac_prop_info_handle_t prh) +{ + char valstr[64]; + int value; + + if (strcmp(pr_name, "_adv_pause_cap") == 0 || + strcmp(pr_name, "_adv_asym_pause_cap") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; + } else if (strcmp(pr_name, "_tx_copy_thresh") == 0) { + value = DEFAULT_TX_COPY_THRESHOLD; + } else if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { + value = DEFAULT_TX_RECYCLE_THRESHOLD; + } else if (strcmp(pr_name, "_tx_overload_thresh") == 0) { + value = DEFAULT_TX_OVERLOAD_THRESHOLD; + } else if (strcmp(pr_name, "_tx_resched_thresh") == 0) { + value = DEFAULT_TX_RESCHED_THRESHOLD; + } else if (strcmp(pr_name, "_rx_copy_thresh") == 0) { + value = DEFAULT_RX_COPY_THRESHOLD; + } else if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { + value = DEFAULT_RX_LIMIT_PER_INTR; + } else if (strcmp(pr_name, "_intr_throttling") == 0) { + value = igb->capab->def_intr_throttle; + } else { + return; } - return (err); + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + mac_prop_info_set_default_str(prh, valstr); } diff --git a/usr/src/uts/common/io/igb/igb_main.c b/usr/src/uts/common/io/igb/igb_main.c index b4070b8389..3ac2c03e1d 100644 --- a/usr/src/uts/common/io/igb/igb_main.c +++ b/usr/src/uts/common/io/igb/igb_main.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -121,21 +120,19 @@ static void igb_fm_init(igb_t *); static void igb_fm_fini(igb_t *); static void igb_release_multicast(igb_t *); -mac_priv_prop_t igb_priv_props[] = { - {"_tx_copy_thresh", MAC_PROP_PERM_RW}, - {"_tx_recycle_thresh", MAC_PROP_PERM_RW}, - {"_tx_overload_thresh", MAC_PROP_PERM_RW}, - {"_tx_resched_thresh", MAC_PROP_PERM_RW}, - {"_rx_copy_thresh", MAC_PROP_PERM_RW}, - {"_rx_limit_per_intr", MAC_PROP_PERM_RW}, - {"_intr_throttling", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_READ}, - {"_adv_asym_pause_cap", MAC_PROP_PERM_READ} +char *igb_priv_props[] = { + "_tx_copy_thresh", + "_tx_recycle_thresh", + "_tx_overload_thresh", + "_tx_resched_thresh", + "_rx_copy_thresh", + "_rx_limit_per_intr", + "_intr_throttling", + "_adv_pause_cap", + "_adv_asym_pause_cap", + NULL }; -#define IGB_MAX_PRIV_PROPS \ - (sizeof (igb_priv_props) / sizeof (mac_priv_prop_t)) - static struct cb_ops igb_cb_ops = { nulldev, /* cb_open */ nulldev, /* cb_close */ @@ -191,7 +188,7 @@ ddi_device_acc_attr_t igb_regs_acc_attr = { }; #define IGB_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) static mac_callbacks_t igb_m_callbacks = { IGB_M_CALLBACK_FLAGS, @@ -202,12 +199,14 @@ static mac_callbacks_t igb_m_callbacks = { igb_m_multicst, NULL, NULL, + NULL, igb_m_ioctl, igb_m_getcapab, NULL, NULL, igb_m_setprop, - igb_m_getprop + igb_m_getprop, + igb_m_propinfo }; /* @@ -783,7 +782,6 @@ igb_register_mac(igb_t *igb) sizeof (struct ether_vlan_header) - ETHERFCSL; mac->m_margin = VLAN_TAGSZ; mac->m_priv_props = igb_priv_props; - mac->m_priv_prop_count = IGB_MAX_PRIV_PROPS; mac->m_v12n = MAC_VIRT_LEVEL1; status = mac_register(mac, &igb->mac_hdl); diff --git a/usr/src/uts/common/io/igb/igb_rx.c b/usr/src/uts/common/io/igb/igb_rx.c index 1eeaf9d325..3f7ac957a5 100644 --- a/usr/src/uts/common/io/igb/igb_rx.c +++ b/usr/src/uts/common/io/igb/igb_rx.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -23,7 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms of the CDDL. + * Use is subject to license terms. */ #include "igb_sw.h" @@ -272,18 +271,17 @@ igb_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error) if (((status_error & E1000_RXD_STAT_TCPCS) || (status_error & E1000_RXD_STAT_UDPCS)) && !(status_error & E1000_RXDEXT_STATERR_TCPE)) - hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK; + hcksum_flags |= HCK_FULLCKSUM_OK; /* * Check IP Checksum */ if ((status_error & E1000_RXD_STAT_IPCS) && !(status_error & E1000_RXDEXT_STATERR_IPE)) - hcksum_flags |= HCK_IPV4_HDRCKSUM; + hcksum_flags |= HCK_IPV4_HDRCKSUM_OK; if (hcksum_flags != 0) { - (void) hcksum_assoc(mp, - NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, hcksum_flags); } } @@ -413,6 +411,10 @@ igb_rx(igb_rx_ring_t *rx_ring, int poll_bytes) mblk_tail = &mp->b_next; } + /* Update per-ring rx statistics */ + rx_ring->rx_pkts++; + rx_ring->rx_bytes += pkt_len; + rx_discard: /* * Reset rx descriptor read bits diff --git a/usr/src/uts/common/io/igb/igb_stat.c b/usr/src/uts/common/io/igb/igb_stat.c index 8edc4dbeed..3f5f4d69a2 100644 --- a/usr/src/uts/common/io/igb/igb_stat.c +++ b/usr/src/uts/common/io/igb/igb_stat.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -22,8 +21,8 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms of the CDDL. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #include "igb_sw.h" @@ -271,3 +270,53 @@ igb_init_stats(igb_t *igb) return (IGB_SUCCESS); } + +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +igb_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)rh; + + switch (stat) { + case MAC_STAT_RBYTES: + *val = rx_ring->rx_bytes; + break; + + case MAC_STAT_IPACKETS: + *val = rx_ring->rx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +igb_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + igb_tx_ring_t *tx_ring = (igb_tx_ring_t *)rh; + + switch (stat) { + case MAC_STAT_OBYTES: + *val = tx_ring->tx_bytes; + break; + + case MAC_STAT_OPACKETS: + *val = tx_ring->tx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} diff --git a/usr/src/uts/common/io/igb/igb_sw.h b/usr/src/uts/common/io/igb/igb_sw.h index e7e886f35c..080cd1bed6 100644 --- a/usr/src/uts/common/io/igb/igb_sw.h +++ b/usr/src/uts/common/io/igb/igb_sw.h @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -451,6 +450,12 @@ typedef struct igb_tx_ring { uint32_t recycle_fail; uint32_t stall_watchdog; + /* + * Per-ring statistics + */ + uint64_t tx_pkts; /* Packets Transmitted Count */ + uint64_t tx_bytes; /* Bytes Transmitted Count */ + #ifdef IGB_DEBUG /* * Debug statistics @@ -516,6 +521,12 @@ typedef struct igb_rx_ring { kmutex_t rx_lock; /* Rx access lock */ + /* + * Per-ring statistics + */ + uint64_t rx_pkts; /* Packets Received Count */ + uint64_t rx_bytes; /* Bytes Received Count */ + #ifdef IGB_DEBUG /* * Debug statistics @@ -810,11 +821,12 @@ boolean_t igb_m_getcapab(void *, mac_capab_t, void *); void igb_fill_ring(void *, mac_ring_type_t, const int, const int, mac_ring_info_t *, mac_ring_handle_t); int igb_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -int igb_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); +int igb_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +void igb_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); int igb_set_priv_prop(igb_t *, const char *, uint_t, const void *); -int igb_get_priv_prop(igb_t *, const char *, - uint_t, uint_t, void *, uint_t *); +int igb_get_priv_prop(igb_t *, const char *, uint_t, void *); +void igb_priv_prop_info(igb_t *, const char *, mac_prop_info_handle_t); boolean_t igb_param_locked(mac_prop_id_t); void igb_fill_group(void *arg, mac_ring_type_t, const int, mac_group_info_t *, mac_group_handle_t); @@ -850,6 +862,8 @@ int igb_init_stats(igb_t *); mblk_t *igb_rx_ring_poll(void *, int); mblk_t *igb_tx_ring_send(void *, mblk_t *); +int igb_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); +int igb_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/igb/igb_tx.c b/usr/src/uts/common/io/igb/igb_tx.c index b77afe1a5d..31e46609a5 100644 --- a/usr/src/uts/common/io/igb/igb_tx.c +++ b/usr/src/uts/common/io/igb/igb_tx.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -418,6 +417,10 @@ adjust_threshold: ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); + /* Update per-ring tx statistics */ + tx_ring->tx_pkts++; + tx_ring->tx_bytes += mbsize; + mutex_exit(&tx_ring->tx_lock); return (B_TRUE); @@ -599,7 +602,7 @@ igb_get_tx_context(mblk_t *mp, tx_context_t *ctx) ASSERT(mp != NULL); - hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, &flags); + mac_hcksum_get(mp, &start, NULL, NULL, NULL, &flags); bzero(ctx, sizeof (tx_context_t)); ctx->hcksum_flags = flags; @@ -607,7 +610,7 @@ igb_get_tx_context(mblk_t *mp, tx_context_t *ctx) if (flags == 0) return (TX_CXT_SUCCESS); - lso_info_get(mp, &mss, &lso_flag); + mac_lso_get(mp, &mss, &lso_flag); ctx->mss = mss; ctx->lso_flag = (lso_flag == HW_LSO); diff --git a/usr/src/uts/common/io/ipw/ipw2100.c b/usr/src/uts/common/io/ipw/ipw2100.c index 8c6bdbbe6f..2559c64762 100644 --- a/usr/src/uts/common/io/ipw/ipw2100.c +++ b/usr/src/uts/common/io/ipw/ipw2100.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -140,9 +140,9 @@ static void ipw2100_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); static int ipw2100_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int ipw2100_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); - + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void ipw2100_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); /* * Interrupt and Data transferring operations @@ -181,7 +181,7 @@ static int ipw2100_cpr_resume(struct ipw2100_softc *sc); * Mac Call Back entries */ mac_callbacks_t ipw2100_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, ipw2100_m_stat, ipw2100_m_start, ipw2100_m_stop, @@ -189,12 +189,14 @@ mac_callbacks_t ipw2100_m_callbacks = { ipw2100_m_multicst, ipw2100_m_unicst, ipw2100_m_tx, + NULL, ipw2100_m_ioctl, NULL, NULL, NULL, ipw2100_m_setprop, - ipw2100_m_getprop + ipw2100_m_getprop, + ipw2100_m_propinfo }; @@ -2470,7 +2472,7 @@ ipw2100_getset(struct ipw2100_softc *sc, mblk_t *m, uint32_t cmd, */ static int ipw2100_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct ipw2100_softc *sc = (struct ipw2100_softc *)arg; struct ieee80211com *ic = &sc->sc_ic; @@ -2487,14 +2489,25 @@ ipw2100_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, break; default: /* go through net80211 */ - err = ieee80211_getprop(ic, pr_name, wldp_pr_num, pr_flags, - wldp_length, wldp_buf, perm); + err = ieee80211_getprop(ic, pr_name, wldp_pr_num, + wldp_length, wldp_buf); break; } return (err); } +static void +ipw2100_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct ipw2100_softc *sc = (struct ipw2100_softc *)arg; + struct ieee80211com *ic = &sc->sc_ic; + + ieee80211_propinfo(ic, pr_name, wldp_pr_num, prh); + +} + static int ipw2100_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/iwh/iwh.c b/usr/src/uts/common/io/iwh/iwh.c index 407b814066..39b6f27e7f 100644 --- a/usr/src/uts/common/io/iwh/iwh.c +++ b/usr/src/uts/common/io/iwh/iwh.c @@ -379,8 +379,10 @@ static void iwh_m_ioctl(void *, queue_t *, mblk_t *); static int iwh_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int iwh_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, + void *wldp_buf); +static void iwh_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); /* * Supported rates for 802.11b/g modes (in 500Kbps unit). @@ -466,7 +468,7 @@ _info(struct modinfo *mip) * Mac Call Back entries */ mac_callbacks_t iwh_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, iwh_m_stat, iwh_m_start, iwh_m_stop, @@ -474,12 +476,14 @@ mac_callbacks_t iwh_m_callbacks = { iwh_m_multicst, iwh_m_unicst, iwh_m_tx, + NULL, iwh_m_ioctl, NULL, NULL, NULL, iwh_m_setprop, - iwh_m_getprop + iwh_m_getprop, + iwh_m_propinfo }; #ifdef DEBUG @@ -3520,7 +3524,7 @@ iwh_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int iwh_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { iwh_sc_t *sc; int err = EINVAL; @@ -3531,11 +3535,20 @@ iwh_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, sc = (iwh_sc_t *)arg; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +iwh_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + iwh_sc_t *sc = (iwh_sc_t *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + static int iwh_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/iwi/ipw2200.c b/usr/src/uts/common/io/iwi/ipw2200.c index d52e069496..0c9a729b43 100644 --- a/usr/src/uts/common/io/iwi/ipw2200.c +++ b/usr/src/uts/common/io/iwi/ipw2200.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -161,9 +161,9 @@ static mblk_t *ipw2200_m_tx(void *arg, mblk_t *mp); static int ipw2200_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int ipw2200_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); - + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void ipw2200_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); /* * Interrupt and Data transferring operations @@ -205,7 +205,7 @@ extern void ieee80211_notify_node_leave(ieee80211com_t *ic, * Mac Call Back entries */ mac_callbacks_t ipw2200_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, ipw2200_m_stat, ipw2200_m_start, ipw2200_m_stop, @@ -213,12 +213,14 @@ mac_callbacks_t ipw2200_m_callbacks = { ipw2200_m_multicst, ipw2200_m_unicst, ipw2200_m_tx, + NULL, ipw2200_m_ioctl, NULL, NULL, NULL, ipw2200_m_setprop, - ipw2200_m_getprop + ipw2200_m_getprop, + ipw2200_m_propinfo }; /* @@ -2558,7 +2560,7 @@ ipw2200_getset(struct ipw2200_softc *sc, mblk_t *m, uint32_t cmd, */ static int ipw2200_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct ipw2200_softc *sc = (struct ipw2200_softc *)arg; struct ieee80211com *ic = &sc->sc_ic; @@ -2575,14 +2577,24 @@ ipw2200_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, break; default: /* go through net80211 */ - err = ieee80211_getprop(ic, pr_name, wldp_pr_num, pr_flags, - wldp_length, wldp_buf, perm); + err = ieee80211_getprop(ic, pr_name, wldp_pr_num, + wldp_length, wldp_buf); break; } return (err); } +static void +ipw2200_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wlpd_pr_num, mac_prop_info_handle_t mph) +{ + struct ipw2200_softc *sc = (struct ipw2200_softc *)arg; + struct ieee80211com *ic = &sc->sc_ic; + + ieee80211_propinfo(ic, pr_name, wlpd_pr_num, mph); +} + static int ipw2200_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/iwk/iwk2.c b/usr/src/uts/common/io/iwk/iwk2.c index fbf600039c..6b5c64363c 100644 --- a/usr/src/uts/common/io/iwk/iwk2.c +++ b/usr/src/uts/common/io/iwk/iwk2.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -340,8 +340,9 @@ static void iwk_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); static int iwk_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_name, uint_t wldp_length, const void *wldp_buf); static int iwk_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_name, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_name, uint_t wldp_length, void *wldp_buf); +static void iwk_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); static void iwk_destroy_locks(iwk_sc_t *sc); static int iwk_send(ieee80211com_t *ic, mblk_t *mp, uint8_t type); static void iwk_thread(iwk_sc_t *sc); @@ -432,7 +433,7 @@ _info(struct modinfo *mip) * Mac Call Back entries */ mac_callbacks_t iwk_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, iwk_m_stat, iwk_m_start, iwk_m_stop, @@ -440,12 +441,14 @@ mac_callbacks_t iwk_m_callbacks = { iwk_m_multicst, iwk_m_unicst, iwk_m_tx, + NULL, iwk_m_ioctl, NULL, NULL, NULL, iwk_m_setprop, - iwk_m_getprop + iwk_m_getprop, + iwk_m_propinfo }; #ifdef DEBUG @@ -3090,19 +3093,20 @@ iwk_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) /* * callback functions for set/get properties */ -/* ARGSUSED */ + static int iwk_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; iwk_sc_t *sc = (iwk_sc_t *)arg; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } + static int iwk_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) @@ -3129,6 +3133,16 @@ iwk_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } +static void +iwk_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + iwk_sc_t *sc = (iwk_sc_t *)arg; + ieee80211com_t *ic = &sc->sc_ic; + + ieee80211_propinfo(ic, pr_name, wldp_pr_num, mph); +} + /*ARGSUSED*/ static int iwk_m_stat(void *arg, uint_t stat, uint64_t *val) diff --git a/usr/src/uts/common/io/iwp/iwp.c b/usr/src/uts/common/io/iwp/iwp.c index 317c02bee0..eec2b84664 100644 --- a/usr/src/uts/common/io/iwp/iwp.c +++ b/usr/src/uts/common/io/iwp/iwp.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -347,8 +347,9 @@ static void iwp_m_ioctl(void *, queue_t *, mblk_t *); static int iwp_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int iwp_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_length, - void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void iwp_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); /* * Supported rates for 802.11b/g modes (in 500Kbps unit). @@ -429,7 +430,7 @@ _info(struct modinfo *mip) * Mac Call Back entries */ mac_callbacks_t iwp_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, iwp_m_stat, iwp_m_start, iwp_m_stop, @@ -437,12 +438,14 @@ mac_callbacks_t iwp_m_callbacks = { iwp_m_multicst, iwp_m_unicst, iwp_m_tx, + NULL, iwp_m_ioctl, NULL, NULL, NULL, iwp_m_setprop, - iwp_m_getprop + iwp_m_getprop, + iwp_m_propinfo }; #ifdef DEBUG @@ -3355,7 +3358,7 @@ iwp_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int iwp_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { iwp_sc_t *sc; int err = EINVAL; @@ -3366,11 +3369,21 @@ iwp_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, sc = (iwp_sc_t *)arg; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +iwp_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + iwp_sc_t *sc; + + sc = (iwp_sc_t *)arg; + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + static int iwp_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/ixgbe/ixgbe.conf b/usr/src/uts/common/io/ixgbe/ixgbe.conf index 8163fa8d9a..d5cee7501a 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe.conf +++ b/usr/src/uts/common/io/ixgbe/ixgbe.conf @@ -21,7 +21,7 @@ # # Copyright(c) 2007-2008 Intel Corporation. All rights reserved. # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # @@ -66,7 +66,8 @@ # # rx_group_number # The number of the receive groups -# Allowed values: 1 - 16 +# Allowed values: 1 - 16 (for Intel 82598 10Gb ethernet controller) +# Allowed values: 1 - 64 (for Intel 82599 10Gb ethernet controller) # Default value: 1 # # -------- How to set parameters for a particular interface --------- diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_debug.c b/usr/src/uts/common/io/ixgbe/ixgbe_debug.c index f4dc85aad6..1430817445 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_debug.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_debug.c @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -22,7 +21,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -40,8 +39,8 @@ ixgbe_dump_interrupt(void *adapter, char *tag) { ixgbe_t *ixgbe = (ixgbe_t *)adapter; struct ixgbe_hw *hw = &ixgbe->hw; - ixgbe_intr_vector_t *vect; - uint32_t ivar, reg; + ixgbe_intr_vector_t *vect; + uint32_t ivar, reg, hw_index; int i, j; /* @@ -74,10 +73,11 @@ ixgbe_dump_interrupt(void *adapter, char *tag) /* for each rx ring bit set */ j = bt_getlowbit(vect->rx_map, 0, (ixgbe->num_rx_rings - 1)); while (j >= 0) { + hw_index = ixgbe->rx_rings[j].hw_index; ixgbe_log(ixgbe, "rx %d ivar %d rxdctl: 0x%x srrctl: 0x%x\n", - j, IXGBE_IVAR_RX_QUEUE(j), - IXGBE_READ_REG(hw, IXGBE_RXDCTL(j)), - IXGBE_READ_REG(hw, IXGBE_SRRCTL(j))); + hw_index, IXGBE_IVAR_RX_QUEUE(hw_index), + IXGBE_READ_REG(hw, IXGBE_RXDCTL(hw_index)), + IXGBE_READ_REG(hw, IXGBE_SRRCTL(hw_index))); j = bt_getlowbit(vect->rx_map, (j + 1), (ixgbe->num_rx_rings - 1)); } @@ -427,7 +427,7 @@ void ixgbe_dump_regs(void *adapter) { ixgbe_t *ixgbe = (ixgbe_t *)adapter; - uint32_t reg_val; + uint32_t reg_val, hw_index; struct ixgbe_hw *hw = &ixgbe->hw; int i; DEBUGFUNC("ixgbe_dump_regs"); @@ -460,10 +460,11 @@ ixgbe_dump_regs(void *adapter) reg_val = IXGBE_READ_REG(hw, IXGBE_RXCTRL); ixgbe_log(ixgbe, "\tRXCTRL=%x\n", reg_val); for (i = 0; i < ixgbe->num_rx_rings; i++) { - reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); - ixgbe_log(ixgbe, "\tRXDCTL(%d)=%x\n", i, reg_val); - reg_val = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); - ixgbe_log(ixgbe, "\tSRRCTL(%d)=%x\n", i, reg_val); + hw_index = ixgbe->rx_rings[i].hw_index; + reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(hw_index)); + ixgbe_log(ixgbe, "\tRXDCTL(%d)=%x\n", hw_index, reg_val); + reg_val = IXGBE_READ_REG(hw, IXGBE_SRRCTL(hw_index)); + ixgbe_log(ixgbe, "\tSRRCTL(%d)=%x\n", hw_index, reg_val); } reg_val = IXGBE_READ_REG(hw, IXGBE_RXCSUM); ixgbe_log(ixgbe, "\tRXCSUM=%x\n", reg_val); diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_gld.c b/usr/src/uts/common/io/ixgbe/ixgbe_gld.c index abc1f3647f..a7d580de1d 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_gld.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_gld.c @@ -21,308 +21,13 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include "ixgbe_sw.h" /* - * Retrieve a value for one of the statistics. - */ -int -ixgbe_m_stat(void *arg, uint_t stat, uint64_t *val) -{ - ixgbe_t *ixgbe = (ixgbe_t *)arg; - struct ixgbe_hw *hw = &ixgbe->hw; - ixgbe_stat_t *ixgbe_ks; - int i; - - ixgbe_ks = (ixgbe_stat_t *)ixgbe->ixgbe_ks->ks_data; - - mutex_enter(&ixgbe->gen_lock); - - if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { - mutex_exit(&ixgbe->gen_lock); - return (ECANCELED); - } - - switch (stat) { - case MAC_STAT_IFSPEED: - *val = ixgbe->link_speed * 1000000ull; - break; - - case MAC_STAT_MULTIRCV: - ixgbe_ks->mprc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_MPRC); - *val = ixgbe_ks->mprc.value.ui64; - break; - - case MAC_STAT_BRDCSTRCV: - ixgbe_ks->bprc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_BPRC); - *val = ixgbe_ks->bprc.value.ui64; - break; - - case MAC_STAT_MULTIXMT: - ixgbe_ks->mptc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_MPTC); - *val = ixgbe_ks->mptc.value.ui64; - break; - - case MAC_STAT_BRDCSTXMT: - ixgbe_ks->bptc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_BPTC); - *val = ixgbe_ks->bptc.value.ui64; - break; - - case MAC_STAT_NORCVBUF: - for (i = 0; i < 8; i++) { - ixgbe_ks->rnbc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RNBC(i)); - } - *val = ixgbe_ks->rnbc.value.ui64; - break; - - case MAC_STAT_IERRORS: - ixgbe_ks->crcerrs.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_CRCERRS); - ixgbe_ks->illerrc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ILLERRC); - ixgbe_ks->errbc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ERRBC); - ixgbe_ks->rlec.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RLEC); - *val = ixgbe_ks->crcerrs.value.ui64 + - ixgbe_ks->illerrc.value.ui64 + - ixgbe_ks->errbc.value.ui64 + - ixgbe_ks->rlec.value.ui64; - break; - - case MAC_STAT_RBYTES: - ixgbe_ks->tor.value.ui64 = 0; - for (i = 0; i < 16; i++) { - ixgbe_ks->qbrc[i].value.ui64 += - IXGBE_READ_REG(hw, IXGBE_QBRC(i)); - ixgbe_ks->tor.value.ui64 += - ixgbe_ks->qbrc[i].value.ui64; - } - *val = ixgbe_ks->tor.value.ui64; - break; - - case MAC_STAT_OBYTES: - ixgbe_ks->tot.value.ui64 = 0; - for (i = 0; i < 16; i++) { - if (hw->mac.type >= ixgbe_mac_82599EB) { - ixgbe_ks->qbtc[i].value.ui64 += - IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); - ixgbe_ks->qbtc[i].value.ui64 += ((uint64_t) - IXGBE_READ_REG(hw, IXGBE_QBTC_H(i))) << 32; - } else { - ixgbe_ks->qbtc[i].value.ui64 += - IXGBE_READ_REG(hw, IXGBE_QBTC(i)); - } - ixgbe_ks->tot.value.ui64 += - ixgbe_ks->qbtc[i].value.ui64; - } - *val = ixgbe_ks->tot.value.ui64; - break; - - case MAC_STAT_IPACKETS: - ixgbe_ks->tpr.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_TPR); - *val = ixgbe_ks->tpr.value.ui64; - break; - - case MAC_STAT_OPACKETS: - ixgbe_ks->tpt.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_TPT); - *val = ixgbe_ks->tpt.value.ui64; - break; - - /* RFC 1643 stats */ - case ETHER_STAT_FCS_ERRORS: - ixgbe_ks->crcerrs.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_CRCERRS); - *val = ixgbe_ks->crcerrs.value.ui64; - break; - - case ETHER_STAT_TOOLONG_ERRORS: - ixgbe_ks->roc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ROC); - *val = ixgbe_ks->roc.value.ui64; - break; - - case ETHER_STAT_MACRCV_ERRORS: - ixgbe_ks->crcerrs.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_CRCERRS); - ixgbe_ks->illerrc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ILLERRC); - ixgbe_ks->errbc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_ERRBC); - ixgbe_ks->rlec.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RLEC); - *val = ixgbe_ks->crcerrs.value.ui64 + - ixgbe_ks->illerrc.value.ui64 + - ixgbe_ks->errbc.value.ui64 + - ixgbe_ks->rlec.value.ui64; - break; - - /* MII/GMII stats */ - case ETHER_STAT_XCVR_ADDR: - /* The Internal PHY's MDI address for each MAC is 1 */ - *val = 1; - break; - - case ETHER_STAT_XCVR_ID: - *val = hw->phy.id; - break; - - case ETHER_STAT_XCVR_INUSE: - switch (ixgbe->link_speed) { - case IXGBE_LINK_SPEED_1GB_FULL: - *val = - (hw->phy.media_type == ixgbe_media_type_copper) ? - XCVR_1000T : XCVR_1000X; - break; - case IXGBE_LINK_SPEED_100_FULL: - *val = (hw->phy.media_type == ixgbe_media_type_copper) ? - XCVR_100T2 : XCVR_100X; - break; - default: - *val = XCVR_NONE; - break; - } - break; - - case ETHER_STAT_CAP_10GFDX: - *val = 1; - break; - - case ETHER_STAT_CAP_1000FDX: - *val = 1; - break; - - case ETHER_STAT_CAP_100FDX: - *val = 1; - break; - - case ETHER_STAT_CAP_ASMPAUSE: - *val = ixgbe->param_asym_pause_cap; - break; - - case ETHER_STAT_CAP_PAUSE: - *val = ixgbe->param_pause_cap; - break; - - case ETHER_STAT_CAP_AUTONEG: - *val = 1; - break; - - case ETHER_STAT_ADV_CAP_10GFDX: - *val = ixgbe->param_adv_10000fdx_cap; - break; - - case ETHER_STAT_ADV_CAP_1000FDX: - *val = ixgbe->param_adv_1000fdx_cap; - break; - - case ETHER_STAT_ADV_CAP_100FDX: - *val = ixgbe->param_adv_100fdx_cap; - break; - - case ETHER_STAT_ADV_CAP_ASMPAUSE: - *val = ixgbe->param_adv_asym_pause_cap; - break; - - case ETHER_STAT_ADV_CAP_PAUSE: - *val = ixgbe->param_adv_pause_cap; - break; - - case ETHER_STAT_ADV_CAP_AUTONEG: - *val = ixgbe->param_adv_autoneg_cap; - break; - - case ETHER_STAT_LP_CAP_10GFDX: - *val = ixgbe->param_lp_10000fdx_cap; - break; - - case ETHER_STAT_LP_CAP_1000FDX: - *val = ixgbe->param_lp_1000fdx_cap; - break; - - case ETHER_STAT_LP_CAP_100FDX: - *val = ixgbe->param_lp_100fdx_cap; - break; - - case ETHER_STAT_LP_CAP_ASMPAUSE: - *val = ixgbe->param_lp_asym_pause_cap; - break; - - case ETHER_STAT_LP_CAP_PAUSE: - *val = ixgbe->param_lp_pause_cap; - break; - - case ETHER_STAT_LP_CAP_AUTONEG: - *val = ixgbe->param_lp_autoneg_cap; - break; - - case ETHER_STAT_LINK_ASMPAUSE: - *val = ixgbe->param_asym_pause_cap; - break; - - case ETHER_STAT_LINK_PAUSE: - *val = ixgbe->param_pause_cap; - break; - - case ETHER_STAT_LINK_AUTONEG: - *val = ixgbe->param_adv_autoneg_cap; - break; - - case ETHER_STAT_LINK_DUPLEX: - *val = ixgbe->link_duplex; - break; - - case ETHER_STAT_TOOSHORT_ERRORS: - ixgbe_ks->ruc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RUC); - *val = ixgbe_ks->ruc.value.ui64; - break; - - case ETHER_STAT_CAP_REMFAULT: - *val = ixgbe->param_rem_fault; - break; - - case ETHER_STAT_ADV_REMFAULT: - *val = ixgbe->param_adv_rem_fault; - break; - - case ETHER_STAT_LP_REMFAULT: - *val = ixgbe->param_lp_rem_fault; - break; - - case ETHER_STAT_JABBER_ERRORS: - ixgbe_ks->rjc.value.ui64 += - IXGBE_READ_REG(hw, IXGBE_RJC); - *val = ixgbe_ks->rjc.value.ui64; - break; - - default: - mutex_exit(&ixgbe->gen_lock); - return (ENOTSUP); - } - - mutex_exit(&ixgbe->gen_lock); - - if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) { - ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); - return (EIO); - } - - return (0); -} - -/* * Bring the device out of the reset/quiesced state that it * was in when the interface was registered. */ @@ -732,115 +437,68 @@ setup_link: int ixgbe_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { ixgbe_t *ixgbe = (ixgbe_t *)arg; struct ixgbe_hw *hw = &ixgbe->hw; int err = 0; uint32_t flow_control; uint64_t tmp = 0; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); - mac_propval_range_t range; - - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_READ; - - bzero(pr_val, pr_valsize); switch (pr_num) { case MAC_PROP_DUPLEX: - if (pr_valsize >= sizeof (link_duplex_t)) { - bcopy(&ixgbe->link_duplex, pr_val, - sizeof (link_duplex_t)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_duplex_t)); + bcopy(&ixgbe->link_duplex, pr_val, + sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - if (pr_valsize >= sizeof (uint64_t)) { - tmp = ixgbe->link_speed * 1000000ull; - bcopy(&tmp, pr_val, sizeof (tmp)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint64_t)); + tmp = ixgbe->link_speed * 1000000ull; + bcopy(&tmp, pr_val, sizeof (tmp)); break; case MAC_PROP_AUTONEG: - if (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) - *perm = MAC_PROP_PERM_RW; - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_adv_autoneg_cap); + *(uint8_t *)pr_val = ixgbe->param_adv_autoneg_cap; break; case MAC_PROP_FLOWCTRL: - *perm = MAC_PROP_PERM_RW; - if (pr_valsize >= sizeof (uint32_t)) { - if (is_default) { + ASSERT(pr_valsize >= sizeof (uint32_t)); + + switch (hw->fc.requested_mode) { + case ixgbe_fc_none: flow_control = LINK_FLOWCTRL_NONE; - bcopy(&flow_control, pr_val, - sizeof (flow_control)); break; - } - switch (hw->fc.requested_mode) { - case ixgbe_fc_none: - flow_control = LINK_FLOWCTRL_NONE; - break; - case ixgbe_fc_rx_pause: - flow_control = LINK_FLOWCTRL_RX; - break; - case ixgbe_fc_tx_pause: - flow_control = LINK_FLOWCTRL_TX; - break; - case ixgbe_fc_full: - flow_control = LINK_FLOWCTRL_BI; - break; - } - bcopy(&flow_control, pr_val, sizeof (flow_control)); - } else - err = EINVAL; + case ixgbe_fc_rx_pause: + flow_control = LINK_FLOWCTRL_RX; + break; + case ixgbe_fc_tx_pause: + flow_control = LINK_FLOWCTRL_TX; + break; + case ixgbe_fc_full: + flow_control = LINK_FLOWCTRL_BI; + break; + } + bcopy(&flow_control, pr_val, sizeof (flow_control)); break; case MAC_PROP_ADV_10GFDX_CAP: - *(uint8_t *)pr_val = (is_default ? 1 : - ixgbe->param_adv_10000fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_adv_10000fdx_cap; break; case MAC_PROP_EN_10GFDX_CAP: - if (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) - *perm = MAC_PROP_PERM_RW; - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_en_10000fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_en_10000fdx_cap; break; case MAC_PROP_ADV_1000FDX_CAP: - *(uint8_t *)pr_val = (is_default ? 1 : - ixgbe->param_adv_1000fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_adv_1000fdx_cap; break; case MAC_PROP_EN_1000FDX_CAP: - if (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) - *perm = MAC_PROP_PERM_RW; - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_en_1000fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_en_1000fdx_cap; break; case MAC_PROP_ADV_100FDX_CAP: - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_adv_100fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_adv_100fdx_cap; break; case MAC_PROP_EN_100FDX_CAP: - if (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) - *perm = MAC_PROP_PERM_RW; - *(uint8_t *)pr_val = - (is_default ? 1 : ixgbe->param_en_100fdx_cap); + *(uint8_t *)pr_val = ixgbe->param_en_100fdx_cap; break; case MAC_PROP_PRIVATE: err = ixgbe_get_priv_prop(ixgbe, pr_name, - pr_flags, pr_valsize, pr_val, perm); - break; - case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = DEFAULT_MTU; - range.range_uint32[0].mpur_max = ixgbe->capab->max_mtu; - bcopy(&range, pr_val, sizeof (range)); + pr_valsize, pr_val); break; default: err = EINVAL; @@ -849,6 +507,78 @@ ixgbe_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, return (err); } +void +ixgbe_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + ixgbe_t *ixgbe = (ixgbe_t *)arg; + uint_t perm; + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_10GFDX_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + case MAC_PROP_EN_10GFDX_CAP: + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + perm = (ixgbe->hw.phy.media_type == ixgbe_media_type_copper) ? + MAC_PROP_PERM_RW : MAC_PROP_PERM_READ; + if (perm == MAC_PROP_PERM_RW) + mac_prop_info_set_default_uint8(prh, 1); + mac_prop_info_set_perm(prh, perm); + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, + LINK_FLOWCTRL_NONE); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + DEFAULT_MTU, ixgbe->capab->max_mtu); + break; + + case MAC_PROP_PRIVATE: { + char valstr[64]; + int value; + + bzero(valstr, sizeof (valstr)); + + if (strcmp(pr_name, "_adv_pause_cap") == 0 || + strcmp(pr_name, "_adv_asym_pause_cap") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; + } + + if (strcmp(pr_name, "_tx_copy_thresh") == 0) { + value = DEFAULT_TX_COPY_THRESHOLD; + } else if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { + value = DEFAULT_TX_RECYCLE_THRESHOLD; + } else if (strcmp(pr_name, "_tx_overload_thresh") == 0) { + value = DEFAULT_TX_OVERLOAD_THRESHOLD; + } else if (strcmp(pr_name, "_tx_resched_thresh") == 0) { + value = DEFAULT_TX_RESCHED_THRESHOLD; + } else if (strcmp(pr_name, "_rx_copy_thresh") == 0) { + value = DEFAULT_RX_COPY_THRESHOLD; + } else if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { + value = DEFAULT_RX_LIMIT_PER_INTR; + } if (strcmp(pr_name, "_intr_throttling") == 0) { + value = ixgbe->capab->def_intr_throttle; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%x", value); + } + } +} + boolean_t ixgbe_param_locked(mac_prop_id_t pr_num) { @@ -999,65 +729,53 @@ ixgbe_set_priv_prop(ixgbe_t *ixgbe, const char *pr_name, int ixgbe_get_priv_prop(ixgbe_t *ixgbe, const char *pr_name, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; - *perm = MAC_PROP_PERM_RW; - if (strcmp(pr_name, "_adv_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - value = (is_default ? 1 : ixgbe->param_adv_pause_cap); + value = ixgbe->param_adv_pause_cap; err = 0; goto done; } if (strcmp(pr_name, "_adv_asym_pause_cap") == 0) { - *perm = MAC_PROP_PERM_READ; - value = (is_default ? 1 : ixgbe->param_adv_asym_pause_cap); + value = ixgbe->param_adv_asym_pause_cap; err = 0; goto done; } if (strcmp(pr_name, "_tx_copy_thresh") == 0) { - value = (is_default ? DEFAULT_TX_COPY_THRESHOLD : - ixgbe->tx_copy_thresh); + value = ixgbe->tx_copy_thresh; err = 0; goto done; } if (strcmp(pr_name, "_tx_recycle_thresh") == 0) { - value = (is_default ? DEFAULT_TX_RECYCLE_THRESHOLD : - ixgbe->tx_recycle_thresh); + value = ixgbe->tx_recycle_thresh; err = 0; goto done; } if (strcmp(pr_name, "_tx_overload_thresh") == 0) { - value = (is_default ? DEFAULT_TX_OVERLOAD_THRESHOLD : - ixgbe->tx_overload_thresh); + value = ixgbe->tx_overload_thresh; err = 0; goto done; } if (strcmp(pr_name, "_tx_resched_thresh") == 0) { - value = (is_default ? DEFAULT_TX_RESCHED_THRESHOLD : - ixgbe->tx_resched_thresh); + value = ixgbe->tx_resched_thresh; err = 0; goto done; } if (strcmp(pr_name, "_rx_copy_thresh") == 0) { - value = (is_default ? DEFAULT_RX_COPY_THRESHOLD : - ixgbe->rx_copy_thresh); + value = ixgbe->rx_copy_thresh; err = 0; goto done; } if (strcmp(pr_name, "_rx_limit_per_intr") == 0) { - value = (is_default ? DEFAULT_RX_LIMIT_PER_INTR : - ixgbe->rx_limit_per_intr); + value = ixgbe->rx_limit_per_intr; err = 0; goto done; } if (strcmp(pr_name, "_intr_throttling") == 0) { - value = (is_default ? ixgbe->capab->def_intr_throttle : - ixgbe->intr_throttling[0]); + value = ixgbe->intr_throttling[0]; err = 0; goto done; } diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_main.c b/usr/src/uts/common/io/ixgbe/ixgbe_main.c index 4f9dd4f40f..3d97264a52 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_main.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_main.c @@ -28,7 +28,7 @@ #include "ixgbe_sw.h" static char ixgbe_ident[] = "Intel 10Gb Ethernet"; -static char ixgbe_version[] = "driver version 1.1.4"; +static char ixgbe_version[] = "ixgbe 1.1.4"; /* * Local function protoypes @@ -57,11 +57,13 @@ static void ixgbe_setup_tx(ixgbe_t *); static void ixgbe_setup_rx_ring(ixgbe_rx_ring_t *); static void ixgbe_setup_tx_ring(ixgbe_tx_ring_t *); static void ixgbe_setup_rss(ixgbe_t *); +static void ixgbe_setup_vmdq(ixgbe_t *); +static void ixgbe_setup_vmdq_rss(ixgbe_t *); static void ixgbe_init_unicst(ixgbe_t *); -static int ixgbe_unicst_set(ixgbe_t *, const uint8_t *, int); static int ixgbe_unicst_find(ixgbe_t *, const uint8_t *); static void ixgbe_setup_multicst(ixgbe_t *); static void ixgbe_get_hw_state(ixgbe_t *); +static void ixgbe_setup_vmdq_rss_conf(ixgbe_t *ixgbe); static void ixgbe_get_conf(ixgbe_t *); static void ixgbe_init_params(ixgbe_t *); static int ixgbe_get_prop(ixgbe_t *, char *, int, int, int); @@ -86,6 +88,7 @@ static void ixgbe_map_txring_to_vector(ixgbe_t *, int, int); static void ixgbe_setup_ivar(ixgbe_t *, uint16_t, uint8_t, int8_t); static void ixgbe_enable_ivar(ixgbe_t *, uint16_t, int8_t); static void ixgbe_disable_ivar(ixgbe_t *, uint16_t, int8_t); +static uint32_t ixgbe_get_hw_rx_index(ixgbe_t *ixgbe, uint32_t sw_rx_index); static int ixgbe_map_intrs_to_vectors(ixgbe_t *); static void ixgbe_setup_adapter_vector(ixgbe_t *); static void ixgbe_rem_intr_handlers(ixgbe_t *); @@ -109,22 +112,26 @@ static int ixgbe_resume(dev_info_t *); static int ixgbe_suspend(dev_info_t *); static void ixgbe_unconfigure(dev_info_t *, ixgbe_t *); static uint8_t *ixgbe_mc_table_itr(struct ixgbe_hw *, uint8_t **, uint32_t *); +static int ixgbe_cbfunc(dev_info_t *, ddi_cb_action_t, void *, void *, void *); +static int ixgbe_intr_cb_register(ixgbe_t *); +static int ixgbe_intr_adjust(ixgbe_t *, ddi_cb_action_t, int); static int ixgbe_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data); static void ixgbe_fm_init(ixgbe_t *); static void ixgbe_fm_fini(ixgbe_t *); -mac_priv_prop_t ixgbe_priv_props[] = { - {"_tx_copy_thresh", MAC_PROP_PERM_RW}, - {"_tx_recycle_thresh", MAC_PROP_PERM_RW}, - {"_tx_overload_thresh", MAC_PROP_PERM_RW}, - {"_tx_resched_thresh", MAC_PROP_PERM_RW}, - {"_rx_copy_thresh", MAC_PROP_PERM_RW}, - {"_rx_limit_per_intr", MAC_PROP_PERM_RW}, - {"_intr_throttling", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_READ}, - {"_adv_asym_pause_cap", MAC_PROP_PERM_READ} +char *ixgbe_priv_props[] = { + "_tx_copy_thresh", + "_tx_recycle_thresh", + "_tx_overload_thresh", + "_tx_resched_thresh", + "_rx_copy_thresh", + "_rx_limit_per_intr", + "_intr_throttling", + "_adv_pause_cap", + "_adv_asym_pause_cap", + NULL }; #define IXGBE_MAX_PRIV_PROPS \ @@ -202,7 +209,7 @@ static lb_property_t lb_external = { }; #define IXGBE_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) static mac_callbacks_t ixgbe_m_callbacks = { IXGBE_M_CALLBACK_FLAGS, @@ -213,12 +220,14 @@ static mac_callbacks_t ixgbe_m_callbacks = { ixgbe_m_multicst, NULL, NULL, + NULL, ixgbe_m_ioctl, ixgbe_m_getcapab, NULL, NULL, ixgbe_m_setprop, - ixgbe_m_getprop + ixgbe_m_getprop, + ixgbe_m_propinfo }; /* @@ -227,7 +236,10 @@ static mac_callbacks_t ixgbe_m_callbacks = { static adapter_info_t ixgbe_82598eb_cap = { 64, /* maximum number of rx queues */ 1, /* minimum number of rx queues */ - 8, /* default number of rx queues */ + 64, /* default number of rx queues */ + 16, /* maximum number of rx groups */ + 1, /* minimum number of rx groups */ + 1, /* default number of rx groups */ 32, /* maximum number of tx queues */ 1, /* minimum number of tx queues */ 8, /* default number of tx queues */ @@ -247,7 +259,10 @@ static adapter_info_t ixgbe_82598eb_cap = { static adapter_info_t ixgbe_82599eb_cap = { 128, /* maximum number of rx queues */ 1, /* minimum number of rx queues */ - 8, /* default number of rx queues */ + 128, /* default number of rx queues */ + 64, /* maximum number of rx groups */ + 1, /* minimum number of rx groups */ + 1, /* default number of rx groups */ 128, /* maximum number of tx queues */ 1, /* minimum number of tx queues */ 8, /* default number of tx queues */ @@ -406,6 +421,14 @@ ixgbe_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) ixgbe->attach_progress |= ATTACH_PROGRESS_PROPS; /* + * Register interrupt callback + */ + if (ixgbe_intr_cb_register(ixgbe) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "Failed to register interrupt callback"); + goto attach_fail; + } + + /* * Allocate interrupts */ if (ixgbe_alloc_intrs(ixgbe) != IXGBE_SUCCESS) { @@ -662,6 +685,11 @@ ixgbe_unconfigure(dev_info_t *devinfo, ixgbe_t *ixgbe) } /* + * Unregister interrupt callback handler + */ + (void) ddi_cb_unregister(ixgbe->cb_hdl); + + /* * Remove driver properties */ if (ixgbe->attach_progress & ATTACH_PROGRESS_PROPS) { @@ -745,7 +773,6 @@ ixgbe_register_mac(ixgbe_t *ixgbe) mac->m_max_sdu = ixgbe->default_mtu; mac->m_margin = VLAN_TAGSZ; mac->m_priv_props = ixgbe_priv_props; - mac->m_priv_prop_count = IXGBE_MAX_PRIV_PROPS; mac->m_v12n = MAC_VIRT_LEVEL1; status = mac_register(mac, &ixgbe->mac_hdl); @@ -879,9 +906,11 @@ ixgbe_init_driver_settings(ixgbe_t *ixgbe) struct ixgbe_hw *hw = &ixgbe->hw; dev_info_t *devinfo = ixgbe->dip; ixgbe_rx_ring_t *rx_ring; + ixgbe_rx_group_t *rx_group; ixgbe_tx_ring_t *tx_ring; uint32_t rx_size; uint32_t tx_size; + uint32_t ring_per_group; int i; /* @@ -915,12 +944,21 @@ ixgbe_init_driver_settings(ixgbe_t *ixgbe) ((tx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10; /* - * Initialize rx/tx rings parameters + * Initialize rx/tx rings/groups parameters */ + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; for (i = 0; i < ixgbe->num_rx_rings; i++) { rx_ring = &ixgbe->rx_rings[i]; rx_ring->index = i; rx_ring->ixgbe = ixgbe; + rx_ring->group_index = i / ring_per_group; + rx_ring->hw_index = ixgbe_get_hw_rx_index(ixgbe, i); + } + + for (i = 0; i < ixgbe->num_rx_groups; i++) { + rx_group = &ixgbe->rx_groups[i]; + rx_group->index = i; + rx_group->ixgbe = ixgbe; } for (i = 0; i < ixgbe->num_tx_rings; i++) { @@ -1605,6 +1643,218 @@ ixgbe_stop(ixgbe_t *ixgbe, boolean_t free_buffer) } /* + * ixgbe_cbfunc - Driver interface for generic DDI callbacks + */ +/* ARGSUSED */ +static int +ixgbe_cbfunc(dev_info_t *dip, ddi_cb_action_t cbaction, void *cbarg, + void *arg1, void *arg2) +{ + ixgbe_t *ixgbe = (ixgbe_t *)arg1; + + switch (cbaction) { + /* IRM callback */ + int count; + case DDI_CB_INTR_ADD: + case DDI_CB_INTR_REMOVE: + count = (int)(uintptr_t)cbarg; + ASSERT(ixgbe->intr_type == DDI_INTR_TYPE_MSIX); + DTRACE_PROBE2(ixgbe__irm__callback, int, count, + int, ixgbe->intr_cnt); + if (ixgbe_intr_adjust(ixgbe, cbaction, count) != + DDI_SUCCESS) { + ixgbe_error(ixgbe, + "IRM CB: Failed to adjust interrupts"); + goto cb_fail; + } + break; + default: + IXGBE_DEBUGLOG_1(ixgbe, "DDI CB: action 0x%x NOT supported", + cbaction); + return (DDI_ENOTSUP); + } + return (DDI_SUCCESS); +cb_fail: + return (DDI_FAILURE); +} + +/* + * ixgbe_intr_adjust - Adjust interrupt to respond to IRM request. + */ +static int +ixgbe_intr_adjust(ixgbe_t *ixgbe, ddi_cb_action_t cbaction, int count) +{ + int i, rc, actual; + + if (count == 0) + return (DDI_SUCCESS); + + if ((cbaction == DDI_CB_INTR_ADD && + ixgbe->intr_cnt + count > ixgbe->intr_cnt_max) || + (cbaction == DDI_CB_INTR_REMOVE && + ixgbe->intr_cnt - count < ixgbe->intr_cnt_min)) + return (DDI_FAILURE); + + if (!(ixgbe->ixgbe_state & IXGBE_STARTED)) { + return (DDI_FAILURE); + } + + for (i = 0; i < ixgbe->num_rx_rings; i++) + mac_ring_intr_set(ixgbe->rx_rings[i].ring_handle, NULL); + for (i = 0; i < ixgbe->num_tx_rings; i++) + mac_ring_intr_set(ixgbe->tx_rings[i].ring_handle, NULL); + + mutex_enter(&ixgbe->gen_lock); + ixgbe->ixgbe_state &= ~IXGBE_STARTED; + ixgbe->ixgbe_state |= IXGBE_INTR_ADJUST; + ixgbe->ixgbe_state |= IXGBE_SUSPENDED; + mac_link_update(ixgbe->mac_hdl, LINK_STATE_UNKNOWN); + + ixgbe_stop(ixgbe, B_FALSE); + /* + * Disable interrupts + */ + if (ixgbe->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) { + rc = ixgbe_disable_intrs(ixgbe); + ASSERT(rc == IXGBE_SUCCESS); + } + ixgbe->attach_progress &= ~ATTACH_PROGRESS_ENABLE_INTR; + + /* + * Remove interrupt handlers + */ + if (ixgbe->attach_progress & ATTACH_PROGRESS_ADD_INTR) { + ixgbe_rem_intr_handlers(ixgbe); + } + ixgbe->attach_progress &= ~ATTACH_PROGRESS_ADD_INTR; + + /* + * Clear vect_map + */ + bzero(&ixgbe->vect_map, sizeof (ixgbe->vect_map)); + switch (cbaction) { + case DDI_CB_INTR_ADD: + rc = ddi_intr_alloc(ixgbe->dip, ixgbe->htable, + DDI_INTR_TYPE_MSIX, ixgbe->intr_cnt, count, &actual, + DDI_INTR_ALLOC_NORMAL); + if (rc != DDI_SUCCESS || actual != count) { + ixgbe_log(ixgbe, "Adjust interrupts failed." + "return: %d, irm cb size: %d, actual: %d", + rc, count, actual); + goto intr_adjust_fail; + } + ixgbe->intr_cnt += count; + break; + + case DDI_CB_INTR_REMOVE: + for (i = ixgbe->intr_cnt - count; + i < ixgbe->intr_cnt; i ++) { + rc = ddi_intr_free(ixgbe->htable[i]); + ixgbe->htable[i] = NULL; + if (rc != DDI_SUCCESS) { + ixgbe_log(ixgbe, "Adjust interrupts failed." + "return: %d, irm cb size: %d, actual: %d", + rc, count, actual); + goto intr_adjust_fail; + } + } + ixgbe->intr_cnt -= count; + break; + } + + /* + * Get priority for first vector, assume remaining are all the same + */ + rc = ddi_intr_get_pri(ixgbe->htable[0], &ixgbe->intr_pri); + if (rc != DDI_SUCCESS) { + ixgbe_log(ixgbe, + "Get interrupt priority failed: %d", rc); + goto intr_adjust_fail; + } + rc = ddi_intr_get_cap(ixgbe->htable[0], &ixgbe->intr_cap); + if (rc != DDI_SUCCESS) { + ixgbe_log(ixgbe, "Get interrupt cap failed: %d", rc); + goto intr_adjust_fail; + } + ixgbe->attach_progress |= ATTACH_PROGRESS_ALLOC_INTR; + + /* + * Map rings to interrupt vectors + */ + if (ixgbe_map_intrs_to_vectors(ixgbe) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, + "IRM CB: Failed to map interrupts to vectors"); + goto intr_adjust_fail; + } + + /* + * Add interrupt handlers + */ + if (ixgbe_add_intr_handlers(ixgbe) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "IRM CB: Failed to add interrupt handlers"); + goto intr_adjust_fail; + } + ixgbe->attach_progress |= ATTACH_PROGRESS_ADD_INTR; + + /* + * Now that mutex locks are initialized, and the chip is also + * initialized, enable interrupts. + */ + if (ixgbe_enable_intrs(ixgbe) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "IRM CB: Failed to enable DDI interrupts"); + goto intr_adjust_fail; + } + ixgbe->attach_progress |= ATTACH_PROGRESS_ENABLE_INTR; + if (ixgbe_start(ixgbe, B_FALSE) != IXGBE_SUCCESS) { + ixgbe_error(ixgbe, "IRM CB: Failed to start"); + goto intr_adjust_fail; + } + ixgbe->ixgbe_state &= ~IXGBE_INTR_ADJUST; + ixgbe->ixgbe_state &= ~IXGBE_SUSPENDED; + ixgbe->ixgbe_state |= IXGBE_STARTED; + mutex_exit(&ixgbe->gen_lock); + + for (i = 0; i < ixgbe->num_rx_rings; i++) { + mac_ring_intr_set(ixgbe->rx_rings[i].ring_handle, + ixgbe->htable[ixgbe->rx_rings[i].intr_vector]); + } + for (i = 0; i < ixgbe->num_tx_rings; i++) { + mac_ring_intr_set(ixgbe->tx_rings[i].ring_handle, + ixgbe->htable[ixgbe->tx_rings[i].intr_vector]); + } + + /* Wakeup all Tx rings */ + for (i = 0; i < ixgbe->num_tx_rings; i++) { + mac_tx_ring_update(ixgbe->mac_hdl, + ixgbe->tx_rings[i].ring_handle); + } + + IXGBE_DEBUGLOG_3(ixgbe, + "IRM CB: interrupts new value: 0x%x(0x%x:0x%x).", + ixgbe->intr_cnt, ixgbe->intr_cnt_min, ixgbe->intr_cnt_max); + return (DDI_SUCCESS); + +intr_adjust_fail: + ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_LOST); + mutex_exit(&ixgbe->gen_lock); + return (DDI_FAILURE); +} + +/* + * ixgbe_intr_cb_register - Register interrupt callback function. + */ +static int +ixgbe_intr_cb_register(ixgbe_t *ixgbe) +{ + if (ddi_cb_register(ixgbe->dip, DDI_CB_FLAG_INTR, ixgbe_cbfunc, + ixgbe, NULL, &ixgbe->cb_hdl) != DDI_SUCCESS) { + return (IXGBE_FAILURE); + } + IXGBE_DEBUGLOG_0(ixgbe, "Interrupt callback function registered."); + return (IXGBE_SUCCESS); +} + +/* * ixgbe_alloc_rings - Allocate memory space for rx/tx rings. */ static int @@ -1771,21 +2021,22 @@ ixgbe_setup_rx_ring(ixgbe_rx_ring_t *rx_ring) * Initialize the length register */ size = rx_data->ring_size * sizeof (union ixgbe_adv_rx_desc); - IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rx_ring->index), size); + IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rx_ring->hw_index), size); /* * Initialize the base address registers */ buf_low = (uint32_t)rx_data->rbd_area.dma_address; buf_high = (uint32_t)(rx_data->rbd_area.dma_address >> 32); - IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rx_ring->index), buf_high); - IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rx_ring->index), buf_low); + IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rx_ring->hw_index), buf_high); + IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rx_ring->hw_index), buf_low); /* * Setup head & tail pointers */ - IXGBE_WRITE_REG(hw, IXGBE_RDT(rx_ring->index), rx_data->ring_size - 1); - IXGBE_WRITE_REG(hw, IXGBE_RDH(rx_ring->index), 0); + IXGBE_WRITE_REG(hw, IXGBE_RDT(rx_ring->hw_index), + rx_data->ring_size - 1); + IXGBE_WRITE_REG(hw, IXGBE_RDH(rx_ring->hw_index), 0); rx_data->rbd_next = 0; rx_data->lro_first = 0; @@ -1796,14 +2047,14 @@ ixgbe_setup_rx_ring(ixgbe_rx_ring_t *rx_ring) * HTHRESH=0 descriptors (to minimize latency on fetch) * WTHRESH defaults to 1 (writeback each descriptor) */ - reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rx_ring->index)); + reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rx_ring->hw_index)); reg_val |= IXGBE_RXDCTL_ENABLE; /* enable queue */ /* Not a valid value for 82599 */ if (hw->mac.type < ixgbe_mac_82599EB) { reg_val |= 0x0020; /* pthresh */ } - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rx_ring->index), reg_val); + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rx_ring->hw_index), reg_val); if (hw->mac.type == ixgbe_mac_82599EB) { reg_val = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); @@ -1818,7 +2069,7 @@ ixgbe_setup_rx_ring(ixgbe_rx_ring_t *rx_ring) reg_val = (ixgbe->rx_buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) | IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; reg_val |= IXGBE_SRRCTL_DROP_EN; - IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rx_ring->index), reg_val); + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rx_ring->hw_index), reg_val); } static void @@ -1826,18 +2077,33 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) { ixgbe_rx_ring_t *rx_ring; struct ixgbe_hw *hw = &ixgbe->hw; - ixgbe_rx_group_t *rx_group; uint32_t reg_val; uint32_t ring_mapping; - int i; + uint32_t i, index; + uint32_t psrtype_rss_bit; /* PSRTYPE must be configured for 82599 */ - reg_val = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | - IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; -#define IXGBE_PSRTYPE_L2_PKT 0x00001000 - reg_val |= IXGBE_PSRTYPE_L2_PKT; - reg_val |= 0xE0000000; - IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), reg_val); + if (ixgbe->classify_mode != IXGBE_CLASSIFY_VMDQ && + ixgbe->classify_mode != IXGBE_CLASSIFY_VMDQ_RSS) { + reg_val = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | + IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; + reg_val |= IXGBE_PSRTYPE_L2HDR; + reg_val |= 0x80000000; + IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), reg_val); + } else { + if (ixgbe->num_rx_groups > 32) { + psrtype_rss_bit = 0x20000000; + } else { + psrtype_rss_bit = 0x40000000; + } + for (i = 0; i < ixgbe->capab->max_rx_grp_num; i++) { + reg_val = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | + IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; + reg_val |= IXGBE_PSRTYPE_L2HDR; + reg_val |= psrtype_rss_bit; + IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(i), reg_val); + } + } /* * Set filter control in FCTRL to accept broadcast packets and do @@ -1850,6 +2116,46 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg_val); /* + * Hardware checksum settings + */ + if (ixgbe->rx_hcksum_enable) { + reg_val = IXGBE_RXCSUM_IPPCSE; /* IP checksum */ + IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, reg_val); + } + + /* + * Setup VMDq and RSS for multiple receive queues + */ + switch (ixgbe->classify_mode) { + case IXGBE_CLASSIFY_RSS: + /* + * One group, only RSS is needed when more than + * one ring enabled. + */ + ixgbe_setup_rss(ixgbe); + break; + + case IXGBE_CLASSIFY_VMDQ: + /* + * Multiple groups, each group has one ring, + * only VMDq is needed. + */ + ixgbe_setup_vmdq(ixgbe); + break; + + case IXGBE_CLASSIFY_VMDQ_RSS: + /* + * Multiple groups and multiple rings, both + * VMDq and RSS are needed. + */ + ixgbe_setup_vmdq_rss(ixgbe); + break; + + default: + break; + } + + /* * Enable the receive unit. This must be done after filter * control is set in FCTRL. */ @@ -1866,27 +2172,15 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) } /* - * Setup rx groups. - */ - for (i = 0; i < ixgbe->num_rx_groups; i++) { - rx_group = &ixgbe->rx_groups[i]; - rx_group->index = i; - rx_group->ixgbe = ixgbe; - } - - /* * Setup the per-ring statistics mapping. */ ring_mapping = 0; for (i = 0; i < ixgbe->num_rx_rings; i++) { - ring_mapping |= (i & 0xF) << (8 * (i & 0x3)); - if ((i & 0x3) == 0x3) { - IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i >> 2), ring_mapping); - ring_mapping = 0; - } + index = ixgbe->rx_rings[i].hw_index; + ring_mapping = IXGBE_READ_REG(hw, IXGBE_RQSMR(index >> 2)); + ring_mapping |= (i & 0xF) << (8 * (index & 0x3)); + IXGBE_WRITE_REG(hw, IXGBE_RQSMR(index >> 2), ring_mapping); } - if ((i & 0x3) != 0x3) - IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i >> 2), ring_mapping); /* * The Max Frame Size in MHADD/MAXFRS will be internally increased @@ -1906,50 +2200,6 @@ ixgbe_setup_rx(ixgbe_t *ixgbe) reg_val |= IXGBE_HLREG0_JUMBOEN; IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg_val); } - - /* - * Hardware checksum settings - */ - if (ixgbe->rx_hcksum_enable) { - reg_val = IXGBE_RXCSUM_IPPCSE; /* IP checksum */ - IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, reg_val); - } - - /* - * Setup RSS for multiple receive queues - */ - if (ixgbe->num_rx_rings > 1) - ixgbe_setup_rss(ixgbe); - - /* - * Setup RSC for multiple receive queues. - */ - if (ixgbe->lro_enable) { - for (i = 0; i < ixgbe->num_rx_rings; i++) { - /* - * Make sure rx_buf_size * MAXDESC not greater - * than 65535. - * Intel recommends 4 for MAXDESC field value. - */ - reg_val = IXGBE_READ_REG(hw, IXGBE_RSCCTL(i)); - reg_val |= IXGBE_RSCCTL_RSCEN; - if (ixgbe->rx_buf_size == IXGBE_PKG_BUF_16k) - reg_val |= IXGBE_RSCCTL_MAXDESC_1; - else - reg_val |= IXGBE_RSCCTL_MAXDESC_4; - IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(i), reg_val); - } - - reg_val = IXGBE_READ_REG(hw, IXGBE_RSCDBU); - reg_val |= IXGBE_RSCDBU_RSCACKDIS; - IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, reg_val); - - reg_val = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); - reg_val |= IXGBE_RDRXCTL_RSCACKC; - reg_val &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; - - IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg_val); - } } static void @@ -2114,13 +2364,17 @@ ixgbe_setup_rss(ixgbe_t *ixgbe) uint32_t i, mrqc, rxcsum; uint32_t random; uint32_t reta; + uint32_t ring_per_group; /* * Fill out redirection table */ reta = 0; + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + for (i = 0; i < 128; i++) { - reta = (reta << 8) | (i % ixgbe->num_rx_rings); + reta = (reta << 8) | (i % ring_per_group) | + ((i % ring_per_group) << 4); if ((i & 3) == 3) IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); } @@ -2161,6 +2415,185 @@ ixgbe_setup_rss(ixgbe_t *ixgbe) } /* + * ixgbe_setup_vmdq - Setup MAC classification feature + */ +static void +ixgbe_setup_vmdq(ixgbe_t *ixgbe) +{ + struct ixgbe_hw *hw = &ixgbe->hw; + uint32_t vmdctl, i, vtctl; + + /* + * Setup the VMDq Control register, enable VMDq based on + * packet destination MAC address: + */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + /* + * VMDq Enable = 1; + * VMDq Filter = 0; MAC filtering + * Default VMDq output index = 0; + */ + vmdctl = IXGBE_VMD_CTL_VMDQ_EN; + IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl); + break; + + case ixgbe_mac_82599EB: + /* + * Enable VMDq-only. + */ + vmdctl = IXGBE_MRQC_VMDQEN; + IXGBE_WRITE_REG(hw, IXGBE_MRQC, vmdctl); + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(i), 0); + } + + /* + * Enable Virtualization and Replication. + */ + vtctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; + IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl); + + /* + * Enable receiving packets to all VFs + */ + IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), IXGBE_VFRE_ENABLE_ALL); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), IXGBE_VFRE_ENABLE_ALL); + + break; + + default: + break; + } +} + +/* + * ixgbe_setup_vmdq_rss - Setup both vmdq feature and rss feature. + */ +static void +ixgbe_setup_vmdq_rss(ixgbe_t *ixgbe) +{ + struct ixgbe_hw *hw = &ixgbe->hw; + uint32_t i, mrqc, rxcsum; + uint32_t random; + uint32_t reta; + uint32_t ring_per_group; + uint32_t vmdctl, vtctl; + + /* + * Fill out redirection table + */ + reta = 0; + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + for (i = 0; i < 128; i++) { + reta = (reta << 8) | (i % ring_per_group) | + ((i % ring_per_group) << 4); + if ((i & 3) == 3) + IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); + } + + /* + * Fill out hash function seeds with a random constant + */ + for (i = 0; i < 10; i++) { + (void) random_get_pseudo_bytes((uint8_t *)&random, + sizeof (uint32_t)); + IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random); + } + + /* + * Enable and setup RSS and VMDq + */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + /* + * Enable RSS & Setup RSS Hash functions + */ + mrqc = IXGBE_MRQC_RSSEN | + IXGBE_MRQC_RSS_FIELD_IPV4 | + IXGBE_MRQC_RSS_FIELD_IPV4_TCP | + IXGBE_MRQC_RSS_FIELD_IPV4_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX | + IXGBE_MRQC_RSS_FIELD_IPV6 | + IXGBE_MRQC_RSS_FIELD_IPV6_TCP | + IXGBE_MRQC_RSS_FIELD_IPV6_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); + + /* + * Enable and Setup VMDq + * VMDq Filter = 0; MAC filtering + * Default VMDq output index = 0; + */ + vmdctl = IXGBE_VMD_CTL_VMDQ_EN; + IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl); + break; + + case ixgbe_mac_82599EB: + /* + * Enable RSS & Setup RSS Hash functions + */ + mrqc = IXGBE_MRQC_RSS_FIELD_IPV4 | + IXGBE_MRQC_RSS_FIELD_IPV4_TCP | + IXGBE_MRQC_RSS_FIELD_IPV4_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX | + IXGBE_MRQC_RSS_FIELD_IPV6 | + IXGBE_MRQC_RSS_FIELD_IPV6_TCP | + IXGBE_MRQC_RSS_FIELD_IPV6_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; + + /* + * Enable VMDq+RSS. + */ + if (ixgbe->num_rx_groups > 32) { + mrqc = mrqc | IXGBE_MRQC_VMDQRSS64EN; + } else { + mrqc = mrqc | IXGBE_MRQC_VMDQRSS32EN; + } + + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(i), 0); + } + break; + + default: + break; + + } + + /* + * Disable Packet Checksum to enable RSS for multiple receive queues. + * It is an adapter hardware limitation that Packet Checksum is + * mutually exclusive with RSS. + */ + rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); + rxcsum |= IXGBE_RXCSUM_PCSD; + rxcsum &= ~IXGBE_RXCSUM_IPPCSE; + IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); + + if (hw->mac.type == ixgbe_mac_82599EB) { + /* + * Enable Virtualization and Replication. + */ + vtctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; + IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl); + + /* + * Enable receiving packets to all VFs + */ + IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), IXGBE_VFRE_ENABLE_ALL); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), IXGBE_VFRE_ENABLE_ALL); + } +} + +/* * ixgbe_init_unicst - Initialize the unicast addresses. */ static void @@ -2183,7 +2616,7 @@ ixgbe_init_unicst(ixgbe_t *ixgbe) /* * Initialize the multiple unicast addresses */ - ixgbe->unicst_total = MAX_NUM_UNICAST_ADDRESSES; + ixgbe->unicst_total = hw->mac.num_rar_entries; ixgbe->unicst_avail = ixgbe->unicst_total; for (slot = 0; slot < ixgbe->unicst_total; slot++) { mac_addr = ixgbe->unicst_addr[slot].mac.addr; @@ -2198,7 +2631,8 @@ ixgbe_init_unicst(ixgbe_t *ixgbe) mac_addr = ixgbe->unicst_addr[slot].mac.addr; if (ixgbe->unicst_addr[slot].mac.set == 1) { (void) ixgbe_set_rar(hw, slot, mac_addr, - NULL, IXGBE_RAH_AV); + ixgbe->unicst_addr[slot].mac.group_index, + IXGBE_RAH_AV); } else { bzero(mac_addr, ETHERADDRL); (void) ixgbe_set_rar(hw, slot, mac_addr, @@ -2209,35 +2643,6 @@ ixgbe_init_unicst(ixgbe_t *ixgbe) } /* - * ixgbe_unicst_set - Set the unicast address to the specified slot. - */ -int -ixgbe_unicst_set(ixgbe_t *ixgbe, const uint8_t *mac_addr, - int slot) -{ - struct ixgbe_hw *hw = &ixgbe->hw; - - ASSERT(mutex_owned(&ixgbe->gen_lock)); - - /* - * Save the unicast address in the software data structure - */ - bcopy(mac_addr, ixgbe->unicst_addr[slot].mac.addr, ETHERADDRL); - - /* - * Set the unicast address to the RAR register - */ - (void) ixgbe_set_rar(hw, slot, (uint8_t *)mac_addr, NULL, IXGBE_RAH_AV); - - if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) { - ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); - return (EIO); - } - - return (0); -} - -/* * ixgbe_unicst_find - Find the slot for the specified unicast address */ int @@ -2352,6 +2757,81 @@ ixgbe_setup_multicst(ixgbe_t *ixgbe) } /* + * ixgbe_setup_vmdq_rss_conf - Configure vmdq and rss (number and mode). + * + * Configure the rx classification mode (vmdq & rss) and vmdq & rss numbers. + * Different chipsets may have different allowed configuration of vmdq and rss. + */ +static void +ixgbe_setup_vmdq_rss_conf(ixgbe_t *ixgbe) +{ + struct ixgbe_hw *hw = &ixgbe->hw; + uint32_t ring_per_group; + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + /* + * 82598 supports the following combination: + * vmdq no. x rss no. + * [5..16] x 1 + * [1..4] x [1..16] + * However 8 rss queue per pool (vmdq) is sufficient for + * most cases. + */ + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + if (ixgbe->num_rx_groups > 4) { + ixgbe->num_rx_rings = ixgbe->num_rx_groups; + } else { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + min(8, ring_per_group); + } + + break; + + case ixgbe_mac_82599EB: + /* + * 82599 supports the following combination: + * vmdq no. x rss no. + * [33..64] x [1..2] + * [2..32] x [1..4] + * 1 x [1..16] + * However 8 rss queue per pool (vmdq) is sufficient for + * most cases. + */ + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + if (ixgbe->num_rx_groups == 1) { + ixgbe->num_rx_rings = min(8, ring_per_group); + } else if (ixgbe->num_rx_groups <= 32) { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + min(4, ring_per_group); + } else if (ixgbe->num_rx_groups <= 64) { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + min(2, ring_per_group); + } + + break; + + default: + break; + } + + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + + if (ixgbe->num_rx_groups == 1 && ring_per_group == 1) { + ixgbe->classify_mode = IXGBE_CLASSIFY_NONE; + } else if (ixgbe->num_rx_groups != 1 && ring_per_group == 1) { + ixgbe->classify_mode = IXGBE_CLASSIFY_VMDQ; + } else if (ixgbe->num_rx_groups != 1 && ring_per_group != 1) { + ixgbe->classify_mode = IXGBE_CLASSIFY_VMDQ_RSS; + } else { + ixgbe->classify_mode = IXGBE_CLASSIFY_RSS; + } + + ixgbe_log(ixgbe, "rx group number:%d, rx ring number:%d", + ixgbe->num_rx_groups, ixgbe->num_rx_rings); +} + +/* * ixgbe_get_conf - Get driver configurations set in driver.conf. * * This routine gets user-configured values out of the configuration @@ -2434,7 +2914,8 @@ ixgbe_get_conf(ixgbe_t *ixgbe) * Multiple groups configuration */ ixgbe->num_rx_groups = ixgbe_get_prop(ixgbe, PROP_RX_GROUP_NUM, - MIN_RX_GROUP_NUM, MAX_RX_GROUP_NUM, DEFAULT_RX_GROUP_NUM); + ixgbe->capab->min_rx_grp_num, ixgbe->capab->max_rx_grp_num, + ixgbe->capab->def_rx_grp_num); ixgbe->mr_enable = ixgbe_get_prop(ixgbe, PROP_MR_ENABLE, 0, 1, DEFAULT_MR_ENABLE); @@ -2443,6 +2924,16 @@ ixgbe_get_conf(ixgbe_t *ixgbe) ixgbe->num_tx_rings = 1; ixgbe->num_rx_rings = 1; ixgbe->num_rx_groups = 1; + ixgbe->classify_mode = IXGBE_CLASSIFY_NONE; + } else { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + max(ixgbe->num_rx_rings / ixgbe->num_rx_groups, 1); + /* + * The combination of num_rx_rings and num_rx_groups + * may be not supported by h/w. We need to adjust + * them to appropriate values. + */ + ixgbe_setup_vmdq_rss_conf(ixgbe); } /* @@ -3699,6 +4190,7 @@ ixgbe_alloc_intrs(ixgbe_t *ixgbe) ixgbe->num_rx_rings = 1; ixgbe->num_rx_groups = 1; ixgbe->num_tx_rings = 1; + ixgbe->classify_mode = IXGBE_CLASSIFY_NONE; ixgbe_log(ixgbe, "MSI-X not used, force rings and groups number to 1"); @@ -3745,9 +4237,10 @@ static int ixgbe_alloc_intr_handles(ixgbe_t *ixgbe, int intr_type) { dev_info_t *devinfo; - int request, count, avail, actual; + int request, count, actual; int minimum; int rc; + uint32_t ring_per_group; devinfo = ixgbe->dip; @@ -3767,12 +4260,13 @@ ixgbe_alloc_intr_handles(ixgbe_t *ixgbe, int intr_type) case DDI_INTR_TYPE_MSIX: /* * Best number of vectors for the adapter is - * # rx rings + # tx rings. + * (# rx rings + # tx rings), however we will + * limit the request number. */ - request = ixgbe->num_rx_rings + ixgbe->num_tx_rings; + request = min(16, ixgbe->num_rx_rings + ixgbe->num_tx_rings); if (request > ixgbe->capab->max_ring_vect) request = ixgbe->capab->max_ring_vect; - minimum = 2; + minimum = 1; IXGBE_DEBUGLOG_0(ixgbe, "interrupt type: MSI-X"); break; @@ -3797,26 +4291,10 @@ ixgbe_alloc_intr_handles(ixgbe_t *ixgbe, int intr_type) } IXGBE_DEBUGLOG_1(ixgbe, "interrupts supported: %d", count); - /* - * Get number of available interrupts - */ - rc = ddi_intr_get_navail(devinfo, intr_type, &avail); - if ((rc != DDI_SUCCESS) || (avail < minimum)) { - ixgbe_log(ixgbe, - "Get interrupt available number failed. " - "Return: %d, available: %d", rc, avail); - return (IXGBE_FAILURE); - } - IXGBE_DEBUGLOG_1(ixgbe, "interrupts available: %d", avail); - - if (avail < request) { - ixgbe_log(ixgbe, "Request %d handles, %d available", - request, avail); - request = avail; - } - actual = 0; ixgbe->intr_cnt = 0; + ixgbe->intr_cnt_max = 0; + ixgbe->intr_cnt_min = 0; /* * Allocate an array of interrupt handles @@ -3834,7 +4312,24 @@ ixgbe_alloc_intr_handles(ixgbe_t *ixgbe, int intr_type) } IXGBE_DEBUGLOG_1(ixgbe, "interrupts actually allocated: %d", actual); + /* + * upper/lower limit of interrupts + */ ixgbe->intr_cnt = actual; + ixgbe->intr_cnt_max = request; + ixgbe->intr_cnt_min = minimum; + + /* + * rss number per group should not exceed the rx interrupt number, + * else need to adjust rx ring number. + */ + ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + ASSERT((ixgbe->num_rx_rings % ixgbe->num_rx_groups) == 0); + if (min(actual, ixgbe->num_rx_rings) < ring_per_group) { + ixgbe->num_rx_rings = ixgbe->num_rx_groups * + min(actual, ixgbe->num_rx_rings); + ixgbe_setup_vmdq_rss_conf(ixgbe); + } /* * Now we know the actual number of vectors. Here we map the vector @@ -4147,6 +4642,53 @@ ixgbe_disable_ivar(ixgbe_t *ixgbe, uint16_t intr_alloc_entry, int8_t cause) } /* + * Convert the rx ring index driver maintained to the rx ring index + * in h/w. + */ +static uint32_t +ixgbe_get_hw_rx_index(ixgbe_t *ixgbe, uint32_t sw_rx_index) +{ + + struct ixgbe_hw *hw = &ixgbe->hw; + uint32_t rx_ring_per_group, hw_rx_index; + + if (ixgbe->classify_mode == IXGBE_CLASSIFY_RSS || + ixgbe->classify_mode == IXGBE_CLASSIFY_NONE) { + return (sw_rx_index); + } else if (ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ) { + if (hw->mac.type == ixgbe_mac_82598EB) { + return (sw_rx_index); + } else if (hw->mac.type == ixgbe_mac_82599EB) { + return (sw_rx_index * 2); + } + } else if (ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ_RSS) { + rx_ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups; + + if (hw->mac.type == ixgbe_mac_82598EB) { + hw_rx_index = (sw_rx_index / rx_ring_per_group) * + 16 + (sw_rx_index % rx_ring_per_group); + return (hw_rx_index); + } else if (hw->mac.type == ixgbe_mac_82599EB) { + if (ixgbe->num_rx_groups > 32) { + hw_rx_index = (sw_rx_index / + rx_ring_per_group) * 2 + + (sw_rx_index % rx_ring_per_group); + } else { + hw_rx_index = (sw_rx_index / + rx_ring_per_group) * 4 + + (sw_rx_index % rx_ring_per_group); + } + return (hw_rx_index); + } + } + + /* + * Should never reach. Just to make compiler happy. + */ + return (sw_rx_index); +} + +/* * ixgbe_map_intrs_to_vectors - Map different interrupts to MSI-X vectors. * * For MSI-X, here will map rx interrupt, tx interrupt and other interrupt @@ -4183,7 +4725,6 @@ ixgbe_map_intrs_to_vectors(ixgbe_t *ixgbe) */ BT_SET(ixgbe->vect_map[vector].other_map, 0); ixgbe->vect_map[vector].other_cnt++; - vector++; /* * Map rx ring interrupts to vectors @@ -4217,6 +4758,7 @@ ixgbe_setup_adapter_vector(ixgbe_t *ixgbe) ixgbe_intr_vector_t *vect; /* vector bitmap */ int r_idx; /* ring index */ int v_idx; /* vector index */ + uint32_t hw_index; /* * Clear any previous entries @@ -4265,7 +4807,8 @@ ixgbe_setup_adapter_vector(ixgbe_t *ixgbe) (ixgbe->num_rx_rings - 1)); while (r_idx >= 0) { - ixgbe_setup_ivar(ixgbe, r_idx, v_idx, 0); + hw_index = ixgbe->rx_rings[r_idx].hw_index; + ixgbe_setup_ivar(ixgbe, hw_index, v_idx, 0); r_idx = bt_getlowbit(vect->rx_map, (r_idx + 1), (ixgbe->num_rx_rings - 1)); } @@ -4637,11 +5180,31 @@ ixgbe_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num) } /* + * Get the global ring index by a ring index within a group. + */ +static int +ixgbe_get_rx_ring_index(ixgbe_t *ixgbe, int gindex, int rindex) +{ + ixgbe_rx_ring_t *rx_ring; + int i; + + for (i = 0; i < ixgbe->num_rx_rings; i++) { + rx_ring = &ixgbe->rx_rings[i]; + if (rx_ring->group_index == gindex) + rindex--; + if (rindex < 0) + return (i); + } + + return (-1); +} + +/* * Callback funtion for MAC layer to register all rings. */ /* ARGSUSED */ void -ixgbe_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, +ixgbe_fill_ring(void *arg, mac_ring_type_t rtype, const int group_index, const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) { ixgbe_t *ixgbe = (ixgbe_t *)arg; @@ -4649,25 +5212,37 @@ ixgbe_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, switch (rtype) { case MAC_RING_TYPE_RX: { - ASSERT(rg_index == 0); - ASSERT(ring_index < ixgbe->num_rx_rings); + /* + * 'index' is the ring index within the group. + * Need to get the global ring index by searching in groups. + */ + int global_ring_index = ixgbe_get_rx_ring_index( + ixgbe, group_index, ring_index); - ixgbe_rx_ring_t *rx_ring = &ixgbe->rx_rings[ring_index]; + ASSERT(global_ring_index >= 0); + + ixgbe_rx_ring_t *rx_ring = &ixgbe->rx_rings[global_ring_index]; rx_ring->ring_handle = rh; infop->mri_driver = (mac_ring_driver_t)rx_ring; infop->mri_start = ixgbe_ring_start; infop->mri_stop = NULL; infop->mri_poll = ixgbe_ring_rx_poll; + infop->mri_stat = ixgbe_rx_ring_stat; mintr->mi_handle = (mac_intr_handle_t)rx_ring; mintr->mi_enable = ixgbe_rx_ring_intr_enable; mintr->mi_disable = ixgbe_rx_ring_intr_disable; + if (ixgbe->intr_type & + (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) { + mintr->mi_ddi_handle = + ixgbe->htable[rx_ring->intr_vector]; + } break; } case MAC_RING_TYPE_TX: { - ASSERT(rg_index == -1); + ASSERT(group_index == -1); ASSERT(ring_index < ixgbe->num_tx_rings); ixgbe_tx_ring_t *tx_ring = &ixgbe->tx_rings[ring_index]; @@ -4677,7 +5252,12 @@ ixgbe_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = NULL; infop->mri_stop = NULL; infop->mri_tx = ixgbe_ring_tx; - + infop->mri_stat = ixgbe_tx_ring_stat; + if (ixgbe->intr_type & + (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) { + mintr->mi_ddi_handle = + ixgbe->htable[tx_ring->intr_vector]; + } break; } default: @@ -4726,16 +5306,26 @@ ixgbe_rx_ring_intr_enable(mac_intr_handle_t intrh) ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)intrh; ixgbe_t *ixgbe = rx_ring->ixgbe; int r_idx = rx_ring->index; + int hw_r_idx = rx_ring->hw_index; int v_idx = rx_ring->intr_vector; mutex_enter(&ixgbe->gen_lock); - ASSERT(BT_TEST(ixgbe->vect_map[v_idx].rx_map, r_idx) == 0); + if (ixgbe->ixgbe_state & IXGBE_INTR_ADJUST) { + mutex_exit(&ixgbe->gen_lock); + /* + * Simply return 0. + * Interrupts are being adjusted. ixgbe_intr_adjust() + * will eventually re-enable the interrupt when it's + * done with the adjustment. + */ + return (0); + } /* * To enable interrupt by setting the VAL bit of given interrupt * vector allocation register (IVAR). */ - ixgbe_enable_ivar(ixgbe, r_idx, 0); + ixgbe_enable_ivar(ixgbe, hw_r_idx, 0); BT_SET(ixgbe->vect_map[v_idx].rx_map, r_idx); @@ -4759,16 +5349,34 @@ ixgbe_rx_ring_intr_disable(mac_intr_handle_t intrh) ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)intrh; ixgbe_t *ixgbe = rx_ring->ixgbe; int r_idx = rx_ring->index; + int hw_r_idx = rx_ring->hw_index; int v_idx = rx_ring->intr_vector; mutex_enter(&ixgbe->gen_lock); - ASSERT(BT_TEST(ixgbe->vect_map[v_idx].rx_map, r_idx) == 1); + if (ixgbe->ixgbe_state & IXGBE_INTR_ADJUST) { + mutex_exit(&ixgbe->gen_lock); + /* + * Simply return 0. + * In the rare case where an interrupt is being + * disabled while interrupts are being adjusted, + * we don't fail the operation. No interrupts will + * be generated while they are adjusted, and + * ixgbe_intr_adjust() will cause the interrupts + * to be re-enabled once it completes. Note that + * in this case, packets may be delivered to the + * stack via interrupts before xgbe_rx_ring_intr_enable() + * is called again. This is acceptable since interrupt + * adjustment is infrequent, and the stack will be + * able to handle these packets. + */ + return (0); + } /* * To disable interrupt by clearing the VAL bit of given interrupt * vector allocation register (IVAR). */ - ixgbe_disable_ivar(ixgbe, r_idx, 0); + ixgbe_disable_ivar(ixgbe, hw_r_idx, 0); BT_CLEAR(ixgbe->vect_map[v_idx].rx_map, r_idx); @@ -4785,8 +5393,8 @@ ixgbe_addmac(void *arg, const uint8_t *mac_addr) { ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)arg; ixgbe_t *ixgbe = rx_group->ixgbe; - int slot; - int err; + struct ixgbe_hw *hw = &ixgbe->hw; + int slot, i; mutex_enter(&ixgbe->gen_lock); @@ -4801,21 +5409,40 @@ ixgbe_addmac(void *arg, const uint8_t *mac_addr) return (ENOSPC); } - for (slot = 0; slot < ixgbe->unicst_total; slot++) { - if (ixgbe->unicst_addr[slot].mac.set == 0) - break; + /* + * The first ixgbe->num_rx_groups slots are reserved for each respective + * group. The rest slots are shared by all groups. While adding a + * MAC address, reserved slots are firstly checked then the shared + * slots are searched. + */ + slot = -1; + if (ixgbe->unicst_addr[rx_group->index].mac.set == 1) { + for (i = ixgbe->num_rx_groups; i < ixgbe->unicst_total; i++) { + if (ixgbe->unicst_addr[i].mac.set == 0) { + slot = i; + break; + } + } + } else { + slot = rx_group->index; } - ASSERT((slot >= 0) && (slot < ixgbe->unicst_total)); - - if ((err = ixgbe_unicst_set(ixgbe, mac_addr, slot)) == 0) { - ixgbe->unicst_addr[slot].mac.set = 1; - ixgbe->unicst_avail--; + if (slot == -1) { + /* no slots available */ + mutex_exit(&ixgbe->gen_lock); + return (ENOSPC); } + bcopy(mac_addr, ixgbe->unicst_addr[slot].mac.addr, ETHERADDRL); + (void) ixgbe_set_rar(hw, slot, ixgbe->unicst_addr[slot].mac.addr, + rx_group->index, IXGBE_RAH_AV); + ixgbe->unicst_addr[slot].mac.set = 1; + ixgbe->unicst_addr[slot].mac.group_index = rx_group->index; + ixgbe->unicst_avail--; + mutex_exit(&ixgbe->gen_lock); - return (err); + return (0); } /* @@ -4826,8 +5453,8 @@ ixgbe_remmac(void *arg, const uint8_t *mac_addr) { ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)arg; ixgbe_t *ixgbe = rx_group->ixgbe; + struct ixgbe_hw *hw = &ixgbe->hw; int slot; - int err; mutex_enter(&ixgbe->gen_lock); @@ -4848,13 +5475,11 @@ ixgbe_remmac(void *arg, const uint8_t *mac_addr) } bzero(ixgbe->unicst_addr[slot].mac.addr, ETHERADDRL); - if ((err = ixgbe_unicst_set(ixgbe, - ixgbe->unicst_addr[slot].mac.addr, slot)) == 0) { - ixgbe->unicst_addr[slot].mac.set = 0; - ixgbe->unicst_avail++; - } + (void) ixgbe_clear_rar(hw, slot); + ixgbe->unicst_addr[slot].mac.set = 0; + ixgbe->unicst_avail++; mutex_exit(&ixgbe->gen_lock); - return (err); + return (0); } diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h b/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h index d9747b4f8f..53690b3a46 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h +++ b/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h @@ -6,14 +6,13 @@ * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * - * You can obtain a copy of the license at: - * http://www.opensolaris.org/os/licensing. + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * - * When using or redistributing this file, you may do so under the - * License only. No other modification of this header is permitted. - * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] @@ -22,7 +21,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -71,8 +70,6 @@ void ixgbe_write_pci_cfg(struct ixgbe_hw *, uint32_t, uint32_t); #define CMD_MEM_WRT_INVALIDATE 0x0010 /* BIT_4 */ #define PCI_COMMAND_REGISTER 0x04 #define PCI_EX_CONF_CAP 0xE0 -#define MAX_NUM_UNICAST_ADDRESSES 0x10 -#define MAX_NUM_MULTICAST_ADDRESSES 0x1000 #define SPEED_10GB 10000 #define SPEED_1GB 1000 #define SPEED_100 100 diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_rx.c b/usr/src/uts/common/io/ixgbe/ixgbe_rx.c index 2ed6a09405..c9efa55a87 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_rx.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_rx.c @@ -514,18 +514,17 @@ ixgbe_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error) */ if ((status_error & IXGBE_RXD_STAT_L4CS) && !(status_error & IXGBE_RXDADV_ERR_TCPE)) - hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK; + hcksum_flags |= HCK_FULLCKSUM_OK; /* * Check IP Checksum */ if ((status_error & IXGBE_RXD_STAT_IPCS) && !(status_error & IXGBE_RXDADV_ERR_IPE)) - hcksum_flags |= HCK_IPV4_HDRCKSUM; + hcksum_flags |= HCK_IPV4_HDRCKSUM_OK; if (hcksum_flags != 0) { - (void) hcksum_assoc(mp, - NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, hcksum_flags); } } @@ -722,6 +721,9 @@ rx_discard: status_error = current_rbd->wb.upper.status_error; } + rx_ring->stat_rbytes += received_bytes; + rx_ring->stat_ipackets += pkt_num; + DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORDEV); rx_data->rbd_next = rx_next; @@ -735,7 +737,7 @@ rx_discard: } else rx_tail = PREV_INDEX(rx_next, 1, rx_data->ring_size); - IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_RDT(rx_ring->index), rx_tail); + IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_RDT(rx_ring->hw_index), rx_tail); if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) { ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_stat.c b/usr/src/uts/common/io/ixgbe/ixgbe_stat.c index 54dfdbff09..4d95a00d9f 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_stat.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_stat.c @@ -438,3 +438,358 @@ ixgbe_init_stats(ixgbe_t *ixgbe) return (IXGBE_SUCCESS); } + +/* + * Retrieve a value for one of the statistics. + */ +int +ixgbe_m_stat(void *arg, uint_t stat, uint64_t *val) +{ + ixgbe_t *ixgbe = (ixgbe_t *)arg; + struct ixgbe_hw *hw = &ixgbe->hw; + ixgbe_stat_t *ixgbe_ks; + int i; + + ixgbe_ks = (ixgbe_stat_t *)ixgbe->ixgbe_ks->ks_data; + + mutex_enter(&ixgbe->gen_lock); + + if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { + mutex_exit(&ixgbe->gen_lock); + return (ECANCELED); + } + + switch (stat) { + case MAC_STAT_IFSPEED: + *val = ixgbe->link_speed * 1000000ull; + break; + + case MAC_STAT_MULTIRCV: + ixgbe_ks->mprc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_MPRC); + *val = ixgbe_ks->mprc.value.ui64; + break; + + case MAC_STAT_BRDCSTRCV: + ixgbe_ks->bprc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_BPRC); + *val = ixgbe_ks->bprc.value.ui64; + break; + + case MAC_STAT_MULTIXMT: + ixgbe_ks->mptc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_MPTC); + *val = ixgbe_ks->mptc.value.ui64; + break; + + case MAC_STAT_BRDCSTXMT: + ixgbe_ks->bptc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_BPTC); + *val = ixgbe_ks->bptc.value.ui64; + break; + + case MAC_STAT_NORCVBUF: + for (i = 0; i < 8; i++) { + ixgbe_ks->rnbc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RNBC(i)); + } + *val = ixgbe_ks->rnbc.value.ui64; + break; + + case MAC_STAT_IERRORS: + ixgbe_ks->crcerrs.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_CRCERRS); + ixgbe_ks->illerrc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ILLERRC); + ixgbe_ks->errbc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ERRBC); + ixgbe_ks->rlec.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RLEC); + *val = ixgbe_ks->crcerrs.value.ui64 + + ixgbe_ks->illerrc.value.ui64 + + ixgbe_ks->errbc.value.ui64 + + ixgbe_ks->rlec.value.ui64; + break; + + case MAC_STAT_RBYTES: + ixgbe_ks->tor.value.ui64 = 0; + for (i = 0; i < 16; i++) { + ixgbe_ks->qbrc[i].value.ui64 += + IXGBE_READ_REG(hw, IXGBE_QBRC(i)); + ixgbe_ks->tor.value.ui64 += + ixgbe_ks->qbrc[i].value.ui64; + } + *val = ixgbe_ks->tor.value.ui64; + break; + + case MAC_STAT_OBYTES: + ixgbe_ks->tot.value.ui64 = 0; + for (i = 0; i < 16; i++) { + if (hw->mac.type >= ixgbe_mac_82599EB) { + ixgbe_ks->qbtc[i].value.ui64 += + IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); + ixgbe_ks->qbtc[i].value.ui64 += ((uint64_t) + IXGBE_READ_REG(hw, IXGBE_QBTC_H(i))) << 32; + } else { + ixgbe_ks->qbtc[i].value.ui64 += + IXGBE_READ_REG(hw, IXGBE_QBTC(i)); + } + ixgbe_ks->tot.value.ui64 += + ixgbe_ks->qbtc[i].value.ui64; + } + *val = ixgbe_ks->tot.value.ui64; + break; + + case MAC_STAT_IPACKETS: + ixgbe_ks->tpr.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_TPR); + *val = ixgbe_ks->tpr.value.ui64; + break; + + case MAC_STAT_OPACKETS: + ixgbe_ks->tpt.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_TPT); + *val = ixgbe_ks->tpt.value.ui64; + break; + + /* RFC 1643 stats */ + case ETHER_STAT_FCS_ERRORS: + ixgbe_ks->crcerrs.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_CRCERRS); + *val = ixgbe_ks->crcerrs.value.ui64; + break; + + case ETHER_STAT_TOOLONG_ERRORS: + ixgbe_ks->roc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ROC); + *val = ixgbe_ks->roc.value.ui64; + break; + + case ETHER_STAT_MACRCV_ERRORS: + ixgbe_ks->crcerrs.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_CRCERRS); + ixgbe_ks->illerrc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ILLERRC); + ixgbe_ks->errbc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_ERRBC); + ixgbe_ks->rlec.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RLEC); + *val = ixgbe_ks->crcerrs.value.ui64 + + ixgbe_ks->illerrc.value.ui64 + + ixgbe_ks->errbc.value.ui64 + + ixgbe_ks->rlec.value.ui64; + break; + + /* MII/GMII stats */ + case ETHER_STAT_XCVR_ADDR: + /* The Internal PHY's MDI address for each MAC is 1 */ + *val = 1; + break; + + case ETHER_STAT_XCVR_ID: + *val = hw->phy.id; + break; + + case ETHER_STAT_XCVR_INUSE: + switch (ixgbe->link_speed) { + case IXGBE_LINK_SPEED_1GB_FULL: + *val = + (hw->phy.media_type == ixgbe_media_type_copper) ? + XCVR_1000T : XCVR_1000X; + break; + case IXGBE_LINK_SPEED_100_FULL: + *val = (hw->phy.media_type == ixgbe_media_type_copper) ? + XCVR_100T2 : XCVR_100X; + break; + default: + *val = XCVR_NONE; + break; + } + break; + + case ETHER_STAT_CAP_10GFDX: + *val = 1; + break; + + case ETHER_STAT_CAP_1000FDX: + *val = 1; + break; + + case ETHER_STAT_CAP_100FDX: + *val = 1; + break; + + case ETHER_STAT_CAP_ASMPAUSE: + *val = ixgbe->param_asym_pause_cap; + break; + + case ETHER_STAT_CAP_PAUSE: + *val = ixgbe->param_pause_cap; + break; + + case ETHER_STAT_CAP_AUTONEG: + *val = 1; + break; + + case ETHER_STAT_ADV_CAP_10GFDX: + *val = ixgbe->param_adv_10000fdx_cap; + break; + + case ETHER_STAT_ADV_CAP_1000FDX: + *val = ixgbe->param_adv_1000fdx_cap; + break; + + case ETHER_STAT_ADV_CAP_100FDX: + *val = ixgbe->param_adv_100fdx_cap; + break; + + case ETHER_STAT_ADV_CAP_ASMPAUSE: + *val = ixgbe->param_adv_asym_pause_cap; + break; + + case ETHER_STAT_ADV_CAP_PAUSE: + *val = ixgbe->param_adv_pause_cap; + break; + + case ETHER_STAT_ADV_CAP_AUTONEG: + *val = ixgbe->param_adv_autoneg_cap; + break; + + case ETHER_STAT_LP_CAP_10GFDX: + *val = ixgbe->param_lp_10000fdx_cap; + break; + + case ETHER_STAT_LP_CAP_1000FDX: + *val = ixgbe->param_lp_1000fdx_cap; + break; + + case ETHER_STAT_LP_CAP_100FDX: + *val = ixgbe->param_lp_100fdx_cap; + break; + + case ETHER_STAT_LP_CAP_ASMPAUSE: + *val = ixgbe->param_lp_asym_pause_cap; + break; + + case ETHER_STAT_LP_CAP_PAUSE: + *val = ixgbe->param_lp_pause_cap; + break; + + case ETHER_STAT_LP_CAP_AUTONEG: + *val = ixgbe->param_lp_autoneg_cap; + break; + + case ETHER_STAT_LINK_ASMPAUSE: + *val = ixgbe->param_asym_pause_cap; + break; + + case ETHER_STAT_LINK_PAUSE: + *val = ixgbe->param_pause_cap; + break; + + case ETHER_STAT_LINK_AUTONEG: + *val = ixgbe->param_adv_autoneg_cap; + break; + + case ETHER_STAT_LINK_DUPLEX: + *val = ixgbe->link_duplex; + break; + + case ETHER_STAT_TOOSHORT_ERRORS: + ixgbe_ks->ruc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RUC); + *val = ixgbe_ks->ruc.value.ui64; + break; + + case ETHER_STAT_CAP_REMFAULT: + *val = ixgbe->param_rem_fault; + break; + + case ETHER_STAT_ADV_REMFAULT: + *val = ixgbe->param_adv_rem_fault; + break; + + case ETHER_STAT_LP_REMFAULT: + *val = ixgbe->param_lp_rem_fault; + break; + + case ETHER_STAT_JABBER_ERRORS: + ixgbe_ks->rjc.value.ui64 += + IXGBE_READ_REG(hw, IXGBE_RJC); + *val = ixgbe_ks->rjc.value.ui64; + break; + + default: + mutex_exit(&ixgbe->gen_lock); + return (ENOTSUP); + } + + mutex_exit(&ixgbe->gen_lock); + + if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) { + ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); + return (EIO); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +ixgbe_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)rh; + ixgbe_t *ixgbe = rx_ring->ixgbe; + + if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { + return (ECANCELED); + } + + switch (stat) { + case MAC_STAT_RBYTES: + *val = rx_ring->stat_rbytes; + break; + + case MAC_STAT_IPACKETS: + *val = rx_ring->stat_ipackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +ixgbe_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)rh; + ixgbe_t *ixgbe = tx_ring->ixgbe; + + if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) { + return (ECANCELED); + } + + switch (stat) { + case MAC_STAT_OBYTES: + *val = tx_ring->stat_obytes; + break; + + case MAC_STAT_OPACKETS: + *val = tx_ring->stat_opackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h index 30dd825e0f..f5e68fcd87 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h +++ b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h @@ -80,9 +80,10 @@ extern "C" { #define IXGBE_STARTED 0x02 #define IXGBE_SUSPENDED 0x04 #define IXGBE_STALL 0x08 +#define IXGBE_INTR_ADJUST 0x40 #define IXGBE_ERROR 0x80 -#define MAX_NUM_UNICAST_ADDRESSES 0x10 +#define MAX_NUM_UNICAST_ADDRESSES 0x80 #define MAX_NUM_MULTICAST_ADDRESSES 0x1000 #define IXGBE_INTR_NONE 0 #define IXGBE_INTR_MSIX 1 @@ -109,11 +110,11 @@ extern "C" { #define MAX_TX_QUEUE_NUM 128 #define MAX_RX_QUEUE_NUM 128 #define MAX_INTR_VECTOR 64 +#define MAX_RX_GROUP_NUM 64 /* * Maximum values for user configurable parameters */ -#define MAX_RX_GROUP_NUM 1 #define MAX_TX_RING_SIZE 4096 #define MAX_RX_RING_SIZE 4096 @@ -128,7 +129,6 @@ extern "C" { /* * Minimum values for user configurable parameters */ -#define MIN_RX_GROUP_NUM 1 #define MIN_TX_RING_SIZE 64 #define MIN_RX_RING_SIZE 64 @@ -143,7 +143,6 @@ extern "C" { /* * Default values for user configurable parameters */ -#define DEFAULT_RX_GROUP_NUM 1 #define DEFAULT_TX_RING_SIZE 1024 #define DEFAULT_RX_RING_SIZE 1024 @@ -251,11 +250,22 @@ extern "C" { #define IXGBE_FLAG_FAN_FAIL_CAPABLE (u32)(1 << 8) #define IXGBE_FLAG_RSC_CAPABLE (u32)(1 << 9) +/* + * Classification mode + */ +#define IXGBE_CLASSIFY_NONE 0 +#define IXGBE_CLASSIFY_RSS 1 +#define IXGBE_CLASSIFY_VMDQ 2 +#define IXGBE_CLASSIFY_VMDQ_RSS 3 + /* adapter-specific info for each supported device type */ typedef struct adapter_info { - uint32_t max_rx_que_num; /* maximum number of rx queues */ - uint32_t min_rx_que_num; /* minimum number of rx queues */ - uint32_t def_rx_que_num; /* default number of rx queues */ + uint32_t max_rx_que_num; /* maximum number of rx queues */ + uint32_t min_rx_que_num; /* minimum number of rx queues */ + uint32_t def_rx_que_num; /* default number of rx queues */ + uint32_t max_rx_grp_num; /* maximum number of rx groups */ + uint32_t min_rx_grp_num; /* minimum number of rx groups */ + uint32_t def_rx_grp_num; /* default number of rx groups */ uint32_t max_tx_que_num; /* maximum number of tx queues */ uint32_t min_tx_que_num; /* minimum number of tx queues */ uint32_t def_tx_que_num; /* default number of tx queues */ @@ -358,7 +368,7 @@ typedef union ixgbe_ether_addr { } reg; struct { uint8_t set; - uint8_t redundant; + uint8_t group_index; uint8_t addr[ETHERADDRL]; } mac; } ixgbe_ether_addr_t; @@ -494,6 +504,8 @@ typedef struct ixgbe_tx_ring { uint32_t stat_break_tbd_limit; uint32_t stat_lso_header_fail; #endif + uint64_t stat_obytes; + uint64_t stat_opackets; mac_ring_handle_t ring_handle; @@ -546,6 +558,8 @@ typedef struct ixgbe_rx_data { */ typedef struct ixgbe_rx_ring { uint32_t index; /* Ring index */ + uint32_t group_index; /* Group index */ + uint32_t hw_index; /* h/w ring index */ uint32_t intr_vector; /* Interrupt vector index */ uint32_t vect_bit; /* vector's bit in register */ @@ -561,6 +575,8 @@ typedef struct ixgbe_rx_ring { uint32_t stat_cksum_error; uint32_t stat_exceed_pkt; #endif + uint64_t stat_rbytes; + uint64_t stat_ipackets; mac_ring_handle_t ring_handle; uint64_t ring_gen_num; @@ -651,6 +667,7 @@ typedef struct ixgbe { boolean_t tx_hcksum_enable; /* Tx h/w cksum offload */ boolean_t lso_enable; /* Large Segment Offload */ boolean_t mr_enable; /* Multiple Tx and Rx Ring */ + uint32_t classify_mode; /* Classification mode */ uint32_t tx_copy_thresh; /* Tx copy threshold */ uint32_t tx_recycle_thresh; /* Tx recycle threshold */ uint32_t tx_overload_thresh; /* Tx overload threshold */ @@ -664,11 +681,14 @@ typedef struct ixgbe { int intr_type; int intr_cnt; + uint32_t intr_cnt_max; + uint32_t intr_cnt_min; int intr_cap; size_t intr_size; uint_t intr_pri; ddi_intr_handle_t *htable; uint32_t eims_mask; + ddi_cb_handle_t cb_hdl; /* Interrupt callback handle */ kmutex_t gen_lock; /* General lock for device access */ kmutex_t watchdog_lock; @@ -825,16 +845,15 @@ int ixgbe_m_start(void *); void ixgbe_m_stop(void *); int ixgbe_m_promisc(void *, boolean_t); int ixgbe_m_multicst(void *, boolean_t, const uint8_t *); -int ixgbe_m_stat(void *, uint_t, uint64_t *); void ixgbe_m_resources(void *); void ixgbe_m_ioctl(void *, queue_t *, mblk_t *); boolean_t ixgbe_m_getcapab(void *, mac_capab_t, void *); int ixgbe_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -int ixgbe_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); +int ixgbe_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +void ixgbe_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); int ixgbe_set_priv_prop(ixgbe_t *, const char *, uint_t, const void *); -int ixgbe_get_priv_prop(ixgbe_t *, const char *, - uint_t, uint_t, void *, uint_t *); +int ixgbe_get_priv_prop(ixgbe_t *, const char *, uint_t, void *); boolean_t ixgbe_param_locked(mac_prop_id_t); /* @@ -864,6 +883,9 @@ void ixgbe_error(void *, const char *, ...); * Function prototypes in ixgbe_stat.c */ int ixgbe_init_stats(ixgbe_t *); +int ixgbe_m_stat(void *, uint_t, uint64_t *); +int ixgbe_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); +int ixgbe_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_tx.c b/usr/src/uts/common/io/ixgbe/ixgbe_tx.c index 310b6226fd..484b9c11e3 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_tx.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_tx.c @@ -21,7 +21,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -501,6 +501,9 @@ adjust_threshold: ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); + tx_ring->stat_obytes += mbsize; + tx_ring->stat_opackets ++; + mutex_exit(&tx_ring->tx_lock); /* @@ -696,7 +699,7 @@ ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) ASSERT(mp != NULL); - hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, &hckflags); + mac_hcksum_get(mp, &start, NULL, NULL, NULL, &hckflags); bzero(ctx, sizeof (ixgbe_tx_context_t)); if (hckflags == 0) { @@ -705,7 +708,7 @@ ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) ctx->hcksum_flags = hckflags; - lso_info_get(mp, &mss, &lsoflags); + mac_lso_get(mp, &mss, &lsoflags); ctx->mss = mss; ctx->lso_flag = (lsoflags == HW_LSO); diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c index 92d1542efd..e9e1a62e1b 100644 --- a/usr/src/uts/common/io/mac/mac.c +++ b/usr/src/uts/common/io/mac/mac.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -280,6 +280,7 @@ #include <sys/mac_provider.h> #include <sys/mac_client_impl.h> #include <sys/mac_soft_ring.h> +#include <sys/mac_stat.h> #include <sys/mac_impl.h> #include <sys/mac.h> #include <sys/dls.h> @@ -306,6 +307,11 @@ #include <sys/exacct_impl.h> #include <inet/nd.h> #include <sys/ethernet.h> +#include <sys/pool.h> +#include <sys/pool_pset.h> +#include <sys/cpupart.h> +#include <inet/wifi_ioctl.h> +#include <net/wpa.h> #define IMPL_HASHSZ 67 /* prime */ @@ -316,6 +322,7 @@ uint_t i_mac_impl_count; static kmem_cache_t *mac_ring_cache; static id_space_t *minor_ids; static uint32_t minor_count; +static pool_event_cb_t mac_pool_event_reg; /* * Logging stuff. Perhaps mac_logging_interval could be broken into @@ -370,6 +377,7 @@ void mac_tx_client_block(mac_client_impl_t *); static void mac_rx_ring_quiesce(mac_ring_t *, uint_t); static int mac_start_group_and_rings(mac_group_t *); static void mac_stop_group_and_rings(mac_group_t *); +static void mac_pool_event_cb(pool_event_t, int, void *); /* * Module initialization functions. @@ -440,14 +448,22 @@ mac_init(void) mac_flow_log_enable = B_FALSE; mac_link_log_enable = B_FALSE; mac_logging_timer = 0; + + /* Register to be notified of noteworthy pools events */ + mac_pool_event_reg.pec_func = mac_pool_event_cb; + mac_pool_event_reg.pec_arg = NULL; + pool_event_cb_register(&mac_pool_event_reg); } int mac_fini(void) { + if (i_mac_impl_count > 0 || minor_count > 0) return (EBUSY); + pool_event_cb_unregister(&mac_pool_event_reg); + id_space_destroy(minor_ids); mac_flow_fini(); @@ -459,6 +475,8 @@ mac_fini(void) mod_hash_destroy_hash(i_mactype_hash); mac_soft_ring_finish(); + + return (0); } @@ -501,7 +519,6 @@ i_mac_constructor(void *buf, void *arg, int kmflag) mip->mi_linkstate = LINK_STATE_UNKNOWN; - mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL); rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL); mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL); mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL); @@ -554,7 +571,6 @@ i_mac_destructor(void *buf, void *arg) ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL); ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0); - mutex_destroy(&mip->mi_lock); rw_destroy(&mip->mi_rw_lock); mutex_destroy(&mip->mi_promisc_lock); @@ -1049,6 +1065,7 @@ mac_start(mac_handle_t mh) { mac_impl_t *mip = (mac_impl_t *)mh; int err = 0; + mac_group_t *defgrp; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); ASSERT(mip->mi_start != NULL); @@ -1074,33 +1091,31 @@ mac_start(mac_handle_t mh) if (mip->mi_default_tx_ring != NULL) { ring = (mac_ring_t *)mip->mi_default_tx_ring; - err = mac_start_ring(ring); - if (err != 0) { - mip->mi_active--; - return (err); + if (ring->mr_state != MR_INUSE) { + err = mac_start_ring(ring); + if (err != 0) { + mip->mi_active--; + return (err); + } } - ring->mr_state = MR_INUSE; } - if (mip->mi_rx_groups != NULL) { + if ((defgrp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { /* * Start the default ring, since it will be needed * to receive broadcast and multicast traffic for * both primary and non-primary MAC clients. */ - mac_group_t *grp = &mip->mi_rx_groups[0]; - - ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); - err = mac_start_group_and_rings(grp); + ASSERT(defgrp->mrg_state == MAC_GROUP_STATE_REGISTERED); + err = mac_start_group_and_rings(defgrp); if (err != 0) { mip->mi_active--; - if (ring != NULL) { + if ((ring != NULL) && + (ring->mr_state == MR_INUSE)) mac_stop_ring(ring); - ring->mr_state = MR_FREE; - } return (err); } - mac_set_rx_group_state(grp, MAC_GROUP_STATE_SHARED); + mac_set_group_state(defgrp, MAC_GROUP_STATE_SHARED); } } @@ -1114,6 +1129,7 @@ void mac_stop(mac_handle_t mh) { mac_impl_t *mip = (mac_impl_t *)mh; + mac_group_t *grp; ASSERT(mip->mi_stop != NULL); ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); @@ -1123,15 +1139,12 @@ mac_stop(mac_handle_t mh) */ ASSERT(mip->mi_active != 0); if (--mip->mi_active == 0) { - if (mip->mi_rx_groups != NULL) { + if ((grp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { /* * There should be no more active clients since the * MAC is being stopped. Stop the default RX group * and transition it back to registered state. - */ - mac_group_t *grp = &mip->mi_rx_groups[0]; - - /* + * * When clients are torn down, the groups * are release via mac_release_rx_group which * knows the the default group is always in @@ -1141,18 +1154,20 @@ mac_stop(mac_handle_t mh) * as a client) and group is in SHARED state. */ ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED); - ASSERT(MAC_RX_GROUP_NO_CLIENT(grp) && + ASSERT(MAC_GROUP_NO_CLIENT(grp) && mip->mi_nactiveclients == 0); mac_stop_group_and_rings(grp); - mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED); + mac_set_group_state(grp, MAC_GROUP_STATE_REGISTERED); } if (mip->mi_default_tx_ring != NULL) { mac_ring_t *ring; ring = (mac_ring_t *)mip->mi_default_tx_ring; - mac_stop_ring(ring); - ring->mr_state = MR_FREE; + if (ring->mr_state == MR_INUSE) { + mac_stop_ring(ring); + ring->mr_flag = 0; + } } /* @@ -1460,74 +1475,111 @@ mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh, mac_ring_handle_t *hwrh, mac_ring_type_t rtype) { mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + flow_entry_t *flent = mcip->mci_flent; + mac_group_t *grp; + mac_ring_t *ring; int cnt = 0; - switch (rtype) { - case MAC_RING_TYPE_RX: { - flow_entry_t *flent = mcip->mci_flent; - mac_group_t *grp; - mac_ring_t *ring; - + if (rtype == MAC_RING_TYPE_RX) { grp = flent->fe_rx_ring_group; - /* - * The mac client did not reserve any RX group, return directly. - * This is probably because the underlying MAC does not support - * any groups. - */ - *hwgh = NULL; - if (grp == NULL) - return (0); - /* - * This group must be reserved by this mac client. - */ - ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && - (mch == (mac_client_handle_t) - (MAC_RX_GROUP_ONLY_CLIENT(grp)))); - for (ring = grp->mrg_rings; - ring != NULL; ring = ring->mr_next, cnt++) { - ASSERT(cnt < MAX_RINGS_PER_GROUP); - hwrh[cnt] = (mac_ring_handle_t)ring; - } - *hwgh = (mac_group_handle_t)grp; - return (cnt); - } - case MAC_RING_TYPE_TX: { - mac_soft_ring_set_t *tx_srs; - mac_srs_tx_t *tx; - - tx_srs = MCIP_TX_SRS(mcip); - tx = &tx_srs->srs_tx; - for (; cnt < tx->st_ring_count; cnt++) - hwrh[cnt] = tx->st_rings[cnt]; - return (cnt); - } - default: + } else if (rtype == MAC_RING_TYPE_TX) { + grp = flent->fe_tx_ring_group; + } else { ASSERT(B_FALSE); return (-1); } + /* + * The mac client did not reserve any RX group, return directly. + * This is probably because the underlying MAC does not support + * any groups. + */ + if (hwgh != NULL) + *hwgh = NULL; + if (grp == NULL) + return (0); + /* + * This group must be reserved by this mac client. + */ + ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && + (mcip == MAC_GROUP_ONLY_CLIENT(grp))); + + for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next, cnt++) { + ASSERT(cnt < MAX_RINGS_PER_GROUP); + hwrh[cnt] = (mac_ring_handle_t)ring; + } + if (hwgh != NULL) + *hwgh = (mac_group_handle_t)grp; + + return (cnt); } /* - * Setup the RX callback of the mac client which exclusively controls HW ring. + * This function is called to get info about Tx/Rx rings. + * + * Return value: returns uint_t which will have various bits set + * that indicates different properties of the ring. + */ +uint_t +mac_hwring_getinfo(mac_ring_handle_t rh) +{ + mac_ring_t *ring = (mac_ring_t *)rh; + mac_ring_info_t *info = &ring->mr_info; + + return (info->mri_flags); +} + +/* + * Export ddi interrupt handles from the HW ring to the pseudo ring and + * setup the RX callback of the mac client which exclusively controls + * HW ring. */ void -mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh) +mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh, + mac_ring_handle_t pseudo_rh) { mac_ring_t *hw_ring = (mac_ring_t *)hwrh; + mac_ring_t *pseudo_ring; mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; - mac_srs->srs_mrh = prh; - mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; + if (pseudo_rh != NULL) { + pseudo_ring = (mac_ring_t *)pseudo_rh; + /* Export the ddi handles to pseudo ring */ + pseudo_ring->mr_info.mri_intr.mi_ddi_handle = + hw_ring->mr_info.mri_intr.mi_ddi_handle; + pseudo_ring->mr_info.mri_intr.mi_ddi_shared = + hw_ring->mr_info.mri_intr.mi_ddi_shared; + /* + * Save a pointer to pseudo ring in the hw ring. If + * interrupt handle changes, the hw ring will be + * notified of the change (see mac_ring_intr_set()) + * and the appropriate change has to be made to + * the pseudo ring that has exported the ddi handle. + */ + hw_ring->mr_prh = pseudo_rh; + } + + if (hw_ring->mr_type == MAC_RING_TYPE_RX) { + ASSERT(!(mac_srs->srs_type & SRST_TX)); + mac_srs->srs_mrh = prh; + mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; + } } void mac_hwring_teardown(mac_ring_handle_t hwrh) { mac_ring_t *hw_ring = (mac_ring_t *)hwrh; - mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; + mac_soft_ring_set_t *mac_srs; - mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; - mac_srs->srs_mrh = NULL; + if (hw_ring == NULL) + return; + hw_ring->mr_prh = NULL; + if (hw_ring->mr_type == MAC_RING_TYPE_RX) { + mac_srs = hw_ring->mr_srs; + ASSERT(!(mac_srs->srs_type & SRST_TX)); + mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; + mac_srs->srs_mrh = NULL; + } } int @@ -1575,7 +1627,7 @@ mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup) } /* - * Send packets through the selected tx ring. + * Send packets through a selected tx ring. */ mblk_t * mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp) @@ -1588,6 +1640,35 @@ mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp) return (info->mri_tx(info->mri_driver, mp)); } +/* + * Query stats for a particular rx/tx ring + */ +int +mac_hwring_getstat(mac_ring_handle_t rh, uint_t stat, uint64_t *val) +{ + mac_ring_t *ring = (mac_ring_t *)rh; + mac_ring_info_t *info = &ring->mr_info; + + return (info->mri_stat(info->mri_driver, stat, val)); +} + +/* + * Private function that is only used by aggr to send packets through + * a port/Tx ring. Since aggr exposes a pseudo Tx ring even for ports + * that does not expose Tx rings, aggr_ring_tx() entry point needs + * access to mac_impl_t to send packets through m_tx() entry point. + * It accomplishes this by calling mac_hwring_send_priv() function. + */ +mblk_t * +mac_hwring_send_priv(mac_client_handle_t mch, mac_ring_handle_t rh, mblk_t *mp) +{ + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_impl_t *mip = mcip->mci_mip; + + MAC_TX(mip, rh, mp, mcip); + return (mp); +} + int mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr) { @@ -1609,7 +1690,7 @@ mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) * started/stopped outside of this function. */ void -mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state) +mac_set_group_state(mac_group_t *grp, mac_group_state_t state) { /* * If there is no change in the group state, just return. @@ -1629,9 +1710,10 @@ mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state) */ ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); - if (GROUP_INTR_DISABLE_FUNC(grp) != NULL) + if (grp->mrg_type == MAC_RING_TYPE_RX && + GROUP_INTR_DISABLE_FUNC(grp) != NULL) { GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); - + } break; case MAC_GROUP_STATE_SHARED: @@ -1641,9 +1723,10 @@ mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state) */ ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); - if (GROUP_INTR_ENABLE_FUNC(grp) != NULL) + if (grp->mrg_type == MAC_RING_TYPE_RX && + GROUP_INTR_ENABLE_FUNC(grp) != NULL) { GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); - + } /* The ring is not available for reservations any more */ break; @@ -1921,7 +2004,8 @@ mac_rx_srs_restart(mac_soft_ring_set_t *srs) if (mr != NULL) { MAC_RING_UNMARK(mr, MR_QUIESCE); /* In case the ring was stopped, safely restart it */ - (void) mac_start_ring(mr); + if (mr->mr_state != MR_INUSE) + (void) mac_start_ring(mr); } else { FLOW_UNMARK(flent, FE_QUIESCE); } @@ -2088,9 +2172,11 @@ mac_tx_flow_restart(flow_entry_t *flent, void *arg) return (0); } -void -mac_tx_client_quiesce(mac_client_impl_t *mcip, uint_t srs_quiesce_flag) +static void +i_mac_tx_client_quiesce(mac_client_handle_t mch, uint_t srs_quiesce_flag) { + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); mac_tx_client_block(mcip); @@ -2102,8 +2188,22 @@ mac_tx_client_quiesce(mac_client_impl_t *mcip, uint_t srs_quiesce_flag) } void -mac_tx_client_restart(mac_client_impl_t *mcip) +mac_tx_client_quiesce(mac_client_handle_t mch) +{ + i_mac_tx_client_quiesce(mch, SRS_QUIESCE); +} + +void +mac_tx_client_condemn(mac_client_handle_t mch) +{ + i_mac_tx_client_quiesce(mch, SRS_CONDEMNED); +} + +void +mac_tx_client_restart(mac_client_handle_t mch) { + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); mac_tx_client_unblock(mcip); @@ -2119,22 +2219,22 @@ mac_tx_client_flush(mac_client_impl_t *mcip) { ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); - mac_tx_client_quiesce(mcip, SRS_QUIESCE); - mac_tx_client_restart(mcip); + mac_tx_client_quiesce((mac_client_handle_t)mcip); + mac_tx_client_restart((mac_client_handle_t)mcip); } void mac_client_quiesce(mac_client_impl_t *mcip) { mac_rx_client_quiesce((mac_client_handle_t)mcip); - mac_tx_client_quiesce(mcip, SRS_QUIESCE); + mac_tx_client_quiesce((mac_client_handle_t)mcip); } void mac_client_restart(mac_client_impl_t *mcip) { mac_rx_client_restart((mac_client_handle_t)mcip); - mac_tx_client_restart(mcip); + mac_tx_client_restart((mac_client_handle_t)mcip); } /* @@ -2386,8 +2486,21 @@ i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring) rw_enter(&mip->mi_rw_lock, RW_READER); for (cclient = mip->mi_clients_list; cclient != NULL; cclient = cclient->mci_client_next) { - if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) + if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) { mac_tx_srs_wakeup(mac_srs, ring); + } else { + /* + * Aggr opens underlying ports in exclusive mode + * and registers flow control callbacks using + * mac_tx_client_notify(). When opened in + * exclusive mode, Tx SRS won't be created + * during mac_unicast_add(). + */ + if (cclient->mci_state_flags & MCIS_EXCLUSIVE) { + mac_tx_invoke_callbacks(cclient, + (mac_tx_cookie_t)ring); + } + } (void) mac_flow_walk(cclient->mci_subflow_tab, mac_tx_flow_srs_wakeup, ring); } @@ -2724,43 +2837,196 @@ done: } /* - * mac_set_prop() sets mac or hardware driver properties: - * MAC resource properties include maxbw, priority, and cpu binding list. - * Driver properties are private properties to the hardware, such as mtu - * and speed. There's one other MAC property -- the PVID. - * If the property is a driver property, mac_set_prop() calls driver's callback - * function to set it. - * If the property is a mac resource property, mac_set_prop() invokes - * mac_set_resources() which will cache the property value in mac_impl_t and - * may call mac_client_set_resource() to update property value of the primary - * mac client, if it exists. + * Checks the size of the value size specified for a property as + * part of a property operation. Returns B_TRUE if the size is + * correct, B_FALSE otherwise. + */ +boolean_t +mac_prop_check_size(mac_prop_id_t id, uint_t valsize, boolean_t is_range) +{ + uint_t minsize = 0; + + if (is_range) + return (valsize >= sizeof (mac_propval_range_t)); + + switch (id) { + case MAC_PROP_ZONE: + minsize = sizeof (dld_ioc_zid_t); + break; + case MAC_PROP_AUTOPUSH: + if (valsize != 0) + minsize = sizeof (struct dlautopush); + break; + case MAC_PROP_TAGMODE: + minsize = sizeof (link_tagmode_t); + break; + case MAC_PROP_RESOURCE: + case MAC_PROP_RESOURCE_EFF: + minsize = sizeof (mac_resource_props_t); + break; + case MAC_PROP_DUPLEX: + minsize = sizeof (link_duplex_t); + break; + case MAC_PROP_SPEED: + minsize = sizeof (uint64_t); + break; + case MAC_PROP_STATUS: + minsize = sizeof (link_state_t); + break; + case MAC_PROP_AUTONEG: + case MAC_PROP_EN_AUTONEG: + minsize = sizeof (uint8_t); + break; + case MAC_PROP_MTU: + case MAC_PROP_LLIMIT: + case MAC_PROP_LDECAY: + minsize = sizeof (uint32_t); + break; + case MAC_PROP_FLOWCTRL: + minsize = sizeof (link_flowctrl_t); + break; + case MAC_PROP_ADV_10GFDX_CAP: + case MAC_PROP_EN_10GFDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + minsize = sizeof (uint8_t); + break; + case MAC_PROP_PVID: + minsize = sizeof (uint16_t); + break; + case MAC_PROP_IPTUN_HOPLIMIT: + minsize = sizeof (uint32_t); + break; + case MAC_PROP_IPTUN_ENCAPLIMIT: + minsize = sizeof (uint32_t); + break; + case MAC_PROP_MAX_TX_RINGS_AVAIL: + case MAC_PROP_MAX_RX_RINGS_AVAIL: + case MAC_PROP_MAX_RXHWCLNT_AVAIL: + case MAC_PROP_MAX_TXHWCLNT_AVAIL: + minsize = sizeof (uint_t); + break; + case MAC_PROP_WL_ESSID: + minsize = sizeof (wl_linkstatus_t); + break; + case MAC_PROP_WL_BSSID: + minsize = sizeof (wl_bssid_t); + break; + case MAC_PROP_WL_BSSTYPE: + minsize = sizeof (wl_bss_type_t); + break; + case MAC_PROP_WL_LINKSTATUS: + minsize = sizeof (wl_linkstatus_t); + break; + case MAC_PROP_WL_DESIRED_RATES: + minsize = sizeof (wl_rates_t); + break; + case MAC_PROP_WL_SUPPORTED_RATES: + minsize = sizeof (wl_rates_t); + break; + case MAC_PROP_WL_AUTH_MODE: + minsize = sizeof (wl_authmode_t); + break; + case MAC_PROP_WL_ENCRYPTION: + minsize = sizeof (wl_encryption_t); + break; + case MAC_PROP_WL_RSSI: + minsize = sizeof (wl_rssi_t); + break; + case MAC_PROP_WL_PHY_CONFIG: + minsize = sizeof (wl_phy_conf_t); + break; + case MAC_PROP_WL_CAPABILITY: + minsize = sizeof (wl_capability_t); + break; + case MAC_PROP_WL_WPA: + minsize = sizeof (wl_wpa_t); + break; + case MAC_PROP_WL_SCANRESULTS: + minsize = sizeof (wl_wpa_ess_t); + break; + case MAC_PROP_WL_POWER_MODE: + minsize = sizeof (wl_ps_mode_t); + break; + case MAC_PROP_WL_RADIO: + minsize = sizeof (wl_radio_t); + break; + case MAC_PROP_WL_ESS_LIST: + minsize = sizeof (wl_ess_list_t); + break; + case MAC_PROP_WL_KEY_TAB: + minsize = sizeof (wl_wep_key_tab_t); + break; + case MAC_PROP_WL_CREATE_IBSS: + minsize = sizeof (wl_create_ibss_t); + break; + case MAC_PROP_WL_SETOPTIE: + minsize = sizeof (wl_wpa_ie_t); + break; + case MAC_PROP_WL_DELKEY: + minsize = sizeof (wl_del_key_t); + break; + case MAC_PROP_WL_KEY: + minsize = sizeof (wl_key_t); + break; + case MAC_PROP_WL_MLME: + minsize = sizeof (wl_mlme_t); + break; + } + + return (valsize >= minsize); +} + +/* + * mac_set_prop() sets MAC or hardware driver properties: + * + * - MAC-managed properties such as resource properties include maxbw, + * priority, and cpu binding list, as well as the default port VID + * used by bridging. These properties are consumed by the MAC layer + * itself and not passed down to the driver. For resource control + * properties, this function invokes mac_set_resources() which will + * cache the property value in mac_impl_t and may call + * mac_client_set_resource() to update property value of the primary + * mac client, if it exists. + * + * - Properties which act on the hardware and must be passed to the + * driver, such as MTU, through the driver's mc_setprop() entry point. */ int -mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) +mac_set_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, + uint_t valsize) { int err = ENOTSUP; mac_impl_t *mip = (mac_impl_t *)mh; ASSERT(MAC_PERIM_HELD(mh)); - switch (macprop->mp_id) { - case MAC_PROP_MAXBW: - case MAC_PROP_PRIO: - case MAC_PROP_PROTECT: - case MAC_PROP_BIND_CPU: { - mac_resource_props_t mrp; + switch (id) { + case MAC_PROP_RESOURCE: { + mac_resource_props_t *mrp; - /* If it is mac property, call mac_set_resources() */ - if (valsize < sizeof (mac_resource_props_t)) - return (EINVAL); - bcopy(val, &mrp, sizeof (mrp)); - err = mac_set_resources(mh, &mrp); + /* call mac_set_resources() for MAC properties */ + ASSERT(valsize >= sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + bcopy(val, mrp, sizeof (*mrp)); + err = mac_set_resources(mh, mrp); + kmem_free(mrp, sizeof (*mrp)); break; } case MAC_PROP_PVID: - if (valsize < sizeof (uint16_t) || - (mip->mi_state_flags & MIS_IS_VNIC)) + ASSERT(valsize >= sizeof (uint16_t)); + if (mip->mi_state_flags & MIS_IS_VNIC) return (EINVAL); err = mac_set_pvid(mh, *(uint16_t *)val); break; @@ -2768,8 +3034,7 @@ mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) case MAC_PROP_MTU: { uint32_t mtu; - if (valsize < sizeof (mtu)) - return (EINVAL); + ASSERT(valsize >= sizeof (uint32_t)); bcopy(val, &mtu, sizeof (mtu)); err = mac_set_mtu(mh, mtu, NULL); break; @@ -2783,9 +3048,9 @@ mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) (mip->mi_state_flags & MIS_IS_VNIC)) return (EINVAL); bcopy(val, &learnval, sizeof (learnval)); - if (learnval == 0 && macprop->mp_id == MAC_PROP_LDECAY) + if (learnval == 0 && id == MAC_PROP_LDECAY) return (EINVAL); - if (macprop->mp_id == MAC_PROP_LLIMIT) + if (id == MAC_PROP_LLIMIT) mip->mi_llimit = learnval; else mip->mi_ldecay = learnval; @@ -2797,60 +3062,68 @@ mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize) /* For other driver properties, call driver's callback */ if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) { err = mip->mi_callbacks->mc_setprop(mip->mi_driver, - macprop->mp_name, macprop->mp_id, valsize, val); + name, id, valsize, val); } } return (err); } /* - * mac_get_prop() gets mac or hardware driver properties. + * mac_get_prop() gets MAC or device driver properties. * * If the property is a driver property, mac_get_prop() calls driver's callback - * function to get it. - * If the property is a mac property, mac_get_prop() invokes mac_get_resources() + * entry point to get it. + * If the property is a MAC property, mac_get_prop() invokes mac_get_resources() * which returns the cached value in mac_impl_t. */ int -mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize, - uint_t *perm) +mac_get_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, + uint_t valsize) { int err = ENOTSUP; mac_impl_t *mip = (mac_impl_t *)mh; - link_state_t link_state; - boolean_t is_getprop, is_setprop; + uint_t rings; + uint_t vlinks; - is_getprop = (mip->mi_callbacks->mc_callbacks & MC_GETPROP); - is_setprop = (mip->mi_callbacks->mc_callbacks & MC_SETPROP); + bzero(val, valsize); - switch (macprop->mp_id) { - case MAC_PROP_MAXBW: - case MAC_PROP_PRIO: - case MAC_PROP_PROTECT: - case MAC_PROP_BIND_CPU: { - mac_resource_props_t mrp; + switch (id) { + case MAC_PROP_RESOURCE: { + mac_resource_props_t *mrp; /* If mac property, read from cache */ - if (valsize < sizeof (mac_resource_props_t)) - return (EINVAL); - mac_get_resources(mh, &mrp); - bcopy(&mrp, val, sizeof (mac_resource_props_t)); + ASSERT(valsize >= sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + mac_get_resources(mh, mrp); + bcopy(mrp, val, sizeof (*mrp)); + kmem_free(mrp, sizeof (*mrp)); + return (0); + } + case MAC_PROP_RESOURCE_EFF: { + mac_resource_props_t *mrp; + + /* If mac effective property, read from client */ + ASSERT(valsize >= sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + mac_get_effective_resources(mh, mrp); + bcopy(mrp, val, sizeof (*mrp)); + kmem_free(mrp, sizeof (*mrp)); return (0); } case MAC_PROP_PVID: - if (valsize < sizeof (uint16_t) || - (mip->mi_state_flags & MIS_IS_VNIC)) + ASSERT(valsize >= sizeof (uint16_t)); + if (mip->mi_state_flags & MIS_IS_VNIC) return (EINVAL); *(uint16_t *)val = mac_get_pvid(mh); return (0); case MAC_PROP_LLIMIT: case MAC_PROP_LDECAY: - if (valsize < sizeof (uint32_t) || - (mip->mi_state_flags & MIS_IS_VNIC)) + ASSERT(valsize >= sizeof (uint32_t)); + if (mip->mi_state_flags & MIS_IS_VNIC) return (EINVAL); - if (macprop->mp_id == MAC_PROP_LLIMIT) + if (id == MAC_PROP_LLIMIT) bcopy(&mip->mi_llimit, val, sizeof (mip->mi_llimit)); else bcopy(&mip->mi_ldecay, val, sizeof (mip->mi_ldecay)); @@ -2858,78 +3131,261 @@ mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize, case MAC_PROP_MTU: { uint32_t sdu; - mac_propval_range_t range; - - if ((macprop->mp_flags & MAC_PROP_POSSIBLE) != 0) { - if (valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - if (is_getprop) { - err = mip->mi_callbacks->mc_getprop(mip-> - mi_driver, macprop->mp_name, macprop->mp_id, - macprop->mp_flags, valsize, val, perm); - } - /* - * If the driver doesn't have *_m_getprop defined or - * if the driver doesn't support setting MTU then - * return the CURRENT value as POSSIBLE value. - */ - if (!is_getprop || err == ENOTSUP) { - mac_sdu_get(mh, NULL, &sdu); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = sdu; - bcopy(&range, val, sizeof (range)); - err = 0; - } - return (err); - } - if (valsize < sizeof (sdu)) - return (EINVAL); - if ((macprop->mp_flags & MAC_PROP_DEFAULT) == 0) { - mac_sdu_get(mh, NULL, &sdu); - bcopy(&sdu, val, sizeof (sdu)); - if (is_setprop && (mip->mi_callbacks->mc_setprop(mip-> - mi_driver, macprop->mp_name, macprop->mp_id, - valsize, val) == 0)) { - *perm = MAC_PROP_PERM_RW; - } else { - *perm = MAC_PROP_PERM_READ; - } - return (0); - } else { - if (mip->mi_info.mi_media == DL_ETHER) { - sdu = ETHERMTU; - bcopy(&sdu, val, sizeof (sdu)); - return (0); - } - /* - * ask driver for its default. - */ - break; - } + ASSERT(valsize >= sizeof (uint32_t)); + mac_sdu_get(mh, NULL, &sdu); + bcopy(&sdu, val, sizeof (sdu)); + + return (0); } - case MAC_PROP_STATUS: + case MAC_PROP_STATUS: { + link_state_t link_state; + if (valsize < sizeof (link_state)) return (EINVAL); - *perm = MAC_PROP_PERM_READ; link_state = mac_link_get(mh); bcopy(&link_state, val, sizeof (link_state)); + + return (0); + } + + case MAC_PROP_MAX_RX_RINGS_AVAIL: + case MAC_PROP_MAX_TX_RINGS_AVAIL: + ASSERT(valsize >= sizeof (uint_t)); + rings = id == MAC_PROP_MAX_RX_RINGS_AVAIL ? + mac_rxavail_get(mh) : mac_txavail_get(mh); + bcopy(&rings, val, sizeof (uint_t)); + return (0); + + case MAC_PROP_MAX_RXHWCLNT_AVAIL: + case MAC_PROP_MAX_TXHWCLNT_AVAIL: + ASSERT(valsize >= sizeof (uint_t)); + vlinks = id == MAC_PROP_MAX_RXHWCLNT_AVAIL ? + mac_rxhwlnksavail_get(mh) : mac_txhwlnksavail_get(mh); + bcopy(&vlinks, val, sizeof (uint_t)); return (0); + + case MAC_PROP_RXRINGSRANGE: + case MAC_PROP_TXRINGSRANGE: + /* + * The value for these properties are returned through + * the MAC_PROP_RESOURCE property. + */ + return (0); + default: break; } + /* If driver property, request from driver */ - if (is_getprop) { - err = mip->mi_callbacks->mc_getprop(mip->mi_driver, - macprop->mp_name, macprop->mp_id, macprop->mp_flags, - valsize, val, perm); + if (mip->mi_callbacks->mc_callbacks & MC_GETPROP) { + err = mip->mi_callbacks->mc_getprop(mip->mi_driver, name, id, + valsize, val); } + return (err); } +/* + * Helper function to initialize the range structure for use in + * mac_get_prop. If the type can be other than uint32, we can + * pass that as an arg. + */ +static void +_mac_set_range(mac_propval_range_t *range, uint32_t min, uint32_t max) +{ + range->mpr_count = 1; + range->mpr_type = MAC_PROPVAL_UINT32; + range->mpr_range_uint32[0].mpur_min = min; + range->mpr_range_uint32[0].mpur_max = max; +} + +/* + * Returns information about the specified property, such as default + * values or permissions. + */ +int +mac_prop_info(mac_handle_t mh, mac_prop_id_t id, char *name, + void *default_val, uint_t default_size, mac_propval_range_t *range, + uint_t *perm) +{ + mac_prop_info_state_t state; + mac_impl_t *mip = (mac_impl_t *)mh; + uint_t max; + + /* + * A property is read/write by default unless the driver says + * otherwise. + */ + if (perm != NULL) + *perm = MAC_PROP_PERM_RW; + + if (default_val != NULL) + bzero(default_val, default_size); + + /* + * First, handle framework properties for which we don't need to + * involve the driver. + */ + switch (id) { + case MAC_PROP_RESOURCE: + case MAC_PROP_PVID: + case MAC_PROP_LLIMIT: + case MAC_PROP_LDECAY: + return (0); + + case MAC_PROP_MAX_RX_RINGS_AVAIL: + case MAC_PROP_MAX_TX_RINGS_AVAIL: + case MAC_PROP_MAX_RXHWCLNT_AVAIL: + case MAC_PROP_MAX_TXHWCLNT_AVAIL: + if (perm != NULL) + *perm = MAC_PROP_PERM_READ; + return (0); + + case MAC_PROP_RXRINGSRANGE: + case MAC_PROP_TXRINGSRANGE: + /* + * Currently, we support range for RX and TX rings properties. + * When we extend this support to maxbw, cpus and priority, + * we should move this to mac_get_resources. + * There is no default value for RX or TX rings. + */ + if ((mip->mi_state_flags & MIS_IS_VNIC) && + mac_is_vnic_primary(mh)) { + /* + * We don't support setting rings for a VLAN + * data link because it shares its ring with the + * primary MAC client. + */ + if (perm != NULL) + *perm = MAC_PROP_PERM_READ; + if (range != NULL) + range->mpr_count = 0; + } else if (range != NULL) { + if (mip->mi_state_flags & MIS_IS_VNIC) + mh = mac_get_lower_mac_handle(mh); + mip = (mac_impl_t *)mh; + if ((id == MAC_PROP_RXRINGSRANGE && + mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) || + (id == MAC_PROP_TXRINGSRANGE && + mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC)) { + if (id == MAC_PROP_RXRINGSRANGE) { + if ((mac_rxhwlnksavail_get(mh) + + mac_rxhwlnksrsvd_get(mh)) <= 1) { + /* + * doesn't support groups or + * rings + */ + range->mpr_count = 0; + } else { + /* + * supports specifying groups, + * but not rings + */ + _mac_set_range(range, 0, 0); + } + } else { + if ((mac_txhwlnksavail_get(mh) + + mac_txhwlnksrsvd_get(mh)) <= 1) { + /* + * doesn't support groups or + * rings + */ + range->mpr_count = 0; + } else { + /* + * supports specifying groups, + * but not rings + */ + _mac_set_range(range, 0, 0); + } + } + } else { + max = id == MAC_PROP_RXRINGSRANGE ? + mac_rxavail_get(mh) + mac_rxrsvd_get(mh) : + mac_txavail_get(mh) + mac_txrsvd_get(mh); + if (max <= 1) { + /* + * doesn't support groups or + * rings + */ + range->mpr_count = 0; + } else { + /* + * -1 because we have to leave out the + * default ring. + */ + _mac_set_range(range, 1, max - 1); + } + } + } + return (0); + + case MAC_PROP_STATUS: + if (perm != NULL) + *perm = MAC_PROP_PERM_READ; + return (0); + } + + /* + * Get the property info from the driver if it implements the + * property info entry point. + */ + bzero(&state, sizeof (state)); + + if (mip->mi_callbacks->mc_callbacks & MC_PROPINFO) { + state.pr_default = default_val; + state.pr_default_size = default_size; + state.pr_range = range; + + mip->mi_callbacks->mc_propinfo(mip->mi_driver, name, id, + (mac_prop_info_handle_t)&state); + + /* + * The operation could fail if the buffer supplied by + * the user was too small for the range or default + * value of the property. + */ + if (state.pr_default_status != 0) + return (state.pr_default_status); + + if (perm != NULL && state.pr_flags & MAC_PROP_INFO_PERM) + *perm = state.pr_perm; + } + + /* + * The MAC layer may want to provide default values or allowed + * ranges for properties if the driver does not provide a + * property info entry point, or that entry point exists, but + * it did not provide a default value or allowed ranges for + * that property. + */ + switch (id) { + case MAC_PROP_MTU: { + uint32_t sdu; + + mac_sdu_get(mh, NULL, &sdu); + + if (range != NULL && !(state.pr_flags & + MAC_PROP_INFO_RANGE)) { + /* MTU range */ + _mac_set_range(range, sdu, sdu); + } + + if (default_val != NULL && !(state.pr_flags & + MAC_PROP_INFO_DEFAULT)) { + if (mip->mi_info.mi_media == DL_ETHER) + sdu = ETHERMTU; + /* default MTU value */ + bcopy(&sdu, default_val, sizeof (sdu)); + } + } + } + + return (0); +} + int mac_fastpath_disable(mac_handle_t mh) { @@ -2953,29 +3409,47 @@ mac_fastpath_enable(mac_handle_t mh) } void -mac_register_priv_prop(mac_impl_t *mip, mac_priv_prop_t *mpp, uint_t nprop) +mac_register_priv_prop(mac_impl_t *mip, char **priv_props) { - mac_priv_prop_t *mpriv; + uint_t nprops, i; + + if (priv_props == NULL) + return; - if (mpp == NULL) + nprops = 0; + while (priv_props[nprops] != NULL) + nprops++; + if (nprops == 0) return; - mpriv = kmem_zalloc(nprop * sizeof (*mpriv), KM_SLEEP); - (void) memcpy(mpriv, mpp, nprop * sizeof (*mpriv)); - mip->mi_priv_prop = mpriv; - mip->mi_priv_prop_count = nprop; + + mip->mi_priv_prop = kmem_zalloc(nprops * sizeof (char *), KM_SLEEP); + + for (i = 0; i < nprops; i++) { + mip->mi_priv_prop[i] = kmem_zalloc(MAXLINKPROPNAME, KM_SLEEP); + (void) strlcpy(mip->mi_priv_prop[i], priv_props[i], + MAXLINKPROPNAME); + } + + mip->mi_priv_prop_count = nprops; } void mac_unregister_priv_prop(mac_impl_t *mip) { - mac_priv_prop_t *mpriv; + uint_t i; - mpriv = mip->mi_priv_prop; - if (mpriv != NULL) { - kmem_free(mpriv, mip->mi_priv_prop_count * sizeof (*mpriv)); - mip->mi_priv_prop = NULL; + if (mip->mi_priv_prop_count == 0) { + ASSERT(mip->mi_priv_prop == NULL); + return; } + + for (i = 0; i < mip->mi_priv_prop_count; i++) + kmem_free(mip->mi_priv_prop[i], MAXLINKPROPNAME); + kmem_free(mip->mi_priv_prop, mip->mi_priv_prop_count * + sizeof (char *)); + + mip->mi_priv_prop = NULL; mip->mi_priv_prop_count = 0; } @@ -2990,22 +3464,19 @@ mac_unregister_priv_prop(mac_impl_t *mip) * count mechanism) will drop such packets. */ static mac_ring_t * -mac_ring_alloc(mac_impl_t *mip, mac_capab_rings_t *cap_rings) +mac_ring_alloc(mac_impl_t *mip) { mac_ring_t *ring; - if (cap_rings->mr_type == MAC_RING_TYPE_RX) { - mutex_enter(&mip->mi_ring_lock); - if (mip->mi_ring_freelist != NULL) { - ring = mip->mi_ring_freelist; - mip->mi_ring_freelist = ring->mr_next; - bzero(ring, sizeof (mac_ring_t)); - } else { - ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); - } + mutex_enter(&mip->mi_ring_lock); + if (mip->mi_ring_freelist != NULL) { + ring = mip->mi_ring_freelist; + mip->mi_ring_freelist = ring->mr_next; + bzero(ring, sizeof (mac_ring_t)); mutex_exit(&mip->mi_ring_lock); } else { - ring = kmem_zalloc(sizeof (mac_ring_t), KM_SLEEP); + mutex_exit(&mip->mi_ring_lock); + ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); } ASSERT((ring != NULL) && (ring->mr_state == MR_FREE)); return (ring); @@ -3014,16 +3485,16 @@ mac_ring_alloc(mac_impl_t *mip, mac_capab_rings_t *cap_rings) static void mac_ring_free(mac_impl_t *mip, mac_ring_t *ring) { - if (ring->mr_type == MAC_RING_TYPE_RX) { - mutex_enter(&mip->mi_ring_lock); - ring->mr_state = MR_FREE; - ring->mr_flag = 0; - ring->mr_next = mip->mi_ring_freelist; - mip->mi_ring_freelist = ring; - mutex_exit(&mip->mi_ring_lock); - } else { - kmem_free(ring, sizeof (mac_ring_t)); - } + ASSERT(ring->mr_state == MR_FREE); + + mutex_enter(&mip->mi_ring_lock); + ring->mr_state = MR_FREE; + ring->mr_flag = 0; + ring->mr_next = mip->mi_ring_freelist; + ring->mr_mip = NULL; + mip->mi_ring_freelist = ring; + mac_ring_stat_delete(ring); + mutex_exit(&mip->mi_ring_lock); } static void @@ -3046,18 +3517,28 @@ mac_start_ring(mac_ring_t *ring) { int rv = 0; - if (ring->mr_start != NULL) + ASSERT(ring->mr_state == MR_FREE); + + if (ring->mr_start != NULL) { rv = ring->mr_start(ring->mr_driver, ring->mr_gen_num); + if (rv != 0) + return (rv); + } + ring->mr_state = MR_INUSE; return (rv); } void mac_stop_ring(mac_ring_t *ring) { + ASSERT(ring->mr_state == MR_INUSE); + if (ring->mr_stop != NULL) ring->mr_stop(ring->mr_driver); + ring->mr_state = MR_FREE; + /* * Increment the ring generation number for this ring. */ @@ -3104,7 +3585,6 @@ mac_start_group_and_rings(mac_group_t *group) ASSERT(ring->mr_state == MR_FREE); if ((rv = mac_start_ring(ring)) != 0) goto error; - ring->mr_state = MR_INUSE; ring->mr_classify_type = MAC_SW_CLASSIFIER; } return (0); @@ -3123,7 +3603,6 @@ mac_stop_group_and_rings(mac_group_t *group) for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { if (ring->mr_state != MR_FREE) { mac_stop_ring(ring); - ring->mr_state = MR_FREE; ring->mr_flag = 0; ring->mr_classify_type = MAC_NO_CLASSIFIER; } @@ -3136,13 +3615,24 @@ static mac_ring_t * mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, mac_capab_rings_t *cap_rings) { - mac_ring_t *ring; + mac_ring_t *ring, *rnext; mac_ring_info_t ring_info; + ddi_intr_handle_t ddi_handle; - ring = mac_ring_alloc(mip, cap_rings); + ring = mac_ring_alloc(mip); /* Prepare basic information of ring */ - ring->mr_index = index; + + /* + * Ring index is numbered to be unique across a particular device. + * Ring index computation makes following assumptions: + * - For drivers with static grouping (e.g. ixgbe, bge), + * ring index exchanged with the driver (e.g. during mr_rget) + * is unique only across the group the ring belongs to. + * - Drivers with dynamic grouping (e.g. nxge), start + * with single group (mrg_index = 0). + */ + ring->mr_index = group->mrg_index * group->mrg_info.mgi_count + index; ring->mr_type = group->mrg_type; ring->mr_gh = (mac_group_handle_t)group; @@ -3159,12 +3649,63 @@ mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, ring->mr_info = ring_info; + /* + * The interrupt handle could be shared among multiple rings. + * Thus if there is a bunch of rings that are sharing an + * interrupt, then only one ring among the bunch will be made + * available for interrupt re-targeting; the rest will have + * ddi_shared flag set to TRUE and would not be available for + * be interrupt re-targeting. + */ + if ((ddi_handle = ring_info.mri_intr.mi_ddi_handle) != NULL) { + rnext = ring->mr_next; + while (rnext != NULL) { + if (rnext->mr_info.mri_intr.mi_ddi_handle == + ddi_handle) { + /* + * If default ring (mr_index == 0) is part + * of a group of rings sharing an + * interrupt, then set ddi_shared flag for + * the default ring and give another ring + * the chance to be re-targeted. + */ + if (rnext->mr_index == 0 && + !rnext->mr_info.mri_intr.mi_ddi_shared) { + rnext->mr_info.mri_intr.mi_ddi_shared = + B_TRUE; + } else { + ring->mr_info.mri_intr.mi_ddi_shared = + B_TRUE; + } + break; + } + rnext = rnext->mr_next; + } + /* + * If rnext is NULL, then no matching ddi_handle was found. + * Rx rings get registered first. So if this is a Tx ring, + * then go through all the Rx rings and see if there is a + * matching ddi handle. + */ + if (rnext == NULL && ring->mr_type == MAC_RING_TYPE_TX) { + mac_compare_ddi_handle(mip->mi_rx_groups, + mip->mi_rx_group_count, ring); + } + } + /* Update ring's status */ ring->mr_state = MR_FREE; ring->mr_flag = 0; /* Update the ring count of the group */ group->mrg_cur_count++; + + /* Create per ring kstats */ + if (ring->mr_stat != NULL) { + ring->mr_mip = mip; + mac_ring_stat_create(ring); + } + return (ring); } @@ -3188,13 +3729,17 @@ mac_init_group(mac_impl_t *mip, mac_group_t *group, int size, int mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) { - mac_capab_rings_t *cap_rings; - mac_group_t *group, *groups; - mac_group_info_t group_info; - uint_t group_free = 0; - uint_t ring_left; - mac_ring_t *ring; - int g, err = 0; + mac_capab_rings_t *cap_rings; + mac_group_t *group; + mac_group_t *groups; + mac_group_info_t group_info; + uint_t group_free = 0; + uint_t ring_left; + mac_ring_t *ring; + int g; + int err = 0; + uint_t grpcnt; + boolean_t pseudo_txgrp = B_FALSE; switch (rtype) { case MAC_RING_TYPE_RX: @@ -3213,15 +3758,32 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) ASSERT(B_FALSE); } - if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, - cap_rings)) + if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, cap_rings)) return (0); + grpcnt = cap_rings->mr_gnum; + + /* + * If we have multiple TX rings, but only one TX group, we can + * create pseudo TX groups (one per TX ring) in the MAC layer, + * except for an aggr. For an aggr currently we maintain only + * one group with all the rings (for all its ports), going + * forwards we might change this. + */ + if (rtype == MAC_RING_TYPE_TX && + cap_rings->mr_gnum == 0 && cap_rings->mr_rnum > 0 && + (mip->mi_state_flags & MIS_IS_AGGR) == 0) { + /* + * The -1 here is because we create a default TX group + * with all the rings in it. + */ + grpcnt = cap_rings->mr_rnum - 1; + pseudo_txgrp = B_TRUE; + } /* * Allocate a contiguous buffer for all groups. */ - groups = kmem_zalloc(sizeof (mac_group_t) * (cap_rings->mr_gnum + 1), - KM_SLEEP); + groups = kmem_zalloc(sizeof (mac_group_t) * (grpcnt+ 1), KM_SLEEP); ring_left = cap_rings->mr_rnum; @@ -3229,7 +3791,7 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) * Get all ring groups if any, and get their ring members * if any. */ - for (g = 0; g < cap_rings->mr_gnum; g++) { + for (g = 0; g < grpcnt; g++) { group = groups + g; /* Prepare basic information of the group */ @@ -3242,6 +3804,16 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) /* Zero to reuse the info data structure */ bzero(&group_info, sizeof (group_info)); + if (pseudo_txgrp) { + /* + * This is a pseudo group that we created, apart + * from setting the state there is nothing to be + * done. + */ + group->mrg_state = MAC_GROUP_STATE_REGISTERED; + group_free++; + continue; + } /* Query group information from driver */ cap_rings->mr_gget(mip->mi_driver, rtype, g, &group_info, (mac_group_handle_t)group); @@ -3321,15 +3893,16 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) */ if (rtype == MAC_RING_TYPE_RX) { if ((group_info.mgi_addmac == NULL) || - (group_info.mgi_addmac == NULL)) + (group_info.mgi_addmac == NULL)) { goto bail; + } } /* Cache driver-supplied information */ group->mrg_info = group_info; /* Update the group's status and group count. */ - mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED); + mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); group_free++; group->mrg_rings = NULL; @@ -3342,7 +3915,7 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) } /* Build up a dummy group for free resources as a pool */ - group = groups + cap_rings->mr_gnum; + group = groups + grpcnt; /* Prepare basic information of the group */ group->mrg_index = -1; @@ -3366,36 +3939,88 @@ mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) ring_left = 0; /* Update this group's status */ - mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED); + mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); } else group->mrg_rings = NULL; ASSERT(ring_left == 0); bail: + /* Cache other important information to finalize the initialization */ switch (rtype) { case MAC_RING_TYPE_RX: mip->mi_rx_group_type = cap_rings->mr_group_type; mip->mi_rx_group_count = cap_rings->mr_gnum; mip->mi_rx_groups = groups; + mip->mi_rx_donor_grp = groups; + if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + /* + * The default ring is reserved since it is + * used for sending the broadcast etc. packets. + */ + mip->mi_rxrings_avail = + mip->mi_rx_groups->mrg_cur_count - 1; + mip->mi_rxrings_rsvd = 1; + } + /* + * The default group cannot be reserved. It is used by + * all the clients that do not have an exclusive group. + */ + mip->mi_rxhwclnt_avail = mip->mi_rx_group_count - 1; + mip->mi_rxhwclnt_used = 1; break; case MAC_RING_TYPE_TX: - mip->mi_tx_group_type = cap_rings->mr_group_type; - mip->mi_tx_group_count = cap_rings->mr_gnum; + mip->mi_tx_group_type = pseudo_txgrp ? MAC_GROUP_TYPE_DYNAMIC : + cap_rings->mr_group_type; + mip->mi_tx_group_count = grpcnt; mip->mi_tx_group_free = group_free; mip->mi_tx_groups = groups; + group = groups + grpcnt; + ring = group->mrg_rings; /* - * Ring 0 is used as the default one and it could be assigned - * to a client as well. + * The ring can be NULL in the case of aggr. Aggr will + * have an empty Tx group which will get populated + * later when pseudo Tx rings are added after + * mac_register() is done. */ - group = groups + cap_rings->mr_gnum; - ring = group->mrg_rings; - while ((ring->mr_index != 0) && (ring->mr_next != NULL)) - ring = ring->mr_next; - ASSERT(ring->mr_index == 0); - mip->mi_default_tx_ring = (mac_ring_handle_t)ring; + if (ring == NULL) { + ASSERT(mip->mi_state_flags & MIS_IS_AGGR); + /* + * pass the group to aggr so it can add Tx + * rings to the group later. + */ + cap_rings->mr_gget(mip->mi_driver, rtype, 0, NULL, + (mac_group_handle_t)group); + /* + * Even though there are no rings at this time + * (rings will come later), set the group + * state to registered. + */ + group->mrg_state = MAC_GROUP_STATE_REGISTERED; + } else { + /* + * Ring 0 is used as the default one and it could be + * assigned to a client as well. + */ + while ((ring->mr_index != 0) && (ring->mr_next != NULL)) + ring = ring->mr_next; + ASSERT(ring->mr_index == 0); + mip->mi_default_tx_ring = (mac_ring_handle_t)ring; + } + if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) + mip->mi_txrings_avail = group->mrg_cur_count - 1; + /* + * The default ring cannot be reserved. + */ + mip->mi_txrings_rsvd = 1; + /* + * The default group cannot be reserved. It will be shared + * by clients that do not have an exclusive group. + */ + mip->mi_txhwclnt_avail = mip->mi_tx_group_count; + mip->mi_txhwclnt_used = 1; break; default: ASSERT(B_FALSE); @@ -3408,8 +4033,45 @@ bail: } /* - * Called to free all ring groups with particular type. It's supposed all groups - * have been released by clinet. + * The ddi interrupt handle could be shared amoung rings. If so, compare + * the new ring's ddi handle with the existing ones and set ddi_shared + * flag. + */ +void +mac_compare_ddi_handle(mac_group_t *groups, uint_t grpcnt, mac_ring_t *cring) +{ + mac_group_t *group; + mac_ring_t *ring; + ddi_intr_handle_t ddi_handle; + int g; + + ddi_handle = cring->mr_info.mri_intr.mi_ddi_handle; + for (g = 0; g < grpcnt; g++) { + group = groups + g; + for (ring = group->mrg_rings; ring != NULL; + ring = ring->mr_next) { + if (ring == cring) + continue; + if (ring->mr_info.mri_intr.mi_ddi_handle == + ddi_handle) { + if (cring->mr_type == MAC_RING_TYPE_RX && + ring->mr_index == 0 && + !ring->mr_info.mri_intr.mi_ddi_shared) { + ring->mr_info.mri_intr.mi_ddi_shared = + B_TRUE; + } else { + cring->mr_info.mri_intr.mi_ddi_shared = + B_TRUE; + } + return; + } + } + } +} + +/* + * Called to free all groups of particular type (RX or TX). It's assumed that + * no clients are using these groups. */ void mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) @@ -3426,6 +4088,7 @@ mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) group_count = mip->mi_rx_group_count; mip->mi_rx_groups = NULL; + mip->mi_rx_donor_grp = NULL; mip->mi_rx_group_count = 0; break; case MAC_RING_TYPE_TX: @@ -3501,32 +4164,6 @@ mac_group_remmac(mac_group_t *group, const uint8_t *addr) } /* - * Release a ring in use by marking it MR_FREE. - * Any other client may reserve it for its use. - */ -void -mac_release_tx_ring(mac_ring_handle_t rh) -{ - mac_ring_t *ring = (mac_ring_t *)rh; - mac_group_t *group = (mac_group_t *)ring->mr_gh; - mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; - - ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - ASSERT(ring->mr_state != MR_FREE); - - /* - * Default tx ring will be released by mac_stop(). - */ - if (rh == mip->mi_default_tx_ring) - return; - - mac_stop_ring(ring); - - ring->mr_state = MR_FREE; - ring->mr_flag = 0; -} - -/* * This is the entry point for packets transmitted through the bridging code. * If no bridge is in place, MAC_RING_TX transmits using tx ring. The 'rh' * pointer may be NULL to select the default ring. @@ -3558,16 +4195,17 @@ mac_bridge_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp) /* * Find a ring from its index. */ -mac_ring_t * -mac_find_ring(mac_group_t *group, int index) +mac_ring_handle_t +mac_find_ring(mac_group_handle_t gh, int index) { + mac_group_t *group = (mac_group_t *)gh; mac_ring_t *ring = group->mrg_rings; for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) if (ring->mr_index == index) break; - return (ring); + return ((mac_ring_handle_t)ring); } /* * Add a ring to an existing group. @@ -3586,6 +4224,7 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) boolean_t driver_call = (ring == NULL); mac_group_type_t group_type; int ret = 0; + flow_entry_t *flent; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); @@ -3606,8 +4245,8 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) * There should be no ring with the same ring index in the target * group. */ - ASSERT(mac_find_ring(group, driver_call ? index : ring->mr_index) == - NULL); + ASSERT(mac_find_ring((mac_group_handle_t)group, + driver_call ? index : ring->mr_index) == NULL); if (driver_call) { /* @@ -3627,7 +4266,8 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) * and the mac_ring_t already exists. */ ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); - ASSERT(cap_rings->mr_gaddring != NULL); + ASSERT(group->mrg_driver == NULL || + cap_rings->mr_gaddring != NULL); ASSERT(ring->mr_gh == NULL); } @@ -3667,6 +4307,27 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) return (0); /* + * Start the ring if needed. Failure causes to undo the grouping action. + */ + if (ring->mr_state != MR_INUSE) { + if ((ret = mac_start_ring(ring)) != 0) { + if (!driver_call) { + cap_rings->mr_gremring(group->mrg_driver, + ring->mr_driver, ring->mr_type); + } + group->mrg_cur_count--; + group->mrg_rings = ring->mr_next; + + ring->mr_gh = NULL; + + if (driver_call) + mac_ring_free(mip, ring); + + return (ret); + } + } + + /* * Set up SRS/SR according to the ring type. */ switch (ring->mr_type) { @@ -3676,58 +4337,98 @@ i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) * reserved for someones exclusive use. */ if (group->mrg_state == MAC_GROUP_STATE_RESERVED) { - flow_entry_t *flent; mac_client_impl_t *mcip; - mcip = MAC_RX_GROUP_ONLY_CLIENT(group); - ASSERT(mcip != NULL); - flent = mcip->mci_flent; - ASSERT(flent->fe_rx_srs_cnt > 0); - mac_srs_group_setup(mcip, flent, group, SRST_LINK); + mcip = MAC_GROUP_ONLY_CLIENT(group); + /* + * Even though this group is reserved we migth still + * have multiple clients, i.e a VLAN shares the + * group with the primary mac client. + */ + if (mcip != NULL) { + flent = mcip->mci_flent; + ASSERT(flent->fe_rx_srs_cnt > 0); + mac_rx_srs_group_setup(mcip, flent, SRST_LINK); + mac_fanout_setup(mcip, flent, + MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, + mcip, NULL, NULL); + } else { + ring->mr_classify_type = MAC_SW_CLASSIFIER; + } } break; case MAC_RING_TYPE_TX: + { + mac_grp_client_t *mgcp = group->mrg_clients; + mac_client_impl_t *mcip; + mac_soft_ring_set_t *mac_srs; + mac_srs_tx_t *tx; + + if (MAC_GROUP_NO_CLIENT(group)) { + if (ring->mr_state == MR_INUSE) + mac_stop_ring(ring); + ring->mr_flag = 0; + break; + } /* - * For TX this function is only invoked during the - * initial creation of a group when a share is - * associated with a MAC client. So the datapath is not - * yet setup, and will be setup later after the - * group has been reserved and populated. + * If the rings are being moved to a group that has + * clients using it, then add the new rings to the + * clients SRS. */ + while (mgcp != NULL) { + boolean_t is_aggr; + + mcip = mgcp->mgc_client; + flent = mcip->mci_flent; + is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR); + mac_srs = MCIP_TX_SRS(mcip); + tx = &mac_srs->srs_tx; + mac_tx_client_quiesce((mac_client_handle_t)mcip); + /* + * If we are growing from 1 to multiple rings. + */ + if (tx->st_mode == SRS_TX_BW || + tx->st_mode == SRS_TX_SERIALIZE || + tx->st_mode == SRS_TX_DEFAULT) { + mac_ring_t *tx_ring = tx->st_arg2; + + tx->st_arg2 = NULL; + mac_tx_srs_stat_recreate(mac_srs, B_TRUE); + mac_tx_srs_add_ring(mac_srs, tx_ring); + if (mac_srs->srs_type & SRST_BW_CONTROL) { + tx->st_mode = is_aggr ? SRS_TX_BW_AGGR : + SRS_TX_BW_FANOUT; + } else { + tx->st_mode = is_aggr ? SRS_TX_AGGR : + SRS_TX_FANOUT; + } + tx->st_func = mac_tx_get_func(tx->st_mode); + } + mac_tx_srs_add_ring(mac_srs, ring); + mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), + mac_rx_deliver, mcip, NULL, NULL); + mac_tx_client_restart((mac_client_handle_t)mcip); + mgcp = mgcp->mgc_next; + } break; + } default: ASSERT(B_FALSE); } - /* - * Start the ring if needed. Failure causes to undo the grouping action. + * For aggr, the default ring will be NULL to begin with. If it + * is NULL, then pick the first ring that gets added as the + * default ring. Any ring in an aggregation can be removed at + * any time (by the user action of removing a link) and if the + * current default ring gets removed, then a new one gets + * picked (see i_mac_group_rem_ring()). */ - if ((ret = mac_start_ring(ring)) != 0) { - if (ring->mr_type == MAC_RING_TYPE_RX) { - if (ring->mr_srs != NULL) { - mac_rx_srs_remove(ring->mr_srs); - ring->mr_srs = NULL; - } - } - if (!driver_call) { - cap_rings->mr_gremring(group->mrg_driver, - ring->mr_driver, ring->mr_type); - } - group->mrg_cur_count--; - group->mrg_rings = ring->mr_next; - - ring->mr_gh = NULL; - - if (driver_call) - mac_ring_free(mip, ring); - - return (ret); + if (mip->mi_state_flags & MIS_IS_AGGR && + mip->mi_default_tx_ring == NULL && + ring->mr_type == MAC_RING_TYPE_TX) { + mip->mi_default_tx_ring = (mac_ring_handle_t)ring; } - /* - * Update the ring's state. - */ - ring->mr_state = MR_INUSE; MAC_RING_UNMARK(ring, MR_INCIPIENT); return (0); } @@ -3748,18 +4449,18 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - ASSERT(mac_find_ring(group, ring->mr_index) == ring); + ASSERT(mac_find_ring((mac_group_handle_t)group, + ring->mr_index) == (mac_ring_handle_t)ring); ASSERT((mac_group_t *)ring->mr_gh == group); ASSERT(ring->mr_type == group->mrg_type); + if (ring->mr_state == MR_INUSE) + mac_stop_ring(ring); switch (ring->mr_type) { case MAC_RING_TYPE_RX: group_type = mip->mi_rx_group_type; cap_rings = &mip->mi_rx_rings_cap; - if (group->mrg_state >= MAC_GROUP_STATE_RESERVED) - mac_stop_ring(ring); - /* * Only hardware classified packets hold a reference to the * ring all the way up the Rx path. mac_rx_srs_remove() @@ -3771,13 +4472,20 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, mac_rx_srs_remove(ring->mr_srs); ring->mr_srs = NULL; } - ring->mr_state = MR_FREE; - ring->mr_flag = 0; break; case MAC_RING_TYPE_TX: + { + mac_grp_client_t *mgcp; + mac_client_impl_t *mcip; + mac_soft_ring_set_t *mac_srs; + mac_srs_tx_t *tx; + mac_ring_t *rem_ring; + mac_group_t *defgrp; + uint_t ring_info = 0; + /* - * For TX this function is only invoked in two + * For TX this function is invoked in three * cases: * * 1) In the case of a failure during the @@ -3789,13 +4497,120 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, * 2) From mac_release_tx_group() when freeing * a TX SRS. * - * In both cases the SRS and its soft rings are - * already quiesced. + * 3) In the case of aggr, when a port gets removed, + * the pseudo Tx rings that it exposed gets removed. + * + * In the first two cases the SRS and its soft + * rings are already quiesced. */ - ASSERT(!driver_call); + if (driver_call) { + mac_client_impl_t *mcip; + mac_soft_ring_set_t *mac_srs; + mac_soft_ring_t *sringp; + mac_srs_tx_t *srs_tx; + + if (mip->mi_state_flags & MIS_IS_AGGR && + mip->mi_default_tx_ring == + (mac_ring_handle_t)ring) { + /* pick a new default Tx ring */ + mip->mi_default_tx_ring = + (group->mrg_rings != ring) ? + (mac_ring_handle_t)group->mrg_rings : + (mac_ring_handle_t)(ring->mr_next); + } + /* Presently only aggr case comes here */ + if (group->mrg_state != MAC_GROUP_STATE_RESERVED) + break; + + mcip = MAC_GROUP_ONLY_CLIENT(group); + ASSERT(mcip != NULL); + ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR); + mac_srs = MCIP_TX_SRS(mcip); + ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_AGGR || + mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); + srs_tx = &mac_srs->srs_tx; + /* + * Wakeup any callers blocked on this + * Tx ring due to flow control. + */ + sringp = srs_tx->st_soft_rings[ring->mr_index]; + ASSERT(sringp != NULL); + mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)sringp); + mac_tx_client_quiesce((mac_client_handle_t)mcip); + mac_tx_srs_del_ring(mac_srs, ring); + mac_tx_client_restart((mac_client_handle_t)mcip); + break; + } + ASSERT(ring != (mac_ring_t *)mip->mi_default_tx_ring); group_type = mip->mi_tx_group_type; cap_rings = &mip->mi_tx_rings_cap; + /* + * See if we need to take it out of the MAC clients using + * this group + */ + if (MAC_GROUP_NO_CLIENT(group)) + break; + mgcp = group->mrg_clients; + defgrp = MAC_DEFAULT_TX_GROUP(mip); + while (mgcp != NULL) { + mcip = mgcp->mgc_client; + mac_srs = MCIP_TX_SRS(mcip); + tx = &mac_srs->srs_tx; + mac_tx_client_quiesce((mac_client_handle_t)mcip); + /* + * If we are here when removing rings from the + * defgroup, mac_reserve_tx_ring would have + * already deleted the ring from the MAC + * clients in the group. + */ + if (group != defgrp) { + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t) + mac_tx_srs_get_soft_ring(mac_srs, ring)); + mac_tx_srs_del_ring(mac_srs, ring); + } + /* + * Additionally, if we are left with only + * one ring in the group after this, we need + * to modify the mode etc. to. (We haven't + * yet taken the ring out, so we check with 2). + */ + if (group->mrg_cur_count == 2) { + if (ring->mr_next == NULL) + rem_ring = group->mrg_rings; + else + rem_ring = ring->mr_next; + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t) + mac_tx_srs_get_soft_ring(mac_srs, + rem_ring)); + mac_tx_srs_del_ring(mac_srs, rem_ring); + if (rem_ring->mr_state != MR_INUSE) { + (void) mac_start_ring(rem_ring); + } + tx->st_arg2 = (void *)rem_ring; + mac_tx_srs_stat_recreate(mac_srs, B_FALSE); + ring_info = mac_hwring_getinfo( + (mac_ring_handle_t)rem_ring); + /* + * We are shrinking from multiple + * to 1 ring. + */ + if (mac_srs->srs_type & SRST_BW_CONTROL) { + tx->st_mode = SRS_TX_BW; + } else if (mac_tx_serialize || + (ring_info & MAC_RING_TX_SERIALIZE)) { + tx->st_mode = SRS_TX_SERIALIZE; + } else { + tx->st_mode = SRS_TX_DEFAULT; + } + tx->st_func = mac_tx_get_func(tx->st_mode); + } + mac_tx_client_restart((mac_client_handle_t)mcip); + mgcp = mgcp->mgc_next; + } break; + } default: ASSERT(B_FALSE); } @@ -3817,7 +4632,8 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, if (!driver_call) { ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); - ASSERT(cap_rings->mr_gremring != NULL); + ASSERT(group->mrg_driver == NULL || + cap_rings->mr_gremring != NULL); /* * Remove the driver level hardware ring. @@ -3829,12 +4645,10 @@ i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, } ring->mr_gh = NULL; - if (driver_call) { + if (driver_call) mac_ring_free(mip, ring); - } else { - ring->mr_state = MR_FREE; + else ring->mr_flag = 0; - } } /* @@ -3982,7 +4796,9 @@ mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, allocated_map = B_TRUE; } - ASSERT(map->ma_group == group); + ASSERT(map->ma_group == NULL || map->ma_group == group); + if (map->ma_group == NULL) + map->ma_group = group; /* * If the MAC address is already in use, simply account for the @@ -4082,6 +4898,8 @@ mac_remove_macaddr(mac_address_t *map) return (0); err = mac_group_remmac(map->ma_group, map->ma_addr); + if (err == 0) + map->ma_group = NULL; break; case MAC_ADDRESS_TYPE_UNICAST_PROMISC: err = i_mac_promisc_set(mip, B_FALSE); @@ -4122,7 +4940,7 @@ mac_update_macaddr(mac_address_t *map, uint8_t *mac_addr) * Update the primary address for drivers that are not * RINGS capable. */ - if (map->ma_group == NULL) { + if (mip->mi_rx_groups == NULL) { err = mip->mi_unicst(mip->mi_driver, (const uint8_t *) mac_addr); if (err != 0) @@ -4223,11 +5041,6 @@ mac_init_macaddr(mac_impl_t *mip) if (mip->mi_rx_groups == NULL) map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; - /* - * The primary MAC address is reserved for default group according - * to current design. - */ - map->ma_group = mip->mi_rx_groups; map->ma_mip = mip; mip->mi_addresses = map; @@ -4258,6 +5071,11 @@ mac_fini_macaddr(mac_impl_t *mip) /* * Logging related functions. + * + * Note that Kernel statistics have been extended to maintain fine + * granularity of statistics viz. hardware lane, software lane, fanout + * stats etc. However, extended accounting continues to support only + * aggregate statistics like before. */ /* Write the Flow description to the log file */ @@ -4304,18 +5122,33 @@ mac_write_flow_desc(flow_entry_t *flent, mac_client_impl_t *mcip) int mac_write_flow_stats(flow_entry_t *flent) { - flow_stats_t *fl_stats; - net_stat_t nstat; + net_stat_t nstat; + mac_soft_ring_set_t *mac_srs; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + int i; - fl_stats = &flent->fe_flowstats; + bzero(&nstat, sizeof (net_stat_t)); nstat.ns_name = flent->fe_flow_name; - nstat.ns_ibytes = fl_stats->fs_rbytes; - nstat.ns_obytes = fl_stats->fs_obytes; - nstat.ns_ipackets = fl_stats->fs_ipackets; - nstat.ns_opackets = fl_stats->fs_opackets; - nstat.ns_ierrors = fl_stats->fs_ierrors; - nstat.ns_oerrors = fl_stats->fs_oerrors; + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + nstat.ns_ibytes += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes; + nstat.ns_ipackets += mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; + nstat.ns_oerrors += mac_rx_stat->mrs_ierrors; + } + mac_srs = (mac_soft_ring_set_t *)(flent->fe_tx_srs); + if (mac_srs != NULL) { + mac_tx_stat = &mac_srs->srs_tx.st_stat; + + nstat.ns_obytes = mac_tx_stat->mts_obytes; + nstat.ns_opackets = mac_tx_stat->mts_opackets; + nstat.ns_oerrors = mac_tx_stat->mts_oerrors; + } return (exacct_commit_netinfo((void *)&nstat, EX_NET_FLSTAT_REC)); } @@ -4347,16 +5180,38 @@ mac_write_link_desc(mac_client_impl_t *mcip) int mac_write_link_stats(mac_client_impl_t *mcip) { - net_stat_t nstat; + net_stat_t nstat; + flow_entry_t *flent; + mac_soft_ring_set_t *mac_srs; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + int i; + bzero(&nstat, sizeof (net_stat_t)); nstat.ns_name = mcip->mci_name; - nstat.ns_ibytes = mcip->mci_stat_ibytes; - nstat.ns_obytes = mcip->mci_stat_obytes; - nstat.ns_ipackets = mcip->mci_stat_ipackets; - nstat.ns_opackets = mcip->mci_stat_opackets; - nstat.ns_ierrors = mcip->mci_stat_ierrors; - nstat.ns_oerrors = mcip->mci_stat_oerrors; + flent = mcip->mci_flent; + if (flent != NULL) { + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + nstat.ns_ibytes += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + + mac_rx_stat->mrs_lclbytes; + nstat.ns_ipackets += mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; + nstat.ns_oerrors += mac_rx_stat->mrs_ierrors; + } + } + mac_srs = (mac_soft_ring_set_t *)(mcip->mci_flent->fe_tx_srs); + if (mac_srs != NULL) { + mac_tx_stat = &mac_srs->srs_tx.st_stat; + + nstat.ns_obytes = mac_tx_stat->mts_obytes; + nstat.ns_opackets = mac_tx_stat->mts_opackets; + nstat.ns_oerrors = mac_tx_stat->mts_oerrors; + } return (exacct_commit_netinfo((void *)&nstat, EX_NET_LNSTAT_REC)); } @@ -4706,181 +5561,255 @@ mac_flow_update_priority(mac_client_impl_t *mcip, flow_entry_t *flent) mac_ring_t * mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring) { - mac_group_t *group; - mac_ring_t *ring; + mac_group_t *group; + mac_grp_client_t *mgcp; + mac_client_impl_t *mcip; + mac_soft_ring_set_t *srs; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - if (mip->mi_tx_groups == NULL) - return (NULL); - /* * Find an available ring and start it before changing its status. * The unassigned rings are at the end of the mi_tx_groups * array. */ - group = mip->mi_tx_groups + mip->mi_tx_group_count; + group = MAC_DEFAULT_TX_GROUP(mip); - for (ring = group->mrg_rings; ring != NULL; - ring = ring->mr_next) { - if (desired_ring == NULL) { - if (ring->mr_state == MR_FREE) - /* wanted any free ring and found one */ - break; - } else { - mac_ring_t *sring; - mac_client_impl_t *client; - mac_soft_ring_set_t *srs; + /* Can't take the default ring out of the default group */ + ASSERT(desired_ring != (mac_ring_t *)mip->mi_default_tx_ring); - if (ring != desired_ring) - /* wants a desired ring but this one ain't it */ - continue; + if (desired_ring->mr_state == MR_FREE) { + ASSERT(MAC_GROUP_NO_CLIENT(group)); + if (mac_start_ring(desired_ring) != 0) + return (NULL); + return (desired_ring); + } + /* + * There are clients using this ring, so let's move the clients + * away from using this ring. + */ + for (mgcp = group->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { + mcip = mgcp->mgc_client; + mac_tx_client_quiesce((mac_client_handle_t)mcip); + srs = MCIP_TX_SRS(mcip); + ASSERT(mac_tx_srs_ring_present(srs, desired_ring)); + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(srs, + desired_ring)); + mac_tx_srs_del_ring(srs, desired_ring); + mac_tx_client_restart((mac_client_handle_t)mcip); + } + return (desired_ring); +} - if (ring->mr_state == MR_FREE) - break; +/* + * For a reserved group with multiple clients, return the primary client. + */ +static mac_client_impl_t * +mac_get_grp_primary(mac_group_t *grp) +{ + mac_grp_client_t *mgcp = grp->mrg_clients; + mac_client_impl_t *mcip; + + while (mgcp != NULL) { + mcip = mgcp->mgc_client; + if (mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) + return (mcip); + mgcp = mgcp->mgc_next; + } + return (NULL); +} + +/* + * Hybrid I/O specifies the ring that should be given to a share. + * If the ring is already used by clients, then we need to release + * the ring back to the default group so that we can give it to + * the share. This means the clients using this ring now get a + * replacement ring. If there aren't any replacement rings, this + * function returns a failure. + */ +static int +mac_reclaim_ring_from_grp(mac_impl_t *mip, mac_ring_type_t ring_type, + mac_ring_t *ring, mac_ring_t **rings, int nrings) +{ + mac_group_t *group = (mac_group_t *)ring->mr_gh; + mac_resource_props_t *mrp; + mac_client_impl_t *mcip; + mac_group_t *defgrp; + mac_ring_t *tring; + mac_group_t *tgrp; + int i; + int j; + mcip = MAC_GROUP_ONLY_CLIENT(group); + if (mcip == NULL) + mcip = mac_get_grp_primary(group); + ASSERT(mcip != NULL); + ASSERT(mcip->mci_share == NULL); + + mrp = MCIP_RESOURCE_PROPS(mcip); + if (ring_type == MAC_RING_TYPE_RX) { + defgrp = mip->mi_rx_donor_grp; + if ((mrp->mrp_mask & MRP_RX_RINGS) == 0) { + /* Need to put this mac client in the default group */ + if (mac_rx_switch_group(mcip, group, defgrp) != 0) + return (ENOSPC); + } else { /* - * Found the desired ring but it's already in use. - * Swap it with a new ring. + * Switch this ring with some other ring from + * the default group. */ - - /* find the client which owns that ring */ - for (client = mip->mi_clients_list; client != NULL; - client = client->mci_client_next) { - srs = MCIP_TX_SRS(client); - if (srs != NULL && mac_tx_srs_ring_present(srs, - desired_ring)) { - /* found our ring */ - break; + for (tring = defgrp->mrg_rings; tring != NULL; + tring = tring->mr_next) { + if (tring->mr_index == 0) + continue; + for (j = 0; j < nrings; j++) { + if (rings[j] == tring) + break; } + if (j >= nrings) + break; } - if (client == NULL) { - /* - * The TX ring is in use, but it's not - * associated with any clients, so it - * has to be the default ring. In that - * case we can simply assign a new ring - * as the default ring, and we're done. - */ - ASSERT(mip->mi_default_tx_ring == - (mac_ring_handle_t)desired_ring); - - /* - * Quiesce all clients on top of - * the NIC to make sure there are no - * pending threads still relying on - * that default ring, for example - * the multicast path. - */ - for (client = mip->mi_clients_list; - client != NULL; - client = client->mci_client_next) { - mac_tx_client_quiesce(client, - SRS_QUIESCE); - } - - mip->mi_default_tx_ring = (mac_ring_handle_t) - mac_reserve_tx_ring(mip, NULL); - - /* resume the clients */ - for (client = mip->mi_clients_list; - client != NULL; - client = client->mci_client_next) - mac_tx_client_restart(client); - - break; + if (tring == NULL) + return (ENOSPC); + if (mac_group_mov_ring(mip, group, tring) != 0) + return (ENOSPC); + if (mac_group_mov_ring(mip, defgrp, ring) != 0) { + (void) mac_group_mov_ring(mip, defgrp, tring); + return (ENOSPC); } + } + ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); + return (0); + } + defgrp = MAC_DEFAULT_TX_GROUP(mip); + if (ring == (mac_ring_t *)mip->mi_default_tx_ring) { + /* + * See if we can get a spare ring to replace the default + * ring. + */ + if (defgrp->mrg_cur_count == 1) { /* - * Note that we cannot simply invoke the group - * add/rem routines since the client doesn't have a - * TX group. So we need to instead add/remove - * the rings from the SRS. + * Need to get a ring from another client, see if + * there are any clients that can be moved to + * the default group, thereby freeing some rings. */ - ASSERT(client->mci_share == NULL); - - /* first quiece the client */ - mac_tx_client_quiesce(client, SRS_QUIESCE); - - /* give a new ring to the client... */ - sring = mac_reserve_tx_ring(mip, NULL); - if (sring != NULL) { - /* - * There are no other available ring - * on that MAC instance. The client - * will fallback to the shared TX - * ring. - */ - mac_tx_srs_add_ring(srs, sring); - } - - /* ... in exchange for our desired ring */ - mac_tx_srs_del_ring(srs, desired_ring); - - /* restart the client */ - mac_tx_client_restart(client); - - if (mip->mi_default_tx_ring == - (mac_ring_handle_t)desired_ring) { - /* - * The desired ring is the default ring, - * and there are one or more clients - * using that default ring directly. - */ - mip->mi_default_tx_ring = - (mac_ring_handle_t)sring; - /* - * Find clients using default ring and - * swap it with the new default ring. - */ - for (client = mip->mi_clients_list; - client != NULL; - client = client->mci_client_next) { - srs = MCIP_TX_SRS(client); - if (srs != NULL && - mac_tx_srs_ring_present(srs, - desired_ring)) { - /* first quiece the client */ - mac_tx_client_quiesce(client, - SRS_QUIESCE); - - /* - * Give it the new default - * ring, and remove the old - * one. - */ - if (sring != NULL) { - mac_tx_srs_add_ring(srs, - sring); - } - mac_tx_srs_del_ring(srs, - desired_ring); - - /* restart the client */ - mac_tx_client_restart(client); + for (i = 0; i < mip->mi_tx_group_count; i++) { + tgrp = &mip->mi_tx_groups[i]; + if (tgrp->mrg_state == + MAC_GROUP_STATE_REGISTERED) { + continue; + } + mcip = MAC_GROUP_ONLY_CLIENT(tgrp); + if (mcip == NULL) + mcip = mac_get_grp_primary(tgrp); + ASSERT(mcip != NULL); + mrp = MCIP_RESOURCE_PROPS(mcip); + if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { + ASSERT(tgrp->mrg_cur_count == 1); + /* + * If this ring is part of the + * rings asked by the share we cannot + * use it as the default ring. + */ + for (j = 0; j < nrings; j++) { + if (rings[j] == tgrp->mrg_rings) + break; } + if (j < nrings) + continue; + mac_tx_client_quiesce( + (mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, tgrp, + defgrp); + mac_tx_client_restart( + (mac_client_handle_t)mcip); + break; } } - break; + /* + * All the rings are reserved, can't give up the + * default ring. + */ + if (defgrp->mrg_cur_count <= 1) + return (ENOSPC); + } + /* + * Swap the default ring with another. + */ + for (tring = defgrp->mrg_rings; tring != NULL; + tring = tring->mr_next) { + /* + * If this ring is part of the rings asked by the + * share we cannot use it as the default ring. + */ + for (j = 0; j < nrings; j++) { + if (rings[j] == tring) + break; + } + if (j >= nrings) + break; } + ASSERT(tring != NULL); + mip->mi_default_tx_ring = (mac_ring_handle_t)tring; + return (0); } - - if (ring != NULL) { - if (mac_start_ring(ring) != 0) - return (NULL); - ring->mr_state = MR_INUSE; + /* + * The Tx ring is with a group reserved by a MAC client. See if + * we can swap it. + */ + ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); + mcip = MAC_GROUP_ONLY_CLIENT(group); + if (mcip == NULL) + mcip = mac_get_grp_primary(group); + ASSERT(mcip != NULL); + mrp = MCIP_RESOURCE_PROPS(mcip); + mac_tx_client_quiesce((mac_client_handle_t)mcip); + if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { + ASSERT(group->mrg_cur_count == 1); + /* Put this mac client in the default group */ + mac_tx_switch_group(mcip, group, defgrp); + } else { + /* + * Switch this ring with some other ring from + * the default group. + */ + for (tring = defgrp->mrg_rings; tring != NULL; + tring = tring->mr_next) { + if (tring == (mac_ring_t *)mip->mi_default_tx_ring) + continue; + /* + * If this ring is part of the rings asked by the + * share we cannot use it for swapping. + */ + for (j = 0; j < nrings; j++) { + if (rings[j] == tring) + break; + } + if (j >= nrings) + break; + } + if (tring == NULL) { + mac_tx_client_restart((mac_client_handle_t)mcip); + return (ENOSPC); + } + if (mac_group_mov_ring(mip, group, tring) != 0) { + mac_tx_client_restart((mac_client_handle_t)mcip); + return (ENOSPC); + } + if (mac_group_mov_ring(mip, defgrp, ring) != 0) { + (void) mac_group_mov_ring(mip, defgrp, tring); + mac_tx_client_restart((mac_client_handle_t)mcip); + return (ENOSPC); + } } - - return (ring); + mac_tx_client_restart((mac_client_handle_t)mcip); + ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); + return (0); } /* - * Minimum number of rings to leave in the default TX group when allocating - * rings to new clients. - */ -static uint_t mac_min_rx_default_rings = 1; - -/* * Populate a zero-ring group with rings. If the share is non-NULL, * the rings are chosen according to that share. * Invoked after allocating a new RX or TX group through @@ -4889,15 +5818,17 @@ static uint_t mac_min_rx_default_rings = 1; */ int i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, - mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share) + mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share, + uint32_t ringcnt) { - mac_ring_t **rings, *tmp_ring[1], *ring; + mac_ring_t **rings, *ring; uint_t nrings; - int rv, i, j; + int rv = 0, i = 0, j; - ASSERT(mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC && - mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC); - ASSERT(new_group->mrg_cur_count == 0); + ASSERT((ring_type == MAC_RING_TYPE_RX && + mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) || + (ring_type == MAC_RING_TYPE_TX && + mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC)); /* * First find the rings to allocate to the group. @@ -4910,9 +5841,23 @@ i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, KM_SLEEP); mip->mi_share_capab.ms_squery(share, ring_type, (mac_ring_handle_t *)rings, &nrings); + for (i = 0; i < nrings; i++) { + /* + * If we have given this ring to a non-default + * group, we need to check if we can get this + * ring. + */ + ring = rings[i]; + if (ring->mr_gh != (mac_group_handle_t)src_group || + ring == (mac_ring_t *)mip->mi_default_tx_ring) { + if (mac_reclaim_ring_from_grp(mip, ring_type, + ring, rings, nrings) != 0) { + rv = ENOSPC; + goto bail; + } + } + } } else { - /* this function is called for TX only with a share */ - ASSERT(ring_type == MAC_RING_TYPE_RX); /* * Pick one ring from default group. * @@ -4922,23 +5867,37 @@ i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, * We need a better way for a driver to indicate this, * for example a per-ring flag. */ + rings = kmem_alloc(ringcnt * sizeof (mac_ring_handle_t), + KM_SLEEP); for (ring = src_group->mrg_rings; ring != NULL; ring = ring->mr_next) { - if (ring->mr_index != 0) + if (ring_type == MAC_RING_TYPE_RX && + ring->mr_index == 0) { + continue; + } + if (ring_type == MAC_RING_TYPE_TX && + ring == (mac_ring_t *)mip->mi_default_tx_ring) { + continue; + } + rings[i++] = ring; + if (i == ringcnt) break; } ASSERT(ring != NULL); - nrings = 1; - tmp_ring[0] = ring; - rings = tmp_ring; + nrings = i; + /* Not enough rings as required */ + if (nrings != ringcnt) { + rv = ENOSPC; + goto bail; + } } switch (ring_type) { case MAC_RING_TYPE_RX: - if (src_group->mrg_cur_count - nrings < - mac_min_rx_default_rings) { + if (src_group->mrg_cur_count - nrings < 1) { /* we ran out of rings */ - return (ENOSPC); + rv = ENOSPC; + goto bail; } /* move receive rings to new group */ @@ -4950,7 +5909,7 @@ i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, (void) mac_group_mov_ring(mip, src_group, rings[j]); } - return (rv); + goto bail; } } break; @@ -4959,37 +5918,42 @@ i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, mac_ring_t *tmp_ring; /* move the TX rings to the new group */ - ASSERT(src_group == NULL); for (i = 0; i < nrings; i++) { /* get the desired ring */ tmp_ring = mac_reserve_tx_ring(mip, rings[i]); + if (tmp_ring == NULL) { + rv = ENOSPC; + goto bail; + } ASSERT(tmp_ring == rings[i]); rv = mac_group_mov_ring(mip, new_group, rings[i]); if (rv != 0) { /* cleanup on failure */ for (j = 0; j < i; j++) { (void) mac_group_mov_ring(mip, - mip->mi_tx_groups + - mip->mi_tx_group_count, rings[j]); + MAC_DEFAULT_TX_GROUP(mip), + rings[j]); } + goto bail; } } break; } } - if (share != NULL) { - /* add group to share */ + /* add group to share */ + if (share != NULL) mip->mi_share_capab.ms_sadd(share, new_group->mrg_driver); - /* free temporary array of rings */ - kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); - } - return (0); +bail: + /* free temporary array of rings */ + kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); + + return (rv); } void -mac_rx_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) +mac_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) { mac_grp_client_t *mgcp; @@ -5008,7 +5972,7 @@ mac_rx_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) } void -mac_rx_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) +mac_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) { mac_grp_client_t *mgcp, **pprev; @@ -5034,65 +5998,149 @@ mac_rx_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) * largest number of rings, otherwise the default ring when available. */ mac_group_t * -mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, - mac_rx_group_reserve_type_t rtype) +mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) { mac_share_handle_t share = mcip->mci_share; mac_impl_t *mip = mcip->mci_mip; mac_group_t *grp = NULL; - int i, start, loopcount; - int err; + int i; + int err = 0; mac_address_t *map; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + int nrings; + int donor_grp_rcnt; + boolean_t need_exclgrp = B_FALSE; + int need_rings = 0; + mac_group_t *candidate_grp = NULL; + mac_client_impl_t *gclient; + mac_resource_props_t *gmrp; + mac_group_t *donorgrp = NULL; + boolean_t rxhw = mrp->mrp_mask & MRP_RX_RINGS; + boolean_t unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC; + boolean_t isprimary; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - /* Check if a group already has this mac address (case of VLANs) */ - if ((map = mac_find_macaddr(mip, mac_addr)) != NULL) - return (map->ma_group); + isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; + + /* + * Check if a group already has this mac address (case of VLANs) + * unless we are moving this MAC client from one group to another. + */ + if (!move && (map = mac_find_macaddr(mip, mac_addr)) != NULL) { + if (map->ma_group != NULL) + return (map->ma_group); + } + if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0) + return (NULL); + /* + * If exclusive open, return NULL which will enable the + * caller to use the default group. + */ + if (mcip->mci_state_flags & MCIS_EXCLUSIVE) + return (NULL); - if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0 || - rtype == MAC_RX_NO_RESERVE) + /* For dynamic groups default unspecified to 1 */ + if (rxhw && unspec && + mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + mrp->mrp_nrxrings = 1; + } + /* + * For static grouping we allow only specifying rings=0 and + * unspecified + */ + if (rxhw && mrp->mrp_nrxrings > 0 && + mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) { return (NULL); + } + if (rxhw) { + /* + * We have explicitly asked for a group (with nrxrings, + * if unspec). + */ + if (unspec || mrp->mrp_nrxrings > 0) { + need_exclgrp = B_TRUE; + need_rings = mrp->mrp_nrxrings; + } else if (mrp->mrp_nrxrings == 0) { + /* + * We have asked for a software group. + */ + return (NULL); + } + } else if (isprimary && mip->mi_nactiveclients == 1 && + mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + /* + * If the primary is the only active client on this + * mip and we have not asked for any rings, we give + * it the default group so that the primary gets to + * use all the rings. + */ + return (NULL); + } + + /* The group that can donate rings */ + donorgrp = mip->mi_rx_donor_grp; + + /* + * The number of rings that the default group can donate. + * We need to leave at least one ring. + */ + donor_grp_rcnt = donorgrp->mrg_cur_count - 1; /* * Try to exclusively reserve a RX group. * - * For flows requires SW_RING it always goes to the default group - * (Until we can explicitely call out default groups (CR 6695600), - * we assume that the default group is always at position zero); + * For flows requiring HW_DEFAULT_RING (unicast flow of the primary + * client), try to reserve the a non-default RX group and give + * it all the rings from the donor group, except the default ring * - * For flows requires HW_DEFAULT_RING (unicast flow of the primary - * client), try to reserve the default RX group only. + * For flows requiring HW_RING (unicast flow of other clients), try + * to reserve non-default RX group with the specified number of + * rings, if available. * - * For flows requires HW_RING (unicast flow of other clients), try - * to reserve non-default RX group then the default group. + * For flows that have not asked for software or hardware ring, + * try to reserve a non-default group with 1 ring, if available. */ - switch (rtype) { - case MAC_RX_RESERVE_DEFAULT: - start = 0; - loopcount = 1; - break; - case MAC_RX_RESERVE_NONDEFAULT: - start = 1; - loopcount = mip->mi_rx_group_count; - } - - for (i = start; i < start + loopcount; i++) { - grp = &mip->mi_rx_groups[i % mip->mi_rx_group_count]; + for (i = 1; i < mip->mi_rx_group_count; i++) { + grp = &mip->mi_rx_groups[i]; DTRACE_PROBE3(rx__group__trying, char *, mip->mi_name, int, grp->mrg_index, mac_group_state_t, grp->mrg_state); /* - * Check to see whether this mac client is the only client - * on this RX group. If not, we cannot exclusively reserve - * this RX group. + * Check if this group could be a candidate group for + * eviction if we need a group for this MAC client, + * but there aren't any. A candidate group is one + * that didn't ask for an exclusive group, but got + * one and it has enough rings (combined with what + * the donor group can donate) for the new MAC + * client */ - if (!MAC_RX_GROUP_NO_CLIENT(grp) && - (MAC_RX_GROUP_ONLY_CLIENT(grp) != mcip)) { + if (grp->mrg_state >= MAC_GROUP_STATE_RESERVED) { + /* + * If the primary/donor group is not the default + * group, don't bother looking for a candidate group. + * If we don't have enough rings we will check + * if the primary group can be vacated. + */ + if (candidate_grp == NULL && + donorgrp == MAC_DEFAULT_RX_GROUP(mip)) { + ASSERT(!MAC_GROUP_NO_CLIENT(grp)); + gclient = MAC_GROUP_ONLY_CLIENT(grp); + if (gclient == NULL) + gclient = mac_get_grp_primary(grp); + ASSERT(gclient != NULL); + gmrp = MCIP_RESOURCE_PROPS(gclient); + if (gclient->mci_share == NULL && + (gmrp->mrp_mask & MRP_RX_RINGS) == 0 && + (unspec || + (grp->mrg_cur_count + donor_grp_rcnt >= + need_rings))) { + candidate_grp = grp; + } + } continue; } - /* * This group could already be SHARED by other multicast * flows on this client. In that case, the group would @@ -5105,35 +6153,133 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, continue; } - if ((i % mip->mi_rx_group_count) == 0 || - mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) { + if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) break; - } - ASSERT(grp->mrg_cur_count == 0); /* * Populate the group. Rings should be taken - * from the default group at position 0 for now. + * from the donor group. */ + nrings = rxhw ? need_rings : isprimary ? donor_grp_rcnt: 1; - err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, - &mip->mi_rx_groups[0], grp, share); - if (err == 0) - break; + /* + * If the donor group can't donate, let's just walk and + * see if someone can vacate a group, so that we have + * enough rings for this, unless we already have + * identified a candiate group.. + */ + if (nrings <= donor_grp_rcnt) { + err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, + donorgrp, grp, share, nrings); + if (err == 0) { + /* + * For a share i_mac_group_allocate_rings gets + * the rings from the driver, let's populate + * the property for the client now. + */ + if (share != NULL) { + mac_client_set_rings( + (mac_client_handle_t)mcip, + grp->mrg_cur_count, -1); + } + if (mac_is_primary_client(mcip) && !rxhw) + mip->mi_rx_donor_grp = grp; + break; + } + } DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, mip->mi_name, int, grp->mrg_index, int, err); /* - * It's a dynamic group but the grouping operation failed. + * It's a dynamic group but the grouping operation + * failed. */ mac_stop_group(grp); } + /* We didn't find an exclusive group for this MAC client */ + if (i >= mip->mi_rx_group_count) { - if (i == start + loopcount) - return (NULL); + if (!need_exclgrp) + return (NULL); + /* + * If we found a candidate group then we switch the + * MAC client from the candidate_group to the default + * group and give the group to this MAC client. If + * we didn't find a candidate_group, check if the + * primary is in its own group and if it can make way + * for this MAC client. + */ + if (candidate_grp == NULL && + donorgrp != MAC_DEFAULT_RX_GROUP(mip) && + donorgrp->mrg_cur_count >= need_rings) { + candidate_grp = donorgrp; + } + if (candidate_grp != NULL) { + boolean_t prim_grp = B_FALSE; + + /* + * Switch the MAC client from the candidate group + * to the default group.. If this group was the + * donor group, then after the switch we need + * to update the donor group too. + */ + grp = candidate_grp; + gclient = MAC_GROUP_ONLY_CLIENT(grp); + if (gclient == NULL) + gclient = mac_get_grp_primary(grp); + if (grp == mip->mi_rx_donor_grp) + prim_grp = B_TRUE; + if (mac_rx_switch_group(gclient, grp, + MAC_DEFAULT_RX_GROUP(mip)) != 0) { + return (NULL); + } + if (prim_grp) { + mip->mi_rx_donor_grp = + MAC_DEFAULT_RX_GROUP(mip); + donorgrp = MAC_DEFAULT_RX_GROUP(mip); + } + + + /* + * Now give this group with the required rings + * to this MAC client. + */ + ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); + if (mac_start_group(grp) != 0) + return (NULL); + + if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) + return (grp); + + donor_grp_rcnt = donorgrp->mrg_cur_count - 1; + ASSERT(grp->mrg_cur_count == 0); + ASSERT(donor_grp_rcnt >= need_rings); + err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, + donorgrp, grp, share, need_rings); + if (err == 0) { + /* + * For a share i_mac_group_allocate_rings gets + * the rings from the driver, let's populate + * the property for the client now. + */ + if (share != NULL) { + mac_client_set_rings( + (mac_client_handle_t)mcip, + grp->mrg_cur_count, -1); + } + DTRACE_PROBE2(rx__group__reserved, + char *, mip->mi_name, int, grp->mrg_index); + return (grp); + } + DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, + mip->mi_name, int, grp->mrg_index, int, err); + mac_stop_group(grp); + } + return (NULL); + } ASSERT(grp != NULL); DTRACE_PROBE2(rx__group__reserved, @@ -5152,10 +6298,13 @@ mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, void mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) { - mac_impl_t *mip = mcip->mci_mip; - mac_ring_t *ring; + mac_impl_t *mip = mcip->mci_mip; + mac_ring_t *ring; - ASSERT(group != &mip->mi_rx_groups[0]); + ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); + + if (mip->mi_rx_donor_grp == group) + mip->mi_rx_donor_grp = MAC_DEFAULT_RX_GROUP(mip); /* * This is the case where there are no clients left. Any @@ -5170,10 +6319,12 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) */ ring->mr_srs = NULL; } - ASSERT(ring->mr_state == MR_INUSE); - mac_stop_ring(ring); - ring->mr_state = MR_FREE; - ring->mr_flag = 0; + ASSERT(group->mrg_state < MAC_GROUP_STATE_RESERVED || + ring->mr_state == MR_INUSE); + if (ring->mr_state == MR_INUSE) { + mac_stop_ring(ring); + ring->mr_flag = 0; + } } /* remove group from share */ @@ -5190,8 +6341,8 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) * Move rings back to default group. */ while ((ring = group->mrg_rings) != NULL) { - (void) mac_group_mov_ring(mip, - &mip->mi_rx_groups[0], ring); + (void) mac_group_mov_ring(mip, mip->mi_rx_donor_grp, + ring); } } mac_stop_group(group); @@ -5202,86 +6353,637 @@ mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) } /* + * When we move the primary's mac address between groups, we need to also + * take all the clients sharing the same mac address along with it (VLANs) + * We remove the mac address for such clients from the group after quiescing + * them. When we add the mac address we restart the client. Note that + * the primary's mac address is removed from the group after all the + * other clients sharing the address are removed. Similarly, the primary's + * mac address is added before all the other client's mac address are + * added. While grp is the group where the clients reside, tgrp is + * the group where the addresses have to be added. + */ +static void +mac_rx_move_macaddr_prim(mac_client_impl_t *mcip, mac_group_t *grp, + mac_group_t *tgrp, uint8_t *maddr, boolean_t add) +{ + mac_impl_t *mip = mcip->mci_mip; + mac_grp_client_t *mgcp = grp->mrg_clients; + mac_client_impl_t *gmcip; + boolean_t prim; + + prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; + + /* + * If the clients are in a non-default group, we just have to + * walk the group's client list. If it is in the default group + * (which will be shared by other clients as well, we need to + * check if the unicast address matches mcip's unicast. + */ + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + if (gmcip != mcip && + (grp != MAC_DEFAULT_RX_GROUP(mip) || + mcip->mci_unicast == gmcip->mci_unicast)) { + if (!add) { + mac_rx_client_quiesce( + (mac_client_handle_t)gmcip); + (void) mac_remove_macaddr(mcip->mci_unicast); + } else { + (void) mac_add_macaddr(mip, tgrp, maddr, prim); + mac_rx_client_restart( + (mac_client_handle_t)gmcip); + } + } + mgcp = mgcp->mgc_next; + } +} + + +/* + * Move the MAC address from fgrp to tgrp. If this is the primary client, + * we need to take any VLANs etc. together too. + */ +static int +mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp, + mac_group_t *tgrp) +{ + mac_impl_t *mip = mcip->mci_mip; + uint8_t maddr[MAXMACADDRLEN]; + int err = 0; + boolean_t prim; + boolean_t multiclnt = B_FALSE; + + mac_rx_client_quiesce((mac_client_handle_t)mcip); + ASSERT(mcip->mci_unicast != NULL); + bcopy(mcip->mci_unicast->ma_addr, maddr, mcip->mci_unicast->ma_len); + + prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; + if (mcip->mci_unicast->ma_nusers > 1) { + mac_rx_move_macaddr_prim(mcip, fgrp, NULL, maddr, B_FALSE); + multiclnt = B_TRUE; + } + ASSERT(mcip->mci_unicast->ma_nusers == 1); + err = mac_remove_macaddr(mcip->mci_unicast); + if (err != 0) { + mac_rx_client_restart((mac_client_handle_t)mcip); + if (multiclnt) { + mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, + B_TRUE); + } + return (err); + } + /* + * Program the H/W Classifier first, if this fails we need + * not proceed with the other stuff. + */ + if ((err = mac_add_macaddr(mip, tgrp, maddr, prim)) != 0) { + /* Revert back the H/W Classifier */ + if ((err = mac_add_macaddr(mip, fgrp, maddr, prim)) != 0) { + /* + * This should not fail now since it worked earlier, + * should we panic? + */ + cmn_err(CE_WARN, + "mac_rx_switch_group: switching %p back" + " to group %p failed!!", (void *)mcip, + (void *)fgrp); + } + mac_rx_client_restart((mac_client_handle_t)mcip); + if (multiclnt) { + mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, + B_TRUE); + } + return (err); + } + mcip->mci_unicast = mac_find_macaddr(mip, maddr); + mac_rx_client_restart((mac_client_handle_t)mcip); + if (multiclnt) + mac_rx_move_macaddr_prim(mcip, fgrp, tgrp, maddr, B_TRUE); + return (err); +} + +/* + * Switch the MAC client from one group to another. This means we need + * to remove the MAC address from the group, remove the MAC client, + * teardown the SRSs and revert the group state. Then, we add the client + * to the destination group, set the SRSs, and add the MAC address to the + * group. + */ +int +mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, + mac_group_t *tgrp) +{ + int err; + mac_group_state_t next_state; + mac_client_impl_t *group_only_mcip; + mac_client_impl_t *gmcip; + mac_impl_t *mip = mcip->mci_mip; + mac_grp_client_t *mgcp; + + ASSERT(fgrp == mcip->mci_flent->fe_rx_ring_group); + + if ((err = mac_rx_move_macaddr(mcip, fgrp, tgrp)) != 0) + return (err); + + /* + * The group might be reserved, but SRSs may not be set up, e.g. + * primary and its vlans using a reserved group. + */ + if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED && + MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { + mac_rx_srs_group_teardown(mcip->mci_flent, B_TRUE); + } + if (fgrp != MAC_DEFAULT_RX_GROUP(mip)) { + mgcp = fgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + mgcp = mgcp->mgc_next; + mac_group_remove_client(fgrp, gmcip); + mac_group_add_client(tgrp, gmcip); + gmcip->mci_flent->fe_rx_ring_group = tgrp; + } + mac_release_rx_group(mcip, fgrp); + ASSERT(MAC_GROUP_NO_CLIENT(fgrp)); + mac_set_group_state(fgrp, MAC_GROUP_STATE_REGISTERED); + } else { + mac_group_remove_client(fgrp, mcip); + mac_group_add_client(tgrp, mcip); + mcip->mci_flent->fe_rx_ring_group = tgrp; + /* + * If there are other clients (VLANs) sharing this address + * we should be here only for the primary. + */ + if (mcip->mci_unicast->ma_nusers > 1) { + /* + * We need to move all the clients that are using + * this h/w address. + */ + mgcp = fgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + mgcp = mgcp->mgc_next; + if (mcip->mci_unicast == gmcip->mci_unicast) { + mac_group_remove_client(fgrp, gmcip); + mac_group_add_client(tgrp, gmcip); + gmcip->mci_flent->fe_rx_ring_group = + tgrp; + } + } + } + /* + * The default group will still take the multicast, + * broadcast traffic etc., so it won't go to + * MAC_GROUP_STATE_REGISTERED. + */ + if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED) + mac_rx_group_unmark(fgrp, MR_CONDEMNED); + mac_set_group_state(fgrp, MAC_GROUP_STATE_SHARED); + } + next_state = mac_group_next_state(tgrp, &group_only_mcip, + MAC_DEFAULT_RX_GROUP(mip), B_TRUE); + mac_set_group_state(tgrp, next_state); + /* + * If the destination group is reserved, setup the SRSs etc. + */ + if (tgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { + mac_rx_srs_group_setup(mcip, mcip->mci_flent, SRST_LINK); + mac_fanout_setup(mcip, mcip->mci_flent, + MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, mcip, NULL, + NULL); + mac_rx_group_unmark(tgrp, MR_INCIPIENT); + } else { + mac_rx_switch_grp_to_sw(tgrp); + } + return (0); +} + +/* * Reserves a TX group for the specified share. Invoked by mac_tx_srs_setup() * when a share was allocated to the client. */ mac_group_t * -mac_reserve_tx_group(mac_impl_t *mip, mac_share_handle_t share) +mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) { - mac_group_t *grp; - int rv, i; + mac_impl_t *mip = mcip->mci_mip; + mac_group_t *grp = NULL; + int rv; + int i; + int err; + mac_group_t *defgrp; + mac_share_handle_t share = mcip->mci_share; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + int nrings; + int defnrings; + boolean_t need_exclgrp = B_FALSE; + int need_rings = 0; + mac_group_t *candidate_grp = NULL; + mac_client_impl_t *gclient; + mac_resource_props_t *gmrp; + boolean_t txhw = mrp->mrp_mask & MRP_TX_RINGS; + boolean_t unspec = mrp->mrp_mask & MRP_TXRINGS_UNSPEC; + boolean_t isprimary; + + isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; + /* + * When we come here for a VLAN on the primary (dladm create-vlan), + * we need to pair it along with the primary (to keep it consistent + * with the RX side). So, we check if the primary is already assigned + * to a group and return the group if so. The other way is also + * true, i.e. the VLAN is already created and now we are plumbing + * the primary. + */ + if (!move && isprimary) { + for (gclient = mip->mi_clients_list; gclient != NULL; + gclient = gclient->mci_client_next) { + if (gclient->mci_flent->fe_type & FLOW_PRIMARY_MAC && + gclient->mci_flent->fe_tx_ring_group != NULL) { + return (gclient->mci_flent->fe_tx_ring_group); + } + } + } + + if (mip->mi_tx_groups == NULL || mip->mi_tx_group_count == 0) + return (NULL); + + /* For dynamic groups, default unspec to 1 */ + if (txhw && unspec && + mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + mrp->mrp_ntxrings = 1; + } + /* + * For static grouping we allow only specifying rings=0 and + * unspecified + */ + if (txhw && mrp->mrp_ntxrings > 0 && + mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC) { + return (NULL); + } + + if (txhw) { + /* + * We have explicitly asked for a group (with ntxrings, + * if unspec). + */ + if (unspec || mrp->mrp_ntxrings > 0) { + need_exclgrp = B_TRUE; + need_rings = mrp->mrp_ntxrings; + } else if (mrp->mrp_ntxrings == 0) { + /* + * We have asked for a software group. + */ + return (NULL); + } + } + defgrp = MAC_DEFAULT_TX_GROUP(mip); + /* + * The number of rings that the default group can donate. + * We need to leave at least one ring - the default ring - in + * this group. + */ + defnrings = defgrp->mrg_cur_count - 1; /* - * TX groups are currently allocated only to MAC clients - * which are associated with a share. Since we have a fixed - * number of share and groups, and we already successfully - * allocated a share, find an available TX group. + * Primary gets default group unless explicitly told not + * to (i.e. rings > 0). */ - ASSERT(share != NULL); - ASSERT(mip->mi_tx_group_free > 0); + if (isprimary && !need_exclgrp) + return (NULL); + nrings = (mrp->mrp_mask & MRP_TX_RINGS) != 0 ? mrp->mrp_ntxrings : 1; for (i = 0; i < mip->mi_tx_group_count; i++) { grp = &mip->mi_tx_groups[i]; - if ((grp->mrg_state == MAC_GROUP_STATE_RESERVED) || - (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) + (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) { + /* + * Select a candidate for replacement if we don't + * get an exclusive group. A candidate group is one + * that didn't ask for an exclusive group, but got + * one and it has enough rings (combined with what + * the default group can donate) for the new MAC + * client. + */ + if (grp->mrg_state == MAC_GROUP_STATE_RESERVED && + candidate_grp == NULL) { + gclient = MAC_GROUP_ONLY_CLIENT(grp); + if (gclient == NULL) + gclient = mac_get_grp_primary(grp); + gmrp = MCIP_RESOURCE_PROPS(gclient); + if (gclient->mci_share == NULL && + (gmrp->mrp_mask & MRP_TX_RINGS) == 0 && + (unspec || + (grp->mrg_cur_count + defnrings) >= + need_rings)) { + candidate_grp = grp; + } + } continue; + } + /* + * If the default can't donate let's just walk and + * see if someone can vacate a group, so that we have + * enough rings for this. + */ + if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC || + nrings <= defnrings) { + if (grp->mrg_state == MAC_GROUP_STATE_REGISTERED) { + rv = mac_start_group(grp); + ASSERT(rv == 0); + } + break; + } + } - rv = mac_start_group(grp); - ASSERT(rv == 0); + /* The default group */ + if (i >= mip->mi_tx_group_count) { + /* + * If we need an exclusive group and have identified a + * candidate group we switch the MAC client from the + * candidate group to the default group and give the + * candidate group to this client. + */ + if (need_exclgrp && candidate_grp != NULL) { + /* + * Switch the MAC client from the candidate group + * to the default group. + */ + grp = candidate_grp; + gclient = MAC_GROUP_ONLY_CLIENT(grp); + if (gclient == NULL) + gclient = mac_get_grp_primary(grp); + mac_tx_client_quiesce((mac_client_handle_t)gclient); + mac_tx_switch_group(gclient, grp, defgrp); + mac_tx_client_restart((mac_client_handle_t)gclient); - grp->mrg_state = MAC_GROUP_STATE_RESERVED; - break; - } + /* + * Give the candidate group with the specified number + * of rings to this MAC client. + */ + ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); + rv = mac_start_group(grp); + ASSERT(rv == 0); - ASSERT(grp != NULL); + if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) + return (grp); + + ASSERT(grp->mrg_cur_count == 0); + ASSERT(defgrp->mrg_cur_count > need_rings); + err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, + defgrp, grp, share, need_rings); + if (err == 0) { + /* + * For a share i_mac_group_allocate_rings gets + * the rings from the driver, let's populate + * the property for the client now. + */ + if (share != NULL) { + mac_client_set_rings( + (mac_client_handle_t)mcip, -1, + grp->mrg_cur_count); + } + mip->mi_tx_group_free--; + return (grp); + } + DTRACE_PROBE3(tx__group__reserve__alloc__rings, char *, + mip->mi_name, int, grp->mrg_index, int, err); + mac_stop_group(grp); + } + return (NULL); + } /* - * Populate the group. Rings should be taken from the group - * of unassigned rings, which is past the array of TX - * groups adversized by the driver. + * We got an exclusive group, but it is not dynamic. */ - rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, NULL, - grp, share); + if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) { + mip->mi_tx_group_free--; + return (grp); + } + + rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, defgrp, grp, + share, nrings); if (rv != 0) { DTRACE_PROBE3(tx__group__reserve__alloc__rings, char *, mip->mi_name, int, grp->mrg_index, int, rv); - mac_stop_group(grp); - grp->mrg_state = MAC_GROUP_STATE_UNINIT; - return (NULL); } - + /* + * For a share i_mac_group_allocate_rings gets the rings from the + * driver, let's populate the property for the client now. + */ + if (share != NULL) { + mac_client_set_rings((mac_client_handle_t)mcip, -1, + grp->mrg_cur_count); + } mip->mi_tx_group_free--; - return (grp); } void -mac_release_tx_group(mac_impl_t *mip, mac_group_t *grp) +mac_release_tx_group(mac_client_impl_t *mcip, mac_group_t *grp) { - mac_client_impl_t *mcip = grp->mrg_tx_client; - mac_share_handle_t share = mcip->mci_share; - mac_ring_t *ring; - - ASSERT(mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC); - ASSERT(share != NULL); - ASSERT(grp->mrg_state == MAC_GROUP_STATE_RESERVED); + mac_impl_t *mip = mcip->mci_mip; + mac_share_handle_t share = mcip->mci_share; + mac_ring_t *ring; + mac_soft_ring_set_t *srs = MCIP_TX_SRS(mcip); + mac_group_t *defgrp; + + defgrp = MAC_DEFAULT_TX_GROUP(mip); + if (srs != NULL) { + if (srs->srs_soft_ring_count > 0) { + for (ring = grp->mrg_rings; ring != NULL; + ring = ring->mr_next) { + ASSERT(mac_tx_srs_ring_present(srs, ring)); + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t) + mac_tx_srs_get_soft_ring(srs, ring)); + mac_tx_srs_del_ring(srs, ring); + } + } else { + ASSERT(srs->srs_tx.st_arg2 != NULL); + srs->srs_tx.st_arg2 = NULL; + mac_srs_stat_delete(srs); + } + } + if (share != NULL) + mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); - mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); - while ((ring = grp->mrg_rings) != NULL) { - /* move the ring back to the pool */ - (void) mac_group_mov_ring(mip, mip->mi_tx_groups + - mip->mi_tx_group_count, ring); + /* move the ring back to the pool */ + if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + while ((ring = grp->mrg_rings) != NULL) + (void) mac_group_mov_ring(mip, defgrp, ring); } mac_stop_group(grp); - mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED); - grp->mrg_tx_client = NULL; mip->mi_tx_group_free++; } /* + * Disassociate a MAC client from a group, i.e go through the rings in the + * group and delete all the soft rings tied to them. + */ +static void +mac_tx_dismantle_soft_rings(mac_group_t *fgrp, flow_entry_t *flent) +{ + mac_client_impl_t *mcip = flent->fe_mcip; + mac_soft_ring_set_t *tx_srs; + mac_srs_tx_t *tx; + mac_ring_t *ring; + + tx_srs = flent->fe_tx_srs; + tx = &tx_srs->srs_tx; + + /* Single ring case we haven't created any soft rings */ + if (tx->st_mode == SRS_TX_BW || tx->st_mode == SRS_TX_SERIALIZE || + tx->st_mode == SRS_TX_DEFAULT) { + tx->st_arg2 = NULL; + mac_srs_stat_delete(tx_srs); + /* Fanout case, where we have to dismantle the soft rings */ + } else { + for (ring = fgrp->mrg_rings; ring != NULL; + ring = ring->mr_next) { + ASSERT(mac_tx_srs_ring_present(tx_srs, ring)); + mac_tx_invoke_callbacks(mcip, + (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(tx_srs, + ring)); + mac_tx_srs_del_ring(tx_srs, ring); + } + ASSERT(tx->st_arg2 == NULL); + } +} + +/* + * Switch the MAC client from one group to another. This means we need + * to remove the MAC client, teardown the SRSs and revert the group state. + * Then, we add the client to the destination roup, set the SRSs etc. + */ +void +mac_tx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, + mac_group_t *tgrp) +{ + mac_client_impl_t *group_only_mcip; + mac_impl_t *mip = mcip->mci_mip; + flow_entry_t *flent = mcip->mci_flent; + mac_group_t *defgrp; + mac_grp_client_t *mgcp; + mac_client_impl_t *gmcip; + flow_entry_t *gflent; + + defgrp = MAC_DEFAULT_TX_GROUP(mip); + ASSERT(fgrp == flent->fe_tx_ring_group); + + if (fgrp == defgrp) { + /* + * If this is the primary we need to find any VLANs on + * the primary and move them too. + */ + mac_group_remove_client(fgrp, mcip); + mac_tx_dismantle_soft_rings(fgrp, flent); + if (mcip->mci_unicast->ma_nusers > 1) { + mgcp = fgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + mgcp = mgcp->mgc_next; + if (mcip->mci_unicast != gmcip->mci_unicast) + continue; + mac_tx_client_quiesce( + (mac_client_handle_t)gmcip); + + gflent = gmcip->mci_flent; + mac_group_remove_client(fgrp, gmcip); + mac_tx_dismantle_soft_rings(fgrp, gflent); + + mac_group_add_client(tgrp, gmcip); + gflent->fe_tx_ring_group = tgrp; + /* We could directly set this to SHARED */ + tgrp->mrg_state = mac_group_next_state(tgrp, + &group_only_mcip, defgrp, B_FALSE); + + mac_tx_srs_group_setup(gmcip, gflent, + SRST_LINK); + mac_fanout_setup(gmcip, gflent, + MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, + gmcip, NULL, NULL); + + mac_tx_client_restart( + (mac_client_handle_t)gmcip); + } + } + if (MAC_GROUP_NO_CLIENT(fgrp)) { + mac_ring_t *ring; + int cnt; + int ringcnt; + + fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; + /* + * Additionally, we also need to stop all + * the rings in the default group, except + * the default ring. The reason being + * this group won't be released since it is + * the default group, so the rings won't + * be stopped otherwise. + */ + ringcnt = fgrp->mrg_cur_count; + ring = fgrp->mrg_rings; + for (cnt = 0; cnt < ringcnt; cnt++) { + if (ring->mr_state == MR_INUSE && + ring != + (mac_ring_t *)mip->mi_default_tx_ring) { + mac_stop_ring(ring); + ring->mr_flag = 0; + } + ring = ring->mr_next; + } + } else if (MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { + fgrp->mrg_state = MAC_GROUP_STATE_RESERVED; + } else { + ASSERT(fgrp->mrg_state == MAC_GROUP_STATE_SHARED); + } + } else { + /* + * We could have VLANs sharing the non-default group with + * the primary. + */ + mgcp = fgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + mgcp = mgcp->mgc_next; + if (gmcip == mcip) + continue; + mac_tx_client_quiesce((mac_client_handle_t)gmcip); + gflent = gmcip->mci_flent; + + mac_group_remove_client(fgrp, gmcip); + mac_tx_dismantle_soft_rings(fgrp, gflent); + + mac_group_add_client(tgrp, gmcip); + gflent->fe_tx_ring_group = tgrp; + /* We could directly set this to SHARED */ + tgrp->mrg_state = mac_group_next_state(tgrp, + &group_only_mcip, defgrp, B_FALSE); + mac_tx_srs_group_setup(gmcip, gflent, SRST_LINK); + mac_fanout_setup(gmcip, gflent, + MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, + gmcip, NULL, NULL); + + mac_tx_client_restart((mac_client_handle_t)gmcip); + } + mac_group_remove_client(fgrp, mcip); + mac_release_tx_group(mcip, fgrp); + fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; + } + + /* Add it to the tgroup */ + mac_group_add_client(tgrp, mcip); + flent->fe_tx_ring_group = tgrp; + tgrp->mrg_state = mac_group_next_state(tgrp, &group_only_mcip, + defgrp, B_FALSE); + + mac_tx_srs_group_setup(mcip, flent, SRST_LINK); + mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), + mac_rx_deliver, mcip, NULL, NULL); +} + +/* * This is a 1-time control path activity initiated by the client (IP). * The mac perimeter protects against other simultaneous control activities, * for example an ioctl that attempts to change the degree of fanout and @@ -5416,3 +7118,599 @@ mac_no_active(mac_handle_t mh) mip->mi_state_flags |= MIS_NO_ACTIVE; i_mac_perim_exit(mip); } + +/* + * Walk the primary VLAN clients whenever the primary's rings property + * changes and update the mac_resource_props_t for the VLAN's client. + * We need to do this since we don't support setting these properties + * on the primary's VLAN clients, but the VLAN clients have to + * follow the primary w.r.t the rings property; + */ +void +mac_set_prim_vlan_rings(mac_impl_t *mip, mac_resource_props_t *mrp) +{ + mac_client_impl_t *vmcip; + mac_resource_props_t *vmrp; + + for (vmcip = mip->mi_clients_list; vmcip != NULL; + vmcip = vmcip->mci_client_next) { + if (!(vmcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) || + mac_client_vid((mac_client_handle_t)vmcip) == + VLAN_ID_NONE) { + continue; + } + vmrp = MCIP_RESOURCE_PROPS(vmcip); + + vmrp->mrp_nrxrings = mrp->mrp_nrxrings; + if (mrp->mrp_mask & MRP_RX_RINGS) + vmrp->mrp_mask |= MRP_RX_RINGS; + else if (vmrp->mrp_mask & MRP_RX_RINGS) + vmrp->mrp_mask &= ~MRP_RX_RINGS; + + vmrp->mrp_ntxrings = mrp->mrp_ntxrings; + if (mrp->mrp_mask & MRP_TX_RINGS) + vmrp->mrp_mask |= MRP_TX_RINGS; + else if (vmrp->mrp_mask & MRP_TX_RINGS) + vmrp->mrp_mask &= ~MRP_TX_RINGS; + + if (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) + vmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; + else + vmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; + + if (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) + vmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; + else + vmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; + } +} + +/* + * We are adding or removing ring(s) from a group. The source for taking + * rings is the default group. The destination for giving rings back is + * the default group. + */ +int +mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group, + mac_group_t *defgrp) +{ + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + uint_t modify; + int count; + mac_ring_t *ring; + mac_ring_t *next; + mac_impl_t *mip = mcip->mci_mip; + mac_ring_t **rings; + uint_t ringcnt; + int i = 0; + boolean_t rx_group = group->mrg_type == MAC_RING_TYPE_RX; + int start; + int end; + mac_group_t *tgrp; + int j; + int rv = 0; + + /* + * If we are asked for just a group, we give 1 ring, else + * the specified number of rings. + */ + if (rx_group) { + ringcnt = (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) ? 1: + mrp->mrp_nrxrings; + } else { + ringcnt = (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) ? 1: + mrp->mrp_ntxrings; + } + + /* don't allow modifying rings for a share for now. */ + ASSERT(mcip->mci_share == NULL); + + if (ringcnt == group->mrg_cur_count) + return (0); + + if (group->mrg_cur_count > ringcnt) { + modify = group->mrg_cur_count - ringcnt; + if (rx_group) { + if (mip->mi_rx_donor_grp == group) { + ASSERT(mac_is_primary_client(mcip)); + mip->mi_rx_donor_grp = defgrp; + } else { + defgrp = mip->mi_rx_donor_grp; + } + } + ring = group->mrg_rings; + rings = kmem_alloc(modify * sizeof (mac_ring_handle_t), + KM_SLEEP); + j = 0; + for (count = 0; count < modify; count++) { + next = ring->mr_next; + rv = mac_group_mov_ring(mip, defgrp, ring); + if (rv != 0) { + /* cleanup on failure */ + for (j = 0; j < count; j++) { + (void) mac_group_mov_ring(mip, group, + rings[j]); + } + break; + } + rings[j++] = ring; + ring = next; + } + kmem_free(rings, modify * sizeof (mac_ring_handle_t)); + return (rv); + } + if (ringcnt >= MAX_RINGS_PER_GROUP) + return (EINVAL); + + modify = ringcnt - group->mrg_cur_count; + + if (rx_group) { + if (group != mip->mi_rx_donor_grp) + defgrp = mip->mi_rx_donor_grp; + else + /* + * This is the donor group with all the remaining + * rings. Default group now gets to be the donor + */ + mip->mi_rx_donor_grp = defgrp; + start = 1; + end = mip->mi_rx_group_count; + } else { + start = 0; + end = mip->mi_tx_group_count - 1; + } + /* + * If the default doesn't have any rings, lets see if we can + * take rings given to an h/w client that doesn't need it. + * For now, we just see if there is any one client that can donate + * all the required rings. + */ + if (defgrp->mrg_cur_count < (modify + 1)) { + for (i = start; i < end; i++) { + if (rx_group) { + tgrp = &mip->mi_rx_groups[i]; + if (tgrp == group || tgrp->mrg_state < + MAC_GROUP_STATE_RESERVED) { + continue; + } + mcip = MAC_GROUP_ONLY_CLIENT(tgrp); + if (mcip == NULL) + mcip = mac_get_grp_primary(tgrp); + ASSERT(mcip != NULL); + mrp = MCIP_RESOURCE_PROPS(mcip); + if ((mrp->mrp_mask & MRP_RX_RINGS) != 0) + continue; + if ((tgrp->mrg_cur_count + + defgrp->mrg_cur_count) < (modify + 1)) { + continue; + } + if (mac_rx_switch_group(mcip, tgrp, + defgrp) != 0) { + return (ENOSPC); + } + } else { + tgrp = &mip->mi_tx_groups[i]; + if (tgrp == group || tgrp->mrg_state < + MAC_GROUP_STATE_RESERVED) { + continue; + } + mcip = MAC_GROUP_ONLY_CLIENT(tgrp); + if (mcip == NULL) + mcip = mac_get_grp_primary(tgrp); + mrp = MCIP_RESOURCE_PROPS(mcip); + if ((mrp->mrp_mask & MRP_TX_RINGS) != 0) + continue; + if ((tgrp->mrg_cur_count + + defgrp->mrg_cur_count) < (modify + 1)) { + continue; + } + /* OK, we can switch this to s/w */ + mac_tx_client_quiesce( + (mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, tgrp, defgrp); + mac_tx_client_restart( + (mac_client_handle_t)mcip); + } + } + if (defgrp->mrg_cur_count < (modify + 1)) + return (ENOSPC); + } + if ((rv = i_mac_group_allocate_rings(mip, group->mrg_type, defgrp, + group, mcip->mci_share, modify)) != 0) { + return (rv); + } + return (0); +} + +/* + * Given the poolname in mac_resource_props, find the cpupart + * that is associated with this pool. The cpupart will be used + * later for finding the cpus to be bound to the networking threads. + * + * use_default is set B_TRUE if pools are enabled and pool_default + * is returned. This avoids a 2nd lookup to set the poolname + * for pool-effective. + * + * returns: + * + * NULL - pools are disabled or if the 'cpus' property is set. + * cpupart of pool_default - pools are enabled and the pool + * is not available or poolname is blank + * cpupart of named pool - pools are enabled and the pool + * is available. + */ +cpupart_t * +mac_pset_find(mac_resource_props_t *mrp, boolean_t *use_default) +{ + pool_t *pool; + cpupart_t *cpupart; + + *use_default = B_FALSE; + + /* CPUs property is set */ + if (mrp->mrp_mask & MRP_CPUS) + return (NULL); + + ASSERT(pool_lock_held()); + + /* Pools are disabled, no pset */ + if (pool_state == POOL_DISABLED) + return (NULL); + + /* Pools property is set */ + if (mrp->mrp_mask & MRP_POOL) { + if ((pool = pool_lookup_pool_by_name(mrp->mrp_pool)) == NULL) { + /* Pool not found */ + DTRACE_PROBE1(mac_pset_find_no_pool, char *, + mrp->mrp_pool); + *use_default = B_TRUE; + pool = pool_default; + } + /* Pools property is not set */ + } else { + *use_default = B_TRUE; + pool = pool_default; + } + + /* Find the CPU pset that corresponds to the pool */ + mutex_enter(&cpu_lock); + if ((cpupart = cpupart_find(pool->pool_pset->pset_id)) == NULL) { + DTRACE_PROBE1(mac_find_pset_no_pset, psetid_t, + pool->pool_pset->pset_id); + } + mutex_exit(&cpu_lock); + + return (cpupart); +} + +void +mac_set_pool_effective(boolean_t use_default, cpupart_t *cpupart, + mac_resource_props_t *mrp, mac_resource_props_t *emrp) +{ + ASSERT(pool_lock_held()); + + if (cpupart != NULL) { + emrp->mrp_mask |= MRP_POOL; + if (use_default) { + (void) strcpy(emrp->mrp_pool, + "pool_default"); + } else { + ASSERT(strlen(mrp->mrp_pool) != 0); + (void) strcpy(emrp->mrp_pool, + mrp->mrp_pool); + } + } else { + emrp->mrp_mask &= ~MRP_POOL; + bzero(emrp->mrp_pool, MAXPATHLEN); + } +} + +struct mac_pool_arg { + char mpa_poolname[MAXPATHLEN]; + pool_event_t mpa_what; +}; + +/*ARGSUSED*/ +static uint_t +mac_pool_link_update(mod_hash_key_t key, mod_hash_val_t *val, void *arg) +{ + struct mac_pool_arg *mpa = arg; + mac_impl_t *mip = (mac_impl_t *)val; + mac_client_impl_t *mcip; + mac_resource_props_t *mrp, *emrp; + boolean_t pool_update = B_FALSE; + boolean_t pool_clear = B_FALSE; + boolean_t use_default = B_FALSE; + cpupart_t *cpupart = NULL; + + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + i_mac_perim_enter(mip); + for (mcip = mip->mi_clients_list; mcip != NULL; + mcip = mcip->mci_client_next) { + pool_update = B_FALSE; + pool_clear = B_FALSE; + use_default = B_FALSE; + mac_client_get_resources((mac_client_handle_t)mcip, mrp); + emrp = MCIP_EFFECTIVE_PROPS(mcip); + + /* + * When pools are enabled + */ + if ((mpa->mpa_what == POOL_E_ENABLE) && + ((mrp->mrp_mask & MRP_CPUS) == 0)) { + mrp->mrp_mask |= MRP_POOL; + pool_update = B_TRUE; + } + + /* + * When pools are disabled + */ + if ((mpa->mpa_what == POOL_E_DISABLE) && + ((mrp->mrp_mask & MRP_CPUS) == 0)) { + mrp->mrp_mask |= MRP_POOL; + pool_clear = B_TRUE; + } + + /* + * Look for links with the pool property set and the poolname + * matching the one which is changing. + */ + if (strcmp(mrp->mrp_pool, mpa->mpa_poolname) == 0) { + /* + * The pool associated with the link has changed. + */ + if (mpa->mpa_what == POOL_E_CHANGE) { + mrp->mrp_mask |= MRP_POOL; + pool_update = B_TRUE; + } + } + + /* + * This link is associated with pool_default and + * pool_default has changed. + */ + if ((mpa->mpa_what == POOL_E_CHANGE) && + (strcmp(emrp->mrp_pool, "pool_default") == 0) && + (strcmp(mpa->mpa_poolname, "pool_default") == 0)) { + mrp->mrp_mask |= MRP_POOL; + pool_update = B_TRUE; + } + + /* + * Get new list of cpus for the pool, bind network + * threads to new list of cpus and update resources. + */ + if (pool_update) { + if (MCIP_DATAPATH_SETUP(mcip)) { + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_setup(mcip, mcip->mci_flent, mrp, + mac_rx_deliver, mcip, NULL, cpupart); + mac_set_pool_effective(use_default, cpupart, + mrp, emrp); + pool_unlock(); + } + mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), + B_FALSE); + } + + /* + * Clear the effective pool and bind network threads + * to any available CPU. + */ + if (pool_clear) { + if (MCIP_DATAPATH_SETUP(mcip)) { + emrp->mrp_mask &= ~MRP_POOL; + bzero(emrp->mrp_pool, MAXPATHLEN); + mac_fanout_setup(mcip, mcip->mci_flent, mrp, + mac_rx_deliver, mcip, NULL, NULL); + } + mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), + B_FALSE); + } + } + i_mac_perim_exit(mip); + kmem_free(mrp, sizeof (*mrp)); + return (MH_WALK_CONTINUE); +} + +static void +mac_pool_update(void *arg) +{ + mod_hash_walk(i_mac_impl_hash, mac_pool_link_update, arg); + kmem_free(arg, sizeof (struct mac_pool_arg)); +} + +/* + * Callback function to be executed when a noteworthy pool event + * takes place. + */ +/* ARGSUSED */ +static void +mac_pool_event_cb(pool_event_t what, poolid_t id, void *arg) +{ + pool_t *pool; + char *poolname = NULL; + struct mac_pool_arg *mpa; + + pool_lock(); + mpa = kmem_zalloc(sizeof (struct mac_pool_arg), KM_SLEEP); + + switch (what) { + case POOL_E_ENABLE: + case POOL_E_DISABLE: + break; + + case POOL_E_CHANGE: + pool = pool_lookup_pool_by_id(id); + if (pool == NULL) { + kmem_free(mpa, sizeof (struct mac_pool_arg)); + pool_unlock(); + return; + } + pool_get_name(pool, &poolname); + (void) strlcpy(mpa->mpa_poolname, poolname, + sizeof (mpa->mpa_poolname)); + break; + + default: + kmem_free(mpa, sizeof (struct mac_pool_arg)); + pool_unlock(); + return; + } + pool_unlock(); + + mpa->mpa_what = what; + + mac_pool_update(mpa); +} + +/* + * Set effective rings property. This could be called from datapath_setup/ + * datapath_teardown or set-linkprop. + * If the group is reserved we just go ahead and set the effective rings. + * Additionally, for TX this could mean the default group has lost/gained + * some rings, so if the default group is reserved, we need to adjust the + * effective rings for the default group clients. For RX, if we are working + * with the non-default group, we just need * to reset the effective props + * for the default group clients. + */ +void +mac_set_rings_effective(mac_client_impl_t *mcip) +{ + mac_impl_t *mip = mcip->mci_mip; + mac_group_t *grp; + mac_group_t *defgrp; + flow_entry_t *flent = mcip->mci_flent; + mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); + mac_grp_client_t *mgcp; + mac_client_impl_t *gmcip; + + grp = flent->fe_rx_ring_group; + if (grp != NULL) { + defgrp = MAC_DEFAULT_RX_GROUP(mip); + /* + * If we have reserved a group, set the effective rings + * to the ring count in the group. + */ + if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { + emrp->mrp_mask |= MRP_RX_RINGS; + emrp->mrp_nrxrings = grp->mrg_cur_count; + } + + /* + * We go through the clients in the shared group and + * reset the effective properties. It is possible this + * might have already been done for some client (i.e. + * if some client is being moved to a group that is + * already shared). The case where the default group is + * RESERVED is taken care of above (note in the RX side if + * there is a non-default group, the default group is always + * SHARED). + */ + if (grp != defgrp || grp->mrg_state == MAC_GROUP_STATE_SHARED) { + if (grp->mrg_state == MAC_GROUP_STATE_SHARED) + mgcp = grp->mrg_clients; + else + mgcp = defgrp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + emrp = MCIP_EFFECTIVE_PROPS(gmcip); + if (emrp->mrp_mask & MRP_RX_RINGS) { + emrp->mrp_mask &= ~MRP_RX_RINGS; + emrp->mrp_nrxrings = 0; + } + mgcp = mgcp->mgc_next; + } + } + } + + /* Now the TX side */ + grp = flent->fe_tx_ring_group; + if (grp != NULL) { + defgrp = MAC_DEFAULT_TX_GROUP(mip); + + if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { + emrp->mrp_mask |= MRP_TX_RINGS; + emrp->mrp_ntxrings = grp->mrg_cur_count; + } else if (grp->mrg_state == MAC_GROUP_STATE_SHARED) { + mgcp = grp->mrg_clients; + while (mgcp != NULL) { + gmcip = mgcp->mgc_client; + emrp = MCIP_EFFECTIVE_PROPS(gmcip); + if (emrp->mrp_mask & MRP_TX_RINGS) { + emrp->mrp_mask &= ~MRP_TX_RINGS; + emrp->mrp_ntxrings = 0; + } + mgcp = mgcp->mgc_next; + } + } + + /* + * If the group is not the default group and the default + * group is reserved, the ring count in the default group + * might have changed, update it. + */ + if (grp != defgrp && + defgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { + gmcip = MAC_GROUP_ONLY_CLIENT(defgrp); + emrp = MCIP_EFFECTIVE_PROPS(gmcip); + emrp->mrp_ntxrings = defgrp->mrg_cur_count; + } + } + emrp = MCIP_EFFECTIVE_PROPS(mcip); +} + +/* + * Check if the primary is in the default group. If so, see if we + * can give it a an exclusive group now that another client is + * being configured. We take the primary out of the default group + * because the multicast/broadcast packets for the all the clients + * will land in the default ring in the default group which means + * any client in the default group, even if it is the only on in + * the group, will lose exclusive access to the rings, hence + * polling. + */ +mac_client_impl_t * +mac_check_primary_relocation(mac_client_impl_t *mcip, boolean_t rxhw) +{ + mac_impl_t *mip = mcip->mci_mip; + mac_group_t *defgrp = MAC_DEFAULT_RX_GROUP(mip); + flow_entry_t *flent = mcip->mci_flent; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + uint8_t *mac_addr; + mac_group_t *ngrp; + + /* + * Check if the primary is in the default group, if not + * or if it is explicitly configured to be in the default + * group OR set the RX rings property, return. + */ + if (flent->fe_rx_ring_group != defgrp || mrp->mrp_mask & MRP_RX_RINGS) + return (NULL); + + /* + * If the new client needs an exclusive group and we + * don't have another for the primary, return. + */ + if (rxhw && mip->mi_rxhwclnt_avail < 2) + return (NULL); + + mac_addr = flent->fe_flow_desc.fd_dst_mac; + /* + * We call this when we are setting up the datapath for + * the first non-primary. + */ + ASSERT(mip->mi_nactiveclients == 2); + /* + * OK, now we have the primary that needs to be relocated. + */ + ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); + if (ngrp == NULL) + return (NULL); + if (mac_rx_switch_group(mcip, defgrp, ngrp) != 0) { + mac_stop_group(ngrp); + return (NULL); + } + return (mcip); +} diff --git a/usr/src/uts/common/io/mac/mac_bcast.c b/usr/src/uts/common/io/mac/mac_bcast.c index 2f17228e06..1aba37c822 100644 --- a/usr/src/uts/common/io/mac/mac_bcast.c +++ b/usr/src/uts/common/io/mac/mac_bcast.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -212,10 +212,15 @@ mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback) rw_enter(&mip->mi_rw_lock, RW_READER); /* update stats */ - if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) - dst_mcip->mci_stat_multircv++; - else - dst_mcip->mci_stat_brdcstrcv++; + if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) { + MCIP_STAT_UPDATE(dst_mcip, multircv, 1); + MCIP_STAT_UPDATE(dst_mcip, multircvbytes, + msgdsize(mp_chain)); + } else { + MCIP_STAT_UPDATE(dst_mcip, brdcstrcv, 1); + MCIP_STAT_UPDATE(dst_mcip, brdcstrcvbytes, + msgdsize(mp_chain)); + } if (grp->mbg_clients_gen != gen) { /* @@ -236,10 +241,12 @@ mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback) * so we need to send a copy of the packet to the * underlying NIC so that it can be sent on the wire. */ - src_mcip->mci_stat_multixmt++; - src_mcip->mci_stat_brdcstxmt++; + MCIP_STAT_UPDATE(src_mcip, multixmt, 1); + MCIP_STAT_UPDATE(src_mcip, multixmtbytes, msgdsize(mp_chain)); + MCIP_STAT_UPDATE(src_mcip, brdcstxmt, 1); + MCIP_STAT_UPDATE(src_mcip, brdcstxmtbytes, msgdsize(mp_chain)); - MAC_TX(mip, mip->mi_default_tx_ring, mp_chain, B_FALSE); + MAC_TX(mip, mip->mi_default_tx_ring, mp_chain, src_mcip); if (mp_chain != NULL) freemsgchain(mp_chain); } else { diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c index 78c7eae9f2..2f8962f67a 100644 --- a/usr/src/uts/common/io/mac/mac_client.c +++ b/usr/src/uts/common/io/mac/mac_client.c @@ -108,6 +108,7 @@ #include <sys/mac_impl.h> #include <sys/mac_client_impl.h> #include <sys/mac_soft_ring.h> +#include <sys/mac_stat.h> #include <sys/dls.h> #include <sys/dld.h> #include <sys/modctl.h> @@ -144,6 +145,10 @@ static void mac_client_remove_flow_from_list(mac_client_impl_t *, static void mac_client_add_to_flow_list(mac_client_impl_t *, flow_entry_t *); static void mac_rename_flow_names(mac_client_impl_t *, const char *); static void mac_virtual_link_update(mac_impl_t *); +static int mac_client_datapath_setup(mac_client_impl_t *, uint16_t, + uint8_t *, mac_resource_props_t *, boolean_t, mac_unicast_impl_t *); +static void mac_client_datapath_teardown(mac_client_handle_t, + mac_unicast_impl_t *, flow_entry_t *); /* ARGSUSED */ static int @@ -560,6 +565,14 @@ mac_client_link_state(mac_client_impl_t *mcip) } /* + * These statistics are consumed by dladm show-link -s <vnic>, + * dladm show-vnic -s and netstat. With the introduction of dlstat, + * dladm show-link -s and dladm show-vnic -s witll be EOL'ed while + * netstat will consume from kstats introduced for dlstat. This code + * will be removed at that time. + */ + +/* * Return the statistics of a MAC client. These statistics are different * then the statistics of the underlying MAC which are returned by * mac_stat_get(). @@ -567,9 +580,17 @@ mac_client_link_state(mac_client_impl_t *mcip) uint64_t mac_client_stat_get(mac_client_handle_t mch, uint_t stat) { - mac_client_impl_t *mcip = (mac_client_impl_t *)mch; - mac_impl_t *mip = mcip->mci_mip; - uint64_t val; + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_impl_t *mip = mcip->mci_mip; + flow_entry_t *flent = mcip->mci_flent; + mac_soft_ring_set_t *mac_srs; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + int i; + uint64_t val = 0; + + mac_srs = (mac_soft_ring_set_t *)(flent->fe_tx_srs); + mac_tx_stat = &mac_srs->srs_tx.st_stat; switch (stat) { case MAC_STAT_LINK_STATE: @@ -588,37 +609,52 @@ mac_client_stat_get(mac_client_handle_t mch, uint_t stat) val = mac_client_ifspeed(mcip); break; case MAC_STAT_MULTIRCV: - val = mcip->mci_stat_multircv; + val = mcip->mci_misc_stat.mms_multircv; break; case MAC_STAT_BRDCSTRCV: - val = mcip->mci_stat_brdcstrcv; + val = mcip->mci_misc_stat.mms_brdcstrcv; break; case MAC_STAT_MULTIXMT: - val = mcip->mci_stat_multixmt; + val = mcip->mci_misc_stat.mms_multixmt; break; case MAC_STAT_BRDCSTXMT: - val = mcip->mci_stat_brdcstxmt; + val = mcip->mci_misc_stat.mms_brdcstxmt; break; case MAC_STAT_OBYTES: - val = mcip->mci_stat_obytes; + val = mac_tx_stat->mts_obytes; break; case MAC_STAT_OPACKETS: - val = mcip->mci_stat_opackets; + val = mac_tx_stat->mts_opackets; break; case MAC_STAT_OERRORS: - val = mcip->mci_stat_oerrors; + val = mac_tx_stat->mts_oerrors; break; case MAC_STAT_IPACKETS: - val = mcip->mci_stat_ipackets; + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + val += mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; + } break; case MAC_STAT_RBYTES: - val = mcip->mci_stat_ibytes; + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + val += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + + mac_rx_stat->mrs_lclbytes; + } break; case MAC_STAT_IERRORS: - val = mcip->mci_stat_ierrors; + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + val += mac_rx_stat->mrs_ierrors; + } break; default: - val = mac_stat_default(mip, stat); + val = mac_driver_stat_default(mip, stat); break; } @@ -676,12 +712,30 @@ mac_stat_get(mac_handle_t mh, uint_t stat) * The driver doesn't support this statistic. Get the * statistic's default value. */ - val = mac_stat_default(mip, stat); + val = mac_driver_stat_default(mip, stat); } return (val); } /* + * Query hardware rx ring corresponding to the pseudo ring. + */ +uint64_t +mac_pseudo_rx_ring_stat_get(mac_ring_handle_t handle, uint_t stat) +{ + return (mac_rx_ring_stat_get(handle, stat)); +} + +/* + * Query hardware tx ring corresponding to the pseudo ring. + */ +uint64_t +mac_pseudo_tx_ring_stat_get(mac_ring_handle_t handle, uint_t stat) +{ + return (mac_tx_ring_stat_get(handle, stat)); +} + +/* * Utility function which returns the VID associated with a flow entry. */ uint16_t @@ -752,6 +806,12 @@ mac_unicast_update_client_flow(mac_client_impl_t *mcip) mac_flow_set_desc(flent, &flow_desc); /* + * The v6 local addr (used by mac protection) needs to be + * regenerated because our mac address has changed. + */ + mac_protect_update_v6_local_addr(mcip); + + /* * A MAC client could have one MAC address but multiple * VLANs. In that case update the flow entries corresponding * to all VLANs of the MAC client. @@ -1184,20 +1244,14 @@ int mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, uint16_t flags) { - mac_impl_t *mip = (mac_impl_t *)mh; - mac_client_impl_t *mcip; - int err = 0; - boolean_t share_desired = - ((flags & MAC_OPEN_FLAGS_SHARES_DESIRED) != 0); - boolean_t no_hwrings = ((flags & MAC_OPEN_FLAGS_NO_HWRINGS) != 0); - boolean_t req_hwrings = ((flags & MAC_OPEN_FLAGS_REQ_HWRINGS) != 0); - flow_entry_t *flent = NULL; + mac_impl_t *mip = (mac_impl_t *)mh; + mac_client_impl_t *mcip; + int err = 0; + boolean_t share_desired; + flow_entry_t *flent = NULL; + share_desired = (flags & MAC_OPEN_FLAGS_SHARES_DESIRED) != 0; *mchp = NULL; - if (share_desired && no_hwrings) { - /* can't have shares but no hardware rings */ - return (EINVAL); - } i_mac_perim_enter(mip); @@ -1249,6 +1303,9 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, if ((flags & MAC_OPEN_FLAGS_IS_AGGR_PORT) != 0) mcip->mci_state_flags |= MCIS_IS_AGGR_PORT; + if (mip->mi_state_flags & MIS_IS_AGGR) + mcip->mci_state_flags |= MCIS_IS_AGGR; + if ((flags & MAC_OPEN_FLAGS_USE_DATALINK_NAME) != 0) { datalink_id_t linkid; @@ -1283,19 +1340,18 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, if (flags & MAC_OPEN_FLAGS_MULTI_PRIMARY) mcip->mci_flags |= MAC_CLIENT_FLAGS_MULTI_PRIMARY; + if (flags & MAC_OPEN_FLAGS_NO_UNICAST_ADDR) + mcip->mci_state_flags |= MCIS_NO_UNICAST_ADDR; + + mac_protect_init(mcip); + /* the subflow table will be created dynamically */ mcip->mci_subflow_tab = NULL; - mcip->mci_stat_multircv = 0; - mcip->mci_stat_brdcstrcv = 0; - mcip->mci_stat_multixmt = 0; - mcip->mci_stat_brdcstxmt = 0; - - mcip->mci_stat_obytes = 0; - mcip->mci_stat_opackets = 0; - mcip->mci_stat_oerrors = 0; - mcip->mci_stat_ibytes = 0; - mcip->mci_stat_ipackets = 0; - mcip->mci_stat_ierrors = 0; + + mcip->mci_misc_stat.mms_multircv = 0; + mcip->mci_misc_stat.mms_brdcstrcv = 0; + mcip->mci_misc_stat.mms_multixmt = 0; + mcip->mci_misc_stat.mms_brdcstxmt = 0; /* Create an initial flow */ @@ -1321,20 +1377,25 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, */ mac_client_add(mcip); - if (no_hwrings) - mcip->mci_state_flags |= MCIS_NO_HWRINGS; - if (req_hwrings) - mcip->mci_state_flags |= MCIS_REQ_HWRINGS; mcip->mci_share = NULL; - if (share_desired) { - ASSERT(!no_hwrings); + if (share_desired) i_mac_share_alloc(mcip); - } DTRACE_PROBE2(mac__client__open__allocated, mac_impl_t *, mcip->mci_mip, mac_client_impl_t *, mcip); *mchp = (mac_client_handle_t)mcip; + /* + * We will do mimimal datapath setup to allow a MAC client to + * transmit or receive non-unicast packets without waiting + * for mac_unicast_add. + */ + if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) { + if ((err = mac_client_datapath_setup(mcip, VLAN_ID_NONE, + NULL, NULL, B_TRUE, NULL)) != 0) { + goto done; + } + } i_mac_perim_exit(mip); return (0); @@ -1373,6 +1434,13 @@ mac_client_close(mac_client_handle_t mch, uint16_t flags) return; } + /* If we have only setup up minimal datapth setup, tear it down */ + if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) { + mac_client_datapath_teardown((mac_client_handle_t)mcip, NULL, + mcip->mci_flent); + mcip->mci_state_flags &= ~MCIS_NO_UNICAST_ADDR; + } + /* * Remove the flent associated with the MAC client */ @@ -1389,7 +1457,7 @@ mac_client_close(mac_client_handle_t mch, uint16_t flags) ASSERT(mcip->mci_tx_notify_cb_list == NULL); i_mac_share_free(mcip); - + mac_protect_fini(mcip); mac_client_remove(mcip); i_mac_perim_exit(mip); @@ -1495,6 +1563,335 @@ mac_update_subflow_priority(mac_client_impl_t *mcip) } /* + * Modify the TX or RX ring properties. We could either just move around + * rings, i.e add/remove rings given to a client. Or this might cause the + * client to move from hardware based to software or the other way around. + * If we want to reset this property, then we clear the mask, additionally + * if the client was given a non-default group we remove all rings except + * for 1 and give it back to the default group. + */ +int +mac_client_set_rings_prop(mac_client_impl_t *mcip, mac_resource_props_t *mrp, + mac_resource_props_t *tmrp) +{ + mac_impl_t *mip = mcip->mci_mip; + flow_entry_t *flent = mcip->mci_flent; + uint8_t *mac_addr; + int err = 0; + mac_group_t *defgrp; + mac_group_t *group; + mac_group_t *ngrp; + mac_resource_props_t *cmrp = MCIP_RESOURCE_PROPS(mcip); + uint_t ringcnt; + boolean_t unspec; + + if (mcip->mci_share != NULL) + return (EINVAL); + + if (mrp->mrp_mask & MRP_RX_RINGS) { + unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC; + group = flent->fe_rx_ring_group; + defgrp = MAC_DEFAULT_RX_GROUP(mip); + mac_addr = flent->fe_flow_desc.fd_dst_mac; + + /* + * No resulting change. If we are resetting on a client on + * which there was no rx rings property. For dynamic group + * if we are setting the same number of rings already set. + * For static group if we are requesting a group again. + */ + if (mrp->mrp_mask & MRP_RINGS_RESET) { + if (!(tmrp->mrp_mask & MRP_RX_RINGS)) + return (0); + } else { + if (unspec) { + if (tmrp->mrp_mask & MRP_RXRINGS_UNSPEC) + return (0); + } else if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + if ((tmrp->mrp_mask & MRP_RX_RINGS) && + !(tmrp->mrp_mask & MRP_RXRINGS_UNSPEC) && + mrp->mrp_nrxrings == tmrp->mrp_nrxrings) { + return (0); + } + } + } + /* Resetting the prop */ + if (mrp->mrp_mask & MRP_RINGS_RESET) { + /* + * We will just keep one ring and give others back if + * we are not the primary. For the primary we give + * all the rings in the default group except the + * default ring. If it is a static group, then + * we don't do anything, but clear the MRP_RX_RINGS + * flag. + */ + if (group != defgrp) { + if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + /* + * This group has reserved rings + * that need to be released now, + * so does the group. + */ + MAC_RX_RING_RELEASED(mip, + group->mrg_cur_count); + MAC_RX_GRP_RELEASED(mip); + if ((flent->fe_type & + FLOW_PRIMARY_MAC) != 0) { + if (mip->mi_nactiveclients == + 1) { + (void) + mac_rx_switch_group( + mcip, group, + defgrp); + return (0); + } else { + cmrp->mrp_nrxrings = + group-> + mrg_cur_count + + defgrp-> + mrg_cur_count - 1; + } + } else { + cmrp->mrp_nrxrings = 1; + } + (void) mac_group_ring_modify(mcip, + group, defgrp); + } else { + /* + * If this is a static group, we + * need to release the group. The + * client will remain in the same + * group till some other client + * needs this group. + */ + MAC_RX_GRP_RELEASED(mip); + } + /* Let check if we can give this an excl group */ + } else if (group == defgrp) { + ngrp = mac_reserve_rx_group(mcip, mac_addr, + B_TRUE); + /* Couldn't give it a group, that's fine */ + if (ngrp == NULL) + return (0); + /* Switch to H/W */ + if (mac_rx_switch_group(mcip, defgrp, ngrp) != + 0) { + mac_stop_group(ngrp); + return (0); + } + } + /* + * If the client is in the default group, we will + * just clear the MRP_RX_RINGS and leave it as + * it rather than look for an exclusive group + * for it. + */ + return (0); + } + + if (group == defgrp && ((mrp->mrp_nrxrings > 0) || unspec)) { + ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); + if (ngrp == NULL) + return (ENOSPC); + + /* Switch to H/W */ + if (mac_rx_switch_group(mcip, defgrp, ngrp) != 0) { + mac_release_rx_group(mcip, ngrp); + return (ENOSPC); + } + MAC_RX_GRP_RESERVED(mip); + if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) + MAC_RX_RING_RESERVED(mip, ngrp->mrg_cur_count); + } else if (group != defgrp && !unspec && + mrp->mrp_nrxrings == 0) { + /* Switch to S/W */ + ringcnt = group->mrg_cur_count; + if (mac_rx_switch_group(mcip, group, defgrp) != 0) + return (ENOSPC); + if (tmrp->mrp_mask & MRP_RX_RINGS) { + MAC_RX_GRP_RELEASED(mip); + if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_RX_RING_RELEASED(mip, ringcnt); + } + } + } else if (group != defgrp && mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + ringcnt = group->mrg_cur_count; + err = mac_group_ring_modify(mcip, group, defgrp); + if (err != 0) + return (err); + /* + * Update the accounting. If this group + * already had explicitly reserved rings, + * we need to update the rings based on + * the new ring count. If this group + * had not explicitly reserved rings, + * then we just reserve the rings asked for + * and reserve the group. + */ + if (tmrp->mrp_mask & MRP_RX_RINGS) { + if (ringcnt > group->mrg_cur_count) { + MAC_RX_RING_RELEASED(mip, + ringcnt - group->mrg_cur_count); + } else { + MAC_RX_RING_RESERVED(mip, + group->mrg_cur_count - ringcnt); + } + } else { + MAC_RX_RING_RESERVED(mip, group->mrg_cur_count); + MAC_RX_GRP_RESERVED(mip); + } + } + } + if (mrp->mrp_mask & MRP_TX_RINGS) { + unspec = mrp->mrp_mask & MRP_TXRINGS_UNSPEC; + group = flent->fe_tx_ring_group; + defgrp = MAC_DEFAULT_TX_GROUP(mip); + + /* + * For static groups we only allow rings=0 or resetting the + * rings property. + */ + if (mrp->mrp_ntxrings > 0 && + mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) { + return (ENOTSUP); + } + if (mrp->mrp_mask & MRP_RINGS_RESET) { + if (!(tmrp->mrp_mask & MRP_TX_RINGS)) + return (0); + } else { + if (unspec) { + if (tmrp->mrp_mask & MRP_TXRINGS_UNSPEC) + return (0); + } else if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + if ((tmrp->mrp_mask & MRP_TX_RINGS) && + !(tmrp->mrp_mask & MRP_TXRINGS_UNSPEC) && + mrp->mrp_ntxrings == tmrp->mrp_ntxrings) { + return (0); + } + } + } + /* Resetting the prop */ + if (mrp->mrp_mask & MRP_RINGS_RESET) { + if (group != defgrp) { + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + ringcnt = group->mrg_cur_count; + if ((flent->fe_type & + FLOW_PRIMARY_MAC) != 0) { + mac_tx_client_quiesce( + (mac_client_handle_t) + mcip); + mac_tx_switch_group(mcip, + group, defgrp); + mac_tx_client_restart( + (mac_client_handle_t) + mcip); + MAC_TX_GRP_RELEASED(mip); + MAC_TX_RING_RELEASED(mip, + ringcnt); + return (0); + } + cmrp->mrp_ntxrings = 1; + (void) mac_group_ring_modify(mcip, + group, defgrp); + /* + * This group has reserved rings + * that need to be released now. + */ + MAC_TX_RING_RELEASED(mip, ringcnt); + } + /* + * If this is a static group, we + * need to release the group. The + * client will remain in the same + * group till some other client + * needs this group. + */ + MAC_TX_GRP_RELEASED(mip); + } else if (group == defgrp && + (flent->fe_type & FLOW_PRIMARY_MAC) == 0) { + ngrp = mac_reserve_tx_group(mcip, B_TRUE); + if (ngrp == NULL) + return (0); + mac_tx_client_quiesce( + (mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, defgrp, ngrp); + mac_tx_client_restart( + (mac_client_handle_t)mcip); + } + /* + * If the client is in the default group, we will + * just clear the MRP_TX_RINGS and leave it as + * it rather than look for an exclusive group + * for it. + */ + return (0); + } + + /* Switch to H/W */ + if (group == defgrp && ((mrp->mrp_ntxrings > 0) || unspec)) { + ngrp = mac_reserve_tx_group(mcip, B_TRUE); + if (ngrp == NULL) + return (ENOSPC); + mac_tx_client_quiesce((mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, defgrp, ngrp); + mac_tx_client_restart((mac_client_handle_t)mcip); + MAC_TX_GRP_RESERVED(mip); + if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) + MAC_TX_RING_RESERVED(mip, ngrp->mrg_cur_count); + /* Switch to S/W */ + } else if (group != defgrp && !unspec && + mrp->mrp_ntxrings == 0) { + /* Switch to S/W */ + ringcnt = group->mrg_cur_count; + mac_tx_client_quiesce((mac_client_handle_t)mcip); + mac_tx_switch_group(mcip, group, defgrp); + mac_tx_client_restart((mac_client_handle_t)mcip); + if (tmrp->mrp_mask & MRP_TX_RINGS) { + MAC_TX_GRP_RELEASED(mip); + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_TX_RING_RELEASED(mip, ringcnt); + } + } + } else if (group != defgrp && mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + ringcnt = group->mrg_cur_count; + err = mac_group_ring_modify(mcip, group, defgrp); + if (err != 0) + return (err); + /* + * Update the accounting. If this group + * already had explicitly reserved rings, + * we need to update the rings based on + * the new ring count. If this group + * had not explicitly reserved rings, + * then we just reserve the rings asked for + * and reserve the group. + */ + if (tmrp->mrp_mask & MRP_TX_RINGS) { + if (ringcnt > group->mrg_cur_count) { + MAC_TX_RING_RELEASED(mip, + ringcnt - group->mrg_cur_count); + } else { + MAC_TX_RING_RESERVED(mip, + group->mrg_cur_count - ringcnt); + } + } else { + MAC_TX_RING_RESERVED(mip, group->mrg_cur_count); + MAC_TX_GRP_RESERVED(mip); + } + } + } + return (0); +} + +/* * When the MAC client is being brought up (i.e. we do a unicast_add) we need * to initialize the cpu and resource control structure in the * mac_client_impl_t from the mac_impl_t (i.e if there are any cached @@ -1506,16 +1903,73 @@ mac_resource_ctl_set(mac_client_handle_t mch, mac_resource_props_t *mrp) mac_client_impl_t *mcip = (mac_client_impl_t *)mch; mac_impl_t *mip = (mac_impl_t *)mcip->mci_mip; int err = 0; + flow_entry_t *flent = mcip->mci_flent; + mac_resource_props_t *omrp, *nmrp = MCIP_RESOURCE_PROPS(mcip); ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - err = mac_validate_props(mrp); + err = mac_validate_props(mcip->mci_state_flags & MCIS_IS_VNIC ? + mcip->mci_upper_mip : mip, mrp); if (err != 0) return (err); + /* + * Copy over the existing properties since mac_update_resources + * will modify the client's mrp. Currently, the saved property + * is used to determine the difference between existing and + * modified rings property. + */ + omrp = kmem_zalloc(sizeof (*omrp), KM_SLEEP); + bcopy(nmrp, omrp, sizeof (*omrp)); mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), B_FALSE); if (MCIP_DATAPATH_SETUP(mcip)) { /* + * We support rings only for primary client when there are + * multiple clients sharing the same MAC address (e.g. VLAN). + */ + if (mrp->mrp_mask & MRP_RX_RINGS || + mrp->mrp_mask & MRP_TX_RINGS) { + + if ((err = mac_client_set_rings_prop(mcip, mrp, + omrp)) != 0) { + if (omrp->mrp_mask & MRP_RX_RINGS) { + nmrp->mrp_mask |= MRP_RX_RINGS; + nmrp->mrp_nrxrings = omrp->mrp_nrxrings; + } else { + nmrp->mrp_mask &= ~MRP_RX_RINGS; + nmrp->mrp_nrxrings = 0; + } + if (omrp->mrp_mask & MRP_TX_RINGS) { + nmrp->mrp_mask |= MRP_TX_RINGS; + nmrp->mrp_ntxrings = omrp->mrp_ntxrings; + } else { + nmrp->mrp_mask &= ~MRP_TX_RINGS; + nmrp->mrp_ntxrings = 0; + } + if (omrp->mrp_mask & MRP_RXRINGS_UNSPEC) + omrp->mrp_mask |= MRP_RXRINGS_UNSPEC; + else + omrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; + + if (omrp->mrp_mask & MRP_TXRINGS_UNSPEC) + omrp->mrp_mask |= MRP_TXRINGS_UNSPEC; + else + omrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; + kmem_free(omrp, sizeof (*omrp)); + return (err); + } + + /* + * If we modified the rings property of the primary + * we need to update the property fields of its + * VLANs as they inherit the primary's properites. + */ + if (mac_is_primary_client(mcip)) { + mac_set_prim_vlan_rings(mip, + MCIP_RESOURCE_PROPS(mcip)); + } + } + /* * We have to set this prior to calling mac_flow_modify. */ if (mrp->mrp_mask & MRP_PRIORITY) { @@ -1528,11 +1982,11 @@ mac_resource_ctl_set(mac_client_handle_t mch, mac_resource_props_t *mrp) } } - mac_flow_modify(mip->mi_flow_tab, mcip->mci_flent, mrp); + mac_flow_modify(mip->mi_flow_tab, flent, mrp); if (mrp->mrp_mask & MRP_PRIORITY) mac_update_subflow_priority(mcip); - return (0); } + kmem_free(omrp, sizeof (*omrp)); return (0); } @@ -1562,8 +2016,12 @@ mac_unicast_flow_create(mac_client_impl_t *mcip, uint8_t *mac_addr, */ bzero(&flow_desc, sizeof (flow_desc)); - flow_desc.fd_mac_len = mip->mi_type->mt_addr_length; - bcopy(mac_addr, flow_desc.fd_dst_mac, flow_desc.fd_mac_len); + ASSERT(mac_addr != NULL || + (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR)); + if (mac_addr != NULL) { + flow_desc.fd_mac_len = mip->mi_type->mt_addr_length; + bcopy(mac_addr, flow_desc.fd_dst_mac, flow_desc.fd_mac_len); + } flow_desc.fd_mask = FLOW_LINK_DST; if (vid != 0) { flow_desc.fd_vid = vid; @@ -1612,6 +2070,7 @@ mac_unicast_flow_create(mac_client_impl_t *mcip, uint8_t *mac_addr, flent_flags, flent)) != 0) return (err); + mac_misc_stat_create(*flent); FLOW_MARK(*flent, FE_INCIPIENT); (*flent)->fe_mcip = mcip; @@ -1700,6 +2159,9 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, boolean_t nactiveclients_added = B_FALSE; flow_entry_t *flent; int err = 0; + boolean_t no_unicast; + + no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR; if ((err = mac_start((mac_handle_t)mip)) != 0) goto bail; @@ -1725,10 +2187,11 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, /* We are configuring the unicast flow now */ if (!MCIP_DATAPATH_SETUP(mcip)) { - MAC_CLIENT_SET_PRIORITY_RANGE(mcip, - (mrp->mrp_mask & MRP_PRIORITY) ? mrp->mrp_priority : - MPL_LINK_DEFAULT); - + if (mrp != NULL) { + MAC_CLIENT_SET_PRIORITY_RANGE(mcip, + (mrp->mrp_mask & MRP_PRIORITY) ? mrp->mrp_priority : + MPL_LINK_DEFAULT); + } if ((err = mac_unicast_flow_create(mcip, mac_addr, vid, isprimary, B_TRUE, &flent, mrp)) != 0) goto bail; @@ -1743,6 +2206,8 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, if ((err = mac_datapath_setup(mcip, flent, SRST_LINK)) != 0) goto bail; + if (no_unicast) + goto done_setup; /* * The unicast MAC address must have been added successfully. */ @@ -1756,6 +2221,7 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, } else { mac_address_t *map = mcip->mci_unicast; + ASSERT(!no_unicast); /* * A unicast flow already exists for that MAC client, * this flow must be the same mac address but with @@ -1794,7 +2260,7 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, mcip->mci_unicast_list = muip; rw_exit(&mcip->mci_rw_lock); - +done_setup: /* * First add the flent to the flow list of this mcip. Then set * the mip's mi_single_active_client if needed. The Rx path assumes @@ -1802,7 +2268,6 @@ mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid, * flent. */ mac_client_add_to_flow_list(mcip, flent); - if (nactiveclients_added) mac_update_single_active_client(mip); /* @@ -1889,7 +2354,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, boolean_t fastpath_disabled = B_FALSE; boolean_t is_primary = (flags & MAC_UNICAST_PRIMARY); boolean_t is_unicast_hw = (flags & MAC_UNICAST_HW); - mac_resource_props_t mrp; + mac_resource_props_t *mrp; boolean_t passive_client = B_FALSE; mac_unicast_impl_t *muip; boolean_t is_vnic_primary = @@ -1899,6 +2364,13 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != 0))); /* + * Can't unicast add if the client asked only for minimal datapath + * setup. + */ + if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) + return (ENOTSUP); + + /* * Check for an attempted use of the current Port VLAN ID, if enabled. * No client may use it. */ @@ -2020,7 +2492,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, mip->mi_state_flags |= MIS_EXCLUSIVE; } - bzero(&mrp, sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); if (is_primary && !(mcip->mci_state_flags & (MCIS_IS_VNIC | MCIS_IS_AGGR_PORT))) { /* @@ -2029,11 +2501,40 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, * port, its property should be set in the mcip when the * VNIC/aggr was created. */ - mac_get_resources((mac_handle_t)mip, &mrp); - (void) mac_client_set_resources(mch, &mrp); + mac_get_resources((mac_handle_t)mip, mrp); + (void) mac_client_set_resources(mch, mrp); } else if (mcip->mci_state_flags & MCIS_IS_VNIC) { - bcopy(MCIP_RESOURCE_PROPS(mcip), &mrp, - sizeof (mac_resource_props_t)); + /* + * This is a primary VLAN client, we don't support + * specifying rings property for this as it inherits the + * rings property from its MAC. + */ + if (is_vnic_primary) { + mac_resource_props_t *vmrp; + + vmrp = MCIP_RESOURCE_PROPS(mcip); + if (vmrp->mrp_mask & MRP_RX_RINGS || + vmrp->mrp_mask & MRP_TX_RINGS) { + if (fastpath_disabled) + mac_fastpath_enable((mac_handle_t)mip); + kmem_free(mrp, sizeof (*mrp)); + return (ENOTSUP); + } + /* + * Additionally we also need to inherit any + * rings property from the MAC. + */ + mac_get_resources((mac_handle_t)mip, mrp); + if (mrp->mrp_mask & MRP_RX_RINGS) { + vmrp->mrp_mask |= MRP_RX_RINGS; + vmrp->mrp_nrxrings = mrp->mrp_nrxrings; + } + if (mrp->mrp_mask & MRP_TX_RINGS) { + vmrp->mrp_mask |= MRP_TX_RINGS; + vmrp->mrp_ntxrings = mrp->mrp_ntxrings; + } + } + bcopy(MCIP_RESOURCE_PROPS(mcip), mrp, sizeof (*mrp)); } muip = kmem_zalloc(sizeof (mac_unicast_impl_t), KM_SLEEP); @@ -2151,6 +2652,7 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, ASSERT((mcip->mci_flags & MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) == 0); mcip->mci_flags |= MAC_CLIENT_FLAGS_PASSIVE_PRIMARY; + kmem_free(mrp, sizeof (*mrp)); /* * Stash the unicast address handle, we will use it when @@ -2161,10 +2663,12 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, return (0); } - err = mac_client_datapath_setup(mcip, vid, mac_addr, &mrp, + err = mac_client_datapath_setup(mcip, vid, mac_addr, mrp, is_primary || is_vnic_primary, muip); if (err != 0) goto bail_out; + + kmem_free(mrp, sizeof (*mrp)); *mah = (mac_unicast_handle_t)muip; return (0); @@ -2178,6 +2682,7 @@ bail_out: mip->mi_driver); } } + kmem_free(mrp, sizeof (*mrp)); kmem_free(muip, sizeof (mac_unicast_impl_t)); return (err); } @@ -2227,25 +2732,33 @@ mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, return (err); } -void +static void mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip, flow_entry_t *flent) { mac_client_impl_t *mcip = (mac_client_impl_t *)mch; mac_impl_t *mip = mcip->mci_mip; + boolean_t no_unicast; /* - * We would have initialized subflows etc. only if we brought up - * the primary client and set the unicast unicast address etc. - * Deactivate the flows. The flow entry will be removed from the - * active flow tables, and the associated SRS, softrings etc will - * be deleted. But the flow entry itself won't be destroyed, instead - * it will continue to be archived off the the global flow hash - * list, for a possible future activation when say IP is plumbed - * again. + * If we have not added a unicast address for this MAC client, just + * teardown the datapath. */ - mac_link_release_flows(mch); + no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR; + if (!no_unicast) { + /* + * We would have initialized subflows etc. only if we brought + * up the primary client and set the unicast unicast address + * etc. Deactivate the flows. The flow entry will be removed + * from the active flow tables, and the associated SRS, + * softrings etc will be deleted. But the flow entry itself + * won't be destroyed, instead it will continue to be archived + * off the the global flow hash list, for a possible future + * activation when say IP is plumbed again. + */ + mac_link_release_flows(mch); + } mip->mi_nactiveclients--; mac_update_single_active_client(mip); @@ -2287,6 +2800,7 @@ mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip, flent->fe_tx_srs == NULL && flent->fe_rx_srs_cnt == 0); flent->fe_flags = FE_MC_NO_DATAPATH; flow_stat_destroy(flent); + mac_misc_stat_delete(flent); /* Initialize the receiver function to a safe routine */ flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop; @@ -2297,8 +2811,9 @@ mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip, mutex_exit(&flent->fe_lock); if (mip->mi_type->mt_brdcst_addr != NULL) { + ASSERT(muip != NULL || no_unicast); mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr, - muip->mui_vid); + muip != NULL ? muip->mui_vid : VLAN_ID_NONE); } if (mip->mi_nactiveclients == 1) { @@ -2324,8 +2839,12 @@ mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip, if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK) mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK; - kmem_free(muip, sizeof (mac_unicast_impl_t)); + if (muip != NULL) + kmem_free(muip, sizeof (mac_unicast_impl_t)); + mac_protect_cancel_timer(mcip); + mac_protect_flush_dhcp(mcip); + bzero(&mcip->mci_misc_stat, sizeof (mcip->mci_misc_stat)); /* * Disable fastpath if this is a VNIC or a VLAN. */ @@ -2345,7 +2864,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) mac_unicast_impl_t *pre; mac_impl_t *mip = mcip->mci_mip; flow_entry_t *flent; - boolean_t isprimary = B_FALSE; + uint16_t mui_vid; i_mac_perim_enter(mip); if (mcip->mci_flags & MAC_CLIENT_FLAGS_VNIC_PRIMARY) { @@ -2436,11 +2955,6 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) rw_exit(&mcip->mci_rw_lock); } - if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) && - muip->mui_vid == 0) { - mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PRIMARY; - isprimary = B_TRUE; - } if (!mac_client_single_rcvr(mcip)) { /* * This MAC client is shared by more than one unicast @@ -2490,34 +3004,39 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) return (0); } + mui_vid = muip->mui_vid; mac_client_datapath_teardown(mch, muip, flent); + if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) && mui_vid == 0) { + mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PRIMARY; + } else { + i_mac_perim_exit(mip); + return (0); + } + /* * If we are removing the primary, check if we have a passive primary * client that we need to activate now. */ - if (!isprimary) { - i_mac_perim_exit(mip); - return (0); - } mcip = mac_get_passive_primary_client(mip); if (mcip != NULL) { - mac_resource_props_t mrp; + mac_resource_props_t *mrp; mac_unicast_impl_t *muip; mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PASSIVE_PRIMARY; - bzero(&mrp, sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); + /* * Apply the property cached in the mac_impl_t to the * primary mac client. */ - mac_get_resources((mac_handle_t)mip, &mrp); - (void) mac_client_set_resources(mch, &mrp); + mac_get_resources((mac_handle_t)mip, mrp); + (void) mac_client_set_resources(mch, mrp); ASSERT(mcip->mci_p_unicast_list != NULL); muip = mcip->mci_p_unicast_list; mcip->mci_p_unicast_list = NULL; if (mac_client_datapath_setup(mcip, VLAN_ID_NONE, - mip->mi_addr, &mrp, B_TRUE, muip) == 0) { + mip->mi_addr, mrp, B_TRUE, muip) == 0) { if (mcip->mci_rx_p_fn != NULL) { mac_rx_set(mch, mcip->mci_rx_p_fn, mcip->mci_rx_p_arg); @@ -2527,6 +3046,7 @@ mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah) } else { kmem_free(muip, sizeof (mac_unicast_impl_t)); } + kmem_free(mrp, sizeof (*mrp)); } i_mac_perim_exit(mip); return (0); @@ -2775,36 +3295,6 @@ mac_promisc_remove(mac_promisc_handle_t mph) } /* - * Bump the count of the number of active Tx threads. This is maintained as - * a per CPU counter. On (CMT kind of) machines with large number of CPUs, - * a single mci_tx_lock may become contended. However a count of the total - * number of Tx threads per client is needed in order to quiesce the Tx side - * prior to reassigning a Tx ring dynamically to another client. The thread - * that needs to quiesce the Tx traffic grabs all the percpu locks and checks - * the sum of the individual percpu refcnts. Each Tx data thread only grabs - * its own percpu lock and increments its own refcnt. - */ -void * -mac_tx_hold(mac_client_handle_t mch) -{ - mac_client_impl_t *mcip = (mac_client_impl_t *)mch; - mac_tx_percpu_t *mytx; - int error; - - MAC_TX_TRY_HOLD(mcip, mytx, error); - return (error == 0 ? (void *)mytx : NULL); -} - -void -mac_tx_rele(mac_client_handle_t mch, void *mytx_handle) -{ - mac_client_impl_t *mcip = (mac_client_impl_t *)mch; - mac_tx_percpu_t *mytx = mytx_handle; - - MAC_TX_RELE(mcip, mytx) -} - -/* * Send function invoked by MAC clients. */ mac_tx_cookie_t @@ -2872,8 +3362,7 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, srs_tx = &srs->srs_tx; if (srs_tx->st_mode == SRS_TX_DEFAULT && (srs->srs_state & SRS_ENQUEUED) == 0 && - mip->mi_nactiveclients == 1 && mip->mi_promisc_list == NULL && - mp_chain->b_next == NULL) { + mip->mi_nactiveclients == 1 && mp_chain->b_next == NULL) { uint64_t obytes; /* @@ -2891,7 +3380,7 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, MAC_VID_CHECK(mcip, mp_chain, err); if (err != 0) { freemsg(mp_chain); - mcip->mci_stat_oerrors++; + mcip->mci_misc_stat.mms_txerrors++; goto done; } } @@ -2899,7 +3388,7 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, mp_chain = mac_add_vlan_tag(mp_chain, 0, mac_client_vid(mch)); if (mp_chain == NULL) { - mcip->mci_stat_oerrors++; + mcip->mci_misc_stat.mms_txerrors++; goto done; } } @@ -2908,17 +3397,11 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, obytes = (mp_chain->b_cont == NULL ? MBLKL(mp_chain) : msgdsize(mp_chain)); - MAC_TX(mip, srs_tx->st_arg2, mp_chain, - ((mcip->mci_state_flags & MCIS_SHARE_BOUND) != 0)); - + MAC_TX(mip, srs_tx->st_arg2, mp_chain, mcip); if (mp_chain == NULL) { cookie = NULL; - mcip->mci_stat_obytes += obytes; - mcip->mci_stat_opackets += 1; - if ((srs->srs_type & SRST_FLOW) != 0) { - FLOW_STAT_UPDATE(flent, obytes, obytes); - FLOW_STAT_UPDATE(flent, opackets, 1); - } + SRS_TX_STAT_UPDATE(srs, opackets, 1); + SRS_TX_STAT_UPDATE(srs, obytes, obytes); } else { mutex_enter(&srs->srs_lock); cookie = mac_tx_srs_no_desc(srs, mp_chain, @@ -2978,7 +3461,14 @@ mac_tx_is_flow_blocked(mac_client_handle_t mch, mac_tx_cookie_t cookie) } mutex_enter(&mac_srs->srs_lock); - if (mac_srs->srs_tx.st_mode == SRS_TX_FANOUT) { + /* + * Only in the case of TX_FANOUT and TX_AGGR, the underlying + * softring (s_ring_state) will have the HIWAT set. This is + * the multiple Tx ring flow control case. For all other + * case, SRS (srs_state) will store the condition. + */ + if (mac_srs->srs_tx.st_mode == SRS_TX_FANOUT || + mac_srs->srs_tx.st_mode == SRS_TX_AGGR) { if (cookie != NULL) { sringp = (mac_soft_ring_t *)cookie; mutex_enter(&sringp->s_ring_lock); @@ -2986,8 +3476,8 @@ mac_tx_is_flow_blocked(mac_client_handle_t mch, mac_tx_cookie_t cookie) blocked = B_TRUE; mutex_exit(&sringp->s_ring_lock); } else { - for (i = 0; i < mac_srs->srs_oth_ring_count; i++) { - sringp = mac_srs->srs_oth_soft_rings[i]; + for (i = 0; i < mac_srs->srs_tx_ring_count; i++) { + sringp = mac_srs->srs_tx_soft_rings[i]; mutex_enter(&sringp->s_ring_lock); if (sringp->s_ring_state & S_RING_TX_HIWAT) { blocked = B_TRUE; @@ -3228,9 +3718,10 @@ mac_cpu_set(mac_client_handle_t mch, mac_resource_props_t *mrp) ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - if ((err = mac_validate_props(mrp)) != 0) + if ((err = mac_validate_props(mcip->mci_state_flags & MCIS_IS_VNIC ? + mcip->mci_upper_mip : mip, mrp)) != 0) { return (err); - + } if (MCIP_DATAPATH_SETUP(mcip)) mac_flow_modify(mip->mi_flow_tab, mcip->mci_flent, mrp); @@ -3256,14 +3747,20 @@ mac_client_set_resources(mac_client_handle_t mch, mac_resource_props_t *mrp) goto done; } - if (mrp->mrp_mask & MRP_CPUS) { + if (mrp->mrp_mask & (MRP_CPUS|MRP_POOL)) { err = mac_cpu_set(mch, mrp); if (err != 0) goto done; } - if (mrp->mrp_mask & MRP_PROTECT) + if (mrp->mrp_mask & MRP_PROTECT) { err = mac_protect_set(mch, mrp); + if (err != 0) + goto done; + } + + if ((mrp->mrp_mask & MRP_RX_RINGS) || (mrp->mrp_mask & MRP_TX_RINGS)) + err = mac_resource_ctl_set(mch, mrp); done: i_mac_perim_exit(mip); @@ -3283,6 +3780,20 @@ mac_client_get_resources(mac_client_handle_t mch, mac_resource_props_t *mrp) } /* + * Return the effective properties currently associated with the specified + * MAC client. + */ +void +mac_client_get_effective_resources(mac_client_handle_t mch, + mac_resource_props_t *mrp) +{ + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_resource_props_t *mcip_mrp = MCIP_EFFECTIVE_PROPS(mcip); + + bcopy(mcip_mrp, mrp, sizeof (mac_resource_props_t)); +} + +/* * Pass a copy of the specified packet to the promiscuous callbacks * of the specified MAC. * @@ -3708,6 +4219,16 @@ mac_get_lower_mac_handle(mac_handle_t mh) return (((vnic_t *)mip->mi_driver)->vn_lower_mh); } +boolean_t +mac_is_vnic_primary(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + ASSERT(mac_is_vnic(mh)); + return (((vnic_t *)mip->mi_driver)->vn_addr_type == + VNIC_MAC_ADDR_TYPE_PRIMARY); +} + void mac_update_resources(mac_resource_props_t *nmrp, mac_resource_props_t *cmrp, boolean_t is_user_flow) @@ -3728,17 +4249,66 @@ mac_update_resources(mac_resource_props_t *nmrp, mac_resource_props_t *cmrp, } } if (nmrp->mrp_mask & MRP_MAXBW) { - cmrp->mrp_maxbw = nmrp->mrp_maxbw; - if (nmrp->mrp_maxbw == MRP_MAXBW_RESETVAL) + if (nmrp->mrp_maxbw == MRP_MAXBW_RESETVAL) { cmrp->mrp_mask &= ~MRP_MAXBW; - else + cmrp->mrp_maxbw = 0; + } else { cmrp->mrp_mask |= MRP_MAXBW; + cmrp->mrp_maxbw = nmrp->mrp_maxbw; + } } if (nmrp->mrp_mask & MRP_CPUS) MAC_COPY_CPUS(nmrp, cmrp); + if (nmrp->mrp_mask & MRP_POOL) { + if (strlen(nmrp->mrp_pool) == 0) { + cmrp->mrp_mask &= ~MRP_POOL; + bzero(cmrp->mrp_pool, sizeof (cmrp->mrp_pool)); + } else { + cmrp->mrp_mask |= MRP_POOL; + (void) strncpy(cmrp->mrp_pool, nmrp->mrp_pool, + sizeof (cmrp->mrp_pool)); + } + + } + if (nmrp->mrp_mask & MRP_PROTECT) mac_protect_update(nmrp, cmrp); + + /* + * Update the rings specified. + */ + if (nmrp->mrp_mask & MRP_RX_RINGS) { + if (nmrp->mrp_mask & MRP_RINGS_RESET) { + cmrp->mrp_mask &= ~MRP_RX_RINGS; + if (cmrp->mrp_mask & MRP_RXRINGS_UNSPEC) + cmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; + cmrp->mrp_nrxrings = 0; + } else { + cmrp->mrp_mask |= MRP_RX_RINGS; + cmrp->mrp_nrxrings = nmrp->mrp_nrxrings; + } + } + if (nmrp->mrp_mask & MRP_TX_RINGS) { + if (nmrp->mrp_mask & MRP_RINGS_RESET) { + cmrp->mrp_mask &= ~MRP_TX_RINGS; + if (cmrp->mrp_mask & MRP_TXRINGS_UNSPEC) + cmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; + cmrp->mrp_ntxrings = 0; + } else { + cmrp->mrp_mask |= MRP_TX_RINGS; + cmrp->mrp_ntxrings = nmrp->mrp_ntxrings; + } + } + if (nmrp->mrp_mask & MRP_RXRINGS_UNSPEC) + cmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; + else if (cmrp->mrp_mask & MRP_RXRINGS_UNSPEC) + cmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; + + if (nmrp->mrp_mask & MRP_TXRINGS_UNSPEC) + cmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; + else if (cmrp->mrp_mask & MRP_TXRINGS_UNSPEC) + cmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; } } @@ -3757,26 +4327,29 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp) mac_client_impl_t *mcip; int err = 0; uint32_t resmask, newresmask; - mac_resource_props_t tmrp, umrp; + mac_resource_props_t *tmrp, *umrp; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); - err = mac_validate_props(mrp); + err = mac_validate_props(mip, mrp); if (err != 0) return (err); - bcopy(&mip->mi_resource_props, &umrp, sizeof (mac_resource_props_t)); - resmask = umrp.mrp_mask; - mac_update_resources(mrp, &umrp, B_FALSE); - newresmask = umrp.mrp_mask; + umrp = kmem_zalloc(sizeof (*umrp), KM_SLEEP); + bcopy(&mip->mi_resource_props, umrp, sizeof (*umrp)); + resmask = umrp->mrp_mask; + mac_update_resources(mrp, umrp, B_FALSE); + newresmask = umrp->mrp_mask; if (resmask == 0 && newresmask != 0) { /* - * Bandwidth, priority or cpu link properties configured, + * Bandwidth, priority, cpu or pool link properties configured, * must disable fastpath. */ - if ((err = mac_fastpath_disable((mac_handle_t)mip)) != 0) + if ((err = mac_fastpath_disable((mac_handle_t)mip)) != 0) { + kmem_free(umrp, sizeof (*umrp)); return (err); + } } /* @@ -3784,19 +4357,93 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp) * we use a copy of bind_cpu and finally cache bind_cpu in mip. * This allows us to cache only user edits in mip. */ - bcopy(mrp, &tmrp, sizeof (mac_resource_props_t)); + tmrp = kmem_zalloc(sizeof (*tmrp), KM_SLEEP); + bcopy(mrp, tmrp, sizeof (*tmrp)); mcip = mac_primary_client_handle(mip); if (mcip != NULL && (mcip->mci_state_flags & MCIS_IS_AGGR_PORT) == 0) { - err = - mac_client_set_resources((mac_client_handle_t)mcip, &tmrp); + err = mac_client_set_resources((mac_client_handle_t)mcip, tmrp); + } else if ((mrp->mrp_mask & MRP_RX_RINGS || + mrp->mrp_mask & MRP_TX_RINGS)) { + mac_client_impl_t *vmcip; + + /* + * If the primary is not up, we need to check if there + * are any VLANs on this primary. If there are then + * we need to set this property on the VLANs since + * VLANs follow the primary they are based on. Just + * look for the first VLAN and change its properties, + * all the other VLANs should be in the same group. + */ + for (vmcip = mip->mi_clients_list; vmcip != NULL; + vmcip = vmcip->mci_client_next) { + if ((vmcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) && + mac_client_vid((mac_client_handle_t)vmcip) != + VLAN_ID_NONE) { + break; + } + } + if (vmcip != NULL) { + mac_resource_props_t *omrp; + mac_resource_props_t *vmrp; + + omrp = kmem_zalloc(sizeof (*omrp), KM_SLEEP); + bcopy(MCIP_RESOURCE_PROPS(vmcip), omrp, sizeof (*omrp)); + /* + * We dont' call mac_update_resources since we + * want to take only the ring properties and + * not all the properties that may have changed. + */ + vmrp = MCIP_RESOURCE_PROPS(vmcip); + if (mrp->mrp_mask & MRP_RX_RINGS) { + if (mrp->mrp_mask & MRP_RINGS_RESET) { + vmrp->mrp_mask &= ~MRP_RX_RINGS; + if (vmrp->mrp_mask & + MRP_RXRINGS_UNSPEC) { + vmrp->mrp_mask &= + ~MRP_RXRINGS_UNSPEC; + } + vmrp->mrp_nrxrings = 0; + } else { + vmrp->mrp_mask |= MRP_RX_RINGS; + vmrp->mrp_nrxrings = mrp->mrp_nrxrings; + } + } + if (mrp->mrp_mask & MRP_TX_RINGS) { + if (mrp->mrp_mask & MRP_RINGS_RESET) { + vmrp->mrp_mask &= ~MRP_TX_RINGS; + if (vmrp->mrp_mask & + MRP_TXRINGS_UNSPEC) { + vmrp->mrp_mask &= + ~MRP_TXRINGS_UNSPEC; + } + vmrp->mrp_ntxrings = 0; + } else { + vmrp->mrp_mask |= MRP_TX_RINGS; + vmrp->mrp_ntxrings = mrp->mrp_ntxrings; + } + } + if (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) + vmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; + + if (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) + vmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; + + if ((err = mac_client_set_rings_prop(vmcip, mrp, + omrp)) != 0) { + bcopy(omrp, MCIP_RESOURCE_PROPS(vmcip), + sizeof (*omrp)); + } else { + mac_set_prim_vlan_rings(mip, vmrp); + } + kmem_free(omrp, sizeof (*omrp)); + } } /* Only update the values if mac_client_set_resources succeeded */ if (err == 0) { - bcopy(&umrp, &mip->mi_resource_props, - sizeof (mac_resource_props_t)); + bcopy(umrp, &mip->mi_resource_props, sizeof (*umrp)); /* - * If bankwidth, priority or cpu link properties cleared, + * If bandwidth, priority or cpu link properties cleared, * renable fastpath. */ if (resmask != 0 && newresmask == 0) @@ -3804,6 +4451,8 @@ i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp) } else if (resmask == 0 && newresmask != 0) { mac_fastpath_enable((mac_handle_t)mip); } + kmem_free(tmrp, sizeof (*tmrp)); + kmem_free(umrp, sizeof (*umrp)); return (err); } @@ -3827,17 +4476,33 @@ mac_get_resources(mac_handle_t mh, mac_resource_props_t *mrp) mac_impl_t *mip = (mac_impl_t *)mh; mac_client_impl_t *mcip; - if (mip->mi_state_flags & MIS_IS_VNIC) { - mcip = mac_primary_client_handle(mip); - if (mcip != NULL) { - mac_client_get_resources((mac_client_handle_t)mcip, - mrp); - return; - } + mcip = mac_primary_client_handle(mip); + if (mcip != NULL) { + mac_client_get_resources((mac_client_handle_t)mcip, mrp); + return; } bcopy(&mip->mi_resource_props, mrp, sizeof (mac_resource_props_t)); } +/* + * Get the effective properties from the primary client of the + * specified MAC instance. + */ +void +mac_get_effective_resources(mac_handle_t mh, mac_resource_props_t *mrp) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + mac_client_impl_t *mcip; + + mcip = mac_primary_client_handle(mip); + if (mcip != NULL) { + mac_client_get_effective_resources((mac_client_handle_t)mcip, + mrp); + return; + } + bzero(mrp, sizeof (mac_resource_props_t)); +} + int mac_set_pvid(mac_handle_t mh, uint16_t pvid) { @@ -3904,8 +4569,10 @@ mac_rename_primary(mac_handle_t mh, const char *new_name) * the associated flow kstat. */ if (mip->mi_state_flags & MIS_IS_VNIC) { + mac_client_impl_t *mcip = mac_vnic_lower(mip); ASSERT(new_name != NULL); - mac_rename_flow_names(mac_vnic_lower(mip), new_name); + mac_rename_flow_names(mcip, new_name); + mac_stat_rename(mcip); goto done; } /* @@ -3954,6 +4621,10 @@ mac_rename_primary(mac_handle_t mh, const char *new_name) } } + /* Recreate kstats associated with aggr pseudo rings */ + if (mip->mi_state_flags & MIS_IS_AGGR) + mac_pseudo_ring_stat_rename(mip); + done: i_mac_perim_exit(mip); return (0); @@ -4187,8 +4858,14 @@ mac_client_single_rcvr(mac_client_impl_t *mcip) } int -mac_validate_props(mac_resource_props_t *mrp) +mac_validate_props(mac_impl_t *mip, mac_resource_props_t *mrp) { + boolean_t reset; + uint32_t rings_needed; + uint32_t rings_avail; + mac_group_type_t gtype; + mac_resource_props_t *mip_mrp; + if (mrp == NULL) return (0); @@ -4246,6 +4923,100 @@ mac_validate_props(mac_resource_props_t *mrp) if (err != 0) return (err); } + + if (!(mrp->mrp_mask & MRP_RX_RINGS) && + !(mrp->mrp_mask & MRP_TX_RINGS)) { + return (0); + } + + /* + * mip will be null when we come from mac_flow_create or + * mac_link_flow_modify. In the latter case it is a user flow, + * for which we don't support rings. In the former we would + * have validated the props beforehand (i_mac_unicast_add -> + * mac_client_set_resources -> validate for the primary and + * vnic_dev_create -> mac_client_set_resources -> validate for + * a vnic. + */ + if (mip == NULL) + return (0); + + /* + * We don't support setting rings property for a VNIC that is using a + * primary address (VLAN) + */ + if ((mip->mi_state_flags & MIS_IS_VNIC) && + mac_is_vnic_primary((mac_handle_t)mip)) { + return (ENOTSUP); + } + + mip_mrp = &mip->mi_resource_props; + /* + * The rings property should be validated against the NICs + * resources + */ + if (mip->mi_state_flags & MIS_IS_VNIC) + mip = (mac_impl_t *)mac_get_lower_mac_handle((mac_handle_t)mip); + + reset = mrp->mrp_mask & MRP_RINGS_RESET; + /* + * If groups are not supported, return error. + */ + if (((mrp->mrp_mask & MRP_RX_RINGS) && mip->mi_rx_groups == NULL) || + ((mrp->mrp_mask & MRP_TX_RINGS) && mip->mi_tx_groups == NULL)) { + return (EINVAL); + } + /* + * If we are just resetting, there is no validation needed. + */ + if (reset) + return (0); + + if (mrp->mrp_mask & MRP_RX_RINGS) { + rings_needed = mrp->mrp_nrxrings; + /* + * We just want to check if the number of additional + * rings requested is available. + */ + if (mip_mrp->mrp_mask & MRP_RX_RINGS) { + if (mrp->mrp_nrxrings > mip_mrp->mrp_nrxrings) + /* Just check for the additional rings */ + rings_needed -= mip_mrp->mrp_nrxrings; + else + /* We are not asking for additional rings */ + rings_needed = 0; + } + rings_avail = mip->mi_rxrings_avail; + gtype = mip->mi_rx_group_type; + } else { + rings_needed = mrp->mrp_ntxrings; + /* Similarly for the TX rings */ + if (mip_mrp->mrp_mask & MRP_TX_RINGS) { + if (mrp->mrp_ntxrings > mip_mrp->mrp_ntxrings) + /* Just check for the additional rings */ + rings_needed -= mip_mrp->mrp_ntxrings; + else + /* We are not asking for additional rings */ + rings_needed = 0; + } + rings_avail = mip->mi_txrings_avail; + gtype = mip->mi_tx_group_type; + } + + /* Error if the group is dynamic .. */ + if (gtype == MAC_GROUP_TYPE_DYNAMIC) { + /* + * .. and rings specified are more than available. + */ + if (rings_needed > rings_avail) + return (EINVAL); + } else { + /* + * OR group is static and we have specified some rings. + */ + if (rings_needed > 0) + return (EINVAL); + } return (0); } @@ -4266,11 +5037,18 @@ mac_virtual_link_update(mac_impl_t *mip) * mac handle in the client. */ void -mac_set_upper_mac(mac_client_handle_t mch, mac_handle_t mh) +mac_set_upper_mac(mac_client_handle_t mch, mac_handle_t mh, + mac_resource_props_t *mrp) { mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_impl_t *mip = (mac_impl_t *)mh; - mcip->mci_upper_mip = (mac_impl_t *)mh; + mcip->mci_upper_mip = mip; + /* If there are any properties, copy it over too */ + if (mrp != NULL) { + bcopy(mrp, &mip->mi_resource_props, + sizeof (mac_resource_props_t)); + } } /* @@ -4326,15 +5104,7 @@ mac_unmark_exclusive(mac_handle_t mh) } /* - * Set the MTU for the specified MAC. Note that this mechanism depends on - * the driver calling mac_maxsdu_update() to update the link MTU if it was - * successful in setting its MTU. - * - * Note that there is potential for improvement here. A better model might be - * to not require drivers to call mac_maxsdu_update(), but rather have this - * function update mi_sdu_max and send notifications if the driver setprop - * callback succeeds. This would remove the burden and complexity from - * drivers. + * Set the MTU for the specified MAC. */ int mac_set_mtu(mac_handle_t mh, uint_t new_mtu, uint_t *old_mtu_arg) @@ -4352,9 +5122,18 @@ mac_set_mtu(mac_handle_t mh, uint_t new_mtu, uint_t *old_mtu_arg) old_mtu = mip->mi_sdu_max; + if (new_mtu == 0 || new_mtu < mip->mi_sdu_min) { + rv = EINVAL; + goto bail; + } + if (old_mtu != new_mtu) { rv = mip->mi_callbacks->mc_setprop(mip->mi_driver, "mtu", MAC_PROP_MTU, sizeof (uint_t), &new_mtu); + if (rv != 0) + goto bail; + rv = mac_maxsdu_update(mh, new_mtu); + ASSERT(rv == 0); } bail: @@ -4365,13 +5144,18 @@ bail: return (rv); } +/* + * Return the RX h/w information for the group indexed by grp_num. + */ void -mac_get_hwgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, - uint_t *n_rings, uint_t *type, uint_t *n_clnts, char *clnts_name) +mac_get_hwrxgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, + uint_t *n_rings, uint_t *rings, uint_t *type, uint_t *n_clnts, + char *clnts_name) { mac_impl_t *mip = (mac_impl_t *)mh; mac_grp_client_t *mcip; uint_t i = 0, index = 0; + mac_ring_t *ring; /* Revisit when we implement fully dynamic group allocation */ ASSERT(grp_index >= 0 && grp_index < mip->mi_rx_group_count); @@ -4380,6 +5164,19 @@ mac_get_hwgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, *grp_num = mip->mi_rx_groups[grp_index].mrg_index; *type = mip->mi_rx_groups[grp_index].mrg_type; *n_rings = mip->mi_rx_groups[grp_index].mrg_cur_count; + ring = mip->mi_rx_groups[grp_index].mrg_rings; + for (index = 0; index < mip->mi_rx_groups[grp_index].mrg_cur_count; + index++) { + rings[index] = ring->mr_index; + ring = ring->mr_next; + } + /* Assuming the 1st is the default group */ + index = 0; + if (grp_index == 0) { + (void) strlcpy(clnts_name, "<default,mcast>,", + MAXCLIENTNAMELEN); + index += strlen("<default,mcast>,"); + } for (mcip = mip->mi_rx_groups[grp_index].mrg_clients; mcip != NULL; mcip = mcip->mgc_next) { int name_len = strlen(mcip->mgc_client->mci_name); @@ -4410,10 +5207,194 @@ mac_get_hwgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, rw_exit(&mip->mi_rw_lock); } +/* + * Return the TX h/w information for the group indexed by grp_num. + */ +void +mac_get_hwtxgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num, + uint_t *n_rings, uint_t *rings, uint_t *type, uint_t *n_clnts, + char *clnts_name) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + mac_grp_client_t *mcip; + uint_t i = 0, index = 0; + mac_ring_t *ring; + + /* Revisit when we implement fully dynamic group allocation */ + ASSERT(grp_index >= 0 && grp_index <= mip->mi_tx_group_count); + + rw_enter(&mip->mi_rw_lock, RW_READER); + *grp_num = mip->mi_tx_groups[grp_index].mrg_index > 0 ? + mip->mi_tx_groups[grp_index].mrg_index : grp_index; + *type = mip->mi_tx_groups[grp_index].mrg_type; + *n_rings = mip->mi_tx_groups[grp_index].mrg_cur_count; + ring = mip->mi_tx_groups[grp_index].mrg_rings; + for (index = 0; index < mip->mi_tx_groups[grp_index].mrg_cur_count; + index++) { + rings[index] = ring->mr_index; + ring = ring->mr_next; + } + index = 0; + /* Default group has an index of -1 */ + if (mip->mi_tx_groups[grp_index].mrg_index < 0) { + (void) strlcpy(clnts_name, "<default>,", + MAXCLIENTNAMELEN); + index += strlen("<default>,"); + } + for (mcip = mip->mi_tx_groups[grp_index].mrg_clients; mcip != NULL; + mcip = mcip->mgc_next) { + int name_len = strlen(mcip->mgc_client->mci_name); + + /* + * MAXCLIENTNAMELEN is the buffer size reserved for client + * names. + * XXXX Formating the client name string needs to be moved + * to user land when fixing the size of dhi_clnts in + * dld_hwgrpinfo_t. We should use n_clients * client_name for + * dhi_clntsin instead of MAXCLIENTNAMELEN + */ + if (index + name_len >= MAXCLIENTNAMELEN) { + index = MAXCLIENTNAMELEN; + break; + } + bcopy(mcip->mgc_client->mci_name, &(clnts_name[index]), + name_len); + index += name_len; + clnts_name[index++] = ','; + i++; + } + + /* Get rid of the last , */ + if (index > 0) + clnts_name[index - 1] = '\0'; + *n_clnts = i; + rw_exit(&mip->mi_rw_lock); +} + +/* + * Return the group count for RX or TX. + */ uint_t -mac_hwgrp_num(mac_handle_t mh) +mac_hwgrp_num(mac_handle_t mh, int type) { mac_impl_t *mip = (mac_impl_t *)mh; - return (mip->mi_rx_group_count); + /* + * Return the Rx and Tx group count; for the Tx we need to + * include the default too. + */ + return (type == MAC_RING_TYPE_RX ? mip->mi_rx_group_count : + mip->mi_tx_groups != NULL ? mip->mi_tx_group_count + 1 : 0); +} + +/* + * The total number of free TX rings for this MAC. + */ +uint_t +mac_txavail_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_txrings_avail); +} + +/* + * The total number of free RX rings for this MAC. + */ +uint_t +mac_rxavail_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_rxrings_avail); +} + +/* + * The total number of reserved RX rings on this MAC. + */ +uint_t +mac_rxrsvd_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_rxrings_rsvd); +} + +/* + * The total number of reserved TX rings on this MAC. + */ +uint_t +mac_txrsvd_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_txrings_rsvd); +} + +/* + * Total number of free RX groups on this MAC. + */ +uint_t +mac_rxhwlnksavail_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_rxhwclnt_avail); +} + +/* + * Total number of RX groups reserved on this MAC. + */ +uint_t +mac_rxhwlnksrsvd_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_rxhwclnt_used); +} + +/* + * Total number of free TX groups on this MAC. + */ +uint_t +mac_txhwlnksavail_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_txhwclnt_avail); +} + +/* + * Total number of TX groups reserved on this MAC. + */ +uint_t +mac_txhwlnksrsvd_get(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + return (mip->mi_txhwclnt_used); +} + +/* + * Initialize the rings property for a mac client. A non-0 value for + * rxring or txring specifies the number of rings required, a value + * of MAC_RXRINGS_NONE/MAC_TXRINGS_NONE specifies that it doesn't need + * any RX/TX rings and a value of MAC_RXRINGS_DONTCARE/MAC_TXRINGS_DONTCARE + * means the system can decide whether it can give any rings or not. + */ +void +mac_client_set_rings(mac_client_handle_t mch, int rxrings, int txrings) +{ + mac_client_impl_t *mcip = (mac_client_impl_t *)mch; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + + if (rxrings != MAC_RXRINGS_DONTCARE) { + mrp->mrp_mask |= MRP_RX_RINGS; + mrp->mrp_nrxrings = rxrings; + } + + if (txrings != MAC_TXRINGS_DONTCARE) { + mrp->mrp_mask |= MRP_TX_RINGS; + mrp->mrp_ntxrings = txrings; + } } diff --git a/usr/src/uts/common/io/mac/mac_datapath_setup.c b/usr/src/uts/common/io/mac/mac_datapath_setup.c index 379e488ee2..6f1661d5f2 100644 --- a/usr/src/uts/common/io/mac/mac_datapath_setup.c +++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c @@ -19,12 +19,15 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <sys/types.h> #include <sys/callb.h> +#include <sys/cpupart.h> +#include <sys/pool.h> +#include <sys/pool_pset.h> #include <sys/sdt.h> #include <sys/strsubr.h> #include <sys/strsun.h> @@ -40,6 +43,7 @@ #include <sys/mac_client_priv.h> #include <sys/mac_soft_ring.h> #include <sys/mac_flow_impl.h> +#include <sys/mac_stat.h> static void mac_srs_soft_rings_signal(mac_soft_ring_set_t *, uint_t); static void mac_srs_update_fanout_list(mac_soft_ring_set_t *); @@ -56,14 +60,10 @@ static void mac_srs_remove_glist(mac_soft_ring_set_t *); static void mac_srs_fanout_list_free(mac_soft_ring_set_t *); static void mac_soft_ring_remove(mac_soft_ring_set_t *, mac_soft_ring_t *); -static int mac_compute_soft_ring_count(flow_entry_t *, int); +static int mac_compute_soft_ring_count(flow_entry_t *, int, int); static void mac_walk_srs_and_bind(int); static void mac_walk_srs_and_unbind(int); -extern mac_group_t *mac_reserve_rx_group(mac_client_impl_t *, uint8_t *, - mac_rx_group_reserve_type_t); -extern void mac_release_rx_group(mac_client_impl_t *, mac_group_t *); - extern boolean_t mac_latency_optimize; static kmem_cache_t *mac_srs_cache; @@ -92,14 +92,6 @@ int mac_soft_ring_max_q_cnt = 1024; int mac_soft_ring_min_q_cnt = 256; int mac_soft_ring_poll_thres = 16; -/* - * Default value of number of TX rings to be assigned to a MAC client. - * If less than 'mac_tx_ring_count' worth of Tx rings is available, then - * as many as is available will be assigned to the newly created MAC client. - * If no TX rings are available, then MAC client(s) will be assigned the - * default Tx ring. Default Tx ring can be shared among multiple MAC clients. - */ -uint32_t mac_tx_ring_count = 32; boolean_t mac_tx_serialize = B_FALSE; /* @@ -157,9 +149,11 @@ static krwlock_t mac_srs_g_lock; boolean_t mac_srs_thread_bind = B_TRUE; /* - * CPU to fallback to, used by mac_next_bind_cpu(). + * Whether Rx/Tx interrupts should be re-targeted. Disabled by default. + * dladm command would override this. */ -processorid_t srs_bind_cpu = 0; +boolean_t mac_tx_intr_retarget = B_FALSE; +boolean_t mac_rx_intr_retarget = B_FALSE; /* * If cpu bindings are specified by user, then Tx SRS and its soft @@ -170,24 +164,39 @@ processorid_t srs_bind_cpu = 0; */ #define BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp) { \ processorid_t cpuid; \ - int i, j; \ + int i; \ mac_soft_ring_t *softring; \ + mac_cpus_t *srs_cpu; \ \ - cpuid = mrp->mrp_cpu[mrp->mrp_ncpus - 1]; \ - mac_srs_worker_bind(mac_tx_srs, cpuid); \ - if (TX_MULTI_RING_MODE(mac_tx_srs)) { \ - j = mrp->mrp_ncpus - 1; \ - for (i = 0; \ - i < mac_tx_srs->srs_oth_ring_count; i++, j--) { \ - if (j < 0) \ - j = mrp->mrp_ncpus - 1; \ - cpuid = mrp->mrp_cpu[j]; \ - softring = mac_tx_srs->srs_oth_soft_rings[i]; \ - (void) mac_soft_ring_bind(softring, cpuid); \ + srs_cpu = &mac_tx_srs->srs_cpu; \ + cpuid = srs_cpu->mc_tx_fanout_cpus[0]; \ + mac_srs_worker_bind(mac_tx_srs, cpuid); \ + if (MAC_TX_SOFT_RINGS(mac_tx_srs)) { \ + for (i = 0; i < mac_tx_srs->srs_tx_ring_count; i++) { \ + cpuid = srs_cpu->mc_tx_fanout_cpus[i]; \ + softring = mac_tx_srs->srs_tx_soft_rings[i]; \ + if (cpuid != -1) { \ + (void) mac_soft_ring_bind(softring, \ + cpuid); \ + } \ } \ } \ } +/* + * Re-targeting is allowed only for exclusive group or for primary. + */ +#define RETARGETABLE_CLIENT(group, mcip) \ + ((((group) != NULL) && \ + ((group)->mrg_state == MAC_GROUP_STATE_RESERVED)) || \ + mac_is_primary_client(mcip)) + +#define MAC_RING_RETARGETABLE(ring) \ + (((ring) != NULL) && \ + ((ring)->mr_info.mri_intr.mi_ddi_handle != NULL) && \ + !((ring)->mr_info.mri_intr.mi_ddi_shared)) + + /* INIT and FINI ROUTINES */ void @@ -218,7 +227,7 @@ mac_soft_ring_finish(void) } static void -mac_srs_soft_rings_free(mac_soft_ring_set_t *mac_srs, boolean_t release_tx_ring) +mac_srs_soft_rings_free(mac_soft_ring_set_t *mac_srs) { mac_soft_ring_t *softring, *next, *head; @@ -240,7 +249,7 @@ mac_srs_soft_rings_free(mac_soft_ring_set_t *mac_srs, boolean_t release_tx_ring) for (softring = head; softring != NULL; softring = next) { next = softring->s_ring_next; - mac_soft_ring_free(softring, release_tx_ring); + mac_soft_ring_free(softring); } } @@ -518,21 +527,30 @@ mac_srs_poll_state_change(mac_soft_ring_set_t *mac_srs, /* * Return the next CPU to be used to bind a MAC kernel thread. + * If a cpupart is specified, the cpu chosen must be from that + * cpu partition. */ static processorid_t -mac_next_bind_cpu(void) +mac_next_bind_cpu(cpupart_t *cpupart) { - static processorid_t srs_curr_cpu = -1; - cpu_t *cp; + static cpu_t *cp = NULL; + cpu_t *cp_start; ASSERT(MUTEX_HELD(&cpu_lock)); - srs_curr_cpu++; - cp = cpu_get(srs_curr_cpu); - if (cp == NULL || !cpu_is_online(cp)) - srs_curr_cpu = srs_bind_cpu; + if (cp == NULL) + cp = cpu_list; + + cp = cp->cpu_next_onln; + cp_start = cp; + + do { + if ((cpupart == NULL) || (cp->cpu_part == cpupart)) + return (cp->cpu_id); - return (srs_curr_cpu); + } while ((cp = cp->cpu_next_onln) != cp_start); + + return (NULL); } /* ARGSUSED */ @@ -588,7 +606,7 @@ mac_srs_cpu_setup(cpu_setup_t what, int id, void *arg) */ boolean_t mac_use_bw_heuristic = B_TRUE; static int -mac_compute_soft_ring_count(flow_entry_t *flent, int rx_srs_cnt) +mac_compute_soft_ring_count(flow_entry_t *flent, int rx_srs_cnt, int maxcpus) { uint64_t cpu_speed, bw = 0; int srings = 0; @@ -675,12 +693,85 @@ mac_compute_soft_ring_count(flow_entry_t *flent, int rx_srs_cnt) srings = 0; } /* Do some more massaging */ - srings = min(srings, ncpus); + srings = min(srings, maxcpus); srings = min(srings, MAX_SR_FANOUT); return (srings); } /* + * mac_tx_cpu_init: + * set up CPUs for Tx interrupt re-targeting and Tx worker + * thread binding + */ +static void +mac_tx_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp, + cpupart_t *cpupart) +{ + mac_soft_ring_set_t *tx_srs = flent->fe_tx_srs; + mac_srs_tx_t *srs_tx = &tx_srs->srs_tx; + mac_cpus_t *srs_cpu = &tx_srs->srs_cpu; + mac_soft_ring_t *sringp; + mac_ring_t *ring; + processorid_t worker_cpuid; + boolean_t retargetable_client = B_FALSE; + int i, j; + + if (RETARGETABLE_CLIENT((mac_group_t *)flent->fe_tx_ring_group, + flent->fe_mcip)) { + retargetable_client = B_TRUE; + } + + if (MAC_TX_SOFT_RINGS(tx_srs)) { + if (mrp != NULL) + j = mrp->mrp_ncpus - 1; + for (i = 0; i < tx_srs->srs_tx_ring_count; i++) { + if (mrp != NULL) { + if (j < 0) + j = mrp->mrp_ncpus - 1; + worker_cpuid = mrp->mrp_cpu[j]; + } else { + /* + * Bind interrupt to the next CPU available + * and leave the worker unbound. + */ + worker_cpuid = -1; + } + sringp = tx_srs->srs_tx_soft_rings[i]; + ring = (mac_ring_t *)sringp->s_ring_tx_arg2; + srs_cpu->mc_tx_fanout_cpus[i] = worker_cpuid; + if (MAC_RING_RETARGETABLE(ring) && + retargetable_client) { + mutex_enter(&cpu_lock); + srs_cpu->mc_tx_intr_cpu[i] = + (mrp != NULL) ? mrp->mrp_cpu[j] : + (mac_tx_intr_retarget ? + mac_next_bind_cpu(cpupart) : -1); + mutex_exit(&cpu_lock); + } else { + srs_cpu->mc_tx_intr_cpu[i] = -1; + } + if (mrp != NULL) + j--; + } + } else { + /* Tx mac_ring_handle_t is stored in st_arg2 */ + srs_cpu->mc_tx_fanout_cpus[0] = + (mrp != NULL) ? mrp->mrp_cpu[mrp->mrp_ncpus - 1] : -1; + ring = (mac_ring_t *)srs_tx->st_arg2; + if (MAC_RING_RETARGETABLE(ring) && retargetable_client) { + mutex_enter(&cpu_lock); + srs_cpu->mc_tx_intr_cpu[0] = (mrp != NULL) ? + mrp->mrp_cpu[mrp->mrp_ncpus - 1] : + (mac_tx_intr_retarget ? + mac_next_bind_cpu(cpupart) : -1); + mutex_exit(&cpu_lock); + } else { + srs_cpu->mc_tx_intr_cpu[0] = -1; + } + } +} + +/* * Assignment of user specified CPUs to a link. * * Minimum CPUs required to get an optimal assignmet: @@ -719,6 +810,7 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) int rx_srs_cnt, reqd_rx_cpu_cnt; int fanout_cpu_cnt, reqd_tx_cpu_cnt; int reqd_poll_worker_cnt, fanout_cnt_per_srs; + mac_resource_props_t *emrp = &flent->fe_effective_props; ASSERT(mrp->mrp_fanout_mode == MCM_CPUS); /* @@ -731,12 +823,11 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) no_of_cpus = mrp->mrp_ncpus; - if (mrp->mrp_intr_cpu != -1) { + if (mrp->mrp_rx_intr_cpu != -1) { /* * interrupt has been re-targetted. Poll * thread needs to be bound to interrupt - * CPU. Presently only fixed interrupts - * are re-targetted, MSI-x aren't. + * CPU. * * Find where in the list is the intr * CPU and swap it with the first one. @@ -744,11 +835,11 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) * list for poll. */ for (i = 0; i < no_of_cpus; i++) { - if (mrp->mrp_cpu[i] == mrp->mrp_intr_cpu) + if (mrp->mrp_cpu[i] == mrp->mrp_rx_intr_cpu) break; } mrp->mrp_cpu[i] = mrp->mrp_cpu[0]; - mrp->mrp_cpu[0] = mrp->mrp_intr_cpu; + mrp->mrp_cpu[0] = mrp->mrp_rx_intr_cpu; } /* @@ -768,8 +859,8 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) /* How many CPUs are needed for Tx side? */ tx_srs = flent->fe_tx_srs; - reqd_tx_cpu_cnt = TX_MULTI_RING_MODE(tx_srs) ? - tx_srs->srs_oth_ring_count : 1; + reqd_tx_cpu_cnt = MAC_TX_SOFT_RINGS(tx_srs) ? + tx_srs->srs_tx_ring_count : 1; /* CPUs needed for Rx SRSes poll and worker threads */ reqd_poll_worker_cnt = mac_latency_optimize ? @@ -806,14 +897,14 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_fanout_cnt = fanout_cnt_per_srs; - srs_cpu->mc_pollid = mrp->mrp_cpu[cpu_cnt++]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = srs_cpu->mc_pollid; - if (!mac_latency_optimize) - srs_cpu->mc_workerid = mrp->mrp_cpu[cpu_cnt++]; + srs_cpu->mc_rx_fanout_cnt = fanout_cnt_per_srs; + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt++]; + /* Retarget the interrupt to the same CPU as the poll */ + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = (mac_latency_optimize ? + srs_cpu->mc_rx_pollid : mrp->mrp_cpu[cpu_cnt++]); for (i = 0; i < fanout_cnt_per_srs; i++) - srs_cpu->mc_fanout_cpus[i] = mrp->mrp_cpu[cpu_cnt++]; + srs_cpu->mc_rx_fanout_cpus[i] = mrp->mrp_cpu[cpu_cnt++]; /* Do the assignment for h/w Rx SRSes */ if (flent->fe_rx_srs_cnt > 1) { @@ -831,23 +922,22 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_fanout_cnt = fanout_cnt_per_srs; + srs_cpu->mc_rx_fanout_cnt = fanout_cnt_per_srs; /* The first CPU in the list is the intr CPU */ - srs_cpu->mc_pollid = mrp->mrp_cpu[cpu_cnt++]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = srs_cpu->mc_pollid; - if (!mac_latency_optimize) { - srs_cpu->mc_workerid = - mrp->mrp_cpu[cpu_cnt++]; - } + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt++]; + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = + (mac_latency_optimize ? + srs_cpu->mc_rx_pollid : + mrp->mrp_cpu[cpu_cnt++]); for (i = 0; i < fanout_cnt_per_srs; i++) { - srs_cpu->mc_fanout_cpus[i] = + srs_cpu->mc_rx_fanout_cpus[i] = mrp->mrp_cpu[cpu_cnt++]; } ASSERT(cpu_cnt <= no_of_cpus); } } - return; + goto tx_cpu_init; } /* @@ -885,13 +975,15 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_fanout_cnt = 1; - srs_cpu->mc_pollid = mrp->mrp_cpu[cpu_cnt++]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = srs_cpu->mc_pollid; - if (!mac_latency_optimize && worker_assign) - srs_cpu->mc_workerid = mrp->mrp_cpu[cpu_cnt++]; - srs_cpu->mc_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; + srs_cpu->mc_rx_fanout_cnt = 1; + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt++]; + /* Retarget the interrupt to the same CPU as the poll */ + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = + ((!mac_latency_optimize && worker_assign) ? + mrp->mrp_cpu[cpu_cnt++] : srs_cpu->mc_rx_pollid); + + srs_cpu->mc_rx_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; /* Do CPU bindings for SRSes having h/w Rx rings */ if (flent->fe_rx_srs_cnt > 1) { @@ -909,22 +1001,21 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_pollid = + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = srs_cpu->mc_pollid; - if (!mac_latency_optimize && worker_assign) { - srs_cpu->mc_workerid = - mrp->mrp_cpu[++cpu_cnt]; - } - srs_cpu->mc_fanout_cnt = 1; - srs_cpu->mc_fanout_cpus[0] = + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = + ((!mac_latency_optimize && worker_assign) ? + mrp->mrp_cpu[++cpu_cnt] : + srs_cpu->mc_rx_pollid); + srs_cpu->mc_rx_fanout_cnt = 1; + srs_cpu->mc_rx_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; cpu_cnt++; ASSERT(cpu_cnt <= no_of_cpus); } } - return; + goto tx_cpu_init; } /* @@ -942,14 +1033,28 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) srs_cpu->mc_ncpus = no_of_cpus; bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus)); - srs_cpu->mc_fanout_cnt = 1; - srs_cpu->mc_pollid = mrp->mrp_cpu[cpu_cnt]; - srs_cpu->mc_intr_cpu = mrp->mrp_intr_cpu; - srs_cpu->mc_workerid = mrp->mrp_cpu[cpu_cnt]; - srs_cpu->mc_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; + srs_cpu->mc_rx_fanout_cnt = 1; + srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt]; + /* Retarget the interrupt to the same CPU as the poll */ + srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid; + srs_cpu->mc_rx_workerid = mrp->mrp_cpu[cpu_cnt]; + srs_cpu->mc_rx_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt]; if (++cpu_cnt >= no_of_cpus) cpu_cnt = 0; } + +tx_cpu_init: + mac_tx_cpu_init(flent, mrp, NULL); + + /* + * Copy the user specified CPUs to the effective CPUs + */ + for (i = 0; i < mrp->mrp_ncpus; i++) { + emrp->mrp_cpu[i] = mrp->mrp_cpu[i]; + } + emrp->mrp_ncpus = mrp->mrp_ncpus; + emrp->mrp_mask = mrp->mrp_mask; + bzero(emrp->mrp_pool, MAXPATHLEN); } /* @@ -960,64 +1065,95 @@ mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) * with a flent. */ static void -mac_flow_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) +mac_flow_cpu_init(flow_entry_t *flent, cpupart_t *cpupart) { mac_soft_ring_set_t *rx_srs; processorid_t cpuid; - int j, srs_cnt, soft_ring_cnt = 0; + int i, j, k, srs_cnt, nscpus, maxcpus, soft_ring_cnt = 0; mac_cpus_t *srs_cpu; + mac_resource_props_t *emrp = &flent->fe_effective_props; + uint32_t cpus[MRP_NCPUS]; - if (mrp->mrp_mask & MRP_CPUS_USERSPEC) { - mac_flow_user_cpu_init(flent, mrp); - } else { + /* + * The maximum number of CPUs available can either be + * the number of CPUs in the pool or the number of CPUs + * in the system. + */ + maxcpus = (cpupart != NULL) ? cpupart->cp_ncpus : ncpus; + + /* + * Compute the number of soft rings needed on top for each Rx + * SRS. "rx_srs_cnt-1" indicates the number of Rx SRS + * associated with h/w Rx rings. Soft ring count needed for + * each h/w Rx SRS is computed and the same is applied to + * software classified Rx SRS. The first Rx SRS in fe_rx_srs[] + * is the software classified Rx SRS. + */ + soft_ring_cnt = mac_compute_soft_ring_count(flent, + flent->fe_rx_srs_cnt - 1, maxcpus); + if (soft_ring_cnt == 0) { /* - * Compute the number of soft rings needed on top for each Rx - * SRS. "rx_srs_cnt-1" indicates the number of Rx SRS - * associated with h/w Rx rings. Soft ring count needed for - * each h/w Rx SRS is computed and the same is applied to - * software classified Rx SRS. The first Rx SRS in fe_rx_srs[] - * is the software classified Rx SRS. + * Even when soft_ring_cnt is 0, we still need + * to create a soft ring for TCP, UDP and + * OTHER. So set it to 1. */ - soft_ring_cnt = mac_compute_soft_ring_count(flent, - flent->fe_rx_srs_cnt - 1); - if (soft_ring_cnt == 0) { - /* - * Even when soft_ring_cnt is 0, we still need - * to create a soft ring for TCP, UDP and - * OTHER. So set it to 1. - */ - soft_ring_cnt = 1; - } - for (srs_cnt = 0; srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) { - rx_srs = flent->fe_rx_srs[srs_cnt]; - srs_cpu = &rx_srs->srs_cpu; - if (rx_srs->srs_fanout_state == SRS_FANOUT_INIT) { - if (soft_ring_cnt == srs_cpu->mc_fanout_cnt) - continue; - rx_srs->srs_fanout_state = SRS_FANOUT_REINIT; - } - srs_cpu->mc_ncpus = soft_ring_cnt; - srs_cpu->mc_fanout_cnt = soft_ring_cnt; - mutex_enter(&cpu_lock); - for (j = 0; j < soft_ring_cnt; j++) { - cpuid = mac_next_bind_cpu(); - srs_cpu->mc_cpus[j] = cpuid; - srs_cpu->mc_fanout_cpus[j] = cpuid; - } - cpuid = mac_next_bind_cpu(); - srs_cpu->mc_pollid = cpuid; - /* increment ncpus to account for polling cpu */ + soft_ring_cnt = 1; + } + for (srs_cnt = 0; srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) { + rx_srs = flent->fe_rx_srs[srs_cnt]; + srs_cpu = &rx_srs->srs_cpu; + if (rx_srs->srs_fanout_state == SRS_FANOUT_INIT) + rx_srs->srs_fanout_state = SRS_FANOUT_REINIT; + srs_cpu->mc_ncpus = soft_ring_cnt; + srs_cpu->mc_rx_fanout_cnt = soft_ring_cnt; + mutex_enter(&cpu_lock); + for (j = 0; j < soft_ring_cnt; j++) { + cpuid = mac_next_bind_cpu(cpupart); + srs_cpu->mc_cpus[j] = cpuid; + srs_cpu->mc_rx_fanout_cpus[j] = cpuid; + } + cpuid = mac_next_bind_cpu(cpupart); + srs_cpu->mc_rx_pollid = cpuid; + srs_cpu->mc_rx_intr_cpu = (mac_rx_intr_retarget ? + srs_cpu->mc_rx_pollid : -1); + /* increment ncpus to account for polling cpu */ + srs_cpu->mc_ncpus++; + srs_cpu->mc_cpus[j++] = cpuid; + if (!mac_latency_optimize) { + cpuid = mac_next_bind_cpu(cpupart); srs_cpu->mc_ncpus++; srs_cpu->mc_cpus[j++] = cpuid; - if (!mac_latency_optimize) { - cpuid = mac_next_bind_cpu(); - srs_cpu->mc_ncpus++; - srs_cpu->mc_cpus[j++] = cpuid; - } - srs_cpu->mc_workerid = cpuid; - mutex_exit(&cpu_lock); } + srs_cpu->mc_rx_workerid = cpuid; + mutex_exit(&cpu_lock); } + + nscpus = 0; + for (srs_cnt = 0; srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) { + rx_srs = flent->fe_rx_srs[srs_cnt]; + srs_cpu = &rx_srs->srs_cpu; + for (j = 0; j < srs_cpu->mc_ncpus; j++) { + cpus[nscpus++] = srs_cpu->mc_cpus[j]; + } + } + + + /* + * Copy cpu list to fe_effective_props + * without duplicates. + */ + k = 0; + for (i = 0; i < nscpus; i++) { + for (j = 0; j < k; j++) { + if (emrp->mrp_cpu[j] == cpus[i]) + break; + } + if (j == k) + emrp->mrp_cpu[k++] = cpus[i]; + } + emrp->mrp_ncpus = k; + + mac_tx_cpu_init(flent, NULL, cpupart); } /* @@ -1025,15 +1161,46 @@ mac_flow_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp) * (setup SRS and set/update FANOUT, B/W and PRIORITY) */ +/* + * mac_srs_fanout_list_alloc: + * + * The underlying device can expose upto MAX_RINGS_PER_GROUP worth of + * rings to a client. In such a case, MAX_RINGS_PER_GROUP worth of + * array space is needed to store Tx soft rings. Thus we allocate so + * much array space for srs_tx_soft_rings. + * + * And when it is an aggr, again we allocate MAX_RINGS_PER_GROUP worth + * of space to st_soft_rings. This array is used for quick access to + * soft ring associated with a pseudo Tx ring based on the pseudo + * ring's index (mr_index). + */ static void mac_srs_fanout_list_alloc(mac_soft_ring_set_t *mac_srs) { - mac_srs->srs_tcp_soft_rings = (mac_soft_ring_t **) - kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, KM_SLEEP); - mac_srs->srs_udp_soft_rings = (mac_soft_ring_t **) - kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, KM_SLEEP); - mac_srs->srs_oth_soft_rings = (mac_soft_ring_t **) - kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, KM_SLEEP); + mac_client_impl_t *mcip = mac_srs->srs_mcip; + + if (mac_srs->srs_type & SRST_TX) { + mac_srs->srs_tx_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * + MAX_RINGS_PER_GROUP, KM_SLEEP); + if (mcip->mci_state_flags & MCIS_IS_AGGR) { + mac_srs_tx_t *tx = &mac_srs->srs_tx; + + tx->st_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * + MAX_RINGS_PER_GROUP, KM_SLEEP); + } + } else { + mac_srs->srs_tcp_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, + KM_SLEEP); + mac_srs->srs_udp_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, + KM_SLEEP); + mac_srs->srs_oth_soft_rings = (mac_soft_ring_t **) + kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT, + KM_SLEEP); + } } static void @@ -1095,6 +1262,121 @@ mac_srs_poll_bind(mac_soft_ring_set_t *mac_srs, processorid_t cpuid) } /* + * Re-target interrupt to the passed CPU. If re-target is successful, + * set mc_rx_intr_cpu to the re-targeted CPU. Otherwise set it to -1. + */ +void +mac_rx_srs_retarget_intr(mac_soft_ring_set_t *mac_srs, processorid_t cpuid) +{ + cpu_t *cp; + mac_ring_t *ring = mac_srs->srs_ring; + mac_intr_t *mintr = &ring->mr_info.mri_intr; + flow_entry_t *flent = mac_srs->srs_flent; + boolean_t primary = mac_is_primary_client(mac_srs->srs_mcip); + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Don't re-target the interrupt for these cases: + * 1) ring is NULL + * 2) the interrupt is shared (mi_ddi_shared) + * 3) ddi_handle is NULL and !primary + * 4) primary, ddi_handle is NULL but fe_rx_srs_cnt > 2 + * Case 3 & 4 are because of mac_client_intr_cpu() routine. + * This routine will re-target fixed interrupt for primary + * mac client if the client has only one ring. In that + * case, mc_rx_intr_cpu will already have the correct value. + */ + if (ring == NULL || mintr->mi_ddi_shared || cpuid == -1 || + (mintr->mi_ddi_handle == NULL && !primary) || (primary && + mintr->mi_ddi_handle == NULL && flent->fe_rx_srs_cnt > 2)) { + mac_srs->srs_cpu.mc_rx_intr_cpu = -1; + return; + } + + if (mintr->mi_ddi_handle == NULL) + return; + + cp = cpu_get(cpuid); + if (cp == NULL || !cpu_is_online(cp)) + return; + + /* Drop the cpu_lock as ddi_intr_set_affinity() holds it */ + mutex_exit(&cpu_lock); + if (ddi_intr_set_affinity(mintr->mi_ddi_handle, cpuid) == DDI_SUCCESS) + mac_srs->srs_cpu.mc_rx_intr_cpu = cpuid; + else + mac_srs->srs_cpu.mc_rx_intr_cpu = -1; + mutex_enter(&cpu_lock); +} + +/* + * Re-target Tx interrupts + */ +void +mac_tx_srs_retarget_intr(mac_soft_ring_set_t *mac_srs) +{ + cpu_t *cp; + mac_ring_t *ring; + mac_intr_t *mintr; + mac_soft_ring_t *sringp; + mac_srs_tx_t *srs_tx; + mac_cpus_t *srs_cpu; + processorid_t cpuid; + int i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + srs_cpu = &mac_srs->srs_cpu; + if (MAC_TX_SOFT_RINGS(mac_srs)) { + for (i = 0; i < mac_srs->srs_tx_ring_count; i++) { + sringp = mac_srs->srs_tx_soft_rings[i]; + ring = (mac_ring_t *)sringp->s_ring_tx_arg2; + cpuid = srs_cpu->mc_tx_intr_cpu[i]; + cp = cpu_get(cpuid); + if (cp == NULL || !cpu_is_online(cp) || + !MAC_RING_RETARGETABLE(ring)) { + srs_cpu->mc_tx_retargeted_cpu[i] = -1; + continue; + } + mintr = &ring->mr_info.mri_intr; + /* + * Drop the cpu_lock as ddi_intr_set_affinity() + * holds it + */ + mutex_exit(&cpu_lock); + if (ddi_intr_set_affinity(mintr->mi_ddi_handle, + cpuid) == DDI_SUCCESS) { + srs_cpu->mc_tx_retargeted_cpu[i] = cpuid; + } else { + srs_cpu->mc_tx_retargeted_cpu[i] = -1; + } + mutex_enter(&cpu_lock); + } + } else { + cpuid = srs_cpu->mc_tx_intr_cpu[0]; + cp = cpu_get(cpuid); + if (cp == NULL || !cpu_is_online(cp)) { + srs_cpu->mc_tx_retargeted_cpu[0] = -1; + return; + } + srs_tx = &mac_srs->srs_tx; + ring = (mac_ring_t *)srs_tx->st_arg2; + if (MAC_RING_RETARGETABLE(ring)) { + mintr = &ring->mr_info.mri_intr; + mutex_exit(&cpu_lock); + if ((ddi_intr_set_affinity(mintr->mi_ddi_handle, + cpuid) == DDI_SUCCESS)) { + srs_cpu->mc_tx_retargeted_cpu[0] = cpuid; + } else { + srs_cpu->mc_tx_retargeted_cpu[0] = -1; + } + mutex_enter(&cpu_lock); + } + } +} + +/* * When a CPU comes back online, bind the MAC kernel threads which * were previously bound to that CPU, and had to be unbound because * the CPU was going away. @@ -1231,17 +1513,16 @@ done: static void mac_tx_srs_update_bwlimit(mac_soft_ring_set_t *srs, mac_resource_props_t *mrp) { - uint32_t tx_mode; + uint32_t tx_mode, ring_info = 0; mac_srs_tx_t *srs_tx = &srs->srs_tx; mac_client_impl_t *mcip = srs->srs_mcip; - mac_impl_t *mip = mcip->mci_mip; /* * We need to quiesce/restart the client here because mac_tx() and * srs->srs_tx->st_func do not hold srs->srs_lock while accessing * st_mode and related fields, which are modified by the code below. */ - mac_tx_client_quiesce(mcip, SRS_QUIESCE); + mac_tx_client_quiesce((mac_client_handle_t)mcip); mutex_enter(&srs->srs_lock); mutex_enter(&srs->srs_bw->mac_bw_lock); @@ -1250,14 +1531,18 @@ mac_tx_srs_update_bwlimit(mac_soft_ring_set_t *srs, mac_resource_props_t *mrp) if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) { /* Reset bandwidth limit */ if (tx_mode == SRS_TX_BW) { + if (srs_tx->st_arg2 != NULL) + ring_info = mac_hwring_getinfo(srs_tx->st_arg2); if (mac_tx_serialize || - (mip->mi_v12n_level & MAC_VIRT_SERIALIZE)) { + (ring_info & MAC_RING_TX_SERIALIZE)) { srs_tx->st_mode = SRS_TX_SERIALIZE; } else { srs_tx->st_mode = SRS_TX_DEFAULT; } } else if (tx_mode == SRS_TX_BW_FANOUT) { srs_tx->st_mode = SRS_TX_FANOUT; + } else if (tx_mode == SRS_TX_BW_AGGR) { + srs_tx->st_mode = SRS_TX_AGGR; } srs->srs_type &= ~SRST_BW_CONTROL; } else { @@ -1270,13 +1555,15 @@ mac_tx_srs_update_bwlimit(mac_soft_ring_set_t *srs, mac_resource_props_t *mrp) srs->srs_bw->mac_bw_drop_threshold = srs->srs_bw->mac_bw_limit << 1; srs->srs_type |= SRST_BW_CONTROL; - if (tx_mode != SRS_TX_BW && - tx_mode != SRS_TX_BW_FANOUT) { + if (tx_mode != SRS_TX_BW && tx_mode != SRS_TX_BW_FANOUT && + tx_mode != SRS_TX_BW_AGGR) { if (tx_mode == SRS_TX_SERIALIZE || tx_mode == SRS_TX_DEFAULT) { srs_tx->st_mode = SRS_TX_BW; } else if (tx_mode == SRS_TX_FANOUT) { srs_tx->st_mode = SRS_TX_BW_FANOUT; + } else if (tx_mode == SRS_TX_AGGR) { + srs_tx->st_mode = SRS_TX_BW_AGGR; } else { ASSERT(0); } @@ -1287,7 +1574,7 @@ done: mutex_exit(&srs->srs_bw->mac_bw_lock); mutex_exit(&srs->srs_lock); - mac_tx_client_restart(mcip); + mac_tx_client_restart((mac_client_handle_t)mcip); } /* @@ -1392,9 +1679,7 @@ mac_client_update_classifier(mac_client_impl_t *mcip, boolean_t enable) static void mac_srs_update_fanout_list(mac_soft_ring_set_t *mac_srs) { - int tcp_count = 0; - int udp_count = 0; - int oth_count = 0; + int tcp_count = 0, udp_count = 0, oth_count = 0, tx_count = 0; mac_soft_ring_t *softring; softring = mac_srs->srs_soft_ring_head; @@ -1403,33 +1688,35 @@ mac_srs_update_fanout_list(mac_soft_ring_set_t *mac_srs) mac_srs->srs_tcp_ring_count = 0; mac_srs->srs_udp_ring_count = 0; mac_srs->srs_oth_ring_count = 0; + mac_srs->srs_tx_ring_count = 0; return; } - softring = mac_srs->srs_soft_ring_head; - tcp_count = udp_count = oth_count = 0; - while (softring != NULL) { - if (softring->s_ring_type & ST_RING_TCP) + if (softring->s_ring_type & ST_RING_TCP) { mac_srs->srs_tcp_soft_rings[tcp_count++] = softring; - else if (softring->s_ring_type & ST_RING_UDP) + } else if (softring->s_ring_type & ST_RING_UDP) { mac_srs->srs_udp_soft_rings[udp_count++] = softring; - else + } else if (softring->s_ring_type & ST_RING_OTH) { mac_srs->srs_oth_soft_rings[oth_count++] = softring; + } else { + ASSERT(softring->s_ring_type & ST_RING_TX); + mac_srs->srs_tx_soft_rings[tx_count++] = softring; + } softring = softring->s_ring_next; } ASSERT(mac_srs->srs_soft_ring_count == - (tcp_count + udp_count + oth_count)); - + (tcp_count + udp_count + oth_count + tx_count)); mac_srs->srs_tcp_ring_count = tcp_count; mac_srs->srs_udp_ring_count = udp_count; mac_srs->srs_oth_ring_count = oth_count; + mac_srs->srs_tx_ring_count = tx_count; } void -mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, - pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs, +mac_srs_create_proto_softrings(int id, uint16_t type, pri_t pri, + mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs, processorid_t cpuid, mac_direct_rx_t rx_func, void *x_arg1, mac_resource_handle_t x_arg2, boolean_t set_bypass) { @@ -1446,7 +1733,7 @@ mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, mrf.mrf_flow_priority = pri; softring = mac_soft_ring_create(id, mac_soft_ring_worker_wait, - (void *)flent, (type|ST_RING_TCP), pri, mcip, mac_srs, + (type|ST_RING_TCP), pri, mcip, mac_srs, cpuid, rx_func, x_arg1, x_arg2); softring->s_ring_rx_arg2 = NULL; @@ -1481,7 +1768,7 @@ mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, * bypass the DLS layer. */ softring = mac_soft_ring_create(id, mac_soft_ring_worker_wait, - (void *)flent, (type|ST_RING_UDP), pri, mcip, mac_srs, + (type|ST_RING_UDP), pri, mcip, mac_srs, cpuid, rx_func, x_arg1, x_arg2); softring->s_ring_rx_arg2 = NULL; @@ -1493,7 +1780,7 @@ mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, /* Create the Oth softrings which has to go through the DLS */ softring = mac_soft_ring_create(id, mac_soft_ring_worker_wait, - (void *)flent, (type|ST_RING_OTH), pri, mcip, mac_srs, + (type|ST_RING_OTH), pri, mcip, mac_srs, cpuid, rx_func, x_arg1, x_arg2); softring->s_ring_rx_arg2 = NULL; } @@ -1507,19 +1794,16 @@ mac_srs_create_proto_softrings(int id, void *flent, uint16_t type, * same CPU as that of the soft ring's. */ static void -mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, - mac_resource_props_t *mrp, mac_direct_rx_t rx_func, void *x_arg1, - mac_resource_handle_t x_arg2, mac_soft_ring_set_t *mac_rx_srs, - mac_soft_ring_set_t *mac_tx_srs) +mac_srs_fanout_modify(mac_client_impl_t *mcip, mac_direct_rx_t rx_func, + void *x_arg1, mac_resource_handle_t x_arg2, + mac_soft_ring_set_t *mac_rx_srs, mac_soft_ring_set_t *mac_tx_srs) { mac_soft_ring_t *softring; uint32_t soft_ring_flag = 0; processorid_t cpuid = -1; - boolean_t user_specified; int i, srings_present, new_fanout_cnt; mac_cpus_t *srs_cpu; - user_specified = mrp->mrp_mask & MRP_CPUS_USERSPEC; /* fanout state is REINIT. Set it back to INIT */ ASSERT(mac_rx_srs->srs_fanout_state == SRS_FANOUT_REINIT); mac_rx_srs->srs_fanout_state = SRS_FANOUT_INIT; @@ -1528,7 +1812,7 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, srings_present = mac_rx_srs->srs_tcp_ring_count; /* new request */ srs_cpu = &mac_rx_srs->srs_cpu; - new_fanout_cnt = srs_cpu->mc_fanout_cnt; + new_fanout_cnt = srs_cpu->mc_rx_fanout_cnt; mutex_enter(&mac_rx_srs->srs_lock); if (mac_rx_srs->srs_type & SRST_BW_CONTROL) @@ -1547,8 +1831,7 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, * Create the protocol softrings and set the * DLS bypass where possible. */ - mac_srs_create_proto_softrings(i, - (void *)flent, soft_ring_flag, + mac_srs_create_proto_softrings(i, soft_ring_flag, mac_rx_srs->srs_pri, mcip, mac_rx_srs, cpuid, rx_func, x_arg1, x_arg2, B_TRUE); } @@ -1583,7 +1866,7 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, ASSERT(new_fanout_cnt == mac_rx_srs->srs_tcp_ring_count); mutex_enter(&cpu_lock); for (i = 0; i < mac_rx_srs->srs_tcp_ring_count; i++) { - cpuid = srs_cpu->mc_fanout_cpus[i]; + cpuid = srs_cpu->mc_rx_fanout_cpus[i]; (void) mac_soft_ring_bind(mac_rx_srs->srs_udp_soft_rings[i], cpuid); (void) mac_soft_ring_bind(mac_rx_srs->srs_oth_soft_rings[i], @@ -1597,15 +1880,16 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, } } - mac_srs_worker_bind(mac_rx_srs, srs_cpu->mc_pollid); - mac_srs_poll_bind(mac_rx_srs, srs_cpu->mc_workerid); - + mac_srs_worker_bind(mac_rx_srs, srs_cpu->mc_rx_workerid); + mac_srs_poll_bind(mac_rx_srs, srs_cpu->mc_rx_pollid); + mac_rx_srs_retarget_intr(mac_rx_srs, srs_cpu->mc_rx_intr_cpu); /* * Bind Tx srs and soft ring threads too. Let's bind tx * srs to the last cpu in mrp list. */ - if (mac_tx_srs != NULL && user_specified) { + if (mac_tx_srs != NULL) { BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp); + mac_tx_srs_retarget_intr(mac_tx_srs); } mutex_exit(&cpu_lock); } @@ -1614,16 +1898,15 @@ mac_srs_fanout_modify(mac_client_impl_t *mcip, flow_entry_t *flent, * Bind SRS threads and soft rings to CPUs/create fanout list. */ void -mac_srs_fanout_init(mac_client_impl_t *mcip, flow_entry_t *flent, - mac_resource_props_t *mrp, mac_direct_rx_t rx_func, void *x_arg1, - mac_resource_handle_t x_arg2, mac_soft_ring_set_t *mac_rx_srs, - mac_soft_ring_set_t *mac_tx_srs) +mac_srs_fanout_init(mac_client_impl_t *mcip, mac_resource_props_t *mrp, + mac_direct_rx_t rx_func, void *x_arg1, mac_resource_handle_t x_arg2, + mac_soft_ring_set_t *mac_rx_srs, mac_soft_ring_set_t *mac_tx_srs, + cpupart_t *cpupart) { int i; - processorid_t cpuid, worker_cpuid, poll_cpuid; + processorid_t cpuid; uint32_t soft_ring_flag = 0; int soft_ring_cnt; - boolean_t user_specified = B_FALSE; mac_cpus_t *srs_cpu = &mac_rx_srs->srs_cpu; /* @@ -1641,31 +1924,27 @@ mac_srs_fanout_init(mac_client_impl_t *mcip, flow_entry_t *flent, ASSERT(mac_rx_srs->srs_fanout_state == SRS_FANOUT_UNINIT); mac_rx_srs->srs_fanout_state = SRS_FANOUT_INIT; - user_specified = mrp->mrp_mask & MRP_CPUS_USERSPEC; /* * Ring count can be 0 if no fanout is required and no cpu * were specified. Leave the SRS worker and poll thread * unbound */ ASSERT(mrp != NULL); - soft_ring_cnt = srs_cpu->mc_fanout_cnt; + soft_ring_cnt = srs_cpu->mc_rx_fanout_cnt; /* Step 1: bind cpu contains cpu list where threads need to bind */ if (soft_ring_cnt > 0) { mutex_enter(&cpu_lock); for (i = 0; i < soft_ring_cnt; i++) { - cpuid = srs_cpu->mc_fanout_cpus[i]; + cpuid = srs_cpu->mc_rx_fanout_cpus[i]; /* Create the protocol softrings */ - mac_srs_create_proto_softrings(i, (void *)flent, - soft_ring_flag, mac_rx_srs->srs_pri, - mcip, mac_rx_srs, cpuid, rx_func, - x_arg1, x_arg2, B_FALSE); + mac_srs_create_proto_softrings(i, soft_ring_flag, + mac_rx_srs->srs_pri, mcip, mac_rx_srs, cpuid, + rx_func, x_arg1, x_arg2, B_FALSE); } - worker_cpuid = srs_cpu->mc_workerid; - poll_cpuid = srs_cpu->mc_pollid; - mac_srs_worker_bind(mac_rx_srs, worker_cpuid); - mac_srs_poll_bind(mac_rx_srs, poll_cpuid); - + mac_srs_worker_bind(mac_rx_srs, srs_cpu->mc_rx_workerid); + mac_srs_poll_bind(mac_rx_srs, srs_cpu->mc_rx_pollid); + mac_rx_srs_retarget_intr(mac_rx_srs, srs_cpu->mc_rx_intr_cpu); /* * Bind Tx srs and soft ring threads too. * Let's bind tx srs to the last cpu in @@ -1676,9 +1955,8 @@ mac_srs_fanout_init(mac_client_impl_t *mcip, flow_entry_t *flent, goto alldone; } - if (user_specified) { - BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp); - } + BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp); + mac_tx_srs_retarget_intr(mac_tx_srs); mutex_exit(&cpu_lock); } else { mutex_enter(&cpu_lock); @@ -1686,8 +1964,8 @@ mac_srs_fanout_init(mac_client_impl_t *mcip, flow_entry_t *flent, * For a subflow, mrp_workerid and mrp_pollid * is not set. */ - mac_srs_worker_bind(mac_rx_srs, mrp->mrp_workerid); - mac_srs_poll_bind(mac_rx_srs, mrp->mrp_pollid); + mac_srs_worker_bind(mac_rx_srs, mrp->mrp_rx_workerid); + mac_srs_poll_bind(mac_rx_srs, mrp->mrp_rx_pollid); mutex_exit(&cpu_lock); goto no_softrings; } @@ -1702,12 +1980,11 @@ alldone: no_softrings: if (mac_rx_srs->srs_type & SRST_FANOUT_PROTO) { mutex_enter(&cpu_lock); - cpuid = mac_next_bind_cpu(); + cpuid = mac_next_bind_cpu(cpupart); /* Create the protocol softrings */ - mac_srs_create_proto_softrings(0, (void *)flent, - soft_ring_flag, mac_rx_srs->srs_pri, - mcip, mac_rx_srs, cpuid, rx_func, - x_arg1, x_arg2, B_FALSE); + mac_srs_create_proto_softrings(0, soft_ring_flag, + mac_rx_srs->srs_pri, mcip, mac_rx_srs, cpuid, + rx_func, x_arg1, x_arg2, B_FALSE); mutex_exit(&cpu_lock); } else { /* @@ -1729,7 +2006,7 @@ no_softrings: void mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, mac_resource_props_t *mrp, mac_direct_rx_t rx_func, void *x_arg1, - mac_resource_handle_t x_arg2) + mac_resource_handle_t x_arg2, cpupart_t *cpupart) { mac_soft_ring_set_t *mac_rx_srs, *mac_tx_srs; int i, rx_srs_cnt; @@ -1739,7 +2016,7 @@ mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * This is an aggregation port. Fanout will be setup * over the aggregation itself. */ - if (mcip->mci_state_flags & MCIS_IS_AGGR_PORT) + if (mcip->mci_state_flags & MCIS_EXCLUSIVE) return; mac_rx_srs = flent->fe_rx_srs[0]; @@ -1754,12 +2031,18 @@ mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, /* No fanout for subflows */ if (flent->fe_type & FLOW_USER) { - mac_srs_fanout_init(mcip, flent, mrp, rx_func, - x_arg1, x_arg2, mac_rx_srs, mac_tx_srs); + mac_srs_fanout_init(mcip, mrp, rx_func, + x_arg1, x_arg2, mac_rx_srs, mac_tx_srs, + cpupart); return; } - mac_flow_cpu_init(flent, mrp); + if (mrp->mrp_mask & MRP_CPUS_USERSPEC) + mac_flow_user_cpu_init(flent, mrp); + else + mac_flow_cpu_init(flent, cpupart); + + mrp->mrp_rx_fanout_cnt = mac_rx_srs->srs_cpu.mc_rx_fanout_cnt; /* * Set up fanout for both SW (0th SRS) and HW classified @@ -1771,15 +2054,16 @@ mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, mac_tx_srs = NULL; switch (mac_rx_srs->srs_fanout_state) { case SRS_FANOUT_UNINIT: - mac_srs_fanout_init(mcip, flent, mrp, rx_func, - x_arg1, x_arg2, mac_rx_srs, mac_tx_srs); + mac_srs_fanout_init(mcip, mrp, rx_func, + x_arg1, x_arg2, mac_rx_srs, mac_tx_srs, + cpupart); break; case SRS_FANOUT_INIT: break; case SRS_FANOUT_REINIT: mac_rx_srs_quiesce(mac_rx_srs, SRS_QUIESCE); - mac_srs_fanout_modify(mcip, flent, mrp, rx_func, - x_arg1, x_arg2, mac_rx_srs, mac_tx_srs); + mac_srs_fanout_modify(mcip, rx_func, x_arg1, + x_arg2, mac_rx_srs, mac_tx_srs); mac_rx_srs_restart(mac_rx_srs); break; default: @@ -1791,7 +2075,7 @@ mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent, } /* - * mac_create_soft_ring_set: + * mac_srs_create: * * Create a mac_soft_ring_set_t (SRS). If soft_ring_fanout_type is * SRST_TX, an SRS for Tx side is created. Otherwise an SRS for Rx side @@ -1867,6 +2151,7 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, mac_srs->srs_type = (srs_type | SRST_NO_SOFT_RINGS); mac_srs->srs_worker_cpuid = mac_srs->srs_worker_cpuid_save = -1; mac_srs->srs_poll_cpuid = mac_srs->srs_poll_cpuid_save = -1; + mac_srs->srs_mcip = mcip; mac_srs_fanout_list_alloc(mac_srs); /* @@ -1881,7 +2166,6 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, } else { mac_srs->srs_pri = mcip->mci_max_pri; } - mac_srs->srs_mcip = mcip; /* * We need to insert the SRS in the global list before * binding the SRS and SR threads. Otherwise there is a @@ -1959,7 +2243,7 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, mac_tx_srs_max_q_cnt : mac_tx_srs_hiwat; srs_tx->st_arg1 = x_arg1; srs_tx->st_arg2 = x_arg2; - return (mac_srs); + goto done; } if ((srs_type & SRST_FLOW) != 0 || @@ -1973,11 +2257,13 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, srs_rx->sr_arg2 = x_arg2; if (ring != NULL) { + uint_t ring_info; + /* Is the mac_srs created over the RX default group? */ if (ring->mr_gh == (mac_group_handle_t) - (&mcip->mci_mip->mi_rx_groups[0])) + MAC_DEFAULT_RX_GROUP(mcip->mci_mip)) { mac_srs->srs_type |= SRST_DEFAULT_GRP; - + } mac_srs->srs_ring = ring; ring->mr_srs = mac_srs; ring->mr_classify_type = MAC_HW_CLASSIFIER; @@ -1997,9 +2283,12 @@ mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type, * so that we get a chance to switch into a polling * mode under backlog. */ - if (mcip->mci_mip->mi_v12n_level & MAC_VIRT_SERIALIZE) + ring_info = mac_hwring_getinfo((mac_ring_handle_t)ring); + if (ring_info & MAC_RING_RX_ENQUEUE) mac_srs->srs_state |= SRS_SOFTRING_QUEUE; } +done: + mac_srs_stat_create(mac_srs); return (mac_srs); } @@ -2043,7 +2332,7 @@ mac_find_fanout(flow_entry_t *flent, uint32_t link_type) /* * Change a group from h/w to s/w classification. */ -static void +void mac_rx_switch_grp_to_sw(mac_group_t *group) { mac_ring_t *ring; @@ -2063,11 +2352,11 @@ mac_rx_switch_grp_to_sw(mac_group_t *group) if (ring->mr_state != MR_INUSE) (void) mac_start_ring(ring); + /* * We need to perform SW classification * for packets landing in these rings */ - ring->mr_state = MR_INUSE; ring->mr_flag = 0; ring->mr_classify_type = MAC_SW_CLASSIFIER; } @@ -2079,14 +2368,38 @@ mac_rx_switch_grp_to_sw(mac_group_t *group) */ void mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, - mac_group_t *group, uint32_t link_type) + uint32_t link_type) +{ + cpupart_t *cpupart; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); + boolean_t use_default = B_FALSE; + + mac_rx_srs_group_setup(mcip, flent, link_type); + mac_tx_srs_group_setup(mcip, flent, link_type); + + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), + mac_rx_deliver, mcip, NULL, cpupart); + mac_set_pool_effective(use_default, cpupart, mrp, emrp); + pool_unlock(); +} + +/* + * Set up the RX SRSs. If the S/W SRS is not set, set it up, if there + * is a group associated with this MAC client, set up SRSs for individual + * h/w rings. + */ +void +mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, + uint32_t link_type) { mac_impl_t *mip = mcip->mci_mip; mac_soft_ring_set_t *mac_srs; - mac_soft_ring_set_t *tx_srs = NULL; mac_ring_t *ring; uint32_t fanout_type; - boolean_t created_srs = B_FALSE; + mac_group_t *rx_group = flent->fe_rx_ring_group; fanout_type = mac_find_fanout(flent, link_type); @@ -2096,64 +2409,23 @@ mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, /* Setup the Rx SRS */ mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type, mac_rx_deliver, mcip, NULL, NULL); - mutex_enter(&flent->fe_lock); flent->fe_cb_fn = (flow_fn_t)mac_srs->srs_rx.sr_lower_proc; flent->fe_cb_arg1 = (void *)mip; flent->fe_cb_arg2 = (void *)mac_srs; mutex_exit(&flent->fe_lock); - - /* Setup the Tx SRS as well */ - ASSERT(flent->fe_tx_srs == NULL); - tx_srs = mac_srs_create(mcip, flent, SRST_TX | link_type, - NULL, mcip, NULL, NULL); - - if (mcip->mci_share != NULL) { - mac_srs_tx_t *tx = &tx_srs->srs_tx; - ASSERT((mcip->mci_state_flags & MCIS_NO_HWRINGS) == 0); - /* - * A share requires a dedicated TX group. - * mac_reserve_tx_group() does the work needed to - * allocate a new group and populate that group - * with rings according to the driver requirements - * and limitations. - */ - tx->st_group = - mac_reserve_tx_group(mip, mcip->mci_share); - ASSERT(tx->st_group != NULL); - tx->st_group->mrg_tx_client = mcip; - } - mac_tx_srs_setup(mcip, flent, link_type); - created_srs = B_TRUE; } - if (group == NULL) { - if (created_srs) { - mac_fanout_setup(mcip, flent, - MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, - mcip, NULL); - } + if (rx_group == NULL) return; - } - /* * fanout for default SRS is done when default SRS are created * above. As each ring is added to the group, we setup the * SRS and fanout to it. */ - switch (group->mrg_state) { + switch (rx_group->mrg_state) { case MAC_GROUP_STATE_RESERVED: - /* - * The group is exclusively ours. Create a SRS - * for each ring in the group and allow the - * individual SRS to dynamically poll their - * Rx ring. Do this only if the client is not - * a VLAN MAC client since for VLAN we do - * s/w classification for the VID check. - */ - if (i_mac_flow_vid(mcip->mci_flent) != VLAN_ID_NONE) - break; - for (ring = group->mrg_rings; ring != NULL; + for (ring = rx_group->mrg_rings; ring != NULL; ring = ring->mr_next) { switch (ring->mr_state) { case MR_INUSE: @@ -2163,14 +2435,28 @@ mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, if (ring->mr_state != MR_INUSE) (void) mac_start_ring(ring); - ring->mr_state = MR_INUSE; - + /* + * Since the group is exclusively ours create + * an SRS for this ring to allow the + * individual SRS to dynamically poll the + * ring. Do this only if the client is not + * a VLAN MAC client, since for VLAN we do + * s/w classification for the VID check, and + * if it has a unicast address. + */ + if ((mcip->mci_state_flags & + MCIS_NO_UNICAST_ADDR) || + i_mac_flow_vid(mcip->mci_flent) != + VLAN_ID_NONE) { + break; + } mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type, mac_rx_deliver, mcip, NULL, ring); break; default: - cmn_err(CE_PANIC, "srs_setup: mcip = %p " + cmn_err(CE_PANIC, + "srs_setup: mcip = %p " "trying to add UNKNOWN ring = %p\n", (void *)mcip, (void *)ring); break; @@ -2181,43 +2467,102 @@ mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, /* * Set all rings of this group to software classified. * - * If the group is current RESERVED, the existing mac client - * (the only client on this group) is using this group - * exclusively. In that case we need to disable polling on - * the rings of the group (if it was enabled), and free the - * SRS associated with the rings. + * If the group is current RESERVED, the existing mac + * client (the only client on this group) is using + * this group exclusively. In that case we need to + * disable polling on the rings of the group (if it + * was enabled), and free the SRS associated with the + * rings. */ - mac_rx_switch_grp_to_sw(group); + mac_rx_switch_grp_to_sw(rx_group); break; default: ASSERT(B_FALSE); break; } - mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), - mac_rx_deliver, mcip, NULL); } +/* + * Set up the TX SRS. + */ void -mac_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, +mac_tx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t link_type) { + int cnt; + int ringcnt; + mac_ring_t *ring; + mac_group_t *grp; + + /* + * If we are opened exclusively (like aggr does for aggr_ports), + * don't set up Tx SRS and Tx soft rings as they won't be used. + * The same thing has to be done for Rx side also. See bug: + * 6880080 + */ + if (mcip->mci_state_flags & MCIS_EXCLUSIVE) { + /* + * If we have rings, start them here. + */ + if (flent->fe_tx_ring_group == NULL) + return; + grp = (mac_group_t *)flent->fe_tx_ring_group; + ringcnt = grp->mrg_cur_count; + ring = grp->mrg_rings; + for (cnt = 0; cnt < ringcnt; cnt++) { + if (ring->mr_state != MR_INUSE) { + (void) mac_start_ring(ring); + } + ring = ring->mr_next; + } + return; + } + if (flent->fe_tx_srs == NULL) { + (void) mac_srs_create(mcip, flent, SRST_TX | link_type, + NULL, mcip, NULL, NULL); + } + mac_tx_srs_setup(mcip, flent); +} + +/* + * Remove all the RX SRSs. If we want to remove only the SRSs associated + * with h/w rings, leave the S/W SRS alone. This is used when we want to + * move the MAC client from one group to another, so we need to teardown + * on the h/w SRSs. + */ +void +mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly) +{ mac_soft_ring_set_t *mac_srs; - mac_soft_ring_set_t *tx_srs; - mac_srs_tx_t *tx; int i; + int count = flent->fe_rx_srs_cnt; - for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + for (i = 0; i < count; i++) { + if (i == 0 && hwonly) + continue; mac_srs = flent->fe_rx_srs[i]; mac_rx_srs_quiesce(mac_srs, SRS_CONDEMNED); - /* - * Deal with all fanout tear down etc. - */ mac_srs_free(mac_srs); flent->fe_rx_srs[i] = NULL; + flent->fe_rx_srs_cnt--; } - flent->fe_rx_srs_cnt = 0; + ASSERT(!hwonly || flent->fe_rx_srs_cnt == 1); + ASSERT(hwonly || flent->fe_rx_srs_cnt == 0); +} + +/* + * Remove the TX SRS. + */ +void +mac_tx_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, + uint32_t link_type) +{ + mac_soft_ring_set_t *tx_srs; + mac_srs_tx_t *tx; + + if ((tx_srs = flent->fe_tx_srs) == NULL) + return; - tx_srs = flent->fe_tx_srs; tx = &tx_srs->srs_tx; switch (link_type) { case SRST_FLOW: @@ -2228,25 +2573,16 @@ mac_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, mac_tx_srs_quiesce(tx_srs, SRS_CONDEMNED); break; case SRST_LINK: - mac_tx_client_quiesce(mcip, SRS_CONDEMNED); - /* - * Release the TX resources. First the TX group, if any - * was assigned to the MAC client, which will cause the - * TX rings to be moved back to the pool. Then free the - * rings themselves. - */ - if (tx->st_group != NULL) { - mac_release_tx_group(tx_srs->srs_mcip->mci_mip, - tx->st_group); - tx->st_group = NULL; - } - if (tx->st_ring_count != 0) { - kmem_free(tx->st_rings, - sizeof (mac_ring_handle_t) * tx->st_ring_count); - } + mac_tx_client_condemn((mac_client_handle_t)mcip); if (tx->st_arg2 != NULL) { ASSERT(tx_srs->srs_type & SRST_TX); - mac_release_tx_ring(tx->st_arg2); + /* + * The ring itself will be stopped when + * we release the group or in the + * mac_datapath_teardown (for the default + * group) + */ + tx->st_arg2 = NULL; } break; default: @@ -2258,7 +2594,9 @@ mac_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, } /* - * This is the group state machine. The state of an Rx group is given by + * This is the group state machine. + * + * The state of an Rx group is given by * the following table. The default group and its rings are started in * mac_start itself and the default group stays in SHARED state until * mac_stop at which time the group and rings are stopped and and it @@ -2276,15 +2614,27 @@ mac_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * * Non-default 0 N.A. REGISTERED * Non-default 1 N.A. RESERVED - * Non-default > 1 N.A. SHARED * * Default 0 N.A. SHARED * Default 1 1 RESERVED * Default 1 > 1 SHARED * Default > 1 N.A. SHARED + * + * For a TX group, the following is the state table. + * + * Group type # of clients Group State + * in the group + * + * Non-default 0 REGISTERED + * Non-default 1 RESERVED + * + * Default 0 REGISTERED + * Default 1 RESERVED + * Default > 1 SHARED */ mac_group_state_t -mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) +mac_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip, + mac_group_t *defgrp, boolean_t rx_group) { mac_impl_t *mip = (mac_impl_t *)grp->mrg_mh; @@ -2292,11 +2642,11 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) /* Non-default group */ - if (grp != mip->mi_rx_groups) { - if (MAC_RX_GROUP_NO_CLIENT(grp)) + if (grp != defgrp) { + if (MAC_GROUP_NO_CLIENT(grp)) return (MAC_GROUP_STATE_REGISTERED); - *group_only_mcip = MAC_RX_GROUP_ONLY_CLIENT(grp); + *group_only_mcip = MAC_GROUP_ONLY_CLIENT(grp); if (*group_only_mcip != NULL) return (MAC_GROUP_STATE_RESERVED); @@ -2305,10 +2655,19 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) /* Default group */ - if (MAC_RX_GROUP_NO_CLIENT(grp) || mip->mi_nactiveclients != 1) + if (MAC_GROUP_NO_CLIENT(grp)) { + if (rx_group) + return (MAC_GROUP_STATE_SHARED); + else + return (MAC_GROUP_STATE_REGISTERED); + } + *group_only_mcip = MAC_GROUP_ONLY_CLIENT(grp); + if (*group_only_mcip == NULL) + return (MAC_GROUP_STATE_SHARED); + + if (rx_group && mip->mi_nactiveclients != 1) return (MAC_GROUP_STATE_SHARED); - *group_only_mcip = MAC_RX_GROUP_ONLY_CLIENT(grp); ASSERT(*group_only_mcip != NULL); return (MAC_GROUP_STATE_RESERVED); } @@ -2456,13 +2815,12 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) * * For NICs which have only 1 Rx ring (we treat NICs with no Rx rings * as NIC with a single default ring), we assign the only ring to - * primary Link as MAC_RX_HW_DEFAULT_RING. The primary Link SRS can do - * polling on it as long as it is the only link in use and we compare - * the MAC address for unicast packets before accepting an incoming - * packet (there is no need for S/W classification in this case). We - * disable polling on the only ring the moment 2nd link gets created - * (the polling remains enabled even though there are broadcast and - * multicast flows created). + * primary Link. The primary Link SRS can do polling on it as long as + * it is the only link in use and we compare the MAC address for unicast + * packets before accepting an incoming packet (there is no need for S/W + * classification in this case). We disable polling on the only ring the + * moment 2nd link gets created (the polling remains enabled even though + * there are broadcast and * multicast flows created). * * If the NIC has more than 1 Rx ring, we assign the default ring (the * 1st ring) to deal with broadcast, multicast and traffic for other @@ -2472,10 +2830,6 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) * (and its SRS) can continue to poll the assigned Rx ring at all times * independantly. * - * Right now we just assign MAC_RX_HW_DEFAULT_RING to note that it is - * primary NIC and later we will check to see how many Rx rings we - * have and can we get a non default Rx ring for the primary MAC. - * * Note: In future, if no fanout is specified, we try to assign 2 Rx * rings for the primary Link with the primary MAC address + TCP going * to one ring and primary MAC address + UDP|SCTP going to other ring. @@ -2487,56 +2841,128 @@ mac_rx_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip) * As an optimization, when a new NIC or VNIC is created, we can get * only one Rx ring and make it a TCP specific Rx ring and use the * H/W default Rx ring for the rest (this Rx ring is never polled). + * + * For clients that don't have MAC address, but want to receive and + * transmit packets (e.g, bpf, gvrp etc.), we need to setup the datapath. + * For such clients (identified by the MCIS_NO_UNICAST_ADDR flag) we + * always give the default group and use software classification (i.e. + * even if this is the only client in the default group, we will + * leave group as shared). */ int mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t link_type) { mac_impl_t *mip = mcip->mci_mip; - mac_group_t *group = NULL; - mac_group_t *default_group; + mac_group_t *rgroup = NULL; + mac_group_t *tgroup = NULL; + mac_group_t *default_rgroup; + mac_group_t *default_tgroup; int err; uint8_t *mac_addr; - mac_rx_group_reserve_type_t rtype = MAC_RX_RESERVE_NONDEFAULT; mac_group_state_t next_state; mac_client_impl_t *group_only_mcip; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); + boolean_t rxhw; + boolean_t txhw; + boolean_t use_default = B_FALSE; + cpupart_t *cpupart; + boolean_t no_unicast; + boolean_t isprimary = flent->fe_type & FLOW_PRIMARY_MAC; + mac_client_impl_t *reloc_pmcip = NULL; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); switch (link_type) { case SRST_FLOW: - mac_srs_group_setup(mcip, flent, NULL, link_type); + mac_srs_group_setup(mcip, flent, link_type); return (0); case SRST_LINK: + no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR; mac_addr = flent->fe_flow_desc.fd_dst_mac; - /* Check if we need to reserve the default group */ - if (flent->fe_type & FLOW_PRIMARY_MAC) - rtype = MAC_RX_RESERVE_DEFAULT; + /* Default RX group */ + default_rgroup = MAC_DEFAULT_RX_GROUP(mip); - if ((mcip->mci_state_flags & MCIS_NO_HWRINGS) == 0) { - /* - * Check to see if we can get an exclusive group for - * this mac address or if there already exists a - * group that has this mac address (case of VLANs). - * If no groups are available, use the default group. - */ - group = mac_reserve_rx_group(mcip, mac_addr, rtype); + /* Default TX group */ + default_tgroup = MAC_DEFAULT_TX_GROUP(mip); + + if (no_unicast) { + rgroup = default_rgroup; + tgroup = default_tgroup; + goto grp_found; } + rxhw = (mrp->mrp_mask & MRP_RX_RINGS) && + (mrp->mrp_nrxrings > 0 || + (mrp->mrp_mask & MRP_RXRINGS_UNSPEC)); + txhw = (mrp->mrp_mask & MRP_TX_RINGS) && + (mrp->mrp_ntxrings > 0 || + (mrp->mrp_mask & MRP_TXRINGS_UNSPEC)); - if (group == NULL) { - if ((mcip->mci_state_flags & MCIS_REQ_HWRINGS) != 0) - return (ENOSPC); - group = &mip->mi_rx_groups[0]; + /* + * By default we have given the primary all the rings + * i.e. the default group. Let's see if the primary + * needs to be relocated so that the addition of this + * client doesn't impact the primary's performance, + * i.e. if the primary is in the default group and + * we add this client, the primary will lose polling. + * We do this only for NICs supporting dynamic ring + * grouping and only when this is the first client + * after the primary (i.e. nactiveclients is 2) + */ + if (!isprimary && mip->mi_nactiveclients == 2 && + (group_only_mcip = mac_primary_client_handle(mip)) != + NULL && mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + reloc_pmcip = mac_check_primary_relocation( + group_only_mcip, rxhw); + } + /* + * Check to see if we can get an exclusive group for + * this mac address or if there already exists a + * group that has this mac address (case of VLANs). + * If no groups are available, use the default group. + */ + rgroup = mac_reserve_rx_group(mcip, mac_addr, B_FALSE); + if (rgroup == NULL && rxhw) { + err = ENOSPC; + goto setup_failed; + } else if (rgroup == NULL) { + rgroup = default_rgroup; + } + /* + * Check to see if we can get an exclusive group for + * this mac client. If no groups are available, use + * the default group. + */ + tgroup = mac_reserve_tx_group(mcip, B_FALSE); + if (tgroup == NULL && txhw) { + if (rgroup != NULL && rgroup != default_rgroup) + mac_release_rx_group(mcip, rgroup); + err = ENOSPC; + goto setup_failed; + } else if (tgroup == NULL) { + tgroup = default_tgroup; } /* * Some NICs don't support any Rx rings, so there may not * even be a default group. */ - if (group != NULL) { - flent->fe_rx_ring_group = group; + grp_found: + if (rgroup != NULL) { + if (rgroup != default_rgroup && + MAC_GROUP_NO_CLIENT(rgroup) && + (rxhw || mcip->mci_share != NULL)) { + MAC_RX_GRP_RESERVED(mip); + if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_RX_RING_RESERVED(mip, + rgroup->mrg_cur_count); + } + } + flent->fe_rx_ring_group = rgroup; /* * Add the client to the group. This could cause * either this group to move to the shared state or @@ -2545,18 +2971,29 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * actions on the default group are postponed to * the end of this function. */ - mac_rx_group_add_client(group, mcip); - next_state = mac_rx_group_next_state(group, - &group_only_mcip); - - ASSERT((next_state == MAC_GROUP_STATE_RESERVED && - mcip == group_only_mcip) || - (next_state == MAC_GROUP_STATE_SHARED && - group_only_mcip == NULL)); - - mac_set_rx_group_state(group, next_state); + mac_group_add_client(rgroup, mcip); + next_state = mac_group_next_state(rgroup, + &group_only_mcip, default_rgroup, B_TRUE); + mac_set_group_state(rgroup, next_state); } + if (tgroup != NULL) { + if (tgroup != default_tgroup && + MAC_GROUP_NO_CLIENT(tgroup) && + (txhw || mcip->mci_share != NULL)) { + MAC_TX_GRP_RESERVED(mip); + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_TX_RING_RESERVED(mip, + tgroup->mrg_cur_count); + } + } + flent->fe_tx_ring_group = tgroup; + mac_group_add_client(tgroup, mcip); + next_state = mac_group_next_state(tgroup, + &group_only_mcip, default_tgroup, B_FALSE); + tgroup->mrg_state = next_state; + } /* * Setup the Rx and Tx SRSes. If we got a pristine group * exclusively above, mac_srs_group_setup would simply create @@ -2564,18 +3001,23 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * reserved group, mac_srs_group_setup would also dismantle the * SRSes of the previously exclusive group */ - mac_srs_group_setup(mcip, flent, group, link_type); + mac_srs_group_setup(mcip, flent, link_type); + /* We are setting up minimal datapath only */ + if (no_unicast) + break; /* Program the S/W Classifer */ if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0) goto setup_failed; /* Program the H/W Classifier */ - if ((err = mac_add_macaddr(mip, group, mac_addr, + if ((err = mac_add_macaddr(mip, rgroup, mac_addr, (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0)) != 0) goto setup_failed; mcip->mci_unicast = mac_find_macaddr(mip, mac_addr); ASSERT(mcip->mci_unicast != NULL); + /* Initialize the v6 local addr used by link protection */ + mac_protect_update_v6_local_addr(mcip); break; default: @@ -2590,38 +3032,53 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * incoming broadcast traffic to the other groups and dismantle the * SRSes over the default group. */ - if (group != NULL) { - if (group != mip->mi_rx_groups) { - default_group = mip->mi_rx_groups; - if (default_group->mrg_state == + if (rgroup != NULL) { + if (rgroup != default_rgroup) { + if (default_rgroup->mrg_state == MAC_GROUP_STATE_RESERVED) { - group_only_mcip = MAC_RX_GROUP_ONLY_CLIENT( - default_group); + group_only_mcip = MAC_GROUP_ONLY_CLIENT( + default_rgroup); ASSERT(group_only_mcip != NULL && mip->mi_nactiveclients > 1); - mac_set_rx_group_state(default_group, + mac_set_group_state(default_rgroup, MAC_GROUP_STATE_SHARED); - mac_srs_group_setup(group_only_mcip, + mac_rx_srs_group_setup(group_only_mcip, + group_only_mcip->mci_flent, SRST_LINK); + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_setup(group_only_mcip, group_only_mcip->mci_flent, - default_group, SRST_LINK); + MCIP_RESOURCE_PROPS(group_only_mcip), + mac_rx_deliver, group_only_mcip, NULL, + cpupart); + mac_set_pool_effective(use_default, cpupart, + mrp, emrp); + pool_unlock(); } - ASSERT(default_group->mrg_state == + ASSERT(default_rgroup->mrg_state == MAC_GROUP_STATE_SHARED); } /* * If we get an exclusive group for a VLAN MAC client we * need to take the s/w path to make the additional check for * the vid. Disable polling and set it to s/w classification. + * Similarly for clients that don't have a unicast address. */ - if (group->mrg_state == MAC_GROUP_STATE_RESERVED && - i_mac_flow_vid(mcip->mci_flent) != VLAN_ID_NONE) { - mac_rx_switch_grp_to_sw(group); + if (rgroup->mrg_state == MAC_GROUP_STATE_RESERVED && + (i_mac_flow_vid(flent) != VLAN_ID_NONE || no_unicast)) { + mac_rx_switch_grp_to_sw(rgroup); } } + mac_set_rings_effective(mcip); return (0); setup_failed: + /* Switch the primary back to default group */ + if (reloc_pmcip != NULL) { + (void) mac_rx_switch_group(reloc_pmcip, + reloc_pmcip->mci_flent->fe_rx_ring_group, default_rgroup); + } mac_datapath_teardown(mcip, flent, link_type); return (err); } @@ -2637,12 +3094,14 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, mac_group_t *default_group; boolean_t check_default_group = B_FALSE; mac_group_state_t next_state; + mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); switch (link_type) { case SRST_FLOW: - mac_srs_group_teardown(mcip, flent, SRST_FLOW); + mac_rx_srs_group_teardown(flent, B_FALSE); + mac_tx_srs_group_teardown(mcip, flent, SRST_FLOW); return; case SRST_LINK: @@ -2666,7 +3125,9 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, mac_flow_wait(flent, FLOW_DRIVER_UPCALL); /* Now quiesce and destroy all SRS and soft rings */ - mac_srs_group_teardown(mcip, flent, SRST_LINK); + mac_rx_srs_group_teardown(flent, B_FALSE); + mac_tx_srs_group_teardown(mcip, flent, SRST_LINK); + ASSERT((mcip->mci_flent == flent) && (flent->fe_next == NULL)); @@ -2677,16 +3138,17 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * were the last client, release the group. */ group = flent->fe_rx_ring_group; + default_group = MAC_DEFAULT_RX_GROUP(mip); if (group != NULL) { - mac_rx_group_remove_client(group, mcip); - next_state = mac_rx_group_next_state(group, - &grp_only_mcip); + mac_group_remove_client(group, mcip); + next_state = mac_group_next_state(group, + &grp_only_mcip, default_group, B_TRUE); if (next_state == MAC_GROUP_STATE_RESERVED) { /* * Only one client left on this RX group. */ ASSERT(grp_only_mcip != NULL); - mac_set_rx_group_state(group, + mac_set_group_state(group, MAC_GROUP_STATE_RESERVED); group_only_flent = grp_only_mcip->mci_flent; @@ -2695,9 +3157,14 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * access on the group. Allow it to * dynamically poll the H/W rings etc. */ - mac_srs_group_setup(grp_only_mcip, - group_only_flent, group, SRST_LINK); + mac_rx_srs_group_setup(grp_only_mcip, + group_only_flent, SRST_LINK); + mac_fanout_setup(grp_only_mcip, + group_only_flent, + MCIP_RESOURCE_PROPS(grp_only_mcip), + mac_rx_deliver, grp_only_mcip, NULL, NULL); mac_rx_group_unmark(group, MR_INCIPIENT); + mac_set_rings_effective(grp_only_mcip); } else if (next_state == MAC_GROUP_STATE_REGISTERED) { /* * This is a non-default group being freed up. @@ -2705,19 +3172,95 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * to see if the primary client can get * exclusive access to the default group. */ - ASSERT(group != mip->mi_rx_groups); + ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); + if (mrp->mrp_mask & MRP_RX_RINGS) { + MAC_RX_GRP_RELEASED(mip); + if (mip->mi_rx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_RX_RING_RELEASED(mip, + group->mrg_cur_count); + } + } mac_release_rx_group(mcip, group); - mac_set_rx_group_state(group, + mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); check_default_group = B_TRUE; } else { ASSERT(next_state == MAC_GROUP_STATE_SHARED); - mac_set_rx_group_state(group, + mac_set_group_state(group, MAC_GROUP_STATE_SHARED); mac_rx_group_unmark(group, MR_CONDEMNED); } flent->fe_rx_ring_group = NULL; } + /* + * Remove the client from the TX group. Additionally, if + * this a non-default group, then we also need to release + * the group. + */ + group = flent->fe_tx_ring_group; + default_group = MAC_DEFAULT_TX_GROUP(mip); + if (group != NULL) { + mac_group_remove_client(group, mcip); + next_state = mac_group_next_state(group, + &grp_only_mcip, default_group, B_FALSE); + if (next_state == MAC_GROUP_STATE_REGISTERED) { + if (group != default_group) { + if (mrp->mrp_mask & MRP_TX_RINGS) { + MAC_TX_GRP_RELEASED(mip); + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC) { + MAC_TX_RING_RELEASED( + mip, group-> + mrg_cur_count); + } + } + mac_release_tx_group(mcip, group); + /* + * If the default group is reserved, + * then we need to set the effective + * rings as we would have given + * back some rings when the group + * was released + */ + if (mip->mi_tx_group_type == + MAC_GROUP_TYPE_DYNAMIC && + default_group->mrg_state == + MAC_GROUP_STATE_RESERVED) { + grp_only_mcip = + MAC_GROUP_ONLY_CLIENT + (default_group); + mac_set_rings_effective( + grp_only_mcip); + } + } else { + mac_ring_t *ring; + int cnt; + int ringcnt; + + /* + * Stop all the rings except the + * default ring. + */ + ringcnt = group->mrg_cur_count; + ring = group->mrg_rings; + for (cnt = 0; cnt < ringcnt; cnt++) { + if (ring->mr_state == + MR_INUSE && ring != + (mac_ring_t *) + mip->mi_default_tx_ring) { + mac_stop_ring(ring); + ring->mr_flag = 0; + } + ring = ring->mr_next; + } + } + } else if (next_state == MAC_GROUP_STATE_RESERVED) { + mac_set_rings_effective(grp_only_mcip); + } + flent->fe_tx_ring_group = NULL; + group->mrg_state = next_state; + } break; default: ASSERT(B_FALSE); @@ -2731,21 +3274,53 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, * over the default group. */ if (check_default_group) { - default_group = mip->mi_rx_groups; + default_group = MAC_DEFAULT_RX_GROUP(mip); ASSERT(default_group->mrg_state == MAC_GROUP_STATE_SHARED); - next_state = mac_rx_group_next_state(default_group, - &grp_only_mcip); + next_state = mac_group_next_state(default_group, + &grp_only_mcip, default_group, B_TRUE); if (next_state == MAC_GROUP_STATE_RESERVED) { ASSERT(grp_only_mcip != NULL && mip->mi_nactiveclients == 1); - mac_set_rx_group_state(default_group, + mac_set_group_state(default_group, MAC_GROUP_STATE_RESERVED); - mac_srs_group_setup(grp_only_mcip, + mac_rx_srs_group_setup(grp_only_mcip, + grp_only_mcip->mci_flent, SRST_LINK); + mac_fanout_setup(grp_only_mcip, grp_only_mcip->mci_flent, - default_group, SRST_LINK); + MCIP_RESOURCE_PROPS(grp_only_mcip), mac_rx_deliver, + grp_only_mcip, NULL, NULL); mac_rx_group_unmark(default_group, MR_INCIPIENT); + mac_set_rings_effective(grp_only_mcip); } } + + /* + * If the primary is the only one left and the MAC supports + * dynamic grouping, we need to see if the primary needs to + * be moved to the default group so that it can use all the + * H/W rings. + */ + if (!(flent->fe_type & FLOW_PRIMARY_MAC) && + mip->mi_nactiveclients == 1 && + mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { + default_group = MAC_DEFAULT_RX_GROUP(mip); + grp_only_mcip = mac_primary_client_handle(mip); + if (grp_only_mcip == NULL) + return; + group_only_flent = grp_only_mcip->mci_flent; + mrp = MCIP_RESOURCE_PROPS(grp_only_mcip); + /* + * If the primary has an explicit property set, leave it + * alone. + */ + if (mrp->mrp_mask & MRP_RX_RINGS) + return; + /* + * Switch the primary to the default group. + */ + (void) mac_rx_switch_group(grp_only_mcip, + group_only_flent->fe_rx_ring_group, default_group); + } } /* DATAPATH TEAR DOWN ROUTINES (SRS and FANOUT teardown) */ @@ -2753,18 +3328,36 @@ mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent, static void mac_srs_fanout_list_free(mac_soft_ring_set_t *mac_srs) { - ASSERT(mac_srs->srs_tcp_soft_rings != NULL); - kmem_free(mac_srs->srs_tcp_soft_rings, - sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); - mac_srs->srs_tcp_soft_rings = NULL; - ASSERT(mac_srs->srs_udp_soft_rings != NULL); - kmem_free(mac_srs->srs_udp_soft_rings, - sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); - mac_srs->srs_udp_soft_rings = NULL; - ASSERT(mac_srs->srs_oth_soft_rings != NULL); - kmem_free(mac_srs->srs_oth_soft_rings, - sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); - mac_srs->srs_oth_soft_rings = NULL; + if (mac_srs->srs_type & SRST_TX) { + mac_srs_tx_t *tx; + + ASSERT(mac_srs->srs_tcp_soft_rings == NULL); + ASSERT(mac_srs->srs_udp_soft_rings == NULL); + ASSERT(mac_srs->srs_oth_soft_rings == NULL); + ASSERT(mac_srs->srs_tx_soft_rings != NULL); + kmem_free(mac_srs->srs_tx_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_RINGS_PER_GROUP); + mac_srs->srs_tx_soft_rings = NULL; + tx = &mac_srs->srs_tx; + if (tx->st_soft_rings != NULL) { + kmem_free(tx->st_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_RINGS_PER_GROUP); + } + } else { + ASSERT(mac_srs->srs_tx_soft_rings == NULL); + ASSERT(mac_srs->srs_tcp_soft_rings != NULL); + kmem_free(mac_srs->srs_tcp_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); + mac_srs->srs_tcp_soft_rings = NULL; + ASSERT(mac_srs->srs_udp_soft_rings != NULL); + kmem_free(mac_srs->srs_udp_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); + mac_srs->srs_udp_soft_rings = NULL; + ASSERT(mac_srs->srs_oth_soft_rings != NULL); + kmem_free(mac_srs->srs_oth_soft_rings, + sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT); + mac_srs->srs_oth_soft_rings = NULL; + } } /* @@ -2815,10 +3408,11 @@ mac_srs_free(mac_soft_ring_set_t *mac_srs) mac_pkt_drop(NULL, NULL, mac_srs->srs_first, B_FALSE); mac_srs_ring_free(mac_srs); - mac_srs_soft_rings_free(mac_srs, B_TRUE); + mac_srs_soft_rings_free(mac_srs); mac_srs_fanout_list_free(mac_srs); mac_srs->srs_bw = NULL; + mac_srs_stat_delete(mac_srs); kmem_cache_free(mac_srs_cache, mac_srs); } @@ -3126,13 +3720,19 @@ mac_tx_srs_add_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring) { mac_client_impl_t *mcip = mac_srs->srs_mcip; mac_soft_ring_t *soft_ring; - int count = mac_srs->srs_oth_ring_count; + int count = mac_srs->srs_tx_ring_count; + uint32_t soft_ring_type = ST_RING_TX; + uint_t ring_info; ASSERT(mac_srs->srs_state & SRS_QUIESCE); - soft_ring = mac_soft_ring_create(count, 0, NULL, - (ST_RING_OTH | ST_RING_TX), maxclsyspri, mcip, mac_srs, -1, + ring_info = mac_hwring_getinfo((mac_ring_handle_t)tx_ring); + if (mac_tx_serialize || (ring_info & MAC_RING_TX_SERIALIZE)) + soft_ring_type |= ST_RING_WORKER_ONLY; + soft_ring = mac_soft_ring_create(count, 0, + soft_ring_type, maxclsyspri, mcip, mac_srs, -1, NULL, mcip, (mac_resource_handle_t)tx_ring); - mac_srs->srs_oth_ring_count++; + mac_srs->srs_tx_ring_count++; + mac_srs_update_fanout_list(mac_srs); /* * put this soft ring in quiesce mode too so when we restart * all soft rings in the srs are in the same state. @@ -3177,7 +3777,7 @@ mac_soft_ring_remove(mac_soft_ring_set_t *mac_srs, mac_soft_ring_t *softring) mac_srs->srs_soft_ring_condemned_count--; mutex_exit(&mac_srs->srs_lock); - mac_soft_ring_free(softring, B_FALSE); + mac_soft_ring_free(softring); } void @@ -3185,70 +3785,59 @@ mac_tx_srs_del_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring) { int i; mac_soft_ring_t *soft_ring, *remove_sring; + mac_client_impl_t *mcip = mac_srs->srs_mcip; mutex_enter(&mac_srs->srs_lock); - for (i = 0; i < mac_srs->srs_oth_ring_count; i++) { - soft_ring = mac_srs->srs_oth_soft_rings[i]; + for (i = 0; i < mac_srs->srs_tx_ring_count; i++) { + soft_ring = mac_srs->srs_tx_soft_rings[i]; if (soft_ring->s_ring_tx_arg2 == tx_ring) break; } mutex_exit(&mac_srs->srs_lock); - ASSERT(i < mac_srs->srs_oth_ring_count); + ASSERT(i < mac_srs->srs_tx_ring_count); remove_sring = soft_ring; + /* + * In the case of aggr, the soft ring associated with a Tx ring + * is also stored in st_soft_rings[] array. That entry should + * be removed. + */ + if (mcip->mci_state_flags & MCIS_IS_AGGR) { + mac_srs_tx_t *tx = &mac_srs->srs_tx; + + ASSERT(tx->st_soft_rings[tx_ring->mr_index] == remove_sring); + tx->st_soft_rings[tx_ring->mr_index] = NULL; + } mac_soft_ring_remove(mac_srs, remove_sring); mac_srs_update_fanout_list(mac_srs); } /* * mac_tx_srs_setup(): - * * Used to setup Tx rings. If no free Tx ring is available, then default * Tx ring is used. */ void -mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent, - uint32_t srs_type) +mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent) { - mac_impl_t *mip = mcip->mci_mip; - mac_soft_ring_set_t *tx_srs; - int i, tx_ring_count = 0, tx_rings_reserved = 0; - mac_ring_handle_t *tx_rings = NULL; - uint32_t soft_ring_type; - mac_group_t *grp = NULL; - mac_ring_t *ring; - mac_srs_tx_t *tx; - boolean_t serialize = B_FALSE; - - tx_srs = flent->fe_tx_srs; - tx = &tx_srs->srs_tx; - - if (tx->st_group != NULL) { - grp = tx->st_group; - tx_ring_count = grp->mrg_cur_count; - } else { - tx_ring_count = mac_tx_ring_count; - } - - if (tx_ring_count != 0) { - tx_rings = kmem_zalloc(sizeof (mac_ring_handle_t) * - tx_ring_count, KM_SLEEP); - } - - /* - * Just use the default ring for now. We need to use - * the underlying link's ring set instead of the underlying - * NIC's. - */ - if (srs_type == SRST_FLOW || - (mcip->mci_state_flags & MCIS_NO_HWRINGS) != 0) { - /* use default ring */ - tx_rings[0] = (void *)mip->mi_default_tx_ring; - tx_rings_reserved++; - goto rings_assigned; - } - - if (mcip->mci_share != NULL) - ring = grp->mrg_rings; + mac_impl_t *mip = mcip->mci_mip; + mac_soft_ring_set_t *tx_srs = flent->fe_tx_srs; + int i; + int tx_ring_count = 0; + uint32_t soft_ring_type; + mac_group_t *grp = NULL; + mac_ring_t *ring; + mac_srs_tx_t *tx = &tx_srs->srs_tx; + boolean_t is_aggr; + uint_t ring_info = 0; + + is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR) != 0; + grp = flent->fe_tx_ring_group; + if (grp == NULL) { + ring = (mac_ring_t *)mip->mi_default_tx_ring; + goto no_group; + } + tx_ring_count = grp->mrg_cur_count; + ring = grp->mrg_rings; /* * An attempt is made to reserve 'tx_ring_count' number * of Tx rings. If tx_ring_count is 0, default Tx ring @@ -3258,87 +3847,80 @@ mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent, * then each Tx ring will have a Tx-side soft ring. All * these soft rings will be hang off Tx SRS. */ - for (i = 0; i < tx_ring_count; i++) { - if (mcip->mci_share != NULL) { - /* - * The ring was already chosen and associated - * with the TX group. Save it in the new - * array to keep as much of the code below common - * between the share and non-share cases. - */ - ASSERT(ring != NULL); - tx_rings[i] = (mac_ring_handle_t)ring; - ring = ring->mr_next; - } else { - tx_rings[i] = - (mac_ring_handle_t)mac_reserve_tx_ring(mip, NULL); - if (tx_rings[i] == NULL) { - /* - * We have run out of Tx rings. So - * give the default ring too. - */ - tx_rings[i] = (void *)mip->mi_default_tx_ring; - tx_rings_reserved++; + switch (grp->mrg_state) { + case MAC_GROUP_STATE_SHARED: + case MAC_GROUP_STATE_RESERVED: + if (tx_ring_count <= 1 && !is_aggr) { +no_group: + if (ring != NULL && + ring->mr_state != MR_INUSE) { + (void) mac_start_ring(ring); + ring_info = mac_hwring_getinfo( + (mac_ring_handle_t)ring); + } + tx->st_arg2 = (void *)ring; + mac_tx_srs_stat_recreate(tx_srs, B_FALSE); + if (tx_srs->srs_type & SRST_BW_CONTROL) { + tx->st_mode = SRS_TX_BW; + } else if (mac_tx_serialize || + (ring_info & MAC_RING_TX_SERIALIZE)) { + tx->st_mode = SRS_TX_SERIALIZE; + } else { + tx->st_mode = SRS_TX_DEFAULT; + } break; } - } - tx_rings_reserved++; - } - -rings_assigned: - if (mac_tx_serialize || (mip->mi_v12n_level & MAC_VIRT_SERIALIZE)) - serialize = B_TRUE; - /* - * Did we get the requested number of tx rings? - * There are 2 actions we can take depending upon the number - * of tx_rings we got. - * 1) If we got one, then get the tx_ring from the soft ring, - * save it in SRS and free up the soft ring. - * 2) If we got more than 1, then do the tx fanout among the - * rings we obtained. - */ - ASSERT(tx_rings_reserved != 0); - if (tx_rings_reserved == 1) { - tx->st_arg2 = (void *)tx_rings[0]; - /* For ring_count of 0 or 1, set the tx_mode and return */ - if (tx_srs->srs_type & SRST_BW_CONTROL) - tx->st_mode = SRS_TX_BW; - else if (serialize) - tx->st_mode = SRS_TX_SERIALIZE; - else - tx->st_mode = SRS_TX_DEFAULT; - } else { - /* - * We got multiple Tx rings for Tx fanout. - */ - soft_ring_type = ST_RING_OTH | ST_RING_TX; - if (tx_srs->srs_type & SRST_BW_CONTROL) { - tx->st_mode = SRS_TX_BW_FANOUT; - } else { - tx->st_mode = SRS_TX_FANOUT; - if (serialize) - soft_ring_type |= ST_RING_WORKER_ONLY; - } - for (i = 0; i < tx_rings_reserved; i++) { - (void) mac_soft_ring_create(i, 0, NULL, soft_ring_type, - maxclsyspri, mcip, tx_srs, -1, NULL, mcip, - (mac_resource_handle_t)tx_rings[i]); - } - mac_srs_update_fanout_list(tx_srs); + soft_ring_type = ST_RING_TX; + if (tx_srs->srs_type & SRST_BW_CONTROL) { + tx->st_mode = is_aggr ? + SRS_TX_BW_AGGR : SRS_TX_BW_FANOUT; + } else { + tx->st_mode = is_aggr ? SRS_TX_AGGR : + SRS_TX_FANOUT; + } + for (i = 0; i < tx_ring_count; i++) { + ASSERT(ring != NULL); + switch (ring->mr_state) { + case MR_INUSE: + case MR_FREE: + ASSERT(ring->mr_srs == NULL); + + if (ring->mr_state != MR_INUSE) + (void) mac_start_ring(ring); + ring_info = mac_hwring_getinfo( + (mac_ring_handle_t)ring); + if (mac_tx_serialize || (ring_info & + MAC_RING_TX_SERIALIZE)) { + soft_ring_type |= + ST_RING_WORKER_ONLY; + } + (void) mac_soft_ring_create(i, 0, + soft_ring_type, maxclsyspri, + mcip, tx_srs, -1, NULL, mcip, + (mac_resource_handle_t)ring); + break; + default: + cmn_err(CE_PANIC, + "srs_setup: mcip = %p " + "trying to add UNKNOWN ring = %p\n", + (void *)mcip, (void *)ring); + break; + } + ring = ring->mr_next; + } + mac_srs_update_fanout_list(tx_srs); + break; + default: + ASSERT(B_FALSE); + break; } tx->st_func = mac_tx_get_func(tx->st_mode); - - DTRACE_PROBE3(tx__srs___setup__return, mac_soft_ring_set_t *, tx_srs, - int, tx->st_mode, int, tx_srs->srs_oth_ring_count); - - if (tx_ring_count != 0) { - tx->st_ring_count = tx_rings_reserved; - tx->st_rings = kmem_zalloc(sizeof (mac_ring_handle_t) * - tx_rings_reserved, KM_SLEEP); - for (i = 0; i < tx->st_ring_count; i++) - tx->st_rings[i] = tx_rings[i]; - kmem_free(tx_rings, sizeof (mac_ring_handle_t) * tx_ring_count); + if (is_aggr) { + VERIFY(i_mac_capab_get((mac_handle_t)mip, + MAC_CAPAB_AGGR, &tx->st_capab_aggr)); } + DTRACE_PROBE3(tx__srs___setup__return, mac_soft_ring_set_t *, tx_srs, + int, tx->st_mode, int, tx_srs->srs_tx_ring_count); } /* @@ -3346,10 +3928,14 @@ rings_assigned: * its current link speed. */ void -mac_fanout_recompute_client(mac_client_impl_t *mcip) +mac_fanout_recompute_client(mac_client_impl_t *mcip, cpupart_t *cpupart) { uint64_t link_speed; mac_resource_props_t *mcip_mrp; + flow_entry_t *flent = mcip->mci_flent; + mac_soft_ring_set_t *rx_srs; + mac_cpus_t *srs_cpu; + int soft_ring_count, maxcpus; ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); @@ -3359,8 +3945,31 @@ mac_fanout_recompute_client(mac_client_impl_t *mcip) if ((link_speed != 0) && (link_speed != mcip->mci_flent->fe_nic_speed)) { mcip_mrp = MCIP_RESOURCE_PROPS(mcip); - mac_fanout_setup(mcip, mcip->mci_flent, - mcip_mrp, mac_rx_deliver, mcip, NULL); + /* + * Before calling mac_fanout_setup(), check to see if + * the SRSes already have the right number of soft + * rings. mac_fanout_setup() is a heavy duty operation + * where new cpu bindings are done for SRS and soft + * ring threads and interrupts re-targeted. + */ + maxcpus = (cpupart != NULL) ? cpupart->cp_ncpus : ncpus; + soft_ring_count = mac_compute_soft_ring_count(flent, + flent->fe_rx_srs_cnt - 1, maxcpus); + /* + * If soft_ring_count returned by + * mac_compute_soft_ring_count() is 0, bump it + * up by 1 because we always have atleast one + * TCP, UDP, and OTH soft ring associated with + * an SRS. + */ + soft_ring_count = (soft_ring_count == 0) ? + 1 : soft_ring_count; + rx_srs = flent->fe_rx_srs[0]; + srs_cpu = &rx_srs->srs_cpu; + if (soft_ring_count != srs_cpu->mc_rx_fanout_cnt) { + mac_fanout_setup(mcip, flent, mcip_mrp, + mac_rx_deliver, mcip, NULL, cpupart); + } } } @@ -3376,6 +3985,9 @@ void mac_fanout_recompute(mac_impl_t *mip) { mac_client_impl_t *mcip; + cpupart_t *cpupart; + boolean_t use_default; + mac_resource_props_t *mrp, *emrp; i_mac_perim_enter(mip); if ((mip->mi_state_flags & MIS_IS_VNIC) != 0 || @@ -3389,7 +4001,14 @@ mac_fanout_recompute(mac_impl_t *mip) if ((mcip->mci_state_flags & MCIS_SHARE_BOUND) != 0 || !MCIP_DATAPATH_SETUP(mcip)) continue; - mac_fanout_recompute_client(mcip); + mrp = MCIP_RESOURCE_PROPS(mcip); + emrp = MCIP_EFFECTIVE_PROPS(mcip); + use_default = B_FALSE; + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_recompute_client(mcip, cpupart); + mac_set_pool_effective(use_default, cpupart, mrp, emrp); + pool_unlock(); } i_mac_perim_exit(mip); } diff --git a/usr/src/uts/common/io/mac/mac_flow.c b/usr/src/uts/common/io/mac/mac_flow.c index 16b5ec4396..aa4985fe4c 100644 --- a/usr/src/uts/common/io/mac/mac_flow.c +++ b/usr/src/uts/common/io/mac/mac_flow.c @@ -29,10 +29,14 @@ #include <sys/mac.h> #include <sys/mac_impl.h> #include <sys/mac_client_impl.h> +#include <sys/mac_stat.h> #include <sys/dls.h> #include <sys/dls_impl.h> #include <sys/mac_soft_ring.h> #include <sys/ethernet.h> +#include <sys/cpupart.h> +#include <sys/pool.h> +#include <sys/pool_pset.h> #include <sys/vlan.h> #include <inet/ip.h> #include <inet/ip6.h> @@ -40,6 +44,16 @@ #include <netinet/udp.h> #include <netinet/sctp.h> +typedef struct flow_stats_s { + uint64_t fs_obytes; + uint64_t fs_opackets; + uint64_t fs_oerrors; + uint64_t fs_ibytes; + uint64_t fs_ipackets; + uint64_t fs_ierrors; +} flow_stats_t; + + /* global flow table, will be a per exclusive-zone table later */ static mod_hash_t *flow_hash; static krwlock_t flow_tab_lock; @@ -55,7 +69,7 @@ typedef struct { #define FS_OFF(f) (offsetof(flow_stats_t, f)) static flow_stats_info_t flow_stats_list[] = { - {"rbytes", FS_OFF(fs_rbytes)}, + {"rbytes", FS_OFF(fs_ibytes)}, {"ipackets", FS_OFF(fs_ipackets)}, {"ierrors", FS_OFF(fs_ierrors)}, {"obytes", FS_OFF(fs_obytes)}, @@ -83,19 +97,48 @@ flow_stat_init(kstat_named_t *knp) static int flow_stat_update(kstat_t *ksp, int rw) { - flow_entry_t *fep = ksp->ks_private; - flow_stats_t *fsp = &fep->fe_flowstats; - kstat_named_t *knp = ksp->ks_data; - uint64_t *statp; - int i; + flow_entry_t *fep = ksp->ks_private; + kstat_named_t *knp = ksp->ks_data; + uint64_t *statp; + int i; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + flow_stats_t flow_stats; + mac_soft_ring_set_t *mac_srs; if (rw != KSTAT_READ) return (EACCES); + bzero(&flow_stats, sizeof (flow_stats_t)); + + for (i = 0; i < fep->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)fep->fe_rx_srs[i]; + if (mac_srs == NULL) /* Multicast flow */ + break; + mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + flow_stats.fs_ibytes += mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes; + + flow_stats.fs_ipackets += mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; + + flow_stats.fs_ierrors += mac_rx_stat->mrs_ierrors; + } + + mac_srs = (mac_soft_ring_set_t *)fep->fe_tx_srs; + if (mac_srs == NULL) /* Multicast flow */ + goto done; + mac_tx_stat = &mac_srs->srs_tx.st_stat; + + flow_stats.fs_obytes = mac_tx_stat->mts_obytes; + flow_stats.fs_opackets = mac_tx_stat->mts_opackets; + flow_stats.fs_oerrors = mac_tx_stat->mts_oerrors; + +done: for (i = 0; i < FS_SIZE; i++, knp++) { statp = (uint64_t *) - ((uchar_t *)fsp + flow_stats_list[i].fs_offset); - + ((uchar_t *)&flow_stats + flow_stats_list[i].fs_offset); knp->value.ui64 = *statp; } return (0); @@ -170,11 +213,11 @@ int mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name, void *client_cookie, uint_t type, flow_entry_t **flentp) { - flow_entry_t *flent = *flentp; - int err = 0; + flow_entry_t *flent = *flentp; + int err = 0; if (mrp != NULL) { - err = mac_validate_props(mrp); + err = mac_validate_props(NULL, mrp); if (err != 0) return (err); } @@ -221,6 +264,8 @@ mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name, mrp->mrp_priority = MPL_SUBFLOW_DEFAULT; else mrp->mrp_priority = MPL_LINK_DEFAULT; + bzero(mrp->mrp_pool, MAXPATHLEN); + bzero(&mrp->mrp_cpus, sizeof (mac_cpus_t)); bcopy(mrp, &flent->fe_effective_props, sizeof (mac_resource_props_t)); } @@ -593,7 +638,7 @@ mac_flow_destroy(flow_entry_t *flent) } else { mac_flow_cleanup(flent); } - + mac_misc_stat_delete(flent); mutex_destroy(&flent->fe_lock); cv_destroy(&flent->fe_cv); flow_stat_destroy(flent); @@ -617,13 +662,15 @@ mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp) int i; if ((mrp->mrp_mask & MRP_MAXBW) != 0 && - (fmrp->mrp_maxbw != mrp->mrp_maxbw)) { + (!(fmrp->mrp_mask & MRP_MAXBW) || + (fmrp->mrp_maxbw != mrp->mrp_maxbw))) { changed_mask |= MRP_MAXBW; - fmrp->mrp_maxbw = mrp->mrp_maxbw; if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) { fmrp->mrp_mask &= ~MRP_MAXBW; + fmrp->mrp_maxbw = 0; } else { fmrp->mrp_mask |= MRP_MAXBW; + fmrp->mrp_maxbw = mrp->mrp_maxbw; } } @@ -658,6 +705,22 @@ mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp) changed_mask |= MRP_CPUS; MAC_COPY_CPUS(mrp, fmrp); } + + /* + * Modify the rings property. + */ + if (mrp->mrp_mask & MRP_RX_RINGS || mrp->mrp_mask & MRP_TX_RINGS) + mac_set_rings_effective(flent->fe_mcip); + + if ((mrp->mrp_mask & MRP_POOL) != 0) { + if (strcmp(fmrp->mrp_pool, mrp->mrp_pool) != 0) + changed_mask |= MRP_POOL; + if (strlen(mrp->mrp_pool) == 0) + fmrp->mrp_mask &= ~MRP_POOL; + else + fmrp->mrp_mask |= MRP_POOL; + (void) strncpy(fmrp->mrp_pool, mrp->mrp_pool, MAXPATHLEN); + } return (changed_mask); } @@ -667,6 +730,9 @@ mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp) uint32_t changed_mask; mac_client_impl_t *mcip = flent->fe_mcip; mac_resource_props_t *mcip_mrp = MCIP_RESOURCE_PROPS(mcip); + mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); + cpupart_t *cpupart = NULL; + boolean_t use_default = B_FALSE; ASSERT(flent != NULL); ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); @@ -693,14 +759,24 @@ mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp) !(changed_mask & MRP_CPUS) && !(mcip_mrp->mrp_mask & MRP_CPUS_USERSPEC)) { mac_fanout_setup(mcip, flent, mcip_mrp, - mac_rx_deliver, mcip, NULL); + mac_rx_deliver, mcip, NULL, NULL); } } if (mrp->mrp_mask & MRP_PRIORITY) mac_flow_update_priority(mcip, flent); if (changed_mask & MRP_CPUS) - mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL); + mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL, + NULL); + + if (mrp->mrp_mask & MRP_POOL) { + pool_lock(); + cpupart = mac_pset_find(mrp, &use_default); + mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL, + cpupart); + mac_set_pool_effective(use_default, cpupart, mrp, emrp); + pool_unlock(); + } } /* @@ -1368,7 +1444,7 @@ mac_link_flow_modify(char *flow_name, mac_resource_props_t *mrp) datalink_id_t linkid; flow_tab_t *flow_tab; - err = mac_validate_props(mrp); + err = mac_validate_props(NULL, mrp); if (err != 0) return (err); @@ -1445,10 +1521,14 @@ static int mac_link_flow_walk_cb(flow_entry_t *flent, void *arg) { flow_walk_state_t *statep = arg; - mac_flowinfo_t finfo; + mac_flowinfo_t *finfo; + int err; - mac_link_flowinfo_copy(&finfo, flent); - return (statep->ws_func(&finfo, statep->ws_arg)); + finfo = kmem_zalloc(sizeof (*finfo), KM_SLEEP); + mac_link_flowinfo_copy(finfo, flent); + err = statep->ws_func(finfo, statep->ws_arg); + kmem_free(finfo, sizeof (*finfo)); + return (err); } /* @@ -1885,18 +1965,19 @@ flow_ip_accept(flow_tab_t *ft, flow_state_t *s) break; } case ETHERTYPE_IPV6: { - ip6_t *ip6h = (ip6_t *)l3_start; - uint16_t ip6_hdrlen; - uint8_t nexthdr; + ip6_t *ip6h = (ip6_t *)l3_start; + ip6_frag_t *frag = NULL; + uint16_t ip6_hdrlen; + uint8_t nexthdr; - if (!mac_ip_hdr_length_v6(s->fs_mp, ip6h, &ip6_hdrlen, - &nexthdr, NULL, NULL)) { + if (!mac_ip_hdr_length_v6(ip6h, s->fs_mp->b_wptr, &ip6_hdrlen, + &nexthdr, &frag)) { return (ENOBUFS); } l3info->l3_hdrsize = ip6_hdrlen; l3info->l3_protocol = nexthdr; l3info->l3_version = IPV6_VERSION; - l3info->l3_fragmented = B_FALSE; + l3info->l3_fragmented = (frag != NULL); break; } default: diff --git a/usr/src/uts/common/io/mac/mac_hio.c b/usr/src/uts/common/io/mac/mac_hio.c index 9810ac821c..703c42b4cc 100644 --- a/usr/src/uts/common/io/mac/mac_hio.c +++ b/usr/src/uts/common/io/mac/mac_hio.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ #include <sys/mac.h> #include <sys/mac_impl.h> #include <sys/mac_client_impl.h> +#include <sys/mac_client_priv.h> #include <sys/mac_soft_ring.h> @@ -129,7 +130,7 @@ mac_share_bind(mac_client_handle_t mch, uint64_t cookie, uint64_t *rcookie) * there are no in flight packets through a transmit ring * which is being bound to another domain. */ - mac_tx_client_quiesce(mcip, SRS_QUIESCE); + mac_tx_client_quiesce(mch); /* * For the receive path, no traffic will be sent up through @@ -148,7 +149,7 @@ mac_share_bind(mac_client_handle_t mch, uint64_t cookie, uint64_t *rcookie) /* * Resume transmit traffic for the MAC client. */ - mac_tx_client_restart(mcip); + mac_tx_client_restart(mch); i_mac_perim_exit(mip); @@ -182,7 +183,7 @@ mac_share_unbind(mac_client_handle_t mch) * been updated by mac_fanout_recompute(). Do the check here * now that the share has been unbound. */ - mac_fanout_recompute_client(mcip); + mac_fanout_recompute_client(mcip, NULL); i_mac_perim_exit(mip); } diff --git a/usr/src/uts/common/io/mac/mac_ndd.c b/usr/src/uts/common/io/mac/mac_ndd.c index 9d4fc4bc18..cf99ef64be 100644 --- a/usr/src/uts/common/io/mac/mac_ndd.c +++ b/usr/src/uts/common/io/mac/mac_ndd.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +29,7 @@ #include <sys/types.h> #include <sys/mac.h> #include <sys/mac_impl.h> +#include <sys/mac_client_priv.h> #include <inet/nd.h> #include <sys/mac_ether.h> #include <sys/policy.h> @@ -95,17 +96,16 @@ mac_ndd_get_names(mac_impl_t *mip, mblk_t *mp) { int size_out, i; mblk_t *tmp; - mac_priv_prop_t *mpriv; uint_t permflags; int status; uint64_t value; + char *prop_name; if (!mac_add_name(mp, "?", MAC_PROP_PERM_READ)) return (-1); /* first the known ndd mappings */ for (i = 0; i < mip->mi_type->mt_mappingcount; i++) { - permflags = MAC_PROP_PERM_RW; if ((mip->mi_type->mt_mapping[i].mp_flags & MAC_PROP_MAP_KSTAT) != 0) permflags = MAC_PROP_PERM_READ; @@ -113,8 +113,13 @@ mac_ndd_get_names(mac_impl_t *mip, mblk_t *mp) status = mip->mi_callbacks->mc_getprop(mip->mi_driver, mip->mi_type->mt_mapping[i].mp_name, mip->mi_type->mt_mapping[i].mp_prop_id, - 0, mip->mi_type->mt_mapping[i].mp_valsize, - &value, &permflags); + mip->mi_type->mt_mapping[i].mp_valsize, &value); + if (status != 0) + continue; + status = mac_prop_info((mac_handle_t)mip, + mip->mi_type->mt_mapping[i].mp_prop_id, + mip->mi_type->mt_mapping[i].mp_name, NULL, 0, + NULL, &permflags); if (status != 0) continue; } @@ -126,10 +131,14 @@ mac_ndd_get_names(mac_impl_t *mip, mblk_t *mp) /* now the driver's ndd variables */ for (i = 0; i < mip->mi_priv_prop_count; i++) { - mpriv = &mip->mi_priv_prop[i]; + prop_name = mip->mi_priv_prop[i]; + + if (mac_prop_info((mac_handle_t)mip, MAC_PROP_PRIVATE, + prop_name, NULL, 0, NULL, &permflags) != 0) + return (-1); /* skip over the "_" */ - if (!mac_add_name(mp, &mpriv->mpp_name[1], mpriv->mpp_flags)) + if (!mac_add_name(mp, &prop_name[1], permflags)) return (-1); } @@ -185,7 +194,6 @@ mac_ndd_get_ioctl(mac_impl_t *mip, mblk_t *mp, int avail, int *rval) uint16_t u16; uint32_t u32; uint64_t u64; - uint_t perm; if (mp->b_cont == NULL || avail < 2) return (EINVAL); @@ -258,9 +266,8 @@ mac_ndd_get_ioctl(mac_impl_t *mip, mblk_t *mp, int avail, int *rval) new_value = u32 = (long)u64; } else { status = mip->mi_callbacks->mc_getprop(mip->mi_driver, - name, mip->mi_type->mt_mapping[i].mp_prop_id, 0, - mip->mi_type->mt_mapping[i].mp_valsize, value, - &perm); + name, mip->mi_type->mt_mapping[i].mp_prop_id, + mip->mi_type->mt_mapping[i].mp_valsize, value); switch (mip->mi_type->mt_mapping[i].mp_valsize) { case 1: new_value = u8; @@ -294,7 +301,7 @@ mac_ndd_get_ioctl(mac_impl_t *mip, mblk_t *mp, int avail, int *rval) */ (void) snprintf(priv_name, sizeof (priv_name), "_%s", name); status = mip->mi_callbacks->mc_getprop(mip->mi_driver, priv_name, - MAC_PROP_PRIVATE, 0, avail - 2, mp1->b_rptr, &perm); + MAC_PROP_PRIVATE, avail - 2, mp1->b_rptr); if (status != 0) goto get_done; diff --git a/usr/src/uts/common/io/mac/mac_protect.c b/usr/src/uts/common/io/mac/mac_protect.c index 8bd527c8d5..c923bcdbe2 100644 --- a/usr/src/uts/common/io/mac/mac_protect.c +++ b/usr/src/uts/common/io/mac/mac_protect.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,68 +33,1668 @@ #include <sys/ethernet.h> #include <sys/vlan.h> #include <sys/dlpi.h> +#include <sys/avl.h> #include <inet/ip.h> #include <inet/ip6.h> #include <inet/arp.h> +#include <netinet/arp.h> +#include <netinet/udp.h> +#include <netinet/dhcp.h> +#include <netinet/dhcp6.h> /* - * Check if ipaddr is in the 'allowed-ips' list. + * Implementation overview for DHCP address detection + * + * The purpose of DHCP address detection is to relieve the user of having to + * manually configure static IP addresses when ip-nospoof protection is turned + * on. To achieve this, the mac layer needs to intercept DHCP packets to + * determine the assigned IP addresses. + * + * A DHCP handshake between client and server typically requires at least + * 4 messages: + * + * 1. DISCOVER - client attempts to locate DHCP servers via a + * broadcast message to its subnet. + * 2. OFFER - server responds to client with an IP address and + * other parameters. + * 3. REQUEST - client requests the offered address. + * 4. ACK - server verifies that the requested address matches + * the one it offered. + * + * DHCPv6 behaves pretty much the same way aside from different message names. + * + * Address information is embedded in either the OFFER or REQUEST message. + * We chose to intercept REQUEST because this is at the last part of the + * handshake and it indicates that the client intends to keep the address. + * Intercepting OFFERs is unreliable because the client may receive multiple + * offers from different servers, and we can't tell which address the client + * will keep. + * + * Each DHCP message has a transaction ID. We use this transaction ID to match + * REQUESTs with ACKs received from servers. + * + * For IPv4, the process to acquire a DHCP-assigned address is as follows: + * + * 1. Client sends REQUEST. a new dhcpv4_txn_t object is created and inserted + * in the the mci_v4_pending_txn table (keyed by xid). This object represents + * a new transaction. It contains the xid, the client ID and requested IP + * address. + * + * 2. Server responds with an ACK. The xid from this ACK is used to lookup the + * pending transaction from the mci_v4_pending_txn table. Once the object is + * found, it is removed from the pending table and inserted into the + * completed table (mci_v4_completed_txn, keyed by client ID) and the dynamic + * IP table (mci_v4_dyn_ip, keyed by IP address). + * + * 3. An outgoing packet that goes through the ip-nospoof path will be checked + * against the dynamic IP table. Packets that have the assigned DHCP address + * as the source IP address will pass the check and be admitted onto the + * network. + * + * IPv4 notes: + * + * If the server never responds with an ACK, there is a timer that is set after + * the insertion of the transaction into the pending table. When the timer + * fires, it will check whether the transaction is old (by comparing current + * time and the txn's timestamp), if so the transaction will be freed. along + * with this, any transaction in the completed/dyn-ip tables matching the client + * ID of this stale transaction will also be freed. If the client fails to + * extend a lease, we want to stop the client from using any IP addresses that + * were granted previously. + * + * A RELEASE message from the client will not cause a transaction to be created. + * The client ID in the RELEASE message will be used for finding and removing + * transactions in the completed and dyn-ip tables. + * + * + * For IPv6, the process to acquire a DHCPv6-assigned address is as follows: + * + * 1. Client sends REQUEST. The DUID is extracted and stored into a dhcpv6_cid_t + * structure. A new transaction structure (dhcpv6_txn_t) is also created and + * it will point to the dhcpv6_cid_t. If an existing transaction with a + * matching xid is not found, this dhcpv6_txn_t will be inserted into the + * mci_v6_pending_txn table (keyed by xid). + * + * 2. Server responds with a REPLY. If a pending transaction is found, the + * addresses in the reply will be placed into the dhcpv6_cid_t pointed to by + * the transaction. The dhcpv6_cid_t will then be moved to the mci_v6_cid + * table (keyed by cid). The associated addresses will be added to the + * mci_v6_dyn_ip table (while still being pointed to by the dhcpv6_cid_t). + * + * 3. IPv6 ip-nospoof will now check mci_v6_dyn_ip for matching packets. + * Packets with a source address matching one of the DHCPv6-assigned + * addresses will be allowed through. + * + * IPv6 notes: + * + * The v6 code shares the same timer as v4 for scrubbing stale transactions. + * Just like v4, as part of removing an expired transaction, a RELEASE will be + * be triggered on the cid associated with the expired transaction. + * + * The data structures used for v6 are slightly different because a v6 client + * may have multiple addresses associated with it. + */ + +/* + * These are just arbitrary limits meant for preventing abuse (e.g. a user + * flooding the network with bogus transactions). They are not meant to be + * user-modifiable so they are not exposed as linkprops. + */ +static ulong_t dhcp_max_pending_txn = 512; +static ulong_t dhcp_max_completed_txn = 512; +static time_t txn_cleanup_interval = 60; + +/* + * DHCPv4 transaction. It may be added to three different tables + * (keyed by different fields). + */ +typedef struct dhcpv4_txn { + uint32_t dt_xid; + time_t dt_timestamp; + uint8_t dt_cid[DHCP_MAX_OPT_SIZE]; + uint8_t dt_cid_len; + ipaddr_t dt_ipaddr; + avl_node_t dt_node; + avl_node_t dt_ipnode; + struct dhcpv4_txn *dt_next; +} dhcpv4_txn_t; + +/* + * DHCPv6 address. May be added to mci_v6_dyn_ip. + * It is always pointed to by its parent dhcpv6_cid_t structure. + */ +typedef struct dhcpv6_addr { + in6_addr_t da_addr; + avl_node_t da_node; + struct dhcpv6_addr *da_next; +} dhcpv6_addr_t; + +/* + * DHCPv6 client ID. May be added to mci_v6_cid. + * No dhcpv6_txn_t should be pointing to it after it is added to mci_v6_cid. + */ +typedef struct dhcpv6_cid { + uchar_t *dc_cid; + uint_t dc_cid_len; + dhcpv6_addr_t *dc_addr; + uint_t dc_addrcnt; + avl_node_t dc_node; +} dhcpv6_cid_t; + +/* + * DHCPv6 transaction. Unlike its v4 counterpart, this object gets freed up + * as soon as the transaction completes or expires. + */ +typedef struct dhcpv6_txn { + uint32_t dt_xid; + time_t dt_timestamp; + dhcpv6_cid_t *dt_cid; + avl_node_t dt_node; + struct dhcpv6_txn *dt_next; +} dhcpv6_txn_t; + +static void start_txn_cleanup_timer(mac_client_impl_t *); + +#define BUMP_STAT(m, s) (m)->mci_misc_stat.mms_##s++ + +/* + * Comparison functions for the 3 AVL trees used: + * mci_v4_pending_txn, mci_v4_completed_txn, mci_v4_dyn_ip + */ +static int +compare_dhcpv4_xid(const void *arg1, const void *arg2) +{ + const dhcpv4_txn_t *txn1 = arg1, *txn2 = arg2; + + if (txn1->dt_xid < txn2->dt_xid) + return (-1); + else if (txn1->dt_xid > txn2->dt_xid) + return (1); + else + return (0); +} + +static int +compare_dhcpv4_cid(const void *arg1, const void *arg2) +{ + const dhcpv4_txn_t *txn1 = arg1, *txn2 = arg2; + int ret; + + if (txn1->dt_cid_len < txn2->dt_cid_len) + return (-1); + else if (txn1->dt_cid_len > txn2->dt_cid_len) + return (1); + + if (txn1->dt_cid_len == 0) + return (0); + + ret = memcmp(txn1->dt_cid, txn2->dt_cid, txn1->dt_cid_len); + if (ret < 0) + return (-1); + else if (ret > 0) + return (1); + else + return (0); +} + +static int +compare_dhcpv4_ip(const void *arg1, const void *arg2) +{ + const dhcpv4_txn_t *txn1 = arg1, *txn2 = arg2; + + if (txn1->dt_ipaddr < txn2->dt_ipaddr) + return (-1); + else if (txn1->dt_ipaddr > txn2->dt_ipaddr) + return (1); + else + return (0); +} + +/* + * Find the specified DHCPv4 option. + */ +static int +get_dhcpv4_option(struct dhcp *dh4, uchar_t *end, uint8_t type, + uchar_t **opt, uint8_t *opt_len) +{ + uchar_t *start = (uchar_t *)dh4->options; + uint8_t otype, olen; + + while (start < end) { + if (*start == CD_PAD) { + start++; + continue; + } + if (*start == CD_END) + break; + + otype = *start++; + olen = *start++; + if (otype == type && olen > 0) { + *opt = start; + *opt_len = olen; + return (0); + } + start += olen; + } + return (ENOENT); +} + +/* + * Locate the start of a DHCPv4 header. + * The possible return values and associated meanings are: + * 0 - packet is DHCP and has a DHCP header. + * EINVAL - packet is not DHCP. the recommended action is to let it pass. + * ENOSPC - packet is a initial fragment that is DHCP or is unidentifiable. + * the recommended action is to drop it. + */ +static int +get_dhcpv4_info(ipha_t *ipha, uchar_t *end, struct dhcp **dh4) +{ + uint16_t offset_and_flags, client, server; + boolean_t first_frag = B_FALSE; + struct udphdr *udph; + uchar_t *dh; + + if (ipha->ipha_protocol != IPPROTO_UDP) + return (EINVAL); + + offset_and_flags = ntohs(ipha->ipha_fragment_offset_and_flags); + if ((offset_and_flags & (IPH_MF | IPH_OFFSET)) != 0) { + /* + * All non-initial fragments may pass because we cannot + * identify their type. It's safe to let them through + * because reassembly will fail if we decide to drop the + * initial fragment. + */ + if (((offset_and_flags << 3) & 0xffff) != 0) + return (EINVAL); + first_frag = B_TRUE; + } + /* drop packets without a udp header */ + udph = (struct udphdr *)((uchar_t *)ipha + IPH_HDR_LENGTH(ipha)); + if ((uchar_t *)&udph[1] > end) + return (ENOSPC); + + client = htons(IPPORT_BOOTPC); + server = htons(IPPORT_BOOTPS); + if (udph->uh_sport != client && udph->uh_sport != server && + udph->uh_dport != client && udph->uh_dport != server) + return (EINVAL); + + /* drop dhcp fragments */ + if (first_frag) + return (ENOSPC); + + dh = (uchar_t *)&udph[1]; + if (dh + BASE_PKT_SIZE > end) + return (EINVAL); + + *dh4 = (struct dhcp *)dh; + return (0); +} + +/* + * Wrappers for accesses to avl trees to improve readability. + * Their purposes are fairly self-explanatory. + */ +static dhcpv4_txn_t * +find_dhcpv4_pending_txn(mac_client_impl_t *mcip, uint32_t xid) +{ + dhcpv4_txn_t tmp_txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + tmp_txn.dt_xid = xid; + return (avl_find(&mcip->mci_v4_pending_txn, &tmp_txn, NULL)); +} + +static int +insert_dhcpv4_pending_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn) +{ + avl_index_t where; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (avl_find(&mcip->mci_v4_pending_txn, txn, &where) != NULL) + return (EEXIST); + + if (avl_numnodes(&mcip->mci_v4_pending_txn) >= dhcp_max_pending_txn) { + BUMP_STAT(mcip, dhcpdropped); + return (EAGAIN); + } + avl_insert(&mcip->mci_v4_pending_txn, txn, where); + return (0); +} + +static void +remove_dhcpv4_pending_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn) +{ + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + avl_remove(&mcip->mci_v4_pending_txn, txn); +} + +static dhcpv4_txn_t * +find_dhcpv4_completed_txn(mac_client_impl_t *mcip, uint8_t *cid, + uint8_t cid_len) +{ + dhcpv4_txn_t tmp_txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (cid_len > 0) + bcopy(cid, tmp_txn.dt_cid, cid_len); + tmp_txn.dt_cid_len = cid_len; + return (avl_find(&mcip->mci_v4_completed_txn, &tmp_txn, NULL)); +} + +/* + * After a pending txn is removed from the pending table, it is inserted + * into both the completed and dyn-ip tables. These two insertions are + * done together because a client ID must have 1:1 correspondence with + * an IP address and IP addresses must be unique in the dyn-ip table. + */ +static int +insert_dhcpv4_completed_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn) +{ + avl_index_t where; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (avl_find(&mcip->mci_v4_completed_txn, txn, &where) != NULL) + return (EEXIST); + + if (avl_numnodes(&mcip->mci_v4_completed_txn) >= + dhcp_max_completed_txn) { + BUMP_STAT(mcip, dhcpdropped); + return (EAGAIN); + } + + avl_insert(&mcip->mci_v4_completed_txn, txn, where); + if (avl_find(&mcip->mci_v4_dyn_ip, txn, &where) != NULL) { + avl_remove(&mcip->mci_v4_completed_txn, txn); + return (EEXIST); + } + avl_insert(&mcip->mci_v4_dyn_ip, txn, where); + return (0); +} + +static void +remove_dhcpv4_completed_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn) +{ + dhcpv4_txn_t *ctxn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if ((ctxn = avl_find(&mcip->mci_v4_dyn_ip, txn, NULL)) != NULL && + ctxn == txn) + avl_remove(&mcip->mci_v4_dyn_ip, txn); + + avl_remove(&mcip->mci_v4_completed_txn, txn); +} + +/* + * Check whether an IP address is in the dyn-ip table. */ static boolean_t -ipnospoof_check_ips(mac_protect_t *protect, ipaddr_t ipaddr) +check_dhcpv4_dyn_ip(mac_client_impl_t *mcip, ipaddr_t ipaddr) +{ + dhcpv4_txn_t tmp_txn, *txn; + + mutex_enter(&mcip->mci_protect_lock); + tmp_txn.dt_ipaddr = ipaddr; + txn = avl_find(&mcip->mci_v4_dyn_ip, &tmp_txn, NULL); + mutex_exit(&mcip->mci_protect_lock); + return (txn != NULL); +} + +/* + * Create/destroy a DHCPv4 transaction. + */ +static dhcpv4_txn_t * +create_dhcpv4_txn(uint32_t xid, uint8_t *cid, uint8_t cid_len, ipaddr_t ipaddr) +{ + dhcpv4_txn_t *txn; + + if ((txn = kmem_zalloc(sizeof (*txn), KM_NOSLEEP)) == NULL) + return (NULL); + + txn->dt_xid = xid; + txn->dt_timestamp = ddi_get_time(); + if (cid_len > 0) + bcopy(cid, &txn->dt_cid, cid_len); + txn->dt_cid_len = cid_len; + txn->dt_ipaddr = ipaddr; + return (txn); +} + +static void +free_dhcpv4_txn(dhcpv4_txn_t *txn) +{ + kmem_free(txn, sizeof (*txn)); +} + +/* + * Clean up all v4 tables. + */ +static void +flush_dhcpv4(mac_client_impl_t *mcip) +{ + void *cookie = NULL; + dhcpv4_txn_t *txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + while ((txn = avl_destroy_nodes(&mcip->mci_v4_dyn_ip, + &cookie)) != NULL) { + /* + * No freeing needed here because the same txn exists + * in the mci_v4_completed_txn table as well. + */ + } + cookie = NULL; + while ((txn = avl_destroy_nodes(&mcip->mci_v4_completed_txn, + &cookie)) != NULL) { + free_dhcpv4_txn(txn); + } + cookie = NULL; + while ((txn = avl_destroy_nodes(&mcip->mci_v4_pending_txn, + &cookie)) != NULL) { + free_dhcpv4_txn(txn); + } +} + +/* + * Cleanup stale DHCPv4 transactions. + */ +static void +txn_cleanup_v4(mac_client_impl_t *mcip) { - uint_t i; + dhcpv4_txn_t *txn, *ctxn, *next, *txn_list = NULL; /* - * unspecified addresses are harmless and are used by ARP,DHCP..etc. + * Find stale pending transactions and place them on a list + * to be removed. */ - if (ipaddr == INADDR_ANY) - return (B_TRUE); + for (txn = avl_first(&mcip->mci_v4_pending_txn); txn != NULL; + txn = avl_walk(&mcip->mci_v4_pending_txn, txn, AVL_AFTER)) { + if (ddi_get_time() - txn->dt_timestamp > + txn_cleanup_interval) { + DTRACE_PROBE2(found__expired__txn, + mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); - for (i = 0; i < protect->mp_ipaddrcnt; i++) { - if (protect->mp_ipaddrs[i] == ipaddr) - return (B_TRUE); + txn->dt_next = txn_list; + txn_list = txn; + } } - return (B_FALSE); + + /* + * Remove and free stale pending transactions and completed + * transactions with the same client IDs as the stale transactions. + */ + for (txn = txn_list; txn != NULL; txn = next) { + avl_remove(&mcip->mci_v4_pending_txn, txn); + + ctxn = find_dhcpv4_completed_txn(mcip, txn->dt_cid, + txn->dt_cid_len); + if (ctxn != NULL) { + DTRACE_PROBE2(removing__completed__txn, + mac_client_impl_t *, mcip, + dhcpv4_txn_t *, ctxn); + + remove_dhcpv4_completed_txn(mcip, ctxn); + free_dhcpv4_txn(ctxn); + } + next = txn->dt_next; + txn->dt_next = NULL; + + DTRACE_PROBE2(freeing__txn, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + free_dhcpv4_txn(txn); + } +} + +/* + * Core logic for intercepting outbound DHCPv4 packets. + */ +static void +intercept_dhcpv4_outbound(mac_client_impl_t *mcip, ipha_t *ipha, uchar_t *end) +{ + struct dhcp *dh4; + uchar_t *opt; + dhcpv4_txn_t *txn, *ctxn; + ipaddr_t ipaddr; + uint8_t opt_len, mtype, cid[DHCP_MAX_OPT_SIZE], cid_len; + + if (get_dhcpv4_info(ipha, end, &dh4) != 0) + return; + + if (get_dhcpv4_option(dh4, end, CD_DHCP_TYPE, &opt, &opt_len) != 0 || + opt_len != 1) { + DTRACE_PROBE2(mtype__not__found, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + return; + } + mtype = *opt; + if (mtype != REQUEST && mtype != RELEASE) { + DTRACE_PROBE3(ignored__mtype, mac_client_impl_t *, mcip, + struct dhcp *, dh4, uint8_t, mtype); + return; + } + + /* client ID is optional for IPv4 */ + if (get_dhcpv4_option(dh4, end, CD_CLIENT_ID, &opt, &opt_len) == 0 && + opt_len >= 2) { + bcopy(opt, cid, opt_len); + cid_len = opt_len; + } else { + bzero(cid, DHCP_MAX_OPT_SIZE); + cid_len = 0; + } + + mutex_enter(&mcip->mci_protect_lock); + if (mtype == RELEASE) { + DTRACE_PROBE2(release, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + + /* flush any completed txn with this cid */ + ctxn = find_dhcpv4_completed_txn(mcip, cid, cid_len); + if (ctxn != NULL) { + DTRACE_PROBE2(release__successful, mac_client_impl_t *, + mcip, struct dhcp *, dh4); + + remove_dhcpv4_completed_txn(mcip, ctxn); + free_dhcpv4_txn(ctxn); + } + goto done; + } + + /* + * If a pending txn already exists, we'll update its timestamp so + * it won't get flushed by the timer. We don't need to create new + * txns for retransmissions. + */ + if ((txn = find_dhcpv4_pending_txn(mcip, dh4->xid)) != NULL) { + DTRACE_PROBE2(update, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + txn->dt_timestamp = ddi_get_time(); + goto done; + } + + if (get_dhcpv4_option(dh4, end, CD_REQUESTED_IP_ADDR, + &opt, &opt_len) != 0 || opt_len != sizeof (ipaddr)) { + DTRACE_PROBE2(ipaddr__not__found, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + goto done; + } + bcopy(opt, &ipaddr, sizeof (ipaddr)); + if ((txn = create_dhcpv4_txn(dh4->xid, cid, cid_len, ipaddr)) == NULL) + goto done; + + if (insert_dhcpv4_pending_txn(mcip, txn) != 0) { + DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + free_dhcpv4_txn(txn); + goto done; + } + start_txn_cleanup_timer(mcip); + + DTRACE_PROBE2(txn__pending, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + +done: + mutex_exit(&mcip->mci_protect_lock); } /* - * Enforce ip-nospoof protection. Only IPv4 is supported for now. + * Core logic for intercepting inbound DHCPv4 packets. + */ +static void +intercept_dhcpv4_inbound(mac_client_impl_t *mcip, ipha_t *ipha, uchar_t *end) +{ + uchar_t *opt; + struct dhcp *dh4; + dhcpv4_txn_t *txn, *ctxn; + uint8_t opt_len, mtype; + + if (get_dhcpv4_info(ipha, end, &dh4) != 0) + return; + + if (get_dhcpv4_option(dh4, end, CD_DHCP_TYPE, &opt, &opt_len) != 0 || + opt_len != 1) { + DTRACE_PROBE2(mtype__not__found, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + return; + } + mtype = *opt; + if (mtype != ACK && mtype != NAK) { + DTRACE_PROBE3(ignored__mtype, mac_client_impl_t *, mcip, + struct dhcp *, dh4, uint8_t, mtype); + return; + } + + mutex_enter(&mcip->mci_protect_lock); + if ((txn = find_dhcpv4_pending_txn(mcip, dh4->xid)) == NULL) { + DTRACE_PROBE2(txn__not__found, mac_client_impl_t *, mcip, + struct dhcp *, dh4); + goto done; + } + remove_dhcpv4_pending_txn(mcip, txn); + + /* + * We're about to move a txn from the pending table to the completed/ + * dyn-ip tables. If there is an existing completed txn with the + * same cid as our txn, we need to remove and free it. + */ + ctxn = find_dhcpv4_completed_txn(mcip, txn->dt_cid, txn->dt_cid_len); + if (ctxn != NULL) { + DTRACE_PROBE2(replacing__old__txn, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, ctxn); + remove_dhcpv4_completed_txn(mcip, ctxn); + free_dhcpv4_txn(ctxn); + } + if (mtype == NAK) { + DTRACE_PROBE2(nak__received, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + free_dhcpv4_txn(txn); + goto done; + } + if (insert_dhcpv4_completed_txn(mcip, txn) != 0) { + DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + free_dhcpv4_txn(txn); + goto done; + } + DTRACE_PROBE2(txn__completed, mac_client_impl_t *, mcip, + dhcpv4_txn_t *, txn); + +done: + mutex_exit(&mcip->mci_protect_lock); +} + + +/* + * Comparison functions for the DHCPv6 AVL trees. */ static int -ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, - mblk_t *mp, mac_header_info_t *mhip) +compare_dhcpv6_xid(const void *arg1, const void *arg2) { - uint32_t sap = mhip->mhi_bindsap; - uchar_t *start = mp->b_rptr + mhip->mhi_hdrsize; - int err = EINVAL; + const dhcpv6_txn_t *txn1 = arg1, *txn2 = arg2; + + if (txn1->dt_xid < txn2->dt_xid) + return (-1); + else if (txn1->dt_xid > txn2->dt_xid) + return (1); + else + return (0); +} + +static int +compare_dhcpv6_ip(const void *arg1, const void *arg2) +{ + const dhcpv6_addr_t *ip1 = arg1, *ip2 = arg2; + int ret; + + ret = memcmp(&ip1->da_addr, &ip2->da_addr, sizeof (in6_addr_t)); + if (ret < 0) + return (-1); + else if (ret > 0) + return (1); + else + return (0); +} + +static int +compare_dhcpv6_cid(const void *arg1, const void *arg2) +{ + const dhcpv6_cid_t *cid1 = arg1, *cid2 = arg2; + int ret; + + if (cid1->dc_cid_len < cid2->dc_cid_len) + return (-1); + else if (cid1->dc_cid_len > cid2->dc_cid_len) + return (1); + + if (cid1->dc_cid_len == 0) + return (0); + + ret = memcmp(cid1->dc_cid, cid2->dc_cid, cid1->dc_cid_len); + if (ret < 0) + return (-1); + else if (ret > 0) + return (1); + else + return (0); +} + +/* + * Locate the start of a DHCPv6 header. + * The possible return values and associated meanings are: + * 0 - packet is DHCP and has a DHCP header. + * EINVAL - packet is not DHCP. the recommended action is to let it pass. + * ENOSPC - packet is a initial fragment that is DHCP or is unidentifiable. + * the recommended action is to drop it. + */ +static int +get_dhcpv6_info(ip6_t *ip6h, uchar_t *end, dhcpv6_message_t **dh6) +{ + uint16_t hdrlen, client, server; + boolean_t first_frag = B_FALSE; + ip6_frag_t *frag = NULL; + uint8_t proto; + struct udphdr *udph; + uchar_t *dh; + + if (!mac_ip_hdr_length_v6(ip6h, end, &hdrlen, &proto, &frag)) + return (ENOSPC); + + if (proto != IPPROTO_UDP) + return (EINVAL); + + if (frag != NULL) { + /* + * All non-initial fragments may pass because we cannot + * identify their type. It's safe to let them through + * because reassembly will fail if we decide to drop the + * initial fragment. + */ + if ((ntohs(frag->ip6f_offlg) & ~7) != 0) + return (EINVAL); + first_frag = B_TRUE; + } + /* drop packets without a udp header */ + udph = (struct udphdr *)((uchar_t *)ip6h + hdrlen); + if ((uchar_t *)&udph[1] > end) + return (ENOSPC); + + client = htons(IPPORT_DHCPV6C); + server = htons(IPPORT_DHCPV6S); + if (udph->uh_sport != client && udph->uh_sport != server && + udph->uh_dport != client && udph->uh_dport != server) + return (EINVAL); + + /* drop dhcp fragments */ + if (first_frag) + return (ENOSPC); + + dh = (uchar_t *)&udph[1]; + if (dh + sizeof (dhcpv6_message_t) > end) + return (EINVAL); + + *dh6 = (dhcpv6_message_t *)dh; + return (0); +} + +/* + * Find the specified DHCPv6 option. + */ +static dhcpv6_option_t * +get_dhcpv6_option(void *buf, size_t buflen, dhcpv6_option_t *oldopt, + uint16_t codenum, uint_t *retlenp) +{ + uchar_t *bp; + dhcpv6_option_t d6o; + uint_t olen; + + codenum = htons(codenum); + bp = buf; + while (buflen >= sizeof (dhcpv6_option_t)) { + bcopy(bp, &d6o, sizeof (d6o)); + olen = ntohs(d6o.d6o_len) + sizeof (d6o); + if (olen > buflen) + break; + if (d6o.d6o_code != codenum || d6o.d6o_len == 0 || + (oldopt != NULL && bp <= (uchar_t *)oldopt)) { + bp += olen; + buflen -= olen; + continue; + } + if (retlenp != NULL) + *retlenp = olen; + /* LINTED : alignment */ + return ((dhcpv6_option_t *)bp); + } + return (NULL); +} + +/* + * Get the status code from a reply message. + */ +static int +get_dhcpv6_status(dhcpv6_message_t *dh6, uchar_t *end, uint16_t *status) +{ + dhcpv6_option_t *d6o; + uint_t olen; + uint16_t s; + + d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL, + DHCPV6_OPT_STATUS_CODE, &olen); + + /* Success is implied if status code is missing */ + if (d6o == NULL) { + *status = DHCPV6_STAT_SUCCESS; + return (0); + } + if ((uchar_t *)d6o + olen > end) + return (EINVAL); + + olen -= sizeof (*d6o); + if (olen < sizeof (s)) + return (EINVAL); + + bcopy(&d6o[1], &s, sizeof (s)); + *status = ntohs(s); + return (0); +} + +/* + * Get the addresses from a reply message. + */ +static int +get_dhcpv6_addrs(dhcpv6_message_t *dh6, uchar_t *end, dhcpv6_cid_t *cid) +{ + dhcpv6_option_t *d6o; + dhcpv6_addr_t *next; + uint_t olen; + + d6o = NULL; + while ((d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], + d6o, DHCPV6_OPT_IA_NA, &olen)) != NULL) { + dhcpv6_option_t *d6so; + dhcpv6_iaaddr_t d6ia; + dhcpv6_addr_t **addrp; + uchar_t *obase; + uint_t solen; + + if (olen < sizeof (dhcpv6_ia_na_t) || + (uchar_t *)d6o + olen > end) + goto fail; + + obase = (uchar_t *)d6o + sizeof (dhcpv6_ia_na_t); + olen -= sizeof (dhcpv6_ia_na_t); + d6so = NULL; + while ((d6so = get_dhcpv6_option(obase, olen, d6so, + DHCPV6_OPT_IAADDR, &solen)) != NULL) { + if (solen < sizeof (dhcpv6_iaaddr_t) || + (uchar_t *)d6so + solen > end) + goto fail; + + bcopy(d6so, &d6ia, sizeof (d6ia)); + for (addrp = &cid->dc_addr; *addrp != NULL; + addrp = &(*addrp)->da_next) { + if (bcmp(&(*addrp)->da_addr, &d6ia.d6ia_addr, + sizeof (in6_addr_t)) == 0) + goto fail; + } + if ((*addrp = kmem_zalloc(sizeof (dhcpv6_addr_t), + KM_NOSLEEP)) == NULL) + goto fail; + + bcopy(&d6ia.d6ia_addr, &(*addrp)->da_addr, + sizeof (in6_addr_t)); + cid->dc_addrcnt++; + } + } + if (cid->dc_addrcnt == 0) + return (ENOENT); + + return (0); + +fail: + for (; cid->dc_addr != NULL; cid->dc_addr = next) { + next = cid->dc_addr->da_next; + kmem_free(cid->dc_addr, sizeof (dhcpv6_addr_t)); + cid->dc_addrcnt--; + } + ASSERT(cid->dc_addrcnt == 0); + return (EINVAL); +} + +/* + * Free a cid. + * Before this gets called the caller must ensure that all the + * addresses are removed from the mci_v6_dyn_ip table. + */ +static void +free_dhcpv6_cid(dhcpv6_cid_t *cid) +{ + dhcpv6_addr_t *addr, *next; + uint_t cnt = 0; + + kmem_free(cid->dc_cid, cid->dc_cid_len); + for (addr = cid->dc_addr; addr != NULL; addr = next) { + next = addr->da_next; + kmem_free(addr, sizeof (*addr)); + cnt++; + } + ASSERT(cnt == cid->dc_addrcnt); + kmem_free(cid, sizeof (*cid)); +} + +/* + * Extract the DUID from a message. The associated addresses will be + * extracted later from the reply message. + */ +static dhcpv6_cid_t * +create_dhcpv6_cid(dhcpv6_message_t *dh6, uchar_t *end) +{ + dhcpv6_option_t *d6o; + dhcpv6_cid_t *cid; + uchar_t *rawcid; + uint_t olen, rawcidlen; + + d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL, + DHCPV6_OPT_CLIENTID, &olen); + if (d6o == NULL || (uchar_t *)d6o + olen > end) + return (NULL); + + rawcidlen = olen - sizeof (*d6o); + if ((rawcid = kmem_zalloc(rawcidlen, KM_NOSLEEP)) == NULL) + return (NULL); + bcopy(d6o + 1, rawcid, rawcidlen); + + if ((cid = kmem_zalloc(sizeof (*cid), KM_NOSLEEP)) == NULL) { + kmem_free(rawcid, rawcidlen); + return (NULL); + } + cid->dc_cid = rawcid; + cid->dc_cid_len = rawcidlen; + return (cid); +} + +/* + * Remove a cid from mci_v6_cid. The addresses owned by the cid + * are also removed from mci_v6_dyn_ip. + */ +static void +remove_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid) +{ + dhcpv6_addr_t *addr, *tmp_addr; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + avl_remove(&mcip->mci_v6_cid, cid); + for (addr = cid->dc_addr; addr != NULL; addr = addr->da_next) { + tmp_addr = avl_find(&mcip->mci_v6_dyn_ip, addr, NULL); + if (tmp_addr == addr) + avl_remove(&mcip->mci_v6_dyn_ip, addr); + } +} + +/* + * Find and remove a matching cid and associated addresses from + * their respective tables. + */ +static void +release_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid) +{ + dhcpv6_cid_t *oldcid; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if ((oldcid = avl_find(&mcip->mci_v6_cid, cid, NULL)) == NULL) + return; + + /* + * Since cid belongs to a pending txn, it can't possibly be in + * mci_v6_cid. Anything that's found must be an existing cid. + */ + ASSERT(oldcid != cid); + remove_dhcpv6_cid(mcip, oldcid); + free_dhcpv6_cid(oldcid); +} + +/* + * Insert cid into mci_v6_cid. + */ +static int +insert_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid) +{ + avl_index_t where; + dhcpv6_addr_t *addr; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (avl_find(&mcip->mci_v6_cid, cid, &where) != NULL) + return (EEXIST); + + if (avl_numnodes(&mcip->mci_v6_cid) >= dhcp_max_completed_txn) { + BUMP_STAT(mcip, dhcpdropped); + return (EAGAIN); + } + avl_insert(&mcip->mci_v6_cid, cid, where); + for (addr = cid->dc_addr; addr != NULL; addr = addr->da_next) { + if (avl_find(&mcip->mci_v6_dyn_ip, addr, &where) != NULL) + goto fail; + + avl_insert(&mcip->mci_v6_dyn_ip, addr, where); + } + return (0); + +fail: + remove_dhcpv6_cid(mcip, cid); + return (EEXIST); +} + +/* + * Check whether an IP address is in the dyn-ip table. + */ +static boolean_t +check_dhcpv6_dyn_ip(mac_client_impl_t *mcip, in6_addr_t *addr) +{ + dhcpv6_addr_t tmp_addr, *a; + + mutex_enter(&mcip->mci_protect_lock); + bcopy(addr, &tmp_addr.da_addr, sizeof (in6_addr_t)); + a = avl_find(&mcip->mci_v6_dyn_ip, &tmp_addr, NULL); + mutex_exit(&mcip->mci_protect_lock); + return (a != NULL); +} + +static dhcpv6_txn_t * +find_dhcpv6_pending_txn(mac_client_impl_t *mcip, uint32_t xid) +{ + dhcpv6_txn_t tmp_txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + tmp_txn.dt_xid = xid; + return (avl_find(&mcip->mci_v6_pending_txn, &tmp_txn, NULL)); +} + +static void +remove_dhcpv6_pending_txn(mac_client_impl_t *mcip, dhcpv6_txn_t *txn) +{ + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + avl_remove(&mcip->mci_v6_pending_txn, txn); +} + +static dhcpv6_txn_t * +create_dhcpv6_txn(uint32_t xid, dhcpv6_cid_t *cid) +{ + dhcpv6_txn_t *txn; + + if ((txn = kmem_zalloc(sizeof (dhcpv6_txn_t), KM_NOSLEEP)) == NULL) + return (NULL); + + txn->dt_xid = xid; + txn->dt_cid = cid; + txn->dt_timestamp = ddi_get_time(); + return (txn); +} + +static void +free_dhcpv6_txn(dhcpv6_txn_t *txn) +{ + if (txn->dt_cid != NULL) + free_dhcpv6_cid(txn->dt_cid); + kmem_free(txn, sizeof (dhcpv6_txn_t)); +} + +static int +insert_dhcpv6_pending_txn(mac_client_impl_t *mcip, dhcpv6_txn_t *txn) +{ + avl_index_t where; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (avl_find(&mcip->mci_v6_pending_txn, txn, &where) != NULL) + return (EEXIST); + + if (avl_numnodes(&mcip->mci_v6_pending_txn) >= dhcp_max_pending_txn) { + BUMP_STAT(mcip, dhcpdropped); + return (EAGAIN); + } + avl_insert(&mcip->mci_v6_pending_txn, txn, where); + return (0); +} + +/* + * Clean up all v6 tables. + */ +static void +flush_dhcpv6(mac_client_impl_t *mcip) +{ + void *cookie = NULL; + dhcpv6_cid_t *cid; + dhcpv6_txn_t *txn; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + while (avl_destroy_nodes(&mcip->mci_v6_dyn_ip, &cookie) != NULL) { + } + cookie = NULL; + while ((cid = avl_destroy_nodes(&mcip->mci_v6_cid, &cookie)) != NULL) { + free_dhcpv6_cid(cid); + } + cookie = NULL; + while ((txn = avl_destroy_nodes(&mcip->mci_v6_pending_txn, + &cookie)) != NULL) { + free_dhcpv6_txn(txn); + } +} + +/* + * Cleanup stale DHCPv6 transactions. + */ +static void +txn_cleanup_v6(mac_client_impl_t *mcip) +{ + dhcpv6_txn_t *txn, *next, *txn_list = NULL; /* - * This handles the case where the mac header is not in - * the same mblk as the IP header. + * Find stale pending transactions and place them on a list + * to be removed. */ - if (start == mp->b_wptr) { - mp = mp->b_cont; + for (txn = avl_first(&mcip->mci_v6_pending_txn); txn != NULL; + txn = avl_walk(&mcip->mci_v6_pending_txn, txn, AVL_AFTER)) { + if (ddi_get_time() - txn->dt_timestamp > + txn_cleanup_interval) { + DTRACE_PROBE2(found__expired__txn, + mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + txn->dt_next = txn_list; + txn_list = txn; + } + } + + /* + * Remove and free stale pending transactions. + * Release any existing cids matching the stale transactions. + */ + for (txn = txn_list; txn != NULL; txn = next) { + avl_remove(&mcip->mci_v6_pending_txn, txn); + release_dhcpv6_cid(mcip, txn->dt_cid); + next = txn->dt_next; + txn->dt_next = NULL; + + DTRACE_PROBE2(freeing__txn, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + free_dhcpv6_txn(txn); + } + +} + +/* + * Core logic for intercepting outbound DHCPv6 packets. + */ +static void +intercept_dhcpv6_outbound(mac_client_impl_t *mcip, ip6_t *ip6h, uchar_t *end) +{ + dhcpv6_message_t *dh6; + dhcpv6_txn_t *txn; + dhcpv6_cid_t *cid = NULL; + uint32_t xid; + uint8_t mtype; + + if (get_dhcpv6_info(ip6h, end, &dh6) != 0) + return; + + mtype = dh6->d6m_msg_type; + if (mtype != DHCPV6_MSG_REQUEST && mtype != DHCPV6_MSG_RENEW && + mtype != DHCPV6_MSG_REBIND && mtype != DHCPV6_MSG_RELEASE) + return; + + if ((cid = create_dhcpv6_cid(dh6, end)) == NULL) + return; + + mutex_enter(&mcip->mci_protect_lock); + if (mtype == DHCPV6_MSG_RELEASE) { + release_dhcpv6_cid(mcip, cid); + goto done; + } + xid = DHCPV6_GET_TRANSID(dh6); + if ((txn = find_dhcpv6_pending_txn(mcip, xid)) != NULL) { + DTRACE_PROBE2(update, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + txn->dt_timestamp = ddi_get_time(); + goto done; + } + if ((txn = create_dhcpv6_txn(xid, cid)) == NULL) + goto done; + + cid = NULL; + if (insert_dhcpv6_pending_txn(mcip, txn) != 0) { + DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + free_dhcpv6_txn(txn); + goto done; + } + start_txn_cleanup_timer(mcip); + + DTRACE_PROBE2(txn__pending, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + +done: + if (cid != NULL) + free_dhcpv6_cid(cid); + + mutex_exit(&mcip->mci_protect_lock); +} + +/* + * Core logic for intercepting inbound DHCPv6 packets. + */ +static void +intercept_dhcpv6_inbound(mac_client_impl_t *mcip, ip6_t *ip6h, uchar_t *end) +{ + dhcpv6_message_t *dh6; + dhcpv6_txn_t *txn; + uint32_t xid; + uint8_t mtype; + uint16_t status; + + if (get_dhcpv6_info(ip6h, end, &dh6) != 0) + return; + + mtype = dh6->d6m_msg_type; + if (mtype != DHCPV6_MSG_REPLY) + return; + + mutex_enter(&mcip->mci_protect_lock); + xid = DHCPV6_GET_TRANSID(dh6); + if ((txn = find_dhcpv6_pending_txn(mcip, xid)) == NULL) { + DTRACE_PROBE2(txn__not__found, mac_client_impl_t *, mcip, + dhcpv6_message_t *, dh6); + goto done; + } + remove_dhcpv6_pending_txn(mcip, txn); + release_dhcpv6_cid(mcip, txn->dt_cid); + + if (get_dhcpv6_status(dh6, end, &status) != 0 || + status != DHCPV6_STAT_SUCCESS) { + DTRACE_PROBE2(error__status, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + goto done; + } + if (get_dhcpv6_addrs(dh6, end, txn->dt_cid) != 0) { + DTRACE_PROBE2(no__addrs, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + goto done; + } + if (insert_dhcpv6_cid(mcip, txn->dt_cid) != 0) { + DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + goto done; + } + DTRACE_PROBE2(txn__completed, mac_client_impl_t *, mcip, + dhcpv6_txn_t *, txn); + + txn->dt_cid = NULL; + +done: + if (txn != NULL) + free_dhcpv6_txn(txn); + mutex_exit(&mcip->mci_protect_lock); +} + +/* + * Timer for cleaning up stale transactions. + */ +static void +txn_cleanup_timer(void *arg) +{ + mac_client_impl_t *mcip = arg; + + mutex_enter(&mcip->mci_protect_lock); + if (mcip->mci_txn_cleanup_tid == 0) { + /* do nothing if timer got cancelled */ + mutex_exit(&mcip->mci_protect_lock); + return; + } + mcip->mci_txn_cleanup_tid = 0; + + txn_cleanup_v4(mcip); + txn_cleanup_v6(mcip); + + /* + * Restart timer if pending transactions still exist. + */ + if (!avl_is_empty(&mcip->mci_v4_pending_txn) || + !avl_is_empty(&mcip->mci_v6_pending_txn)) { + DTRACE_PROBE1(restarting__timer, mac_client_impl_t *, mcip); + + mcip->mci_txn_cleanup_tid = timeout(txn_cleanup_timer, mcip, + drv_usectohz(txn_cleanup_interval * 1000000)); + } + mutex_exit(&mcip->mci_protect_lock); +} + +static void +start_txn_cleanup_timer(mac_client_impl_t *mcip) +{ + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + if (mcip->mci_txn_cleanup_tid == 0) { + mcip->mci_txn_cleanup_tid = timeout(txn_cleanup_timer, mcip, + drv_usectohz(txn_cleanup_interval * 1000000)); + } +} + +static void +cancel_txn_cleanup_timer(mac_client_impl_t *mcip) +{ + timeout_id_t tid; + + ASSERT(MUTEX_HELD(&mcip->mci_protect_lock)); + + /* + * This needs to be a while loop because the timer could get + * rearmed during untimeout(). + */ + while ((tid = mcip->mci_txn_cleanup_tid) != 0) { + mcip->mci_txn_cleanup_tid = 0; + mutex_exit(&mcip->mci_protect_lock); + (void) untimeout(tid); + mutex_enter(&mcip->mci_protect_lock); + } +} + +/* + * Get the start/end pointers of an L3 packet and also do pullup if needed. + * pulled-up packet needs to be freed by the caller. + */ +static int +get_l3_info(mblk_t *mp, size_t hdrsize, uchar_t **start, uchar_t **end, + mblk_t **nmp) +{ + uchar_t *s, *e; + mblk_t *newmp = NULL; + + /* + * Pullup if necessary but reject packets that do not have + * a proper mac header. + */ + s = mp->b_rptr + hdrsize; + e = mp->b_wptr; + + if (s > mp->b_wptr) + return (EINVAL); + + if (!OK_32PTR(s) || mp->b_cont != NULL) { /* - * IP header missing. Let the packet through. + * Temporarily adjust mp->b_rptr to ensure proper + * alignment of IP header in newmp. */ - if (mp == NULL) - return (0); + DTRACE_PROBE1(pullup__needed, mblk_t *, mp); + + mp->b_rptr += hdrsize; + newmp = msgpullup(mp, -1); + mp->b_rptr -= hdrsize; + + if (newmp == NULL) + return (ENOMEM); + + s = newmp->b_rptr; + e = newmp->b_wptr; + } + + *start = s; + *end = e; + *nmp = newmp; + return (0); +} + +void +mac_protect_intercept_dhcp_one(mac_client_impl_t *mcip, mblk_t *mp) +{ + mac_impl_t *mip = mcip->mci_mip; + uchar_t *start, *end; + mblk_t *nmp = NULL; + mac_header_info_t mhi; + int err; + + err = mac_vlan_header_info((mac_handle_t)mip, mp, &mhi); + if (err != 0) { + DTRACE_PROBE2(invalid__header, mac_client_impl_t *, mcip, + mblk_t *, mp); + return; + } + + err = get_l3_info(mp, mhi.mhi_hdrsize, &start, &end, &nmp); + if (err != 0) { + DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip, + mblk_t *, mp); + return; + } + + switch (mhi.mhi_bindsap) { + case ETHERTYPE_IP: { + ipha_t *ipha = (ipha_t *)start; + + if (start + sizeof (ipha_t) > end) + return; + + intercept_dhcpv4_inbound(mcip, ipha, end); + break; + } + case ETHERTYPE_IPV6: { + ip6_t *ip6h = (ip6_t *)start; + + if (start + sizeof (ip6_t) > end) + return; + + intercept_dhcpv6_inbound(mcip, ip6h, end); + break; + } + } + freemsg(nmp); +} + +void +mac_protect_intercept_dhcp(mac_client_impl_t *mcip, mblk_t *mp) +{ + /* + * Skip checks if we are part of an aggr. + */ + if ((mcip->mci_state_flags & MCIS_IS_AGGR_PORT) != 0) + return; + + for (; mp != NULL; mp = mp->b_next) + mac_protect_intercept_dhcp_one(mcip, mp); +} + +void +mac_protect_flush_dhcp(mac_client_impl_t *mcip) +{ + mutex_enter(&mcip->mci_protect_lock); + flush_dhcpv4(mcip); + flush_dhcpv6(mcip); + mutex_exit(&mcip->mci_protect_lock); +} + +void +mac_protect_cancel_timer(mac_client_impl_t *mcip) +{ + mutex_enter(&mcip->mci_protect_lock); + cancel_txn_cleanup_timer(mcip); + mutex_exit(&mcip->mci_protect_lock); +} + +/* + * Check if addr is in the 'allowed-ips' list. + */ + +/* ARGSUSED */ +static boolean_t +ipnospoof_check_v4(mac_client_impl_t *mcip, mac_protect_t *protect, + ipaddr_t *addr) +{ + uint_t i; + + /* + * The unspecified address is allowed. + */ + if (*addr == INADDR_ANY) + return (B_TRUE); + + for (i = 0; i < protect->mp_ipaddrcnt; i++) { + mac_ipaddr_t *v4addr = &protect->mp_ipaddrs[i]; + + if (v4addr->ip_version == IPV4_VERSION && + V4_PART_OF_V6(v4addr->ip_addr) == *addr) + return (B_TRUE); + } + return (check_dhcpv4_dyn_ip(mcip, *addr)); +} + +static boolean_t +ipnospoof_check_v6(mac_client_impl_t *mcip, mac_protect_t *protect, + in6_addr_t *addr) +{ + uint_t i; + + /* + * The unspecified address and the v6 link local address are allowed. + */ + if (IN6_IS_ADDR_UNSPECIFIED(addr) || + ((mcip->mci_protect_flags & MPT_FLAG_V6_LOCAL_ADDR_SET) != 0 && + IN6_ARE_ADDR_EQUAL(&mcip->mci_v6_local_addr, addr))) + return (B_TRUE); + + + for (i = 0; i < protect->mp_ipaddrcnt; i++) { + mac_ipaddr_t *v6addr = &protect->mp_ipaddrs[i]; + + if (v6addr->ip_version == IPV6_VERSION && + IN6_ARE_ADDR_EQUAL(&v6addr->ip_addr, addr)) + return (B_TRUE); + } + return (check_dhcpv6_dyn_ip(mcip, addr)); +} + +/* + * Checks various fields within an IPv6 NDP packet. + */ +static boolean_t +ipnospoof_check_ndp(mac_client_impl_t *mcip, mac_protect_t *protect, + ip6_t *ip6h, uchar_t *end) +{ + icmp6_t *icmp_nd = (icmp6_t *)&ip6h[1]; + int hdrlen, optlen, opttype, len; + uint_t addrlen, maclen; + uint8_t type; + nd_opt_hdr_t *opt; + struct nd_opt_lla *lla = NULL; + + /* + * NDP packets do not have extension headers so the ICMPv6 header + * must immediately follow the IPv6 header. + */ + if (ip6h->ip6_nxt != IPPROTO_ICMPV6) + return (B_TRUE); + + /* ICMPv6 header missing */ + if ((uchar_t *)&icmp_nd[1] > end) + return (B_FALSE); + + len = end - (uchar_t *)icmp_nd; + type = icmp_nd->icmp6_type; + + switch (type) { + case ND_ROUTER_SOLICIT: + hdrlen = sizeof (nd_router_solicit_t); + break; + case ND_ROUTER_ADVERT: + hdrlen = sizeof (nd_router_advert_t); + break; + case ND_NEIGHBOR_SOLICIT: + hdrlen = sizeof (nd_neighbor_solicit_t); + break; + case ND_NEIGHBOR_ADVERT: + hdrlen = sizeof (nd_neighbor_advert_t); + break; + case ND_REDIRECT: + hdrlen = sizeof (nd_redirect_t); + break; + default: + return (B_TRUE); + } + + if (len < hdrlen) + return (B_FALSE); + + /* SLLA option checking is needed for RS/RA/NS */ + opttype = ND_OPT_SOURCE_LINKADDR; + + switch (type) { + case ND_NEIGHBOR_ADVERT: { + nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp_nd; + + if (!ipnospoof_check_v6(mcip, protect, &na->nd_na_target)) { + DTRACE_PROBE2(ndp__na__fail, + mac_client_impl_t *, mcip, ip6_t *, ip6h); + return (B_FALSE); + } + + /* TLLA option for NA */ + opttype = ND_OPT_TARGET_LINKADDR; + break; + } + case ND_REDIRECT: { + /* option checking not needed for RD */ + return (B_TRUE); + } + default: + break; + } - start = mp->b_rptr; + if (len == hdrlen) { + /* no options, we're done */ + return (B_TRUE); } + opt = (nd_opt_hdr_t *)((uchar_t *)icmp_nd + hdrlen); + optlen = len - hdrlen; + + /* find the option header we need */ + while (optlen > sizeof (nd_opt_hdr_t)) { + if (opt->nd_opt_type == opttype) { + lla = (struct nd_opt_lla *)opt; + break; + } + optlen -= 8 * opt->nd_opt_len; + opt = (nd_opt_hdr_t *) + ((uchar_t *)opt + 8 * opt->nd_opt_len); + } + if (lla == NULL) + return (B_TRUE); + + addrlen = lla->nd_opt_lla_len * 8 - sizeof (nd_opt_hdr_t); + maclen = mcip->mci_mip->mi_info.mi_addr_length; + + if (addrlen != maclen || + bcmp(mcip->mci_unicast->ma_addr, + lla->nd_opt_lla_hdw_addr, maclen) != 0) { + DTRACE_PROBE2(ndp__lla__fail, + mac_client_impl_t *, mcip, ip6_t *, ip6h); + return (B_FALSE); + } + + DTRACE_PROBE2(ndp__lla__ok, mac_client_impl_t *, mcip, ip6_t *, ip6h); + return (B_TRUE); +} + +/* + * Enforce ip-nospoof protection. + */ +static int +ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, + mblk_t *mp, mac_header_info_t *mhip) +{ + size_t hdrsize = mhip->mhi_hdrsize; + uint32_t sap = mhip->mhi_bindsap; + uchar_t *start, *end; + mblk_t *nmp = NULL; + int err; + + err = get_l3_info(mp, hdrsize, &start, &end, &nmp); + if (err != 0) { + DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip, + mblk_t *, mp); + return (err); + } + err = EINVAL; switch (sap) { case ETHERTYPE_IP: { ipha_t *ipha = (ipha_t *)start; - if (start + sizeof (ipha_t) > mp->b_wptr || !OK_32PTR(start)) + if (start + sizeof (ipha_t) > end) goto fail; - if (!ipnospoof_check_ips(protect, ipha->ipha_src)) + if (!ipnospoof_check_v4(mcip, protect, &ipha->ipha_src)) goto fail; + intercept_dhcpv4_outbound(mcip, ipha, end); break; } case ETHERTYPE_ARP: { @@ -103,7 +1703,7 @@ ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, ipaddr_t spaddr; uchar_t *shaddr; - if (start + sizeof (arh_t) > mp->b_wptr) + if (start + sizeof (arh_t) > end) goto fail; maclen = mcip->mci_mip->mi_info.mi_addr_length; @@ -114,7 +1714,7 @@ ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, goto fail; arplen = sizeof (arh_t) + 2 * hlen + 2 * plen; - if (start + arplen > mp->b_wptr) + if (start + arplen > end) goto fail; shaddr = start + sizeof (arh_t); @@ -123,20 +1723,230 @@ ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, goto fail; bcopy(shaddr + hlen, &spaddr, sizeof (spaddr)); - if (!ipnospoof_check_ips(protect, spaddr)) + if (!ipnospoof_check_v4(mcip, protect, &spaddr)) goto fail; break; } - default: + case ETHERTYPE_IPV6: { + ip6_t *ip6h = (ip6_t *)start; + + if (start + sizeof (ip6_t) > end) + goto fail; + + if (!ipnospoof_check_v6(mcip, protect, &ip6h->ip6_src)) + goto fail; + + if (!ipnospoof_check_ndp(mcip, protect, ip6h, end)) + goto fail; + + intercept_dhcpv6_outbound(mcip, ip6h, end); break; } + } + freemsg(nmp); return (0); fail: - /* increment ipnospoof stat here */ + freemsg(nmp); return (err); } +static boolean_t +dhcpnospoof_check_cid(mac_protect_t *p, uchar_t *cid, uint_t cidlen) +{ + int i; + + for (i = 0; i < p->mp_cidcnt; i++) { + mac_dhcpcid_t *dcid = &p->mp_cids[i]; + + if (dcid->dc_len == cidlen && + bcmp(dcid->dc_id, cid, cidlen) == 0) + return (B_TRUE); + } + return (B_FALSE); +} + +static boolean_t +dhcpnospoof_check_v4(mac_client_impl_t *mcip, mac_protect_t *p, + ipha_t *ipha, uchar_t *end) +{ + struct dhcp *dh4; + uchar_t *cid; + uint_t maclen, cidlen = 0; + uint8_t optlen; + int err; + + if ((err = get_dhcpv4_info(ipha, end, &dh4)) != 0) + return (err == EINVAL); + + maclen = mcip->mci_mip->mi_info.mi_addr_length; + if (dh4->hlen == maclen && + bcmp(mcip->mci_unicast->ma_addr, dh4->chaddr, maclen) != 0) { + return (B_FALSE); + } + if (get_dhcpv4_option(dh4, end, CD_CLIENT_ID, &cid, &optlen) == 0) + cidlen = optlen; + + if (cidlen == 0) + return (B_TRUE); + + if (*cid == ARPHRD_ETHER && cidlen - 1 == maclen && + bcmp(mcip->mci_unicast->ma_addr, cid + 1, maclen) == 0) + return (B_TRUE); + + return (dhcpnospoof_check_cid(p, cid, cidlen)); +} + +static boolean_t +dhcpnospoof_check_v6(mac_client_impl_t *mcip, mac_protect_t *p, + ip6_t *ip6h, uchar_t *end) +{ + dhcpv6_message_t *dh6; + dhcpv6_option_t *d6o; + uint8_t mtype; + uchar_t *cid, *lladdr = NULL; + uint_t cidlen, maclen, addrlen = 0; + uint16_t cidtype; + int err; + + if ((err = get_dhcpv6_info(ip6h, end, &dh6)) != 0) + return (err == EINVAL); + + /* + * We only check client-generated messages. + */ + mtype = dh6->d6m_msg_type; + if (mtype == DHCPV6_MSG_ADVERTISE || mtype == DHCPV6_MSG_REPLY || + mtype == DHCPV6_MSG_RECONFIGURE) + return (B_TRUE); + + d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL, + DHCPV6_OPT_CLIENTID, &cidlen); + if (d6o == NULL || (uchar_t *)d6o + cidlen > end) + return (B_TRUE); + + cid = (uchar_t *)&d6o[1]; + cidlen -= sizeof (*d6o); + if (cidlen < sizeof (cidtype)) + return (B_TRUE); + + bcopy(cid, &cidtype, sizeof (cidtype)); + cidtype = ntohs(cidtype); + if (cidtype == DHCPV6_DUID_LLT && cidlen >= sizeof (duid_llt_t)) { + lladdr = cid + sizeof (duid_llt_t); + addrlen = cidlen - sizeof (duid_llt_t); + } + if (cidtype == DHCPV6_DUID_LL && cidlen >= sizeof (duid_ll_t)) { + lladdr = cid + sizeof (duid_ll_t); + addrlen = cidlen - sizeof (duid_ll_t); + } + maclen = mcip->mci_mip->mi_info.mi_addr_length; + if (lladdr != NULL && addrlen == maclen && + bcmp(mcip->mci_unicast->ma_addr, lladdr, maclen) == 0) { + return (B_TRUE); + } + return (dhcpnospoof_check_cid(p, cid, cidlen)); +} + +/* + * Enforce dhcp-nospoof protection. + */ +static int +dhcpnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect, + mblk_t *mp, mac_header_info_t *mhip) +{ + size_t hdrsize = mhip->mhi_hdrsize; + uint32_t sap = mhip->mhi_bindsap; + uchar_t *start, *end; + mblk_t *nmp = NULL; + int err; + + err = get_l3_info(mp, hdrsize, &start, &end, &nmp); + if (err != 0) { + DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip, + mblk_t *, mp); + return (err); + } + err = EINVAL; + + switch (sap) { + case ETHERTYPE_IP: { + ipha_t *ipha = (ipha_t *)start; + + if (start + sizeof (ipha_t) > end) + goto fail; + + if (!dhcpnospoof_check_v4(mcip, protect, ipha, end)) + goto fail; + + break; + } + case ETHERTYPE_IPV6: { + ip6_t *ip6h = (ip6_t *)start; + + if (start + sizeof (ip6_t) > end) + goto fail; + + if (!dhcpnospoof_check_v6(mcip, protect, ip6h, end)) + goto fail; + + break; + } + } + freemsg(nmp); + return (0); + +fail: + /* increment dhcpnospoof stat here */ + freemsg(nmp); + return (err); +} + +/* + * This needs to be called whenever the mac client's mac address changes. + */ +void +mac_protect_update_v6_local_addr(mac_client_impl_t *mcip) +{ + uint8_t *p, *macaddr = mcip->mci_unicast->ma_addr; + uint_t i, media = mcip->mci_mip->mi_info.mi_media; + in6_addr_t token, *v6addr = &mcip->mci_v6_local_addr; + in6_addr_t ll_template = {(uint32_t)V6_LINKLOCAL, 0x0, 0x0, 0x0}; + + + bzero(&token, sizeof (token)); + p = (uint8_t *)&token.s6_addr32[2]; + + switch (media) { + case DL_ETHER: + bcopy(macaddr, p, 3); + p[0] ^= 0x2; + p[3] = 0xff; + p[4] = 0xfe; + bcopy(macaddr + 3, p + 5, 3); + break; + case DL_IB: + ASSERT(mcip->mci_mip->mi_info.mi_addr_length == 20); + bcopy(macaddr + 12, p, 8); + p[0] |= 2; + break; + default: + /* + * We do not need to generate the local address for link types + * that do not support link protection. Wifi pretends to be + * ethernet so it is covered by the DL_ETHER case (note the + * use of mi_media instead of mi_nativemedia). + */ + return; + } + + for (i = 0; i < 4; i++) { + v6addr->s6_addr32[i] = token.s6_addr32[i] | + ll_template.s6_addr32[i]; + } + mcip->mci_protect_flags |= MPT_FLAG_V6_LOCAL_ADDR_SET; +} + /* * Enforce link protection on one packet. */ @@ -159,7 +1969,6 @@ mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp) mblk_t *, mp); return (err); } - protect = &mrp->mrp_protect; types = protect->mp_types; @@ -167,12 +1976,12 @@ mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp) if (mhi.mhi_saddr != NULL && bcmp(mcip->mci_unicast->ma_addr, mhi.mhi_saddr, mip->mi_info.mi_addr_length) != 0) { + BUMP_STAT(mcip, macspoofed); DTRACE_PROBE2(mac__nospoof__fail, mac_client_impl_t *, mcip, mblk_t *, mp); return (EINVAL); } } - if ((types & MPT_RESTRICTED) != 0) { uint32_t vid = VLAN_ID(mhi.mhi_tci); uint32_t sap = mhi.mhi_bindsap; @@ -182,6 +1991,7 @@ mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp) * the vid is not spoofed. */ if (vid != 0 && !mac_client_check_flow_vid(mcip, vid)) { + BUMP_STAT(mcip, restricted); DTRACE_PROBE2(restricted__vid__invalid, mac_client_impl_t *, mcip, mblk_t *, mp); return (EINVAL); @@ -189,20 +1999,28 @@ mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp) if (sap != ETHERTYPE_IP && sap != ETHERTYPE_IPV6 && sap != ETHERTYPE_ARP) { + BUMP_STAT(mcip, restricted); DTRACE_PROBE2(restricted__fail, mac_client_impl_t *, mcip, mblk_t *, mp); return (EINVAL); } } - if ((types & MPT_IPNOSPOOF) != 0) { - if ((err = ipnospoof_check(mcip, protect, - mp, &mhi)) != 0) { + if ((err = ipnospoof_check(mcip, protect, mp, &mhi)) != 0) { + BUMP_STAT(mcip, ipspoofed); DTRACE_PROBE2(ip__nospoof__fail, mac_client_impl_t *, mcip, mblk_t *, mp); return (err); } } + if ((types & MPT_DHCPNOSPOOF) != 0) { + if ((err = dhcpnospoof_check(mcip, protect, mp, &mhi)) != 0) { + BUMP_STAT(mcip, dhcpspoofed); + DTRACE_PROBE2(dhcp__nospoof__fail, + mac_client_impl_t *, mcip, mblk_t *, mp); + return (err); + } + } return (0); } @@ -242,11 +2060,89 @@ mac_protect_check(mac_client_handle_t mch, mblk_t *mp) boolean_t mac_protect_enabled(mac_client_handle_t mch, uint32_t type) { - mac_client_impl_t *mcip = (mac_client_impl_t *)mch; - mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + return (MAC_PROTECT_ENABLED((mac_client_impl_t *)mch, type)); +} - ASSERT(mrp != NULL); - return ((mrp->mrp_protect.mp_types & type) != 0); +static int +validate_ips(mac_protect_t *p) +{ + uint_t i, j; + + if (p->mp_ipaddrcnt == MPT_RESET) + return (0); + + if (p->mp_ipaddrcnt > MPT_MAXIPADDR) + return (EINVAL); + + for (i = 0; i < p->mp_ipaddrcnt; i++) { + mac_ipaddr_t *addr = &p->mp_ipaddrs[i]; + + /* + * The unspecified address is implicitly allowed + * so there's no need to add it to the list. + */ + if (addr->ip_version == IPV4_VERSION) { + if (V4_PART_OF_V6(addr->ip_addr) == INADDR_ANY) + return (EINVAL); + } else if (addr->ip_version == IPV6_VERSION) { + if (IN6_IS_ADDR_UNSPECIFIED(&addr->ip_addr)) + return (EINVAL); + } else { + /* invalid ip version */ + return (EINVAL); + } + + for (j = 0; j < p->mp_ipaddrcnt; j++) { + mac_ipaddr_t *addr1 = &p->mp_ipaddrs[j]; + + if (i == j || addr->ip_version != addr1->ip_version) + continue; + + /* found a duplicate */ + if ((addr->ip_version == IPV4_VERSION && + V4_PART_OF_V6(addr->ip_addr) == + V4_PART_OF_V6(addr1->ip_addr)) || + IN6_ARE_ADDR_EQUAL(&addr->ip_addr, + &addr1->ip_addr)) + return (EINVAL); + } + } + return (0); +} + +/* ARGSUSED */ +static int +validate_cids(mac_protect_t *p) +{ + uint_t i, j; + + if (p->mp_cidcnt == MPT_RESET) + return (0); + + if (p->mp_cidcnt > MPT_MAXCID) + return (EINVAL); + + for (i = 0; i < p->mp_cidcnt; i++) { + mac_dhcpcid_t *cid = &p->mp_cids[i]; + + if (cid->dc_len > MPT_MAXCIDLEN || + (cid->dc_form != CIDFORM_TYPED && + cid->dc_form != CIDFORM_HEX && + cid->dc_form != CIDFORM_STR)) + return (EINVAL); + + for (j = 0; j < p->mp_cidcnt; j++) { + mac_dhcpcid_t *cid1 = &p->mp_cids[j]; + + if (i == j || cid->dc_len != cid1->dc_len) + continue; + + /* found a duplicate */ + if (bcmp(cid->dc_id, cid1->dc_id, cid->dc_len) == 0) + return (EINVAL); + } + } + return (0); } /* @@ -256,33 +2152,18 @@ int mac_protect_validate(mac_resource_props_t *mrp) { mac_protect_t *p = &mrp->mrp_protect; + int err; /* check for invalid types */ if (p->mp_types != MPT_RESET && (p->mp_types & ~MPT_ALL) != 0) return (EINVAL); - if (p->mp_ipaddrcnt != MPT_RESET) { - uint_t i, j; - - if (p->mp_ipaddrcnt > MPT_MAXIPADDR) - return (EINVAL); + if ((err = validate_ips(p)) != 0) + return (err); - for (i = 0; i < p->mp_ipaddrcnt; i++) { - /* - * The unspecified address is implicitly allowed - * so there's no need to add it to the list. - */ - if (p->mp_ipaddrs[i] == INADDR_ANY) - return (EINVAL); + if ((err = validate_cids(p)) != 0) + return (err); - for (j = 0; j < p->mp_ipaddrcnt; j++) { - /* found a duplicate */ - if (i != j && - p->mp_ipaddrs[i] == p->mp_ipaddrs[j]) - return (EINVAL); - } - } - } return (0); } @@ -326,9 +2207,8 @@ mac_protect_update(mac_resource_props_t *new, mac_resource_props_t *curr) curr->mrp_mask |= MRP_PROTECT; } } - if (np->mp_ipaddrcnt != 0) { - if (np->mp_ipaddrcnt < MPT_MAXIPADDR) { + if (np->mp_ipaddrcnt <= MPT_MAXIPADDR) { bcopy(np->mp_ipaddrs, cp->mp_ipaddrs, sizeof (cp->mp_ipaddrs)); cp->mp_ipaddrcnt = np->mp_ipaddrcnt; @@ -337,4 +2217,47 @@ mac_protect_update(mac_resource_props_t *new, mac_resource_props_t *curr) cp->mp_ipaddrcnt = 0; } } + if (np->mp_cidcnt != 0) { + if (np->mp_cidcnt <= MPT_MAXCID) { + bcopy(np->mp_cids, cp->mp_cids, sizeof (cp->mp_cids)); + cp->mp_cidcnt = np->mp_cidcnt; + } else if (np->mp_cidcnt == MPT_RESET) { + bzero(cp->mp_cids, sizeof (cp->mp_cids)); + cp->mp_cidcnt = 0; + } + } +} + +void +mac_protect_init(mac_client_impl_t *mcip) +{ + mutex_init(&mcip->mci_protect_lock, NULL, MUTEX_DRIVER, NULL); + mcip->mci_protect_flags = 0; + mcip->mci_txn_cleanup_tid = 0; + avl_create(&mcip->mci_v4_pending_txn, compare_dhcpv4_xid, + sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_node)); + avl_create(&mcip->mci_v4_completed_txn, compare_dhcpv4_cid, + sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_node)); + avl_create(&mcip->mci_v4_dyn_ip, compare_dhcpv4_ip, + sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_ipnode)); + avl_create(&mcip->mci_v6_pending_txn, compare_dhcpv6_xid, + sizeof (dhcpv6_txn_t), offsetof(dhcpv6_txn_t, dt_node)); + avl_create(&mcip->mci_v6_cid, compare_dhcpv6_cid, + sizeof (dhcpv6_cid_t), offsetof(dhcpv6_cid_t, dc_node)); + avl_create(&mcip->mci_v6_dyn_ip, compare_dhcpv6_ip, + sizeof (dhcpv6_addr_t), offsetof(dhcpv6_addr_t, da_node)); +} + +void +mac_protect_fini(mac_client_impl_t *mcip) +{ + avl_destroy(&mcip->mci_v6_dyn_ip); + avl_destroy(&mcip->mci_v6_cid); + avl_destroy(&mcip->mci_v6_pending_txn); + avl_destroy(&mcip->mci_v4_dyn_ip); + avl_destroy(&mcip->mci_v4_completed_txn); + avl_destroy(&mcip->mci_v4_pending_txn); + mcip->mci_txn_cleanup_tid = 0; + mcip->mci_protect_flags = 0; + mutex_destroy(&mcip->mci_protect_lock); } diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c index 43501e3505..f0fe17ae0a 100644 --- a/usr/src/uts/common/io/mac/mac_provider.c +++ b/usr/src/uts/common/io/mac/mac_provider.c @@ -40,6 +40,7 @@ #include <sys/mac_client_impl.h> #include <sys/mac_client_priv.h> #include <sys/mac_soft_ring.h> +#include <sys/mac_stat.h> #include <sys/dld.h> #include <sys/modctl.h> #include <sys/fs/dv_node.h> @@ -53,6 +54,8 @@ #include <sys/ddi_intr_impl.h> #include <sys/disp.h> #include <sys/sdt.h> +#include <sys/pattr.h> +#include <sys/strsun.h> /* * MAC Provider Interface. @@ -298,8 +301,7 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) /* * Register the private properties. */ - mac_register_priv_prop(mip, mregp->m_priv_props, - mregp->m_priv_prop_count); + mac_register_priv_prop(mip, mregp->m_priv_props); /* * Stash the driver callbacks into the mac_impl_t, but first sanity @@ -334,6 +336,9 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) * Initialize the capabilities */ + bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t)); + bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t)); + if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL)) mip->mi_state_flags |= MIS_IS_VNIC; @@ -371,18 +376,6 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) } /* - * The driver must set mc_tx entry point to NULL when it advertises - * CAP_RINGS for tx rings. - */ - if (mip->mi_tx_groups != NULL) { - if (mregp->m_callbacks->mc_tx != NULL) - goto fail; - } else { - if (mregp->m_callbacks->mc_tx == NULL) - goto fail; - } - - /* * Initialize MAC addresses. Must be called after mac_init_rings(). */ mac_init_macaddr(mip); @@ -396,7 +389,7 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) /* * Initialize the kstats for this device. */ - mac_stat_create(mip); + mac_driver_stat_create(mip); /* Zero out any properties. */ bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t)); @@ -466,7 +459,7 @@ fail: mip->mi_info.mi_unicst_addr = NULL; } - mac_stat_destroy(mip); + mac_driver_stat_delete(mip); if (mip->mi_type != NULL) { atomic_dec_32(&mip->mi_type->mt_ref); @@ -484,6 +477,7 @@ fail: mac_minor_rele(minor); } + mip->mi_state_flags = 0; mac_unregister_priv_prop(mip); /* @@ -532,7 +526,7 @@ mac_unregister(mac_handle_t mh) ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags & MIS_EXCLUSIVE)); - mac_stat_destroy(mip); + mac_driver_stat_delete(mip); (void) mod_hash_remove(i_mac_impl_hash, (mod_hash_key_t)mip->mi_name, &val); @@ -772,11 +766,7 @@ mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) void mac_tx_update(mac_handle_t mh) { - /* - * Walk the list of MAC clients (mac_client_handle) - * and update - */ - i_mac_tx_srs_notify((mac_impl_t *)mh, NULL); + mac_tx_ring_update(mh, NULL); } /* @@ -959,6 +949,151 @@ mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max) return (0); } +static void +mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring) +{ + mac_client_impl_t *mcip; + flow_entry_t *flent; + mac_soft_ring_set_t *mac_rx_srs; + mac_cpus_t *srs_cpu; + int i; + + if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) && + (!ring->mr_info.mri_intr.mi_ddi_shared)) { + /* interrupt can be re-targeted */ + ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); + flent = mcip->mci_flent; + if (ring->mr_type == MAC_RING_TYPE_RX) { + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_rx_srs = flent->fe_rx_srs[i]; + if (mac_rx_srs->srs_ring != ring) + continue; + srs_cpu = &mac_rx_srs->srs_cpu; + mutex_enter(&cpu_lock); + mac_rx_srs_retarget_intr(mac_rx_srs, + srs_cpu->mc_rx_intr_cpu); + mutex_exit(&cpu_lock); + break; + } + } else { + if (flent->fe_tx_srs != NULL) { + mutex_enter(&cpu_lock); + mac_tx_srs_retarget_intr( + flent->fe_tx_srs); + mutex_exit(&cpu_lock); + } + } + } +} + +/* + * Clients like aggr create pseudo rings (mac_ring_t) and expose them to + * their clients. There is a 1-1 mapping pseudo ring and the hardware + * ring. ddi interrupt handles are exported from the hardware ring to + * the pseudo ring. Thus when the interrupt handle changes, clients of + * aggr that are using the handle need to use the new handle and + * re-target their interrupts. + */ +static void +mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring, + ddi_intr_handle_t ddh) +{ + mac_ring_t *pring; + mac_group_t *pgroup; + mac_impl_t *pmip; + char macname[MAXNAMELEN]; + mac_perim_handle_t p_mph; + uint64_t saved_gen_num; + +again: + pring = (mac_ring_t *)ring->mr_prh; + pgroup = (mac_group_t *)pring->mr_gh; + pmip = (mac_impl_t *)pgroup->mrg_mh; + saved_gen_num = ring->mr_gen_num; + (void) strlcpy(macname, pmip->mi_name, MAXNAMELEN); + /* + * We need to enter aggr's perimeter. The locking hierarchy + * dictates that aggr's perimeter should be entered first + * and then the port's perimeter. So drop the port's + * perimeter, enter aggr's and then re-enter port's + * perimeter. + */ + i_mac_perim_exit(mip); + /* + * While we know pmip is the aggr's mip, there is a + * possibility that aggr could have unregistered by + * the time we exit port's perimeter (mip) and + * enter aggr's perimeter (pmip). To avoid that + * scenario, enter aggr's perimeter using its name. + */ + if (mac_perim_enter_by_macname(macname, &p_mph) != 0) + return; + i_mac_perim_enter(mip); + /* + * Check if the ring got assigned to another aggregation before + * be could enter aggr's and the port's perimeter. When a ring + * gets deleted from an aggregation, it calls mac_stop_ring() + * which increments the generation number. So checking + * generation number will be enough. + */ + if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) { + i_mac_perim_exit(mip); + mac_perim_exit(p_mph); + i_mac_perim_enter(mip); + goto again; + } + + /* Check if pseudo ring is still present */ + if (ring->mr_prh != NULL) { + pring->mr_info.mri_intr.mi_ddi_handle = ddh; + pring->mr_info.mri_intr.mi_ddi_shared = + ring->mr_info.mri_intr.mi_ddi_shared; + if (ddh != NULL) + mac_ring_intr_retarget(pgroup, pring); + } + i_mac_perim_exit(mip); + mac_perim_exit(p_mph); +} +/* + * API called by driver to provide new interrupt handle for TX/RX rings. + * This usually happens when IRM (Interrupt Resource Manangement) + * framework either gives the driver more MSI-x interrupts or takes + * away MSI-x interrupts from the driver. + */ +void +mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh) +{ + mac_ring_t *ring = (mac_ring_t *)mrh; + mac_group_t *group = (mac_group_t *)ring->mr_gh; + mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; + + i_mac_perim_enter(mip); + ring->mr_info.mri_intr.mi_ddi_handle = ddh; + if (ddh == NULL) { + /* Interrupts being reset */ + ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE; + if (ring->mr_prh != NULL) { + mac_pseudo_ring_intr_retarget(mip, ring, ddh); + return; + } + } else { + /* New interrupt handle */ + mac_compare_ddi_handle(mip->mi_rx_groups, + mip->mi_rx_group_count, ring); + if (!ring->mr_info.mri_intr.mi_ddi_shared) { + mac_compare_ddi_handle(mip->mi_tx_groups, + mip->mi_tx_group_count, ring); + } + if (ring->mr_prh != NULL) { + mac_pseudo_ring_intr_retarget(mip, ring, ddh); + return; + } else { + mac_ring_intr_retarget(group, ring); + } + } + i_mac_perim_exit(mip); +} + /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */ /* @@ -1141,16 +1276,8 @@ mac_group_add_ring(mac_group_handle_t gh, int index) int ret; i_mac_perim_enter(mip); - - /* - * Only RX rings can be added or removed by drivers currently. - */ - ASSERT(group->mrg_type == MAC_RING_TYPE_RX); - ret = i_mac_group_add_ring(group, NULL, index); - i_mac_perim_exit(mip); - return (ret); } @@ -1166,13 +1293,167 @@ mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh) mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; i_mac_perim_enter(mip); + i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE); + i_mac_perim_exit(mip); +} - /* - * Only RX rings can be added or removed by drivers currently. - */ - ASSERT(group->mrg_type == MAC_RING_TYPE_RX); +/* + * mac_prop_info_*() callbacks called from the driver's prefix_propinfo() + * entry points. + */ - i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE); +void +mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; - i_mac_perim_exit(mip); + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + ASSERT(pr->pr_default_size >= sizeof (uint8_t)); + + *(uint8_t *)(pr->pr_default) = val; + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + ASSERT(pr->pr_default_size >= sizeof (uint64_t)); + + bcopy(&val, pr->pr_default, sizeof (val)); + + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + ASSERT(pr->pr_default_size >= sizeof (uint32_t)); + + bcopy(&val, pr->pr_default, sizeof (val)); + + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + if (strlen(str) > pr->pr_default_size) + pr->pr_default_status = ENOBUFS; + else + (void) strlcpy(pr->pr_default, str, strlen(str)); + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph, + link_flowctrl_t val) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + /* nothing to do if the caller doesn't want the default value */ + if (pr->pr_default == NULL) + return; + + ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t)); + + bcopy(&val, pr->pr_default, sizeof (val)); + + pr->pr_flags |= MAC_PROP_INFO_DEFAULT; +} + +void +mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min, + uint32_t max) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + mac_propval_range_t *range = pr->pr_range; + + /* nothing to do if the caller doesn't want the range info */ + if (range == NULL) + return; + + range->mpr_count = 1; + range->mpr_type = MAC_PROPVAL_UINT32; + range->mpr_range_uint32[0].mpur_min = min; + range->mpr_range_uint32[0].mpur_max = max; + pr->pr_flags |= MAC_PROP_INFO_RANGE; +} + +void +mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm) +{ + mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; + + pr->pr_perm = perm; + pr->pr_flags |= MAC_PROP_INFO_PERM; +} + +void mac_hcksum_get(mblk_t *mp, uint32_t *start, uint32_t *stuff, + uint32_t *end, uint32_t *value, uint32_t *flags_ptr) +{ + uint32_t flags; + + ASSERT(DB_TYPE(mp) == M_DATA); + + flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS; + if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) { + if (value != NULL) + *value = (uint32_t)DB_CKSUM16(mp); + if ((flags & HCK_PARTIALCKSUM) != 0) { + if (start != NULL) + *start = (uint32_t)DB_CKSUMSTART(mp); + if (stuff != NULL) + *stuff = (uint32_t)DB_CKSUMSTUFF(mp); + if (end != NULL) + *end = (uint32_t)DB_CKSUMEND(mp); + } + } + + if (flags_ptr != NULL) + *flags_ptr = flags; +} + +void mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff, + uint32_t end, uint32_t value, uint32_t flags) +{ + ASSERT(DB_TYPE(mp) == M_DATA); + + DB_CKSUMSTART(mp) = (intptr_t)start; + DB_CKSUMSTUFF(mp) = (intptr_t)stuff; + DB_CKSUMEND(mp) = (intptr_t)end; + DB_CKSUMFLAGS(mp) = (uint16_t)flags; + DB_CKSUM16(mp) = (uint16_t)value; +} + +void +mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) +{ + ASSERT(DB_TYPE(mp) == M_DATA); + + if (flags != NULL) { + *flags = DB_CKSUMFLAGS(mp) & HW_LSO; + if ((*flags != 0) && (mss != NULL)) + *mss = (uint32_t)DB_LSOMSS(mp); + } } diff --git a/usr/src/uts/common/io/mac/mac_sched.c b/usr/src/uts/common/io/mac/mac_sched.c index 8b7f718497..9e1b2b0a55 100644 --- a/usr/src/uts/common/io/mac/mac_sched.c +++ b/usr/src/uts/common/io/mac/mac_sched.c @@ -50,6 +50,8 @@ static mac_tx_cookie_t mac_tx_fanout_mode(mac_soft_ring_set_t *, mblk_t *, uintptr_t, uint16_t, mblk_t **); static mac_tx_cookie_t mac_tx_bw_mode(mac_soft_ring_set_t *, mblk_t *, uintptr_t, uint16_t, mblk_t **); +static mac_tx_cookie_t mac_tx_aggr_mode(mac_soft_ring_set_t *, mblk_t *, + uintptr_t, uint16_t, mblk_t **); typedef struct mac_tx_mode_s { mac_tx_srs_mode_t mac_tx_mode; @@ -57,18 +59,34 @@ typedef struct mac_tx_mode_s { } mac_tx_mode_t; /* - * There are five modes of operation on the Tx side. These modes get set + * There are seven modes of operation on the Tx side. These modes get set * in mac_tx_srs_setup(). Except for the experimental TX_SERIALIZE mode, * none of the other modes are user configurable. They get selected by * the system depending upon whether the link (or flow) has multiple Tx - * rings or a bandwidth configured, etc. + * rings or a bandwidth configured, or if the link is an aggr, etc. + * + * When the Tx SRS is operating in aggr mode (st_mode) or if there are + * multiple Tx rings owned by Tx SRS, then each Tx ring (pseudo or + * otherwise) will have a soft ring associated with it. These soft rings + * are stored in srs_tx_soft_rings[] array. + * + * Additionally in the case of aggr, there is the st_soft_rings[] array + * in the mac_srs_tx_t structure. This array is used to store the same + * set of soft rings that are present in srs_tx_soft_rings[] array but + * in a different manner. The soft ring associated with the pseudo Tx + * ring is saved at mr_index (of the pseudo ring) in st_soft_rings[] + * array. This helps in quickly getting the soft ring associated with the + * Tx ring when aggr_find_tx_ring() returns the pseudo Tx ring that is to + * be used for transmit. */ mac_tx_mode_t mac_tx_mode_list[] = { {SRS_TX_DEFAULT, mac_tx_single_ring_mode}, {SRS_TX_SERIALIZE, mac_tx_serializer_mode}, {SRS_TX_FANOUT, mac_tx_fanout_mode}, {SRS_TX_BW, mac_tx_bw_mode}, - {SRS_TX_BW_FANOUT, mac_tx_bw_mode} + {SRS_TX_BW_FANOUT, mac_tx_bw_mode}, + {SRS_TX_AGGR, mac_tx_aggr_mode}, + {SRS_TX_BW_AGGR, mac_tx_bw_mode} }; /* @@ -307,21 +325,16 @@ int mac_srs_worker_wakeup_ticks = 0; } \ } -#define TX_SINGLE_RING_MODE(mac_srs) \ - ((mac_srs)->srs_tx.st_mode == SRS_TX_DEFAULT || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_SERIALIZE || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_BW) - #define TX_BANDWIDTH_MODE(mac_srs) \ ((mac_srs)->srs_tx.st_mode == SRS_TX_BW || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_BW_FANOUT) + (mac_srs)->srs_tx.st_mode == SRS_TX_BW_FANOUT || \ + (mac_srs)->srs_tx.st_mode == SRS_TX_BW_AGGR) #define TX_SRS_TO_SOFT_RING(mac_srs, head, hint) { \ - uint_t hash, indx; \ - hash = HASH_HINT(hint); \ - indx = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count); \ - softring = mac_srs->srs_oth_soft_rings[indx]; \ - (void) (mac_tx_soft_ring_process(softring, head, 0, NULL)); \ + if (tx_mode == SRS_TX_BW_FANOUT) \ + (void) mac_tx_fanout_mode(mac_srs, head, hint, 0, NULL);\ + else \ + (void) mac_tx_aggr_mode(mac_srs, head, hint, 0, NULL); \ } /* @@ -341,7 +354,7 @@ int mac_srs_worker_wakeup_ticks = 0; } else { \ ASSERT(!((srs)->srs_state & SRS_TX_BLOCKED)); \ (srs)->srs_state |= SRS_TX_BLOCKED; \ - (srs)->srs_tx.st_blocked_cnt++; \ + (srs)->srs_tx.st_stat.mts_blockcnt++; \ } \ } @@ -364,7 +377,7 @@ int mac_srs_worker_wakeup_ticks = 0; (srs)->srs_tx.st_hiwat_cnt++; \ if ((srs)->srs_count > (srs)->srs_tx.st_max_q_cnt) { \ /* increment freed stats */ \ - (srs)->srs_tx.st_drop_count += cnt; \ + (srs)->srs_tx.st_stat.mts_sdrops += cnt; \ /* \ * b_prev may be set to the fanout hint \ * hence can't use freemsg directly \ @@ -391,7 +404,7 @@ int mac_srs_worker_wakeup_ticks = 0; #define MAC_TX_SRS_DROP_MESSAGE(srs, mp, cookie) { \ mac_pkt_drop(NULL, NULL, mp, B_FALSE); \ /* increment freed stats */ \ - mac_srs->srs_tx.st_drop_count++; \ + mac_srs->srs_tx.st_stat.mts_sdrops++; \ cookie = (mac_tx_cookie_t)srs; \ } @@ -415,7 +428,7 @@ mac_rx_drop_pkt(mac_soft_ring_set_t *srs, mblk_t *mp) MAC_UPDATE_SRS_SIZE_LOCKED(srs, msgdsize(mp)); mutex_exit(&srs->srs_lock); - srs_rx->sr_drop_count++; + srs_rx->sr_stat.mrs_sdrops++; freemsg(mp); } @@ -448,7 +461,7 @@ mac_srs_fire(void *arg) * 'hint' is fanout_hint (type of uint64_t) which is given by the TCP/IP stack, * and it is used on the TX path. */ -#define HASH_HINT(hint) \ +#define HASH_HINT(hint) \ ((hint) ^ ((hint) >> 24) ^ ((hint) >> 16) ^ ((hint) >> 8)) @@ -797,8 +810,8 @@ mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp, * packets or because mblk's need to be concatenated using * pullupmsg(). */ - if (mac_src_ipv6_fanout || !mac_ip_hdr_length_v6(mp, ip6h, - &hdr_len, &nexthdr, NULL, NULL)) { + if (mac_src_ipv6_fanout || !mac_ip_hdr_length_v6(ip6h, + mp->b_wptr, &hdr_len, &nexthdr, NULL)) { goto src_based_fanout; } whereptr = (uint8_t *)ip6h + hdr_len; @@ -1302,13 +1315,8 @@ check_again: tail->b_next = NULL; smcip = mac_srs->srs_mcip; - if ((mac_srs->srs_type & SRST_FLOW) || - (smcip == NULL)) { - FLOW_STAT_UPDATE(mac_srs->srs_flent, - rbytes, sz); - FLOW_STAT_UPDATE(mac_srs->srs_flent, - ipackets, count); - } + SRS_RX_STAT_UPDATE(mac_srs, pollbytes, sz); + SRS_RX_STAT_UPDATE(mac_srs, pollcnt, count); /* * If there are any promiscuous mode callbacks @@ -1316,9 +1324,6 @@ check_again: * if appropriate and also update the counters. */ if (smcip != NULL) { - smcip->mci_stat_ibytes += sz; - smcip->mci_stat_ipackets += count; - if (smcip->mci_mip->mi_promisc_list != NULL) { mutex_exit(lock); mac_promisc_dispatch(smcip->mci_mip, @@ -1331,15 +1336,14 @@ check_again: mac_srs->srs_bw->mac_bw_polled += sz; mutex_exit(&mac_srs->srs_bw->mac_bw_lock); } - srs_rx->sr_poll_count += count; MAC_RX_SRS_ENQUEUE_CHAIN(mac_srs, head, tail, count, sz); if (count <= 10) - srs_rx->sr_chain_cnt_undr10++; + srs_rx->sr_stat.mrs_chaincntundr10++; else if (count > 10 && count <= 50) - srs_rx->sr_chain_cnt_10to50++; + srs_rx->sr_stat.mrs_chaincnt10to50++; else - srs_rx->sr_chain_cnt_over50++; + srs_rx->sr_stat.mrs_chaincntover50++; } /* @@ -1637,10 +1641,17 @@ again: * callbacks for broadcast and multicast packets are delivered from * mac_rx() and we don't need to worry about that case in this path */ - if (mcip != NULL && mcip->mci_promisc_list != NULL) { - mutex_exit(&mac_srs->srs_lock); - mac_promisc_client_dispatch(mcip, head); - mutex_enter(&mac_srs->srs_lock); + if (mcip != NULL) { + if (mcip->mci_promisc_list != NULL) { + mutex_exit(&mac_srs->srs_lock); + mac_promisc_client_dispatch(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } + if (MAC_PROTECT_ENABLED(mcip, MPT_IPNOSPOOF)) { + mutex_exit(&mac_srs->srs_lock); + mac_protect_intercept_dhcp(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } } /* @@ -1886,7 +1897,7 @@ again: /* zero bandwidth: drop all and return to interrupt mode */ mutex_enter(&mac_srs->srs_bw->mac_bw_lock); if (mac_srs->srs_bw->mac_bw_limit == 0) { - srs_rx->sr_drop_count += cnt; + srs_rx->sr_stat.mrs_sdrops += cnt; ASSERT(mac_srs->srs_bw->mac_bw_sz >= sz); mac_srs->srs_bw->mac_bw_sz -= sz; mac_srs->srs_bw->mac_bw_drop_bytes += sz; @@ -1908,10 +1919,17 @@ again: * callbacks for broadcast and multicast packets are delivered from * mac_rx() and we don't need to worry about that case in this path */ - if (mcip != NULL && mcip->mci_promisc_list != NULL) { - mutex_exit(&mac_srs->srs_lock); - mac_promisc_client_dispatch(mcip, head); - mutex_enter(&mac_srs->srs_lock); + if (mcip != NULL) { + if (mcip->mci_promisc_list != NULL) { + mutex_exit(&mac_srs->srs_lock); + mac_promisc_client_dispatch(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } + if (MAC_PROTECT_ENABLED(mcip, MPT_IPNOSPOOF)) { + mutex_exit(&mac_srs->srs_lock); + mac_protect_intercept_dhcp(mcip, head); + mutex_enter(&mac_srs->srs_lock); + } } /* @@ -2285,7 +2303,6 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, size_t sz = 0; size_t chain_sz, sz1; mac_bw_ctl_t *mac_bw; - mac_client_impl_t *smcip; mac_srs_rx_t *srs_rx = &mac_srs->srs_rx; /* @@ -2302,15 +2319,14 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, } mutex_enter(&mac_srs->srs_lock); - smcip = mac_srs->srs_mcip; - if (mac_srs->srs_type & SRST_FLOW || smcip == NULL) { - FLOW_STAT_UPDATE(mac_srs->srs_flent, rbytes, sz); - FLOW_STAT_UPDATE(mac_srs->srs_flent, ipackets, count); - } - if (smcip != NULL) { - smcip->mci_stat_ibytes += sz; - smcip->mci_stat_ipackets += count; + if (loopback) { + SRS_RX_STAT_UPDATE(mac_srs, lclbytes, sz); + SRS_RX_STAT_UPDATE(mac_srs, lclcnt, count); + + } else { + SRS_RX_STAT_UPDATE(mac_srs, intrbytes, sz); + SRS_RX_STAT_UPDATE(mac_srs, intrcnt, count); } /* @@ -2323,12 +2339,10 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, mac_bw = mac_srs->srs_bw; ASSERT(mac_bw != NULL); mutex_enter(&mac_bw->mac_bw_lock); - /* Count the packets and bytes via interrupt */ - srs_rx->sr_intr_count += count; mac_bw->mac_bw_intr += sz; if (mac_bw->mac_bw_limit == 0) { /* zero bandwidth: drop all */ - srs_rx->sr_drop_count += count; + srs_rx->sr_stat.mrs_sdrops += count; mac_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_bw->mac_bw_lock); mutex_exit(&mac_srs->srs_lock); @@ -2370,7 +2384,7 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, } if (head != NULL) { /* Drop any packet over the threshold */ - srs_rx->sr_drop_count += count; + srs_rx->sr_stat.mrs_sdrops += count; mutex_enter(&mac_bw->mac_bw_lock); mac_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_bw->mac_bw_lock); @@ -2392,7 +2406,7 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, if (!(mac_srs->srs_type & SRST_BW_CONTROL) && (srs_rx->sr_poll_pkt_cnt > srs_rx->sr_hiwat)) { mac_bw = mac_srs->srs_bw; - srs_rx->sr_drop_count += count; + srs_rx->sr_stat.mrs_sdrops += count; mutex_enter(&mac_bw->mac_bw_lock); mac_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_bw->mac_bw_lock); @@ -2402,8 +2416,6 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, } MAC_RX_SRS_ENQUEUE_CHAIN(mac_srs, mp_chain, tail, count, sz); - /* Count the packets entering via interrupt path */ - srs_rx->sr_intr_count += count; if (!(mac_srs->srs_state & SRS_PROC)) { /* @@ -2510,7 +2522,7 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, /* * Ignore fanout hint if we don't have multiple tx rings. */ - if (!TX_MULTI_RING_MODE(mac_srs)) + if (!MAC_TX_SOFT_RINGS(mac_srs)) fanout_hint = 0; if (mac_srs->srs_first != NULL) @@ -2550,25 +2562,30 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, } /* - * There are five tx modes: + * There are seven tx modes: * * 1) Default mode (SRS_TX_DEFAULT) * 2) Serialization mode (SRS_TX_SERIALIZE) * 3) Fanout mode (SRS_TX_FANOUT) * 4) Bandwdith mode (SRS_TX_BW) * 5) Fanout and Bandwidth mode (SRS_TX_BW_FANOUT) + * 6) aggr Tx mode (SRS_TX_AGGR) + * 7) aggr Tx bw mode (SRS_TX_BW_AGGR) * * The tx mode in which an SRS operates is decided in mac_tx_srs_setup() * based on the number of Tx rings requested for an SRS and whether * bandwidth control is requested or not. * - * In the default mode (i.e., no fanout/no bandwidth), the SRS acts as a - * pass-thru. Packets will go directly to mac_tx_send(). When the underlying - * Tx ring runs out of Tx descs, it starts queueing up packets in SRS. - * When flow-control is relieved, the srs_worker drains the queued - * packets and informs blocked clients to restart sending packets. + * The default mode (i.e., no fanout/no bandwidth) is used when the + * underlying NIC does not have Tx rings or just one Tx ring. In this mode, + * the SRS acts as a pass-thru. Packets will go directly to mac_tx_send(). + * When the underlying Tx ring runs out of Tx descs, it starts queueing up + * packets in SRS. When flow-control is relieved, the srs_worker drains + * the queued packets and informs blocked clients to restart sending + * packets. * - * In the SRS_TX_SERIALIZE mode, all calls to mac_tx() are serialized. + * In the SRS_TX_SERIALIZE mode, all calls to mac_tx() are serialized. This + * mode is used when the link has no Tx rings or only one Tx ring. * * In the SRS_TX_FANOUT mode, packets will be fanned out to multiple * Tx rings. Each Tx ring will have a soft ring associated with it. @@ -2581,6 +2598,19 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, * SRS. If fanout to multiple Tx rings is configured, the packets will * be fanned out among the soft rings associated with the Tx rings. * + * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine + * invokes an aggr function, aggr_find_tx_ring(), to find a pseudo Tx ring + * belonging to a port on which the packet has to be sent. Aggr will + * always have a pseudo Tx ring associated with it even when it is an + * aggregation over a single NIC that has no Tx rings. Even in such a + * case, the single pseudo Tx ring will have a soft ring associated with + * it and the soft ring will hang off the SRS. + * + * If a bandwidth is specified for an aggr, SRS_TX_BW_AGGR mode is used. + * In this mode, the bandwidth is first applied on the outgoing packets + * and later mac_tx_addr_mode() function is called to send the packet out + * of one of the pseudo Tx rings. + * * Four flags are used in srs_state for indicating flow control * conditions : SRS_TX_BLOCKED, SRS_TX_HIWAT, SRS_TX_WAKEUP_CLIENT. * SRS_TX_BLOCKED indicates out of Tx descs. SRS expects a wakeup from the @@ -2625,7 +2655,6 @@ mac_tx_single_ring_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, uintptr_t fanout_hint, uint16_t flag, mblk_t **ret_mp) { mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; - boolean_t is_subflow; mac_tx_stats_t stats; mac_tx_cookie_t cookie = NULL; @@ -2656,10 +2685,8 @@ mac_tx_single_ring_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mutex_exit(&mac_srs->srs_lock); } - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); - mp_chain = mac_tx_send(srs_tx->st_arg1, srs_tx->st_arg2, - mp_chain, (is_subflow ? &stats : NULL)); + mp_chain, &stats); /* * Multiple threads could be here sending packets. @@ -2676,9 +2703,7 @@ mac_tx_single_ring_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mutex_exit(&mac_srs->srs_lock); return (cookie); } - - if (is_subflow) - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); + SRS_TX_STATS_UPDATE(mac_srs, &stats); return (NULL); } @@ -2696,7 +2721,6 @@ static mac_tx_cookie_t mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, uintptr_t fanout_hint, uint16_t flag, mblk_t **ret_mp) { - boolean_t is_subflow; mac_tx_stats_t stats; mac_tx_cookie_t cookie = NULL; mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; @@ -2726,10 +2750,8 @@ mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mac_srs->srs_state |= SRS_PROC; mutex_exit(&mac_srs->srs_lock); - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); - mp_chain = mac_tx_send(srs_tx->st_arg1, srs_tx->st_arg2, - mp_chain, (is_subflow ? &stats : NULL)); + mp_chain, &stats); mutex_enter(&mac_srs->srs_lock); mac_srs->srs_state &= ~SRS_PROC; @@ -2747,8 +2769,8 @@ mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, } mutex_exit(&mac_srs->srs_lock); - if (is_subflow && cookie == NULL) - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); + if (cookie == NULL) + SRS_TX_STATS_UPDATE(mac_srs, &stats); return (cookie); } @@ -2766,8 +2788,8 @@ mac_tx_serializer_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, */ #define MAC_TX_SOFT_RING_PROCESS(chain) { \ - index = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count), \ - softring = mac_srs->srs_oth_soft_rings[index]; \ + index = COMPUTE_INDEX(hash, mac_srs->srs_tx_ring_count), \ + softring = mac_srs->srs_tx_soft_rings[index]; \ cookie = mac_tx_soft_ring_process(softring, chain, flag, ret_mp); \ DTRACE_PROBE2(tx__fanout, uint64_t, hash, uint_t, index); \ } @@ -2781,7 +2803,8 @@ mac_tx_fanout_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, uint_t index; mac_tx_cookie_t cookie = NULL; - ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_FANOUT); + ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_FANOUT || + mac_srs->srs_tx.st_mode == SRS_TX_BW_FANOUT); if (fanout_hint != 0) { /* * The hint is specified by the caller, simply pass the @@ -2926,18 +2949,18 @@ mac_tx_bw_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, hash = HASH_HINT(fanout_hint); indx = COMPUTE_INDEX(hash, - mac_srs->srs_oth_ring_count); - softring = mac_srs->srs_oth_soft_rings[indx]; + mac_srs->srs_tx_ring_count); + softring = mac_srs->srs_tx_soft_rings[indx]; return (mac_tx_soft_ring_process(softring, mp_chain, flag, ret_mp)); + } else if (srs_tx->st_mode == SRS_TX_BW_AGGR) { + return (mac_tx_aggr_mode(mac_srs, mp_chain, + fanout_hint, flag, ret_mp)); } else { - boolean_t is_subflow; mac_tx_stats_t stats; - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); - mp_chain = mac_tx_send(srs_tx->st_arg1, srs_tx->st_arg2, - mp_chain, (is_subflow ? &stats : NULL)); + mp_chain, &stats); if (mp_chain != NULL) { mutex_enter(&mac_srs->srs_lock); @@ -2951,13 +2974,68 @@ mac_tx_bw_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, mutex_exit(&mac_srs->srs_lock); return (cookie); } - if (is_subflow) - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); + SRS_TX_STATS_UPDATE(mac_srs, &stats); return (NULL); } } +/* + * mac_tx_aggr_mode + * + * This routine invokes an aggr function, aggr_find_tx_ring(), to find + * a (pseudo) Tx ring belonging to a port on which the packet has to + * be sent. aggr_find_tx_ring() first finds the outgoing port based on + * L2/L3/L4 policy and then uses the fanout_hint passed to it to pick + * a Tx ring from the selected port. + * + * Note that a port can be deleted from the aggregation. In such a case, + * the aggregation layer first separates the port from the rest of the + * ports making sure that port (and thus any Tx rings associated with + * it) won't get selected in the call to aggr_find_tx_ring() function. + * Later calls are made to mac_group_rem_ring() passing pseudo Tx ring + * handles one by one which in turn will quiesce the Tx SRS and remove + * the soft ring associated with the pseudo Tx ring. Unlike Rx side + * where a cookie is used to protect against mac_rx_ring() calls on + * rings that have been removed, no such cookie is needed on the Tx + * side as the pseudo Tx ring won't be available anymore to + * aggr_find_tx_ring() once the port has been removed. + */ +static mac_tx_cookie_t +mac_tx_aggr_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, + uintptr_t fanout_hint, uint16_t flag, mblk_t **ret_mp) +{ + mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; + mac_tx_ring_fn_t find_tx_ring_fn; + mac_ring_handle_t ring = NULL; + void *arg; + mac_soft_ring_t *sringp; + + find_tx_ring_fn = srs_tx->st_capab_aggr.mca_find_tx_ring_fn; + arg = srs_tx->st_capab_aggr.mca_arg; + if (find_tx_ring_fn(arg, mp_chain, fanout_hint, &ring) == NULL) + return (NULL); + sringp = srs_tx->st_soft_rings[((mac_ring_t *)ring)->mr_index]; + return (mac_tx_soft_ring_process(sringp, mp_chain, flag, ret_mp)); +} + +void +mac_tx_invoke_callbacks(mac_client_impl_t *mcip, mac_tx_cookie_t cookie) +{ + mac_cb_t *mcb; + mac_tx_notify_cb_t *mtnfp; + + /* Wakeup callback registered clients */ + MAC_CALLBACK_WALKER_INC(&mcip->mci_tx_notify_cb_info); + for (mcb = mcip->mci_tx_notify_cb_list; mcb != NULL; + mcb = mcb->mcb_nextp) { + mtnfp = (mac_tx_notify_cb_t *)mcb->mcb_objp; + mtnfp->mtnf_fn(mtnfp->mtnf_arg, cookie); + } + MAC_CALLBACK_WALKER_DCR(&mcip->mci_tx_notify_cb_info, + &mcip->mci_tx_notify_cb_list); +} + /* ARGSUSED */ void mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) @@ -2966,7 +3044,6 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) size_t sz; uint32_t tx_mode; uint_t saved_pkt_count; - boolean_t is_subflow; mac_tx_stats_t stats; mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; clock_t now; @@ -2977,7 +3054,6 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) mac_srs->srs_state |= SRS_PROC; - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); tx_mode = srs_tx->st_mode; if (tx_mode == SRS_TX_DEFAULT || tx_mode == SRS_TX_SERIALIZE) { if (mac_srs->srs_first != NULL) { @@ -3000,16 +3076,13 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) tail->b_next = mac_srs->srs_first; mac_srs->srs_first = head; mac_srs->srs_count += - (saved_pkt_count - stats.ts_opackets); + (saved_pkt_count - stats.mts_opackets); if (mac_srs->srs_last == NULL) mac_srs->srs_last = tail; MAC_TX_SRS_BLOCK(mac_srs, head); } else { srs_tx->st_woken_up = B_FALSE; - if (is_subflow) { - FLOW_TX_STATS_UPDATE( - mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); } } } else if (tx_mode == SRS_TX_BW) { @@ -3065,10 +3138,10 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) tail->b_next = mac_srs->srs_first; mac_srs->srs_first = head; mac_srs->srs_count += - (saved_pkt_count - stats.ts_opackets); + (saved_pkt_count - stats.mts_opackets); if (mac_srs->srs_last == NULL) mac_srs->srs_last = tail; - size_sent = sz - stats.ts_obytes; + size_sent = sz - stats.mts_obytes; mac_srs->srs_size += size_sent; mac_srs->srs_bw->mac_bw_sz += size_sent; if (mac_srs->srs_bw->mac_bw_used > size_sent) { @@ -3080,15 +3153,11 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) MAC_TX_SRS_BLOCK(mac_srs, head); } else { srs_tx->st_woken_up = B_FALSE; - if (is_subflow) { - FLOW_TX_STATS_UPDATE( - mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); } } - } else if (tx_mode == SRS_TX_BW_FANOUT) { + } else if (tx_mode == SRS_TX_BW_FANOUT || tx_mode == SRS_TX_BW_AGGR) { mblk_t *prev; - mac_soft_ring_t *softring; uint64_t hint; /* @@ -3155,8 +3224,6 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) */ if (mac_srs->srs_count == 0 && (mac_srs->srs_state & (SRS_TX_HIWAT | SRS_TX_WAKEUP_CLIENT | SRS_ENQUEUED))) { - mac_tx_notify_cb_t *mtnfp; - mac_cb_t *mcb; mac_client_impl_t *mcip = mac_srs->srs_mcip; boolean_t wakeup_required = B_FALSE; @@ -3168,16 +3235,7 @@ mac_tx_srs_drain(mac_soft_ring_set_t *mac_srs, uint_t proc_type) SRS_TX_WAKEUP_CLIENT | SRS_ENQUEUED); mutex_exit(&mac_srs->srs_lock); if (wakeup_required) { - /* Wakeup callback registered clients */ - MAC_CALLBACK_WALKER_INC(&mcip->mci_tx_notify_cb_info); - for (mcb = mcip->mci_tx_notify_cb_list; mcb != NULL; - mcb = mcb->mcb_nextp) { - mtnfp = (mac_tx_notify_cb_t *)mcb->mcb_objp; - mtnfp->mtnf_fn(mtnfp->mtnf_arg, - (mac_tx_cookie_t)mac_srs); - } - MAC_CALLBACK_WALKER_DCR(&mcip->mci_tx_notify_cb_info, - &mcip->mci_tx_notify_cb_list); + mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)mac_srs); /* * If the client is not the primary MAC client, then we * need to send the notification to the clients upper @@ -3276,11 +3334,10 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, } /* - * Fastpath: if there's only one client, and there's no - * multicast listeners, we simply send the packet down to the - * underlying NIC. + * Fastpath: if there's only one client, we simply send + * the packet down to the underlying NIC. */ - if (mip->mi_nactiveclients == 1 && mip->mi_promisc_list == NULL) { + if (mip->mi_nactiveclients == 1) { DTRACE_PROBE2(fastpath, mac_client_impl_t *, src_mcip, mblk_t *, mp_chain); @@ -3293,9 +3350,7 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, msgdsize(mp)); CHECK_VID_AND_ADD_TAG(mp); - MAC_TX(mip, ring, mp, - ((src_mcip->mci_state_flags & MCIS_SHARE_BOUND) != - 0)); + MAC_TX(mip, ring, mp, src_mcip); /* * If the driver is out of descriptors and does a @@ -3336,12 +3391,6 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, CHECK_VID_AND_ADD_TAG(mp); /* - * Check if there are promiscuous mode callbacks defined. - */ - if (mip->mi_promisc_list != NULL) - mac_promisc_dispatch(mip, mp, src_mcip); - - /* * Find the destination. */ dst_flow_ent = mac_tx_classify(mip, mp); @@ -3395,16 +3444,31 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, B_TRUE); } else { /* - * loopback the packet to a - * local MAC client. We force a context - * switch if both source and destination - * MAC clients are used by IP, i.e. bypass - * is set. + * loopback the packet to a local MAC + * client. We force a context switch + * if both source and destination MAC + * clients are used by IP, i.e. + * bypass is set. */ boolean_t do_switch; mac_client_impl_t *dst_mcip = dst_flow_ent->fe_mcip; + /* + * Check if there are promiscuous mode + * callbacks defined. This check is + * done here in the 'else' case and + * not in other cases because this + * path is for local loopback + * communication which does not go + * through MAC_TX(). For paths that go + * through MAC_TX(), the promisc_list + * check is done inside the MAC_TX() + * macro. + */ + if (mip->mi_promisc_list != NULL) + mac_promisc_dispatch(mip, mp, src_mcip); + do_switch = ((src_mcip->mci_state_flags & dst_mcip->mci_state_flags & MCIS_CLIENT_POLL_CAPABLE) != 0); @@ -3422,9 +3486,7 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, * Unknown destination, send via the underlying * NIC. */ - MAC_TX(mip, ring, mp, - ((src_mcip->mci_state_flags & MCIS_SHARE_BOUND) != - 0)); + MAC_TX(mip, ring, mp, src_mcip); if (mp != NULL) { /* * Adjust for the last packet that @@ -3440,15 +3502,9 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, } done: - src_mcip->mci_stat_obytes += obytes; - src_mcip->mci_stat_opackets += opackets; - src_mcip->mci_stat_oerrors += oerrors; - - if (stats != NULL) { - stats->ts_opackets = opackets; - stats->ts_obytes = obytes; - stats->ts_oerrors = oerrors; - } + stats->mts_obytes = obytes; + stats->mts_opackets = opackets; + stats->mts_oerrors = oerrors; return (mp); } @@ -3466,8 +3522,8 @@ mac_tx_srs_ring_present(mac_soft_ring_set_t *srs, mac_ring_t *tx_ring) if (srs->srs_tx.st_arg2 == tx_ring) return (B_TRUE); - for (i = 0; i < srs->srs_oth_ring_count; i++) { - soft_ring = srs->srs_oth_soft_rings[i]; + for (i = 0; i < srs->srs_tx_ring_count; i++) { + soft_ring = srs->srs_tx_soft_rings[i]; if (soft_ring->s_ring_tx_arg2 == tx_ring) return (B_TRUE); } @@ -3476,6 +3532,29 @@ mac_tx_srs_ring_present(mac_soft_ring_set_t *srs, mac_ring_t *tx_ring) } /* + * mac_tx_srs_get_soft_ring + * + * Returns the TX soft ring associated with the given ring, if present. + */ +mac_soft_ring_t * +mac_tx_srs_get_soft_ring(mac_soft_ring_set_t *srs, mac_ring_t *tx_ring) +{ + int i; + mac_soft_ring_t *soft_ring; + + if (srs->srs_tx.st_arg2 == tx_ring) + return (NULL); + + for (i = 0; i < srs->srs_tx_ring_count; i++) { + soft_ring = srs->srs_tx_soft_rings[i]; + if (soft_ring->s_ring_tx_arg2 == tx_ring) + return (soft_ring); + } + + return (NULL); +} + +/* * mac_tx_srs_wakeup * * Called when Tx desc become available. Wakeup the appropriate worker @@ -3490,11 +3569,16 @@ mac_tx_srs_wakeup(mac_soft_ring_set_t *mac_srs, mac_ring_handle_t ring) mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; mutex_enter(&mac_srs->srs_lock); - if (TX_SINGLE_RING_MODE(mac_srs)) { + /* + * srs_tx_ring_count == 0 is the single ring mode case. In + * this mode, there will not be Tx soft rings associated + * with the SRS. + */ + if (!MAC_TX_SOFT_RINGS(mac_srs)) { if (srs_tx->st_arg2 == ring && mac_srs->srs_state & SRS_TX_BLOCKED) { mac_srs->srs_state &= ~SRS_TX_BLOCKED; - srs_tx->st_unblocked_cnt++; + srs_tx->st_stat.mts_unblockcnt++; cv_signal(&mac_srs->srs_async); } /* @@ -3507,15 +3591,17 @@ mac_tx_srs_wakeup(mac_soft_ring_set_t *mac_srs, mac_ring_handle_t ring) return; } - /* If you are here, it is for FANOUT or BW_FANOUT case */ - ASSERT(TX_MULTI_RING_MODE(mac_srs)); - for (i = 0; i < mac_srs->srs_oth_ring_count; i++) { - sringp = mac_srs->srs_oth_soft_rings[i]; + /* + * If you are here, it is for FANOUT, BW_FANOUT, + * AGGR_MODE or AGGR_BW_MODE case + */ + for (i = 0; i < mac_srs->srs_tx_ring_count; i++) { + sringp = mac_srs->srs_tx_soft_rings[i]; mutex_enter(&sringp->s_ring_lock); if (sringp->s_ring_tx_arg2 == ring) { if (sringp->s_ring_state & S_RING_BLOCK) { sringp->s_ring_state &= ~S_RING_BLOCK; - sringp->s_ring_unblocked_cnt++; + sringp->s_st_stat.mts_unblockcnt++; cv_signal(&sringp->s_ring_async); } sringp->s_ring_tx_woken_up = B_TRUE; @@ -3619,6 +3705,7 @@ mac_rx_soft_ring_process(mac_client_impl_t *mcip, mac_soft_ring_t *ringp, mutex_enter(&ringp->s_ring_lock); ringp->s_ring_total_inpkt += cnt; + ringp->s_ring_total_rbytes += sz; if ((mac_srs->srs_rx.sr_poll_pkt_cnt <= 1) && !(ringp->s_ring_type & ST_RING_WORKER_ONLY)) { /* If on processor or blanking on, then enqueue and return */ @@ -3831,11 +3918,14 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, ASSERT(mp_chain != NULL); ASSERT(MUTEX_NOT_HELD(&ringp->s_ring_lock)); /* - * Only two modes can come here; either it can be - * SRS_TX_BW_FANOUT or SRS_TX_FANOUT + * The following modes can come here: SRS_TX_BW_FANOUT, + * SRS_TX_FANOUT, SRS_TX_AGGR, SRS_TX_BW_AGGR. */ + ASSERT(MAC_TX_SOFT_RINGS(mac_srs)); ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_FANOUT || - mac_srs->srs_tx.st_mode == SRS_TX_BW_FANOUT); + mac_srs->srs_tx.st_mode == SRS_TX_BW_FANOUT || + mac_srs->srs_tx.st_mode == SRS_TX_AGGR || + mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); if (ringp->s_ring_type & ST_RING_WORKER_ONLY) { /* Serialization mode */ @@ -3871,7 +3961,6 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, * tx_srs_drain() completely drains out the * messages. */ - boolean_t is_subflow; mac_tx_stats_t stats; if (ringp->s_ring_state & S_RING_ENQUEUED) { @@ -3890,11 +3979,9 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, */ mutex_exit(&ringp->s_ring_lock); } - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); mp_chain = mac_tx_send(ringp->s_ring_tx_arg1, - ringp->s_ring_tx_arg2, mp_chain, - (is_subflow ? &stats : NULL)); + ringp->s_ring_tx_arg2, mp_chain, &stats); /* * Multiple threads could be here sending packets. @@ -3912,9 +3999,9 @@ mac_tx_soft_ring_process(mac_soft_ring_t *ringp, mblk_t *mp_chain, mutex_exit(&ringp->s_ring_lock); return (cookie); } - if (is_subflow) { - FLOW_TX_STATS_UPDATE(mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); + SOFTRING_TX_STATS_UPDATE(ringp, &stats); + return (NULL); } } diff --git a/usr/src/uts/common/io/mac/mac_soft_ring.c b/usr/src/uts/common/io/mac/mac_soft_ring.c index 25cc66ed52..151c99893b 100644 --- a/usr/src/uts/common/io/mac/mac_soft_ring.c +++ b/usr/src/uts/common/io/mac/mac_soft_ring.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -88,6 +88,7 @@ #include <sys/mac_client_impl.h> #include <sys/mac_soft_ring.h> #include <sys/mac_flow_impl.h> +#include <sys/mac_stat.h> static void mac_rx_soft_ring_drain(mac_soft_ring_t *); static void mac_soft_ring_fire(void *); @@ -145,7 +146,7 @@ mac_soft_ring_worker_wakeup(mac_soft_ring_t *ringp) * thread to the assigned CPU. */ mac_soft_ring_t * -mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, +mac_soft_ring_create(int id, clock_t wait, uint16_t type, pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs, processorid_t cpuid, mac_direct_rx_t rx_func, void *x_arg1, mac_resource_handle_t x_arg2) @@ -162,9 +163,13 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, } else if (type & ST_RING_UDP) { (void) snprintf(name, sizeof (name), "mac_udp_soft_ring_%d_%p", id, (void *)mac_srs); - } else { + } else if (type & ST_RING_OTH) { (void) snprintf(name, sizeof (name), "mac_oth_soft_ring_%d_%p", id, (void *)mac_srs); + } else { + ASSERT(type & ST_RING_TX); + (void) snprintf(name, sizeof (name), + "mac_tx_soft_ring_%d_%p", id, (void *)mac_srs); } bzero(ringp, sizeof (mac_soft_ring_t)); @@ -177,7 +182,6 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, ringp->s_ring_wait = MSEC_TO_TICK(wait); ringp->s_ring_mcip = mcip; ringp->s_ring_set = mac_srs; - ringp->s_ring_flent = flent; /* * Protect against access from DR callbacks (mac_walk_srs_bind/unbind) @@ -202,6 +206,14 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, ringp->s_ring_tx_hiwat = (mac_tx_soft_ring_hiwat > mac_tx_soft_ring_max_q_cnt) ? mac_tx_soft_ring_max_q_cnt : mac_tx_soft_ring_hiwat; + if (mcip->mci_state_flags & MCIS_IS_AGGR) { + mac_srs_tx_t *tx = &mac_srs->srs_tx; + + ASSERT(tx->st_soft_rings[ + ((mac_ring_t *)x_arg2)->mr_index] == NULL); + tx->st_soft_rings[((mac_ring_t *)x_arg2)->mr_index] = + ringp; + } } else { ringp->s_ring_drain_func = mac_rx_soft_ring_drain; ringp->s_ring_rx_func = rx_func; @@ -213,6 +225,8 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, if (cpuid != -1) (void) mac_soft_ring_bind(ringp, cpuid); + mac_soft_ring_stat_create(ringp); + return (ringp); } @@ -222,18 +236,14 @@ mac_soft_ring_create(int id, clock_t wait, void *flent, uint16_t type, * Free the soft ring once we are done with it. */ void -mac_soft_ring_free(mac_soft_ring_t *softring, boolean_t release_tx_ring) +mac_soft_ring_free(mac_soft_ring_t *softring) { ASSERT((softring->s_ring_state & (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE | S_RING_PROC)) == (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE)); mac_pkt_drop(NULL, NULL, softring->s_ring_first, B_FALSE); - if (release_tx_ring && softring->s_ring_tx_arg2 != NULL) { - ASSERT(softring->s_ring_type & ST_RING_TX); - mac_release_tx_ring(softring->s_ring_tx_arg2); - } - if (softring->s_ring_ksp) - kstat_delete(softring->s_ring_ksp); + softring->s_ring_tx_arg2 = NULL; + mac_soft_ring_stat_delete(softring); mac_callback_free(softring->s_ring_notify_cb_list); kmem_cache_free(mac_soft_ring_cache, softring); } @@ -642,7 +652,6 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) void *arg2; mblk_t *tail; uint_t saved_pkt_count, saved_size; - boolean_t is_subflow; mac_tx_stats_t stats; mac_soft_ring_set_t *mac_srs = ringp->s_ring_set; @@ -652,7 +661,6 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) ASSERT(!(ringp->s_ring_state & S_RING_PROC)); ringp->s_ring_state |= S_RING_PROC; - is_subflow = ((mac_srs->srs_type & SRST_FLOW) != 0); arg1 = ringp->s_ring_tx_arg1; arg2 = ringp->s_ring_tx_arg2; @@ -675,8 +683,8 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) tail->b_next = ringp->s_ring_first; ringp->s_ring_first = mp; ringp->s_ring_count += - (saved_pkt_count - stats.ts_opackets); - ringp->s_ring_size += (saved_size - stats.ts_obytes); + (saved_pkt_count - stats.mts_opackets); + ringp->s_ring_size += (saved_size - stats.mts_obytes); if (ringp->s_ring_last == NULL) ringp->s_ring_last = tail; @@ -684,7 +692,7 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) ringp->s_ring_tx_woken_up = B_FALSE; } else { ringp->s_ring_state |= S_RING_BLOCK; - ringp->s_ring_blocked_cnt++; + ringp->s_st_stat.mts_blockcnt++; } ringp->s_ring_state &= ~S_RING_PROC; @@ -692,17 +700,13 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) return; } else { ringp->s_ring_tx_woken_up = B_FALSE; - if (is_subflow) { - FLOW_TX_STATS_UPDATE( - mac_srs->srs_flent, &stats); - } + SRS_TX_STATS_UPDATE(mac_srs, &stats); + SOFTRING_TX_STATS_UPDATE(ringp, &stats); } } if (ringp->s_ring_count == 0 && ringp->s_ring_state & (S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED)) { - mac_tx_notify_cb_t *mtnfp; - mac_cb_t *mcb; mac_client_impl_t *mcip = ringp->s_ring_mcip; boolean_t wakeup_required = B_FALSE; @@ -714,16 +718,7 @@ mac_tx_soft_ring_drain(mac_soft_ring_t *ringp) ~(S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED); mutex_exit(&ringp->s_ring_lock); if (wakeup_required) { - /* Wakeup callback registered clients */ - MAC_CALLBACK_WALKER_INC(&mcip->mci_tx_notify_cb_info); - for (mcb = mcip->mci_tx_notify_cb_list; mcb != NULL; - mcb = mcb->mcb_nextp) { - mtnfp = (mac_tx_notify_cb_t *)mcb->mcb_objp; - mtnfp->mtnf_fn(mtnfp->mtnf_arg, - (mac_tx_cookie_t)ringp); - } - MAC_CALLBACK_WALKER_DCR(&mcip->mci_tx_notify_cb_info, - &mcip->mci_tx_notify_cb_list); + mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)ringp); /* * If the client is not the primary MAC client, then we * need to send the notification to the clients upper diff --git a/usr/src/uts/common/io/mac/mac_stat.c b/usr/src/uts/common/io/mac/mac_stat.c index 87f2f914ff..31972f94d8 100644 --- a/usr/src/uts/common/io/mac/mac_stat.c +++ b/usr/src/uts/common/io/mac/mac_stat.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,10 +33,40 @@ #include <sys/kstat.h> #include <sys/mac.h> #include <sys/mac_impl.h> +#include <sys/mac_client_impl.h> +#include <sys/mac_stat.h> +#include <sys/mac_soft_ring.h> +#include <sys/vlan.h> #define MAC_KSTAT_NAME "mac" #define MAC_KSTAT_CLASS "net" +enum mac_stat { + MAC_STAT_LCL, + MAC_STAT_LCLBYTES, + MAC_STAT_INTRS, + MAC_STAT_INTRBYTES, + MAC_STAT_POLLS, + MAC_STAT_POLLBYTES, + MAC_STAT_RXSDROPS, + MAC_STAT_CHU10, + MAC_STAT_CH10T50, + MAC_STAT_CHO50, + MAC_STAT_BLOCK, + MAC_STAT_UNBLOCK, + MAC_STAT_TXSDROPS, + MAC_STAT_TX_ERRORS, + MAC_STAT_MACSPOOFED, + MAC_STAT_IPSPOOFED, + MAC_STAT_DHCPSPOOFED, + MAC_STAT_RESTRICTED, + MAC_STAT_DHCPDROPPED, + MAC_STAT_MULTIRCVBYTES, + MAC_STAT_BRDCSTRCVBYTES, + MAC_STAT_MULTIXMTBYTES, + MAC_STAT_BRDCSTXMTBYTES +}; + static mac_stat_info_t i_mac_si[] = { { MAC_STAT_IFSPEED, "ifspeed", KSTAT_DATA_UINT64, 0 }, { MAC_STAT_MULTIRCV, "multircv", KSTAT_DATA_UINT32, 0 }, @@ -60,7 +90,6 @@ static mac_stat_info_t i_mac_si[] = { { MAC_STAT_OBYTES, "obytes64", KSTAT_DATA_UINT64, 0 }, { MAC_STAT_OPACKETS, "opackets64", KSTAT_DATA_UINT64, 0 } }; - #define MAC_NKSTAT \ (sizeof (i_mac_si) / sizeof (mac_stat_info_t)) @@ -70,7 +99,6 @@ static mac_stat_info_t i_mac_mod_si[] = { { MAC_STAT_LINK_UP, "link_up", KSTAT_DATA_UINT32, 0 }, { MAC_STAT_PROMISC, "promisc", KSTAT_DATA_UINT32, 0 } }; - #define MAC_MOD_NKSTAT \ (sizeof (i_mac_mod_si) / sizeof (mac_stat_info_t)) @@ -79,11 +107,195 @@ static mac_stat_info_t i_mac_mod_si[] = { #define MAC_TYPE_KSTAT_OFFSET MAC_KSTAT_OFFSET + MAC_NKSTAT /* + * Definitions for per rx ring statistics + */ +static mac_stat_info_t i_mac_rx_ring_si[] = { + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_HDROPS, "hdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_RX_RING_NKSTAT \ + (sizeof (i_mac_rx_ring_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per tx ring statistics + */ +static mac_stat_info_t i_mac_tx_ring_si[] = { + { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT64, 0} +}; +#define MAC_TX_RING_NKSTAT \ + (sizeof (i_mac_tx_ring_si) / sizeof (mac_stat_info_t)) + + +/* + * Definitions for per software lane tx statistics + */ +static mac_stat_info_t i_mac_tx_swlane_si[] = { + { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OERRORS, "oerrors", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BLOCK, "blockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_UNBLOCK, "unblockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_TXSDROPS, "txsdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_TX_SWLANE_NKSTAT \ + (sizeof (i_mac_tx_swlane_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per software lane rx statistics + */ +static mac_stat_info_t i_mac_rx_swlane_si[] = { + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_LCL, "local", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_LCLBYTES, "localbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRS, "intrs", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRBYTES, "intrbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RXSDROPS, "rxsdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_RX_SWLANE_NKSTAT \ + (sizeof (i_mac_rx_swlane_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per hardware lane rx statistics + */ +static mac_stat_info_t i_mac_rx_hwlane_si[] = { + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRS, "intrs", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRBYTES, "intrbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_POLLS, "polls", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_POLLBYTES, "pollbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RXSDROPS, "rxsdrops", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CHU10, "chainunder10", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CH10T50, "chain10to50", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CHO50, "chainover50", KSTAT_DATA_UINT64, 0} +}; +#define MAC_RX_HWLANE_NKSTAT \ + (sizeof (i_mac_rx_hwlane_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for misc statistics + */ +static mac_stat_info_t i_mac_misc_si[] = { + { MAC_STAT_MULTIRCV, "multircv", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BRDCSTRCV, "brdcstrcv", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_MULTIXMT, "multixmt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BRDCSTXMT, "brdcstxmt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_MULTIRCVBYTES, "multircvbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BRDCSTRCVBYTES, "brdcstrcvbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_MULTIXMTBYTES, "multixmtbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BRDCSTXMTBYTES, "brdcstxmtbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_TX_ERRORS, "txerrors", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_MACSPOOFED, "macspoofed", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_IPSPOOFED, "ipspoofed", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_DHCPSPOOFED, "dhcpspoofed", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RESTRICTED, "restricted", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_DHCPDROPPED, "dhcpdropped", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_LCL, "local", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_LCLBYTES, "localbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRS, "intrs", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_INTRBYTES, "intrbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_POLLS, "polls", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_POLLBYTES, "pollbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_RXSDROPS, "rxsdrops", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CHU10, "chainunder10", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CH10T50, "chain10to50", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_CHO50, "chainover50", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OERRORS, "oerrors", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BLOCK, "blockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_UNBLOCK, "unblockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_TXSDROPS, "txsdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_SUMMARY_NKSTAT \ + (sizeof (i_mac_misc_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per hardware lane tx statistics + */ +static mac_stat_info_t i_mac_tx_hwlane_si[] = { + { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_OERRORS, "oerrors", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_BLOCK, "blockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_UNBLOCK, "unblockcnt", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_TXSDROPS, "txsdrops", KSTAT_DATA_UINT64, 0} +}; +#define MAC_TX_HWLANE_NKSTAT \ + (sizeof (i_mac_tx_hwlane_si) / sizeof (mac_stat_info_t)) + +/* + * Definitions for per fanout rx statistics + */ +static mac_stat_info_t i_mac_rx_fanout_si[] = { + { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT64, 0}, + { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT64, 0}, +}; +#define MAC_RX_FANOUT_NKSTAT \ + (sizeof (i_mac_rx_fanout_si) / sizeof (mac_stat_info_t)) + +/* * Private functions. */ +typedef struct { + uint_t si_offset; +} stat_info_t; + +#define RX_SRS_STAT_OFF(f) (offsetof(mac_rx_stats_t, f)) +static stat_info_t rx_srs_stats_list[] = { + {RX_SRS_STAT_OFF(mrs_lclbytes)}, + {RX_SRS_STAT_OFF(mrs_lclcnt)}, + {RX_SRS_STAT_OFF(mrs_pollcnt)}, + {RX_SRS_STAT_OFF(mrs_pollbytes)}, + {RX_SRS_STAT_OFF(mrs_intrcnt)}, + {RX_SRS_STAT_OFF(mrs_intrbytes)}, + {RX_SRS_STAT_OFF(mrs_sdrops)}, + {RX_SRS_STAT_OFF(mrs_chaincntundr10)}, + {RX_SRS_STAT_OFF(mrs_chaincnt10to50)}, + {RX_SRS_STAT_OFF(mrs_chaincntover50)}, + {RX_SRS_STAT_OFF(mrs_ierrors)} +}; +#define RX_SRS_STAT_SIZE \ + (sizeof (rx_srs_stats_list) / sizeof (stat_info_t)) + +#define TX_SOFTRING_STAT_OFF(f) (offsetof(mac_tx_stats_t, f)) +static stat_info_t tx_softring_stats_list[] = { + {TX_SOFTRING_STAT_OFF(mts_obytes)}, + {TX_SOFTRING_STAT_OFF(mts_opackets)}, + {TX_SOFTRING_STAT_OFF(mts_oerrors)}, + {TX_SOFTRING_STAT_OFF(mts_blockcnt)}, + {TX_SOFTRING_STAT_OFF(mts_unblockcnt)}, + {TX_SOFTRING_STAT_OFF(mts_sdrops)}, +}; +#define TX_SOFTRING_STAT_SIZE \ + (sizeof (tx_softring_stats_list) / sizeof (stat_info_t)) + +static void +i_mac_add_stats(void *sum, void *op1, void *op2, + stat_info_t stats_list[], uint_t size) +{ + int i; + + for (i = 0; i < size; i++) { + uint64_t *op1_val = (uint64_t *) + ((uchar_t *)op1 + stats_list[i].si_offset); + uint64_t *op2_val = (uint64_t *) + ((uchar_t *)op2 + stats_list[i].si_offset); + uint64_t *sum_val = (uint64_t *) + ((uchar_t *)sum + stats_list[i].si_offset); + + *sum_val = *op1_val + *op2_val; + } +} + static int -i_mac_stat_update(kstat_t *ksp, int rw) +i_mac_driver_stat_update(kstat_t *ksp, int rw) { mac_impl_t *mip = ksp->ks_private; kstat_named_t *knp = ksp->ks_data; @@ -136,6 +348,587 @@ i_mac_kstat_init(kstat_named_t *knp, mac_stat_info_t *si, uint_t count) } } +static int +i_mac_stat_update(kstat_t *ksp, int rw, uint64_t (*fn)(void *, uint_t), + mac_stat_info_t *msi, uint_t count) +{ + kstat_named_t *knp = ksp->ks_data; + uint_t i; + uint64_t val; + + if (rw != KSTAT_READ) + return (EACCES); + + for (i = 0; i < count; i++) { + val = fn(ksp->ks_private, msi[i].msi_stat); + + switch (msi[i].msi_type) { + case KSTAT_DATA_UINT64: + knp->value.ui64 = val; + break; + case KSTAT_DATA_UINT32: + knp->value.ui32 = (uint32_t)val; + break; + default: + ASSERT(B_FALSE); + break; + } + knp++; + } + return (0); +} + +/* + * Create kstat with given name - statname, update function - fn + * and initialize it with given names - init_stat_info + */ +static kstat_t * +i_mac_stat_create(void *handle, const char *modname, const char *statname, + int (*fn) (kstat_t *, int), + mac_stat_info_t *init_stat_info, uint_t count) +{ + kstat_t *ksp; + kstat_named_t *knp; + + ksp = kstat_create(modname, 0, statname, "net", + KSTAT_TYPE_NAMED, count, 0); + + if (ksp == NULL) + return (NULL); + + ksp->ks_update = fn; + ksp->ks_private = handle; + + knp = (kstat_named_t *)ksp->ks_data; + i_mac_kstat_init(knp, init_stat_info, count); + kstat_install(ksp); + + return (ksp); +} + +/* + * Per rx ring statistics + */ +uint64_t +mac_rx_ring_stat_get(void *handle, uint_t stat) +{ + mac_ring_t *ring = (mac_ring_t *)handle; + uint64_t val = 0; + + /* + * XXX Every ring-capable driver must implement an entry point to + * query per ring statistics. CR 6893122 tracks this work item. + * Once this bug is fixed, the framework should fail registration + * for a driver that does not implement this entry point and + * assert ring->mr_stat != NULL here. + */ + if (ring->mr_stat != NULL) + ring->mr_stat(ring->mr_driver, stat, &val); + + return (val); +} + +static int +i_mac_rx_ring_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, mac_rx_ring_stat_get, + i_mac_rx_ring_si, MAC_RX_RING_NKSTAT)); +} + +static void +i_mac_rx_ring_stat_create(mac_ring_t *ring, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(ring, modname, statname, + i_mac_rx_ring_stat_update, i_mac_rx_ring_si, MAC_RX_RING_NKSTAT); + + ring->mr_ksp = ksp; +} + +/* + * Per tx ring statistics + */ +uint64_t +mac_tx_ring_stat_get(void *handle, uint_t stat) +{ + mac_ring_t *ring = (mac_ring_t *)handle; + uint64_t val = 0; + + /* + * XXX Every ring-capable driver must implement an entry point to + * query per ring statistics. CR 6893122 tracks this work item. + * Once this bug is fixed, the framework should fail registration + * for a driver that does not implement this entry point and + * assert ring->mr_stat != NULL here. + */ + if (ring->mr_stat != NULL) + ring->mr_stat(ring->mr_driver, stat, &val); + + return (val); +} + +static int +i_mac_tx_ring_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, mac_tx_ring_stat_get, + i_mac_tx_ring_si, MAC_TX_RING_NKSTAT)); +} + +static void +i_mac_tx_ring_stat_create(mac_ring_t *ring, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(ring, modname, statname, + i_mac_tx_ring_stat_update, i_mac_tx_ring_si, MAC_TX_RING_NKSTAT); + + ring->mr_ksp = ksp; +} + +/* + * Per software lane tx statistics + */ +static uint64_t +i_mac_tx_swlane_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)handle; + mac_tx_stats_t *mac_tx_stat = &mac_srs->srs_tx.st_stat; + + switch (stat) { + case MAC_STAT_OBYTES: + return (mac_tx_stat->mts_obytes); + + case MAC_STAT_OPACKETS: + return (mac_tx_stat->mts_opackets); + + case MAC_STAT_OERRORS: + return (mac_tx_stat->mts_oerrors); + + case MAC_STAT_BLOCK: + return (mac_tx_stat->mts_blockcnt); + + case MAC_STAT_UNBLOCK: + return (mac_tx_stat->mts_unblockcnt); + + case MAC_STAT_TXSDROPS: + return (mac_tx_stat->mts_sdrops); + + default: + return (0); + } +} + +static int +i_mac_tx_swlane_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_tx_swlane_stat_get, + i_mac_tx_swlane_si, MAC_TX_SWLANE_NKSTAT)); +} + +static void +i_mac_tx_swlane_stat_create(mac_soft_ring_set_t *mac_srs, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(mac_srs, modname, statname, + i_mac_tx_swlane_stat_update, i_mac_tx_swlane_si, + MAC_TX_SWLANE_NKSTAT); + + mac_srs->srs_ksp = ksp; +} + +/* + * Per software lane rx statistics + */ +static uint64_t +i_mac_rx_swlane_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)handle; + mac_rx_stats_t *mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + switch (stat) { + case MAC_STAT_IPACKETS: + return (mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_lclcnt); + + case MAC_STAT_RBYTES: + return (mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_lclbytes); + + case MAC_STAT_LCL: + return (mac_rx_stat->mrs_lclcnt); + + case MAC_STAT_LCLBYTES: + return (mac_rx_stat->mrs_lclbytes); + + case MAC_STAT_INTRS: + return (mac_rx_stat->mrs_intrcnt); + + case MAC_STAT_INTRBYTES: + return (mac_rx_stat->mrs_intrbytes); + + case MAC_STAT_RXSDROPS: + return (mac_rx_stat->mrs_sdrops); + + default: + return (0); + } +} + +static int +i_mac_rx_swlane_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_rx_swlane_stat_get, + i_mac_rx_swlane_si, MAC_RX_SWLANE_NKSTAT)); +} + +static void +i_mac_rx_swlane_stat_create(mac_soft_ring_set_t *mac_srs, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(mac_srs, modname, statname, + i_mac_rx_swlane_stat_update, i_mac_rx_swlane_si, + MAC_RX_SWLANE_NKSTAT); + + mac_srs->srs_ksp = ksp; +} + + +/* + * Per hardware lane rx statistics + */ +static uint64_t +i_mac_rx_hwlane_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)handle; + mac_rx_stats_t *mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + switch (stat) { + case MAC_STAT_IPACKETS: + return (mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt); + + case MAC_STAT_RBYTES: + return (mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes); + + case MAC_STAT_INTRS: + return (mac_rx_stat->mrs_intrcnt); + + case MAC_STAT_INTRBYTES: + return (mac_rx_stat->mrs_intrbytes); + + case MAC_STAT_POLLS: + return (mac_rx_stat->mrs_pollcnt); + + case MAC_STAT_POLLBYTES: + return (mac_rx_stat->mrs_pollbytes); + + case MAC_STAT_RXSDROPS: + return (mac_rx_stat->mrs_sdrops); + + case MAC_STAT_CHU10: + return (mac_rx_stat->mrs_chaincntundr10); + + case MAC_STAT_CH10T50: + return (mac_rx_stat->mrs_chaincnt10to50); + + case MAC_STAT_CHO50: + return (mac_rx_stat->mrs_chaincntover50); + + default: + return (0); + } +} + +static int +i_mac_rx_hwlane_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_rx_hwlane_stat_get, + i_mac_rx_hwlane_si, MAC_RX_HWLANE_NKSTAT)); +} + +static void +i_mac_rx_hwlane_stat_create(mac_soft_ring_set_t *mac_srs, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(mac_srs, modname, statname, + i_mac_rx_hwlane_stat_update, i_mac_rx_hwlane_si, + MAC_RX_HWLANE_NKSTAT); + + mac_srs->srs_ksp = ksp; +} + + +/* + * Misc statistics + * + * Counts for + * - Multicast/broadcast Rx/Tx counts + * - Tx errors + */ +static uint64_t +i_mac_misc_stat_get(void *handle, uint_t stat) +{ + flow_entry_t *flent = handle; + mac_client_impl_t *mcip = flent->fe_mcip; + mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat; + mac_rx_stats_t *mac_rx_stat; + mac_tx_stats_t *mac_tx_stat; + + mac_rx_stat = &mac_misc_stat->mms_defunctrxlanestats; + mac_tx_stat = &mac_misc_stat->mms_defuncttxlanestats; + + switch (stat) { + case MAC_STAT_MULTIRCV: + return (mac_misc_stat->mms_multircv); + + case MAC_STAT_BRDCSTRCV: + return (mac_misc_stat->mms_brdcstrcv); + + case MAC_STAT_MULTIXMT: + return (mac_misc_stat->mms_multixmt); + + case MAC_STAT_BRDCSTXMT: + return (mac_misc_stat->mms_brdcstxmt); + + case MAC_STAT_MULTIRCVBYTES: + return (mac_misc_stat->mms_multircvbytes); + + case MAC_STAT_BRDCSTRCVBYTES: + return (mac_misc_stat->mms_brdcstrcvbytes); + + case MAC_STAT_MULTIXMTBYTES: + return (mac_misc_stat->mms_multixmtbytes); + + case MAC_STAT_BRDCSTXMTBYTES: + return (mac_misc_stat->mms_brdcstxmtbytes); + + case MAC_STAT_TX_ERRORS: + return (mac_misc_stat->mms_txerrors); + + case MAC_STAT_MACSPOOFED: + return (mac_misc_stat->mms_macspoofed); + + case MAC_STAT_IPSPOOFED: + return (mac_misc_stat->mms_ipspoofed); + + case MAC_STAT_DHCPSPOOFED: + return (mac_misc_stat->mms_dhcpspoofed); + + case MAC_STAT_RESTRICTED: + return (mac_misc_stat->mms_restricted); + + case MAC_STAT_DHCPDROPPED: + return (mac_misc_stat->mms_dhcpdropped); + + case MAC_STAT_IPACKETS: + return (mac_rx_stat->mrs_intrcnt + + mac_rx_stat->mrs_pollcnt); + + case MAC_STAT_RBYTES: + return (mac_rx_stat->mrs_intrbytes + + mac_rx_stat->mrs_pollbytes); + + case MAC_STAT_LCL: + return (mac_rx_stat->mrs_lclcnt); + + case MAC_STAT_LCLBYTES: + return (mac_rx_stat->mrs_lclbytes); + + case MAC_STAT_INTRS: + return (mac_rx_stat->mrs_intrcnt); + + case MAC_STAT_INTRBYTES: + return (mac_rx_stat->mrs_intrbytes); + + case MAC_STAT_POLLS: + return (mac_rx_stat->mrs_pollcnt); + + case MAC_STAT_POLLBYTES: + return (mac_rx_stat->mrs_pollbytes); + + case MAC_STAT_RXSDROPS: + return (mac_rx_stat->mrs_sdrops); + + case MAC_STAT_CHU10: + return (mac_rx_stat->mrs_chaincntundr10); + + case MAC_STAT_CH10T50: + return (mac_rx_stat->mrs_chaincnt10to50); + + case MAC_STAT_CHO50: + return (mac_rx_stat->mrs_chaincntover50); + + case MAC_STAT_OBYTES: + return (mac_tx_stat->mts_obytes); + + case MAC_STAT_OPACKETS: + return (mac_tx_stat->mts_opackets); + + case MAC_STAT_OERRORS: + return (mac_tx_stat->mts_oerrors); + + case MAC_STAT_BLOCK: + return (mac_tx_stat->mts_blockcnt); + + case MAC_STAT_UNBLOCK: + return (mac_tx_stat->mts_unblockcnt); + + case MAC_STAT_TXSDROPS: + return (mac_tx_stat->mts_sdrops); + + default: + return (0); + } +} + +static int +i_mac_misc_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_misc_stat_get, + i_mac_misc_si, MAC_SUMMARY_NKSTAT)); +} + +static void +i_mac_misc_stat_create(flow_entry_t *flent, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(flent, modname, statname, + i_mac_misc_stat_update, i_mac_misc_si, + MAC_SUMMARY_NKSTAT); + + flent->fe_misc_stat_ksp = ksp; +} + +/* + * Per hardware lane tx statistics + */ +static uint64_t +i_mac_tx_hwlane_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_t *ringp = (mac_soft_ring_t *)handle; + mac_tx_stats_t *mac_tx_stat = &ringp->s_st_stat; + + switch (stat) { + case MAC_STAT_OBYTES: + return (mac_tx_stat->mts_obytes); + + case MAC_STAT_OPACKETS: + return (mac_tx_stat->mts_opackets); + + case MAC_STAT_OERRORS: + return (mac_tx_stat->mts_oerrors); + + case MAC_STAT_BLOCK: + return (mac_tx_stat->mts_blockcnt); + + case MAC_STAT_UNBLOCK: + return (mac_tx_stat->mts_unblockcnt); + + case MAC_STAT_TXSDROPS: + return (mac_tx_stat->mts_sdrops); + + default: + return (0); + } +} + +static int +i_mac_tx_hwlane_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_tx_hwlane_stat_get, + i_mac_tx_hwlane_si, MAC_TX_HWLANE_NKSTAT)); +} + +static void +i_mac_tx_hwlane_stat_create(mac_soft_ring_t *ringp, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(ringp, modname, statname, + i_mac_tx_hwlane_stat_update, i_mac_tx_hwlane_si, + MAC_TX_HWLANE_NKSTAT); + + ringp->s_ring_ksp = ksp; +} + +/* + * Per fanout rx statistics + */ +static uint64_t +i_mac_rx_fanout_stat_get(void *handle, uint_t stat) +{ + mac_soft_ring_t *tcp_ringp = (mac_soft_ring_t *)handle; + mac_soft_ring_t *udp_ringp = NULL, *oth_ringp = NULL; + mac_soft_ring_set_t *mac_srs = tcp_ringp->s_ring_set; + int index; + uint64_t val; + + mutex_enter(&mac_srs->srs_lock); + /* Extract corresponding udp and oth ring pointers */ + for (index = 0; mac_srs->srs_tcp_soft_rings[index] != NULL; index++) { + if (mac_srs->srs_tcp_soft_rings[index] == tcp_ringp) { + udp_ringp = mac_srs->srs_udp_soft_rings[index]; + oth_ringp = mac_srs->srs_oth_soft_rings[index]; + break; + } + } + + ASSERT((udp_ringp != NULL) && (oth_ringp != NULL)); + + switch (stat) { + case MAC_STAT_RBYTES: + val = (tcp_ringp->s_ring_total_rbytes) + + (udp_ringp->s_ring_total_rbytes) + + (oth_ringp->s_ring_total_rbytes); + break; + + case MAC_STAT_IPACKETS: + val = (tcp_ringp->s_ring_total_inpkt) + + (udp_ringp->s_ring_total_inpkt) + + (oth_ringp->s_ring_total_inpkt); + break; + + default: + val = 0; + break; + } + mutex_exit(&mac_srs->srs_lock); + return (val); +} + +static int +i_mac_rx_fanout_stat_update(kstat_t *ksp, int rw) +{ + return (i_mac_stat_update(ksp, rw, i_mac_rx_fanout_stat_get, + i_mac_rx_fanout_si, MAC_RX_FANOUT_NKSTAT)); +} + +static void +i_mac_rx_fanout_stat_create(mac_soft_ring_t *ringp, const char *modname, + const char *statname) +{ + kstat_t *ksp; + + ksp = i_mac_stat_create(ringp, modname, statname, + i_mac_rx_fanout_stat_update, i_mac_rx_fanout_si, + MAC_RX_FANOUT_NKSTAT); + + ringp->s_ring_ksp = ksp; +} + /* * Exported functions. */ @@ -147,7 +940,7 @@ i_mac_kstat_init(kstat_named_t *knp, mac_stat_info_t *si, uint_t count) * also maintained by the driver. */ void -mac_stat_create(mac_impl_t *mip) +mac_driver_stat_create(mac_impl_t *mip) { kstat_t *ksp; kstat_named_t *knp; @@ -161,7 +954,7 @@ mac_stat_create(mac_impl_t *mip) if (ksp == NULL) return; - ksp->ks_update = i_mac_stat_update; + ksp->ks_update = i_mac_driver_stat_update; ksp->ks_private = mip; mip->mi_ksp = ksp; mip->mi_kstat_count = count; @@ -181,7 +974,7 @@ mac_stat_create(mac_impl_t *mip) /*ARGSUSED*/ void -mac_stat_destroy(mac_impl_t *mip) +mac_driver_stat_delete(mac_impl_t *mip) { if (mip->mi_ksp != NULL) { kstat_delete(mip->mi_ksp); @@ -191,15 +984,311 @@ mac_stat_destroy(mac_impl_t *mip) } uint64_t -mac_stat_default(mac_impl_t *mip, uint_t stat) +mac_driver_stat_default(mac_impl_t *mip, uint_t stat) { uint_t stat_index; if (IS_MAC_STAT(stat)) { stat_index = stat - MAC_STAT_MIN; + ASSERT(stat_index < MAC_NKSTAT); return (i_mac_si[stat_index].msi_default); } ASSERT(IS_MACTYPE_STAT(stat)); stat_index = stat - MACTYPE_STAT_MIN; + ASSERT(stat_index < mip->mi_type->mt_statcount); return (mip->mi_type->mt_stats[stat_index].msi_default); } + +void +mac_ring_stat_create(mac_ring_t *ring) +{ + mac_impl_t *mip = ring->mr_mip; + char statname[MAXNAMELEN]; + char modname[MAXNAMELEN]; + + if (mip->mi_state_flags & MIS_IS_AGGR) { + (void) strlcpy(modname, mip->mi_clients_list->mci_name, + MAXNAMELEN); + } else + (void) strlcpy(modname, mip->mi_name, MAXNAMELEN); + + switch (ring->mr_type) { + case MAC_RING_TYPE_RX: + (void) snprintf(statname, sizeof (statname), "mac_rx_ring%d", + ring->mr_index); + i_mac_rx_ring_stat_create(ring, modname, statname); + break; + + case MAC_RING_TYPE_TX: + (void) snprintf(statname, sizeof (statname), "mac_tx_ring%d", + ring->mr_index); + i_mac_tx_ring_stat_create(ring, modname, statname); + break; + + default: + ASSERT(B_FALSE); + break; + } +} + +void +mac_srs_stat_create(mac_soft_ring_set_t *mac_srs) +{ + flow_entry_t *flent = mac_srs->srs_flent; + char statname[MAXNAMELEN]; + boolean_t is_tx_srs; + + /* No hardware/software lanes for user defined flows */ + if ((flent->fe_type & FLOW_USER) != 0) + return; + + is_tx_srs = ((mac_srs->srs_type & SRST_TX) != 0); + + if (is_tx_srs) { + mac_srs_tx_t *srs_tx = &mac_srs->srs_tx; + mac_ring_t *ring = srs_tx->st_arg2; + + if (ring != NULL) { + (void) snprintf(statname, sizeof (statname), + "mac_tx_hwlane%d", ring->mr_index); + } else { + (void) snprintf(statname, sizeof (statname), + "mac_tx_swlane0"); + } + i_mac_tx_swlane_stat_create(mac_srs, flent->fe_flow_name, + statname); + } else { + mac_ring_t *ring = mac_srs->srs_ring; + + if (ring == NULL) { + (void) snprintf(statname, sizeof (statname), + "mac_rx_swlane0"); + i_mac_rx_swlane_stat_create(mac_srs, + flent->fe_flow_name, statname); + } else { + (void) snprintf(statname, sizeof (statname), + "mac_rx_hwlane%d", ring->mr_index); + i_mac_rx_hwlane_stat_create(mac_srs, + flent->fe_flow_name, statname); + } + } +} + +void +mac_misc_stat_create(flow_entry_t *flent) +{ + char statname[MAXNAMELEN]; + + /* No misc stats for user defined or mcast/bcast flows */ + if (((flent->fe_type & FLOW_USER) != 0) || + ((flent->fe_type & FLOW_MCAST) != 0)) + return; + + (void) snprintf(statname, sizeof (statname), "mac_misc_stat"); + i_mac_misc_stat_create(flent, flent->fe_flow_name, statname); +} + +void +mac_soft_ring_stat_create(mac_soft_ring_t *ringp) +{ + mac_soft_ring_set_t *mac_srs = ringp->s_ring_set; + flow_entry_t *flent = ringp->s_ring_mcip->mci_flent; + mac_ring_t *ring = (mac_ring_t *)ringp->s_ring_tx_arg2; + boolean_t is_tx_srs; + char statname[MAXNAMELEN]; + + /* No hardware/software lanes for user defined flows */ + if ((flent->fe_type & FLOW_USER) != 0) + return; + + is_tx_srs = ((mac_srs->srs_type & SRST_TX) != 0); + if (is_tx_srs) { /* tx side hardware lane */ + ASSERT(ring != NULL); + (void) snprintf(statname, sizeof (statname), "mac_tx_hwlane%d", + ring->mr_index); + i_mac_tx_hwlane_stat_create(ringp, flent->fe_flow_name, + statname); + } else { /* rx side fanout */ + /* Maintain single stat for (tcp, udp, oth) */ + if (ringp->s_ring_type & ST_RING_TCP) { + int index; + mac_soft_ring_t *softring; + + for (index = 0, softring = mac_srs->srs_soft_ring_head; + softring != NULL; + index++, softring = softring->s_ring_next) { + if (softring == ringp) + break; + } + + if (mac_srs->srs_ring == NULL) { + (void) snprintf(statname, sizeof (statname), + "mac_rx_swlane0_fanout%d", index/3); + } else { + (void) snprintf(statname, sizeof (statname), + "mac_rx_hwlane%d_fanout%d", + mac_srs->srs_ring->mr_index, index/3); + } + i_mac_rx_fanout_stat_create(ringp, flent->fe_flow_name, + statname); + } + } +} + +void +mac_ring_stat_delete(mac_ring_t *ring) +{ + if (ring->mr_ksp != NULL) { + kstat_delete(ring->mr_ksp); + ring->mr_ksp = NULL; + } +} + +void +mac_srs_stat_delete(mac_soft_ring_set_t *mac_srs) +{ + boolean_t is_tx_srs; + + is_tx_srs = ((mac_srs->srs_type & SRST_TX) != 0); + if (!is_tx_srs) { + /* + * Rx ring has been taken away. Before destroying corresponding + * SRS, save the stats recorded by that SRS. + */ + mac_client_impl_t *mcip = mac_srs->srs_mcip; + mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat; + mac_rx_stats_t *mac_rx_stat = &mac_srs->srs_rx.sr_stat; + + i_mac_add_stats(&mac_misc_stat->mms_defunctrxlanestats, + mac_rx_stat, &mac_misc_stat->mms_defunctrxlanestats, + rx_srs_stats_list, RX_SRS_STAT_SIZE); + } + + if (mac_srs->srs_ksp != NULL) { + kstat_delete(mac_srs->srs_ksp); + mac_srs->srs_ksp = NULL; + } +} + +void +mac_misc_stat_delete(flow_entry_t *flent) +{ + if (flent->fe_misc_stat_ksp != NULL) { + kstat_delete(flent->fe_misc_stat_ksp); + flent->fe_misc_stat_ksp = NULL; + } +} + +void +mac_soft_ring_stat_delete(mac_soft_ring_t *ringp) +{ + mac_soft_ring_set_t *mac_srs = ringp->s_ring_set; + boolean_t is_tx_srs; + + is_tx_srs = ((mac_srs->srs_type & SRST_TX) != 0); + if (is_tx_srs) { + /* + * Tx ring has been taken away. Before destroying corresponding + * soft ring, save the stats recorded by that soft ring. + */ + mac_client_impl_t *mcip = mac_srs->srs_mcip; + mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat; + mac_tx_stats_t *mac_tx_stat = &ringp->s_st_stat; + + i_mac_add_stats(&mac_misc_stat->mms_defuncttxlanestats, + mac_tx_stat, &mac_misc_stat->mms_defuncttxlanestats, + tx_softring_stats_list, TX_SOFTRING_STAT_SIZE); + } + + if (ringp->s_ring_ksp) { + kstat_delete(ringp->s_ring_ksp); + ringp->s_ring_ksp = NULL; + } +} + +void +mac_pseudo_ring_stat_rename(mac_impl_t *mip) +{ + mac_group_t *group; + mac_ring_t *ring; + + /* Recreate pseudo rx ring kstats */ + for (group = mip->mi_rx_groups; group != NULL; + group = group->mrg_next) { + for (ring = group->mrg_rings; ring != NULL; + ring = ring->mr_next) { + mac_ring_stat_delete(ring); + mac_ring_stat_create(ring); + } + } + + /* Recreate pseudo tx ring kstats */ + for (group = mip->mi_tx_groups; group != NULL; + group = group->mrg_next) { + for (ring = group->mrg_rings; ring != NULL; + ring = ring->mr_next) { + mac_ring_stat_delete(ring); + mac_ring_stat_create(ring); + } + } +} + +void +mac_stat_rename(mac_client_impl_t *mcip) +{ + flow_entry_t *flent = mcip->mci_flent; + mac_soft_ring_set_t *mac_srs; + mac_soft_ring_t *ringp; + int i, j; + + ASSERT(flent != NULL); + + /* Recreate rx SRSes kstats */ + for (i = 0; i < flent->fe_rx_srs_cnt; i++) { + mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; + mac_srs_stat_delete(mac_srs); + mac_srs_stat_create(mac_srs); + + /* Recreate rx fanout kstats */ + for (j = 0; j < mac_srs->srs_tcp_ring_count; j++) { + ringp = mac_srs->srs_tcp_soft_rings[j]; + mac_soft_ring_stat_delete(ringp); + mac_soft_ring_stat_create(ringp); + } + } + + /* Recreate tx SRS kstats */ + mac_srs = (mac_soft_ring_set_t *)flent->fe_tx_srs; + mac_srs_stat_delete(mac_srs); + mac_srs_stat_create(mac_srs); + + /* Recreate tx sofring kstats */ + for (ringp = mac_srs->srs_soft_ring_head; ringp; + ringp = ringp->s_ring_next) { + mac_soft_ring_stat_delete(ringp); + mac_soft_ring_stat_create(ringp); + } + + /* Recreate misc kstats */ + mac_misc_stat_delete(flent); + mac_misc_stat_create(flent); +} + +void +mac_tx_srs_stat_recreate(mac_soft_ring_set_t *tx_srs, boolean_t add_stats) +{ + mac_client_impl_t *mcip = tx_srs->srs_mcip; + mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat; + mac_tx_stats_t *mac_tx_stat = &tx_srs->srs_tx.st_stat; + + if (add_stats) { + /* Add the stats to cumulative stats */ + i_mac_add_stats(&mac_misc_stat->mms_defuncttxlanestats, + mac_tx_stat, &mac_misc_stat->mms_defuncttxlanestats, + tx_softring_stats_list, TX_SOFTRING_STAT_SIZE); + } + + bzero(mac_tx_stat, sizeof (mac_tx_stats_t)); + mac_srs_stat_delete(tx_srs); + mac_srs_stat_create(tx_srs); +} diff --git a/usr/src/uts/common/io/mac/mac_util.c b/usr/src/uts/common/io/mac/mac_util.c index 371145e68c..3d9d2f9b39 100644 --- a/usr/src/uts/common/io/mac/mac_util.c +++ b/usr/src/uts/common/io/mac/mac_util.c @@ -244,14 +244,23 @@ mac_fix_cksum(mblk_t *mp_chain) offset, cksum); *(up) = (uint16_t)(cksum ? cksum : ~cksum); + /* + * Flag the packet so that it appears + * that the checksum has already been + * verified by the hardware. + */ + flags &= ~HCK_FULLCKSUM; flags |= HCK_FULLCKSUM_OK; - value = 0xffff; + value = 0; } if (flags & HCK_IPV4_HDRCKSUM) { ASSERT(ipha != NULL); ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); + flags &= ~HCK_IPV4_HDRCKSUM; + flags |= HCK_IPV4_HDRCKSUM_OK; + } } @@ -292,8 +301,8 @@ mac_fix_cksum(mblk_t *mp_chain) * been verified by the hardware. */ flags &= ~HCK_PARTIALCKSUM; - flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK); - value = 0xffff; + flags |= HCK_FULLCKSUM_OK; + value = 0; } (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, @@ -470,27 +479,25 @@ mac_pkt_drop(void *arg, mac_resource_handle_t resource, mblk_t *mp, * returns B_TRUE. */ boolean_t -mac_ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length, - uint8_t *next_hdr, boolean_t *ip_fragmented, uint32_t *ip_frag_ident) +mac_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr, uint16_t *hdr_length, + uint8_t *next_hdr, ip6_frag_t **fragp) { uint16_t length; uint_t ehdrlen; uint8_t *whereptr; - uint8_t *endptr; uint8_t *nexthdrp; ip6_dest_t *desthdr; ip6_rthdr_t *rthdr; ip6_frag_t *fraghdr; - endptr = mp->b_wptr; if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr) return (B_FALSE); ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); length = IPV6_HDR_LEN; whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ - if (ip_fragmented != NULL) - *ip_fragmented = B_FALSE; + if (fragp != NULL) + *fragp = NULL; nexthdrp = &ip6h->ip6_nxt; while (whereptr < endptr) { @@ -521,10 +528,8 @@ mac_ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length, if ((uchar_t *)&fraghdr[1] > endptr) return (B_FALSE); nexthdrp = &fraghdr->ip6f_nxt; - if (ip_fragmented != NULL) - *ip_fragmented = B_TRUE; - if (ip_frag_ident != NULL) - *ip_frag_ident = fraghdr->ip6f_ident; + if (fragp != NULL) + *fragp = fraghdr; break; case IPPROTO_NONE: /* No next header means we're finished */ @@ -561,6 +566,13 @@ mac_ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length, } } +/* + * The following set of routines are there to take care of interrupt + * re-targeting for legacy (fixed) interrupts. Some older versions + * of the popular NICs like e1000g do not support MSI-X interrupts + * and they reserve fixed interrupts for RX/TX rings. To re-target + * these interrupts, PCITOOL ioctls need to be used. + */ typedef struct mac_dladm_intr { int ino; int cpu_id; @@ -807,13 +819,20 @@ mac_client_set_intr_cpu(void *arg, mac_client_handle_t mch, int32_t cpuid) mac_client_impl_t *mcip = (mac_client_impl_t *)mch; mac_resource_props_t *mrp; mac_perim_handle_t mph; + flow_entry_t *flent = mcip->mci_flent; + mac_soft_ring_set_t *rx_srs; + mac_cpus_t *srs_cpu; - if (cpuid == -1 || !mac_check_interrupt_binding(mdip, cpuid)) - return; - + if (!mac_check_interrupt_binding(mdip, cpuid)) + cpuid = -1; mac_perim_enter_by_mh((mac_handle_t)mcip->mci_mip, &mph); mrp = MCIP_RESOURCE_PROPS(mcip); - mrp->mrp_intr_cpu = cpuid; + mrp->mrp_rx_intr_cpu = cpuid; + if (flent != NULL && flent->fe_rx_srs_cnt == 2) { + rx_srs = flent->fe_rx_srs[1]; + srs_cpu = &rx_srs->srs_cpu; + srs_cpu->mc_rx_intr_cpu = cpuid; + } mac_perim_exit(mph); } @@ -825,18 +844,29 @@ mac_client_intr_cpu(mac_client_handle_t mch) mac_soft_ring_set_t *rx_srs; flow_entry_t *flent = mcip->mci_flent; mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); + mac_ring_t *ring; + mac_intr_t *mintr; /* * Check if we need to retarget the interrupt. We do this only * for the primary MAC client. We do this if we have the only - * exclusive ring in the group. + * exclusive ring in the group. */ if (mac_is_primary_client(mcip) && flent->fe_rx_srs_cnt == 2) { rx_srs = flent->fe_rx_srs[1]; srs_cpu = &rx_srs->srs_cpu; - if (mrp->mrp_intr_cpu == srs_cpu->mc_pollid) + ring = rx_srs->srs_ring; + mintr = &ring->mr_info.mri_intr; + /* + * If ddi_handle is present or the poll CPU is + * already bound to the interrupt CPU, return -1. + */ + if (mintr->mi_ddi_handle != NULL || + ((mrp->mrp_ncpus != 0) && + (mrp->mrp_rx_intr_cpu == srs_cpu->mc_rx_pollid))) { return (-1); - return (srs_cpu->mc_pollid); + } + return (srs_cpu->mc_rx_pollid); } return (-1); } @@ -970,8 +1000,8 @@ mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) } case ETHERTYPE_IPV6: { ip6_t *ip6hp; + ip6_frag_t *frag = NULL; uint16_t hdr_length; - uint32_t ip_frag_ident; /* * If the header is not aligned or the header doesn't fit @@ -984,8 +1014,8 @@ mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) !OK_32PTR((char *)ip6hp)) goto done; - if (!mac_ip_hdr_length_v6(mp, ip6hp, &hdr_length, &proto, - &ip_fragmented, &ip_frag_ident)) + if (!mac_ip_hdr_length_v6(ip6hp, mp->b_wptr, &hdr_length, + &proto, &frag)) goto done; skip_len += hdr_length; @@ -994,7 +1024,7 @@ mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) * the frag_id to generate the hash inorder to get * better distribution. */ - if (ip_fragmented || (policy & MAC_PKT_HASH_L3) != 0) { + if (frag != NULL || (policy & MAC_PKT_HASH_L3) != 0) { uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]); uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]); @@ -1003,8 +1033,8 @@ mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) policy &= ~MAC_PKT_HASH_L3; } - if (ip_fragmented) { - uint8_t *identp = (uint8_t *)&ip_frag_ident; + if (frag != NULL) { + uint8_t *identp = (uint8_t *)&frag->ip6f_ident; hash ^= PKT_HASH_4BYTES(identp); goto done; } diff --git a/usr/src/uts/common/io/mii/mii.c b/usr/src/uts/common/io/mii/mii.c index 2187553b40..bfff2a52e8 100644 --- a/usr/src/uts/common/io/mii/mii.c +++ b/usr/src/uts/common/io/mii/mii.c @@ -650,12 +650,10 @@ mii_m_loop_ioctl(mii_handle_t mh, queue_t *wq, mblk_t *mp) int mii_m_getprop(mii_handle_t mh, const char *name, mac_prop_id_t num, - uint_t flags, uint_t sz, void *val, uint_t *permp) + uint_t sz, void *val) { phy_handle_t *ph; int err = 0; - uint_t perm; - boolean_t dfl = flags & MAC_PROP_DEFAULT; _NOTE(ARGUNUSED(name)); @@ -665,54 +663,36 @@ mii_m_getprop(mii_handle_t mh, const char *name, mac_prop_id_t num, mutex_enter(&mh->m_lock); ph = mh->m_phy; - perm = MAC_PROP_PERM_RW; #define CASE_PROP_ABILITY(PROP, VAR) \ case MAC_PROP_ADV_##PROP: \ - perm = MAC_PROP_PERM_READ; \ - *(uint8_t *)val = \ - dfl ? ph->phy_cap_##VAR : ph->phy_adv_##VAR; \ + *(uint8_t *)val = ph->phy_adv_##VAR; \ break; \ \ case MAC_PROP_EN_##PROP: \ - if (!ph->phy_cap_##VAR) \ - perm = MAC_PROP_PERM_READ; \ - *(uint8_t *)val = \ - dfl ? ph->phy_cap_##VAR : ph->phy_en_##VAR; \ + *(uint8_t *)val = ph->phy_en_##VAR; \ break; switch (num) { case MAC_PROP_DUPLEX: - perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (link_duplex_t)) { - bcopy(&ph->phy_duplex, val, sizeof (link_duplex_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (link_duplex_t)); + bcopy(&ph->phy_duplex, val, sizeof (link_duplex_t)); break; - case MAC_PROP_SPEED: - perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (uint64_t)) { - uint64_t speed = ph->phy_speed * 1000000ull; - bcopy(&speed, val, sizeof (speed)); - } else { - err = EINVAL; - } + case MAC_PROP_SPEED: { + uint64_t speed = ph->phy_speed * 1000000ull; + ASSERT(sz >= sizeof (uint64_t)); + bcopy(&speed, val, sizeof (speed)); break; + } case MAC_PROP_AUTONEG: - *(uint8_t *)val = - dfl ? ph->phy_cap_aneg : ph->phy_adv_aneg; + *(uint8_t *)val = ph->phy_adv_aneg; break; case MAC_PROP_FLOWCTRL: - if (sz >= sizeof (link_flowctrl_t)) { - bcopy(&ph->phy_flowctrl, val, - sizeof (link_flowctrl_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (link_flowctrl_t)); + bcopy(&ph->phy_flowctrl, val, sizeof (link_flowctrl_t)); break; CASE_PROP_ABILITY(1000FDX_CAP, 1000_fdx) @@ -728,15 +708,57 @@ mii_m_getprop(mii_handle_t mh, const char *name, mac_prop_id_t num, break; } - if (err == 0) { - *permp = perm; - } - mutex_exit(&mh->m_lock); return (err); } +void +mii_m_propinfo(mii_handle_t mh, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + phy_handle_t *ph; + + _NOTE(ARGUNUSED(name)); + + mutex_enter(&mh->m_lock); + + ph = mh->m_phy; + + switch (num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, ph->phy_cap_aneg); + break; + +#define CASE_PROP_PERM(PROP, VAR) \ + case MAC_PROP_ADV_##PROP: \ + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); \ + mac_prop_info_set_default_uint8(prh, ph->phy_cap_##VAR); \ + break; \ + \ + case MAC_PROP_EN_##PROP: \ + if (!ph->phy_cap_##VAR) \ + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); \ + mac_prop_info_set_default_uint8(prh, ph->phy_cap_##VAR); \ + break; + + CASE_PROP_PERM(1000FDX_CAP, 1000_fdx) + CASE_PROP_PERM(1000HDX_CAP, 1000_hdx) + CASE_PROP_PERM(100T4_CAP, 100_t4) + CASE_PROP_PERM(100FDX_CAP, 100_fdx) + CASE_PROP_PERM(100HDX_CAP, 100_hdx) + CASE_PROP_PERM(10FDX_CAP, 10_fdx) + CASE_PROP_PERM(10HDX_CAP, 10_hdx) + } + + mutex_exit(&mh->m_lock); +} + int mii_m_setprop(mii_handle_t mh, const char *name, mac_prop_id_t num, uint_t sz, const void *valp) @@ -813,65 +835,62 @@ mii_m_setprop(mii_handle_t mh, const char *name, mac_prop_id_t num, advp = &ph->phy_en_aneg; macpp = &mh->m_en_aneg; break; - case MAC_PROP_FLOWCTRL: - if (sz < sizeof (link_flowctrl_t)) { - rv = EINVAL; - } else { - link_flowctrl_t fc; - boolean_t chg; + case MAC_PROP_FLOWCTRL: { + link_flowctrl_t fc; + boolean_t chg; - bcopy(valp, &fc, sizeof (fc)); + ASSERT(sz >= sizeof (link_flowctrl_t)); + bcopy(valp, &fc, sizeof (fc)); - chg = fc == ph->phy_en_flowctrl ? B_FALSE : B_TRUE; - switch (fc) { - case LINK_FLOWCTRL_NONE: - ph->phy_en_pause = B_FALSE; - ph->phy_en_asmpause = B_FALSE; + chg = fc == ph->phy_en_flowctrl ? B_FALSE : B_TRUE; + switch (fc) { + case LINK_FLOWCTRL_NONE: + ph->phy_en_pause = B_FALSE; + ph->phy_en_asmpause = B_FALSE; + ph->phy_en_flowctrl = fc; + break; + /* + * Note that while we don't have a way to advertise + * that we can RX pause (we just won't send pause + * frames), we advertise full support. The MAC driver + * will learn of the configuration via the saved value + * of the tunable. + */ + case LINK_FLOWCTRL_BI: + case LINK_FLOWCTRL_RX: + if (ph->phy_cap_pause) { + ph->phy_en_pause = B_TRUE; + ph->phy_en_asmpause = B_TRUE; ph->phy_en_flowctrl = fc; - break; - /* - * Note that while we don't have a way to - * advertise that we can RX pause (we just - * won't send pause frames), we advertise full - * support. The MAC driver will learn of the - * configuration via the saved value of the - * tunable. - */ - case LINK_FLOWCTRL_BI: - case LINK_FLOWCTRL_RX: - if (ph->phy_cap_pause) { - ph->phy_en_pause = B_TRUE; - ph->phy_en_asmpause = B_TRUE; - ph->phy_en_flowctrl = fc; - } else { - rv = EINVAL; - } - break; - - /* - * Tell the other side that we can assert - * pause, but we cannot resend. - */ - case LINK_FLOWCTRL_TX: - if (ph->phy_cap_asmpause) { - ph->phy_en_pause = B_FALSE; - ph->phy_en_flowctrl = fc; - ph->phy_en_asmpause = B_TRUE; - } else { - rv = EINVAL; - } - break; - default: + } else { rv = EINVAL; - break; } - if ((rv == 0) && chg) { - mh->m_en_flowctrl = fc; - mh->m_tstate = MII_STATE_RESET; - cv_broadcast(&mh->m_cv); + break; + + /* + * Tell the other side that we can assert pause, but + * we cannot resend. + */ + case LINK_FLOWCTRL_TX: + if (ph->phy_cap_asmpause) { + ph->phy_en_pause = B_FALSE; + ph->phy_en_flowctrl = fc; + ph->phy_en_asmpause = B_TRUE; + } else { + rv = EINVAL; } + break; + default: + rv = EINVAL; + break; + } + if ((rv == 0) && chg) { + mh->m_en_flowctrl = fc; + mh->m_tstate = MII_STATE_RESET; + cv_broadcast(&mh->m_cv); } break; + } default: rv = ENOTSUP; diff --git a/usr/src/uts/common/io/mwl/mwl.c b/usr/src/uts/common/io/mwl/mwl.c index ce99b07504..98d0892326 100644 --- a/usr/src/uts/common/io/mwl/mwl.c +++ b/usr/src/uts/common/io/mwl/mwl.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -86,11 +86,13 @@ static int mwl_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int mwl_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, + void *wldp_buf); +static void mwl_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t mwl_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, mwl_m_stat, mwl_m_start, mwl_m_stop, @@ -98,12 +100,14 @@ static mac_callbacks_t mwl_m_callbacks = { mwl_m_multicst, mwl_m_unicst, mwl_m_tx, + NULL, mwl_m_ioctl, NULL, NULL, NULL, mwl_m_setprop, - mwl_m_getprop + mwl_m_getprop, + mwl_m_propinfo }; #define MWL_DBG_ATTACH (1 << 0) @@ -3746,17 +3750,26 @@ mwl_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int mwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct mwl_softc *sc = (struct mwl_softc *)arg; int err = 0; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +mwl_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct mwl_softc *sc = (struct mwl_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + static int mwl_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/mxfe/mxfe.c b/usr/src/uts/common/io/mxfe/mxfe.c index d48164a80f..790c936fd5 100644 --- a/usr/src/uts/common/io/mxfe/mxfe.c +++ b/usr/src/uts/common/io/mxfe/mxfe.c @@ -29,7 +29,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -114,9 +114,11 @@ static int mxfe_m_stat(void *, uint_t, uint64_t *); static int mxfe_m_start(void *); static void mxfe_m_stop(void *); static int mxfe_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); + void *); static int mxfe_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); +static void mxfe_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static unsigned mxfe_intr(caddr_t); static void mxfe_startmac(mxfe_t *); static void mxfe_stopmac(mxfe_t *); @@ -170,7 +172,7 @@ static void mxfe_dprintf(mxfe_t *, const char *, int, char *, ...); #define KIOIP KSTAT_INTR_PTR(mxfep->mxfe_intrstat) static mac_callbacks_t mxfe_m_callbacks = { - MC_SETPROP | MC_GETPROP, + MC_SETPROP | MC_GETPROP | MC_PROPINFO, mxfe_m_stat, mxfe_m_start, mxfe_m_stop, @@ -178,12 +180,14 @@ static mac_callbacks_t mxfe_m_callbacks = { mxfe_m_multicst, mxfe_m_unicst, mxfe_m_tx, + NULL, NULL, /* mc_ioctl */ NULL, /* mc_getcapab */ NULL, /* mc_open */ NULL, /* mc_close */ mxfe_m_setprop, - mxfe_m_getprop + mxfe_m_getprop, + mxfe_m_propinfo }; /* @@ -2877,90 +2881,50 @@ mxfe_m_stat(void *arg, uint_t stat, uint64_t *val) /*ARGSUSED*/ int -mxfe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) +mxfe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, + void *val) { mxfe_t *mxfep = arg; int err = 0; - boolean_t dfl = flags & MAC_PROP_DEFAULT; - if (sz == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; switch (num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (link_duplex_t)) { - bcopy(&mxfep->mxfe_duplex, val, - sizeof (link_duplex_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (link_duplex_t)); + bcopy(&mxfep->mxfe_duplex, val, sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (sz >= sizeof (uint64_t)) { - bcopy(&mxfep->mxfe_ifspeed, val, sizeof (uint64_t)); - } else { - err = EINVAL; - } + ASSERT(sz >= sizeof (uint64_t)); + bcopy(&mxfep->mxfe_ifspeed, val, sizeof (uint64_t)); break; case MAC_PROP_AUTONEG: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_aneg : mxfep->mxfe_adv_aneg; + *(uint8_t *)val = mxfep->mxfe_adv_aneg; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100fdx : mxfep->mxfe_adv_100fdx; - break; case MAC_PROP_EN_100FDX_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100fdx : mxfep->mxfe_adv_100fdx; + *(uint8_t *)val = mxfep->mxfe_adv_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100hdx : mxfep->mxfe_adv_100hdx; - break; case MAC_PROP_EN_100HDX_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100hdx : mxfep->mxfe_adv_100hdx; + *(uint8_t *)val = mxfep->mxfe_adv_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_10fdx : mxfep->mxfe_adv_10fdx; - break; case MAC_PROP_EN_10FDX_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_10fdx : mxfep->mxfe_adv_10fdx; + *(uint8_t *)val = mxfep->mxfe_adv_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_10hdx : mxfep->mxfe_adv_10hdx; - break; case MAC_PROP_EN_10HDX_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_10hdx : mxfep->mxfe_adv_10hdx; + *(uint8_t *)val = mxfep->mxfe_adv_10hdx; break; case MAC_PROP_ADV_100T4_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100T4 : mxfep->mxfe_adv_100T4; - break; case MAC_PROP_EN_100T4_CAP: - *(uint8_t *)val = - dfl ? mxfep->mxfe_cap_100T4 : mxfep->mxfe_adv_100T4; + *(uint8_t *)val = mxfep->mxfe_adv_100T4; break; default: @@ -3041,6 +3005,51 @@ mxfe_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, return (0); } +static void +mxfe_m_propinfo(void *arg, const char *name, mac_prop_id_t num, + mac_prop_info_handle_t mph) +{ + mxfe_t *mxfep = arg; + + _NOTE(ARGUNUSED(name)); + + switch (num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + mac_prop_info_set_perm(mph, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_aneg); + break; + + case MAC_PROP_EN_100FDX_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_100fdx); + break; + + case MAC_PROP_EN_100HDX_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_100hdx); + break; + + case MAC_PROP_EN_10FDX_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_10fdx); + break; + + case MAC_PROP_EN_10HDX_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_10hdx); + break; + + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_default_uint8(mph, mxfep->mxfe_cap_100T4); + break; + } +} + /* * Debugging and error reporting. */ diff --git a/usr/src/uts/common/io/myri10ge/drv/myri10ge.c b/usr/src/uts/common/io/myri10ge/drv/myri10ge.c index d2bda2311b..7cdbad3249 100644 --- a/usr/src/uts/common/io/myri10ge/drv/myri10ge.c +++ b/usr/src/uts/common/io/myri10ge/drv/myri10ge.c @@ -2380,8 +2380,7 @@ myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum) return; } - (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, - csum, HCK_PARTIALCKSUM, 0); + mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM); } static mblk_t * @@ -2889,7 +2888,7 @@ static inline void myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) { uint32_t lso_flag; - lso_info_get(mp, mss, &lso_flag); + mac_lso_get(mp, mss, &lso_flag); (*flags) |= lso_flag; } @@ -2902,8 +2901,7 @@ myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp) int ok; mss = 0; - hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, NULL, - &tx_offload_flags); + mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); ok = pullupmsg(mp, -1); @@ -2912,8 +2910,7 @@ myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp) return (DDI_FAILURE); } MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup); - (void) hcksum_assoc(mp, NULL, NULL, start, stuff, NULL, - NULL, tx_offload_flags, 0); + mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags); if (tx_offload_flags & HW_LSO) DB_LSOMSS(mp) = (uint16_t)mss; lso_info_set(mp, mss, tx_offload_flags); @@ -3347,8 +3344,7 @@ myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp, again: /* Setup checksum offloading, if needed */ - hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, NULL, - &tx_offload_flags); + mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); if (tx_offload_flags & HW_LSO) { max_segs = MYRI10GE_MAX_SEND_DESC_TSO; @@ -3796,6 +3792,58 @@ myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num) return (0); } +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + struct myri10ge_slice_state *ss; + + ss = (struct myri10ge_slice_state *)rh; + switch (stat) { + case MAC_STAT_RBYTES: + *val = ss->rx_stats.ibytes; + break; + + case MAC_STAT_IPACKETS: + *val = ss->rx_stats.ipackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + struct myri10ge_slice_state *ss; + + ss = (struct myri10ge_slice_state *)rh; + switch (stat) { + case MAC_STAT_OBYTES: + *val = ss->tx.stats.obytes; + break; + + case MAC_STAT_OPACKETS: + *val = ss->tx.stats.opackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + static int myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh) { @@ -3843,6 +3891,7 @@ myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = myri10ge_ring_start; infop->mri_stop = NULL; infop->mri_poll = myri10ge_poll_rx; + infop->mri_stat = myri10ge_rx_ring_stat; mintr->mi_handle = (mac_intr_handle_t)ss; mintr->mi_enable = myri10ge_rx_ring_intr_enable; mintr->mi_disable = myri10ge_rx_ring_intr_disable; @@ -3853,6 +3902,7 @@ myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = NULL; infop->mri_stop = NULL; infop->mri_tx = myri10ge_send_wrapper; + infop->mri_stat = myri10ge_tx_ring_stat; break; default: break; @@ -5329,6 +5379,7 @@ static mac_callbacks_t myri10ge_m_callbacks = { myri10ge_m_multicst, NULL, NULL, + NULL, myri10ge_m_ioctl, myri10ge_m_getcapab }; diff --git a/usr/src/uts/common/io/myri10ge/drv/myri10ge_lro.c b/usr/src/uts/common/io/myri10ge/drv/myri10ge_lro.c index 2d03fceac6..ba2177e0fe 100644 --- a/usr/src/uts/common/io/myri10ge/drv/myri10ge_lro.c +++ b/usr/src/uts/common/io/myri10ge/drv/myri10ge_lro.c @@ -118,8 +118,8 @@ myri10ge_lro_flush(struct myri10ge_slice_state *ss, struct lro_entry *lro, tcp->th_sum = 0xffff ^ tcp_csum; } - (void) hcksum_assoc(lro->m_head, NULL, NULL, 0, 0, 0, - 0, HCK_IPV4_HDRCKSUM | HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0); + mac_hcksum_set(lro->m_head, 0, 0, 0, + 0, HCK_IPV4_HDRCKSUM_OK | HCK_FULLCKSUM_OK); mbl->cnt += lro->append_cnt; myri10ge_mbl_append(ss, mbl, lro->m_head); diff --git a/usr/src/uts/common/io/myri10ge/drv/myri10ge_var.h b/usr/src/uts/common/io/myri10ge/drv/myri10ge_var.h index 6840795e94..24889e48a6 100644 --- a/usr/src/uts/common/io/myri10ge/drv/myri10ge_var.h +++ b/usr/src/uts/common/io/myri10ge/drv/myri10ge_var.h @@ -57,9 +57,7 @@ extern "C" { #include <sys/sunddi.h> #include <sys/strsubr.h> /* for hw cksum stuff */ #include <sys/pattr.h> /* for hw cksum stuff */ -#ifdef MYRICOM_PRIV #include <netinet/in.h> /* for hw cksum stuff */ -#endif #include <netinet/ip.h> /* for hw cksum stuff */ #include <netinet/ip6.h> /* for hw cksum stuff */ #include <netinet/tcp.h> /* for hw cksum stuff */ diff --git a/usr/src/uts/common/io/net80211/net80211_ioctl.c b/usr/src/uts/common/io/net80211/net80211_ioctl.c index 93212719e3..25ef1e4fde 100644 --- a/usr/src/uts/common/io/net80211/net80211_ioctl.c +++ b/usr/src/uts/common/io/net80211/net80211_ioctl.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2457,22 +2457,14 @@ ieee80211_setprop(void *ic_arg, const char *pr_name, mac_prop_id_t wldp_pr_num, /* ARGSUSED */ int ieee80211_getprop(void *ic_arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; struct ieee80211com *ic = ic_arg; - if (wldp_length == 0) { - err = EINVAL; - return (err); - } - bzero(wldp_buf, wldp_length); - ASSERT(ic != NULL); IEEE80211_LOCK(ic); - *perm = MAC_PROP_PERM_RW; - switch (wldp_pr_num) { /* mac_prop_id */ case MAC_PROP_WL_ESSID: @@ -2497,34 +2489,27 @@ ieee80211_getprop(void *ic_arg, const char *pr_name, mac_prop_id_t wldp_pr_num, wl_get_desrates(ic, wldp_buf); break; case MAC_PROP_WL_LINKSTATUS: - *perm = MAC_PROP_PERM_READ; wl_get_linkstatus(ic, wldp_buf); break; case MAC_PROP_WL_ESS_LIST: - *perm = MAC_PROP_PERM_READ; wl_get_esslist(ic, wldp_buf); break; case MAC_PROP_WL_SUPPORTED_RATES: - *perm = MAC_PROP_PERM_READ; wl_get_suprates(ic, wldp_buf); break; case MAC_PROP_WL_RSSI: - *perm = MAC_PROP_PERM_READ; wl_get_rssi(ic, wldp_buf); break; case MAC_PROP_WL_CAPABILITY: - *perm = MAC_PROP_PERM_READ; wl_get_capability(ic, wldp_buf); break; case MAC_PROP_WL_WPA: wl_get_wpa(ic, wldp_buf); break; case MAC_PROP_WL_SCANRESULTS: - *perm = MAC_PROP_PERM_READ; wl_get_scanresults(ic, wldp_buf); break; case MAC_PROP_WL_CREATE_IBSS: - *perm = MAC_PROP_PERM_READ; wl_get_createibss(ic, wldp_buf); break; case MAC_PROP_WL_KEY_TAB: @@ -2545,3 +2530,25 @@ ieee80211_getprop(void *ic_arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } + +void ieee80211_propinfo(void *ic_arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(pr_name, ic_arg)); + + /* + * By default permissions are read/write unless specified + * otherwise by the driver. + */ + + switch (wldp_pr_num) { + case MAC_PROP_WL_LINKSTATUS: + case MAC_PROP_WL_ESS_LIST: + case MAC_PROP_WL_SUPPORTED_RATES: + case MAC_PROP_WL_RSSI: + case MAC_PROP_WL_CAPABILITY: + case MAC_PROP_WL_SCANRESULTS: + case MAC_PROP_WL_CREATE_IBSS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } +} diff --git a/usr/src/uts/common/io/nge/nge_main.c b/usr/src/uts/common/io/nge/nge_main.c index 583e9bd61e..1aad680aa7 100644 --- a/usr/src/uts/common/io/nge/nge_main.c +++ b/usr/src/uts/common/io/nge/nge_main.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -177,14 +177,17 @@ static boolean_t nge_m_getcapab(void *, mac_capab_t, void *); static int nge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int nge_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void nge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int nge_set_priv_prop(nge_t *, const char *, uint_t, const void *); static int nge_get_priv_prop(nge_t *, const char *, uint_t, - uint_t, void *); + void *); #define NGE_M_CALLBACK_FLAGS\ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | \ + MC_PROPINFO) static mac_callbacks_t nge_m_callbacks = { NGE_M_CALLBACK_FLAGS, @@ -195,27 +198,27 @@ static mac_callbacks_t nge_m_callbacks = { nge_m_multicst, nge_m_unicst, nge_m_tx, + NULL, nge_m_ioctl, nge_m_getcapab, NULL, NULL, nge_m_setprop, - nge_m_getprop + nge_m_getprop, + nge_m_propinfo }; -mac_priv_prop_t nge_priv_props[] = { - {"_tx_bcopy_threshold", MAC_PROP_PERM_RW}, - {"_rx_bcopy_threshold", MAC_PROP_PERM_RW}, - {"_recv_max_packet", MAC_PROP_PERM_RW}, - {"_poll_quiet_time", MAC_PROP_PERM_RW}, - {"_poll_busy_time", MAC_PROP_PERM_RW}, - {"_rx_intr_hwater", MAC_PROP_PERM_RW}, - {"_rx_intr_lwater", MAC_PROP_PERM_RW}, +char *nge_priv_props[] = { + "_tx_bcopy_threshold", + "_rx_bcopy_threshold", + "_recv_max_packet", + "_poll_quiet_time", + "_poll_busy_time", + "_rx_intr_hwater", + "_rx_intr_lwater", + NULL }; -#define NGE_MAX_PRIV_PROPS \ - (sizeof (nge_priv_props)/sizeof (mac_priv_prop_t)) - static int nge_add_intrs(nge_t *, int); static void nge_rem_intrs(nge_t *); static int nge_register_intrs_and_init_locks(nge_t *); @@ -1750,193 +1753,167 @@ reprogram: static int nge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { nge_t *ngep = barg; int err = 0; link_flowctrl_t fl; uint64_t speed; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); - - if (pr_valsize == 0) - return (EINVAL); - - *perm = MAC_PROP_PERM_RW; - - bzero(pr_val, pr_valsize); switch (pr_num) { case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (link_duplex_t)) { - bcopy(&ngep->param_link_duplex, pr_val, - sizeof (link_duplex_t)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_duplex_t)); + bcopy(&ngep->param_link_duplex, pr_val, + sizeof (link_duplex_t)); break; case MAC_PROP_SPEED: - *perm = MAC_PROP_PERM_READ; - if (pr_valsize >= sizeof (uint64_t)) { - speed = ngep->param_link_speed * 1000000ull; - bcopy(&speed, pr_val, sizeof (speed)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (uint64_t)); + speed = ngep->param_link_speed * 1000000ull; + bcopy(&speed, pr_val, sizeof (speed)); break; case MAC_PROP_AUTONEG: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_autoneg; - } + *(uint8_t *)pr_val = ngep->param_adv_autoneg; break; case MAC_PROP_FLOWCTRL: - if (pr_valsize >= sizeof (link_flowctrl_t)) { - if (pr_flags & MAC_PROP_DEFAULT) { - fl = LINK_FLOWCTRL_BI; - bcopy(&fl, pr_val, sizeof (fl)); - break; - } - if (ngep->param_link_rx_pause && - !ngep->param_link_tx_pause) - fl = LINK_FLOWCTRL_RX; - - if (!ngep->param_link_rx_pause && - !ngep->param_link_tx_pause) - fl = LINK_FLOWCTRL_NONE; - - if (!ngep->param_link_rx_pause && - ngep->param_link_tx_pause) - fl = LINK_FLOWCTRL_TX; - - if (ngep->param_link_rx_pause && - ngep->param_link_tx_pause) - fl = LINK_FLOWCTRL_BI; - bcopy(&fl, pr_val, sizeof (fl)); - } else - err = EINVAL; + ASSERT(pr_valsize >= sizeof (link_flowctrl_t)); + if (ngep->param_link_rx_pause && + !ngep->param_link_tx_pause) + fl = LINK_FLOWCTRL_RX; + + if (!ngep->param_link_rx_pause && + !ngep->param_link_tx_pause) + fl = LINK_FLOWCTRL_NONE; + + if (!ngep->param_link_rx_pause && + ngep->param_link_tx_pause) + fl = LINK_FLOWCTRL_TX; + + if (ngep->param_link_rx_pause && + ngep->param_link_tx_pause) + fl = LINK_FLOWCTRL_BI; + bcopy(&fl, pr_val, sizeof (fl)); break; case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_1000fdx; - } + *(uint8_t *)pr_val = ngep->param_adv_1000fdx; break; case MAC_PROP_EN_1000FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_1000fdx; - } + *(uint8_t *)pr_val = ngep->param_en_1000fdx; break; case MAC_PROP_ADV_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 0; - } else { - *(uint8_t *)pr_val = ngep->param_adv_1000hdx; - } + *(uint8_t *)pr_val = ngep->param_adv_1000hdx; break; case MAC_PROP_EN_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 0; - } else { - *(uint8_t *)pr_val = ngep->param_en_1000hdx; - } + *(uint8_t *)pr_val = ngep->param_en_1000hdx; break; case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_100fdx; - } + *(uint8_t *)pr_val = ngep->param_adv_100fdx; break; case MAC_PROP_EN_100FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_100fdx; - } + *(uint8_t *)pr_val = ngep->param_en_100fdx; break; case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_100hdx; - } + *(uint8_t *)pr_val = ngep->param_adv_100hdx; break; case MAC_PROP_EN_100HDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_100hdx; - } + *(uint8_t *)pr_val = ngep->param_en_100hdx; break; case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_10fdx; - } + *(uint8_t *)pr_val = ngep->param_adv_10fdx; break; case MAC_PROP_EN_10FDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_10fdx; - } + *(uint8_t *)pr_val = ngep->param_en_10fdx; break; case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_adv_10hdx; - } + *(uint8_t *)pr_val = ngep->param_adv_10hdx; break; case MAC_PROP_EN_10HDX_CAP: - if (is_default) { - *(uint8_t *)pr_val = 1; - } else { - *(uint8_t *)pr_val = ngep->param_en_10hdx; - } + *(uint8_t *)pr_val = ngep->param_en_10hdx; break; case MAC_PROP_ADV_100T4_CAP: case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; *(uint8_t *)pr_val = 0; break; case MAC_PROP_PRIVATE: - err = nge_get_priv_prop(ngep, pr_name, pr_flags, + err = nge_get_priv_prop(ngep, pr_name, pr_valsize, pr_val); break; - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = ETHERMTU; - if (ngep->dev_spec_param.jumbo) - range.range_uint32[0].mpur_max = NGE_MAX_MTU; - bcopy(&range, pr_val, sizeof (range)); - break; - } default: err = ENOTSUP; } return (err); } +static void +nge_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + nge_t *ngep = barg; + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_EN_100T4_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, LINK_FLOWCTRL_BI); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, ETHERMTU, + ngep->dev_spec_param.jumbo ? NGE_MAX_MTU : ETHERMTU); + break; + + case MAC_PROP_PRIVATE: { + char valstr[64]; + int value; + + bzero(valstr, sizeof (valstr)); + if (strcmp(pr_name, "_tx_bcopy_threshold") == 0) { + value = NGE_TX_COPY_SIZE; + } else if (strcmp(pr_name, "_rx_bcopy_threshold") == 0) { + value = NGE_RX_COPY_SIZE; + } else if (strcmp(pr_name, "_recv_max_packet") == 0) { + value = 128; + } else if (strcmp(pr_name, "_poll_quiet_time") == 0) { + value = NGE_POLL_QUIET_TIME; + } else if (strcmp(pr_name, "_poll_busy_time") == 0) { + value = NGE_POLL_BUSY_TIME; + } else if (strcmp(pr_name, "_rx_intr_hwater") == 0) { + value = 1; + } else if (strcmp(pr_name, "_rx_intr_lwater") == 0) { + value = 8; + } else { + return; + } + + (void) snprintf(valstr, sizeof (valstr), "%d", value); + } + } + +} + /* ARGSUSED */ static int nge_set_priv_prop(nge_t *ngep, const char *pr_name, uint_t pr_valsize, @@ -2056,49 +2033,44 @@ reprogram: } static int -nge_get_priv_prop(nge_t *ngep, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val) +nge_get_priv_prop(nge_t *ngep, const char *pr_name, uint_t pr_valsize, + void *pr_val) { int err = ENOTSUP; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); int value; if (strcmp(pr_name, "_tx_bcopy_threshold") == 0) { - value = (is_default ? NGE_TX_COPY_SIZE : - ngep->param_txbcopy_threshold); + value = ngep->param_txbcopy_threshold; err = 0; goto done; } if (strcmp(pr_name, "_rx_bcopy_threshold") == 0) { - value = (is_default ? NGE_RX_COPY_SIZE : - ngep->param_rxbcopy_threshold); + value = ngep->param_rxbcopy_threshold; err = 0; goto done; } if (strcmp(pr_name, "_recv_max_packet") == 0) { - value = (is_default ? 128 : ngep->param_recv_max_packet); + value = ngep->param_recv_max_packet; err = 0; goto done; } if (strcmp(pr_name, "_poll_quiet_time") == 0) { - value = (is_default ? NGE_POLL_QUIET_TIME : - ngep->param_poll_quiet_time); + value = ngep->param_poll_quiet_time; err = 0; goto done; } if (strcmp(pr_name, "_poll_busy_time") == 0) { - value = (is_default ? NGE_POLL_BUSY_TIME : - ngep->param_poll_busy_time); + value = ngep->param_poll_busy_time; err = 0; goto done; } if (strcmp(pr_name, "_rx_intr_hwater") == 0) { - value = (is_default ? 1 : ngep->param_rx_intr_hwater); + value = ngep->param_rx_intr_hwater; err = 0; goto done; } if (strcmp(pr_name, "_rx_intr_lwater") == 0) { - value = (is_default ? 8 : ngep->param_rx_intr_lwater); + value = ngep->param_rx_intr_lwater; err = 0; goto done; } @@ -2561,7 +2533,6 @@ nge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) macp->m_max_sdu = ngep->default_mtu; macp->m_margin = VTAG_SIZE; macp->m_priv_props = nge_priv_props; - macp->m_priv_prop_count = NGE_MAX_PRIV_PROPS; /* * Finally, we're ready to register ourselves with the mac * interface; if this succeeds, we're all ready to start() diff --git a/usr/src/uts/common/io/nge/nge_rx.c b/usr/src/uts/common/io/nge/nge_rx.c index 86484445d3..c362117fd2 100644 --- a/usr/src/uts/common/io/nge/nge_rx.c +++ b/usr/src/uts/common/io/nge/nge_rx.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -271,19 +271,18 @@ nge_rxsta_handle(nge_t *ngep, uint32_t stflag, uint32_t *pflags) case RXD_CK8G_TCP_SUM: case RXD_CK8G_UDP_SUM: - *pflags |= HCK_FULLCKSUM; - *pflags |= HCK_IPV4_HDRCKSUM; + *pflags |= HCK_IPV4_HDRCKSUM_OK; *pflags |= HCK_FULLCKSUM_OK; break; case RXD_CK8G_TCP_SUM_ERR: case RXD_CK8G_UDP_SUM_ERR: sw_stp->tcp_hwsum_err++; - *pflags |= HCK_IPV4_HDRCKSUM; + *pflags |= HCK_IPV4_HDRCKSUM_OK; break; case RXD_CK8G_IP_HSUM: - *pflags |= HCK_IPV4_HDRCKSUM; + *pflags |= HCK_IPV4_HDRCKSUM_OK; break; case RXD_CK8G_NO_HSUM: @@ -379,8 +378,7 @@ nge_recv_ring(nge_t *ngep) } if (mp != NULL) { if (!(flag_err & (RX_SUM_NO | RX_SUM_ERR))) { - (void) hcksum_assoc(mp, NULL, NULL, - 0, 0, 0, 0, sum_flags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, sum_flags); } *tail = mp; tail = &mp->b_next; diff --git a/usr/src/uts/common/io/nge/nge_tx.c b/usr/src/uts/common/io/nge/nge_tx.c index c16368bd5f..6ece5b5730 100644 --- a/usr/src/uts/common/io/nge/nge_tx.c +++ b/usr/src/uts/common/io/nge/nge_tx.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -362,8 +362,7 @@ nge_send_copy(nge_t *ngep, mblk_t *mp, send_ring_t *srp) sw_tx_sbd_t *ssbdp; boolean_t tfint; - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, - NULL, NULL, &flags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &flags); bds = 0x1; if ((uint32_t)-1 == (start_index = nge_tx_alloc(ngep, bds))) @@ -476,7 +475,7 @@ nge_send_mapped(nge_t *ngep, mblk_t *mp, size_t fragno) slot = 0; dmah = dmah_list.head; - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &flags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &flags); for (bp = mp; bp != NULL; bp = bp->b_cont) { diff --git a/usr/src/uts/common/io/ntxn/unm_nic_main.c b/usr/src/uts/common/io/ntxn/unm_nic_main.c index 4165589454..be99c52ff3 100644 --- a/usr/src/uts/common/io/ntxn/unm_nic_main.c +++ b/usr/src/uts/common/io/ntxn/unm_nic_main.c @@ -23,7 +23,7 @@ * Use is subject to license terms. */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <sys/types.h> @@ -649,8 +649,7 @@ unm_tx_csum(cmdDescType0_t *desc, mblk_t *mp, pktinfo_t *pktinfo) if (pktinfo->etype == htons(ETHERTYPE_IP)) { uint32_t start, flags; - hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, - &flags); + mac_hcksum_get(mp, &start, NULL, NULL, NULL, &flags); if ((flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) == 0) return; @@ -1306,11 +1305,11 @@ unm_process_rcv(unm_adapter *adapter, statusDesc_t *desc) if (desc->u1.s1.status == STATUS_CKSUM_OK) { adapter->stats.csummed++; cksum_flags = - HCK_FULLCKSUM_OK | HCK_IPV4_HDRCKSUM | HCK_FULLCKSUM; + HCK_FULLCKSUM_OK | HCK_IPV4_HDRCKSUM_OK; } else { cksum_flags = 0; } - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, cksum_flags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, cksum_flags); adapter->stats.no_rcv++; adapter->stats.rxbytes += pkt_length; @@ -2533,9 +2532,7 @@ static mac_callbacks_t ntxn_m_callbacks = { ntxn_m_multicst, ntxn_m_unicst, ntxn_m_tx, -#ifndef SOLARIS11 - NULL, /* mc_resources */ -#endif + NULL, /* mc_reserved */ ntxn_m_ioctl, ntxn_m_getcapab, NULL, /* mc_open */ diff --git a/usr/src/uts/common/io/nxge/nxge_fflp.c b/usr/src/uts/common/io/nxge/nxge_fflp.c index 39e107486e..ac1528275a 100644 --- a/usr/src/uts/common/io/nxge/nxge_fflp.c +++ b/usr/src/uts/common/io/nxge/nxge_fflp.c @@ -18,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -448,6 +449,7 @@ nxge_main_mac_assign_rdc_table(p_nxge_t nxgep) npi_status_t rs = NPI_SUCCESS; hostinfo_t mac_rdc; npi_handle_t handle; + int i; handle = nxgep->npi_reg_handle; mac_rdc.value = 0; @@ -456,6 +458,12 @@ nxge_main_mac_assign_rdc_table(p_nxge_t nxgep) switch (nxgep->function_num) { case 0: case 1: + /* + * Tests indicate that it is OK not to re-initialize the + * hostinfo registers for the XMAC's alternate MAC + * addresses. But that is necessary for BMAC (case 2 + * and case 3 below) + */ rs = npi_mac_hostinfo_entry(handle, OP_SET, nxgep->function_num, XMAC_UNIQUE_HOST_INFO_ENTRY, &mac_rdc); break; @@ -463,6 +471,9 @@ nxge_main_mac_assign_rdc_table(p_nxge_t nxgep) case 3: rs = npi_mac_hostinfo_entry(handle, OP_SET, nxgep->function_num, BMAC_UNIQUE_HOST_INFO_ENTRY, &mac_rdc); + for (i = 1; i <= BMAC_MAX_ALT_ADDR_ENTRY; i++) + rs |= npi_mac_hostinfo_entry(handle, OP_SET, + nxgep->function_num, i, &mac_rdc); break; default: NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL, @@ -488,7 +499,6 @@ nxge_alt_mcast_mac_assign_rdc_table(p_nxge_t nxgep) npi_status_t rs = NPI_SUCCESS; hostinfo_t mac_rdc; npi_handle_t handle; - int i; handle = nxgep->npi_reg_handle; mac_rdc.value = 0; @@ -497,25 +507,13 @@ nxge_alt_mcast_mac_assign_rdc_table(p_nxge_t nxgep) switch (nxgep->function_num) { case 0: case 1: - /* - * Tests indicate that it is OK not to re-initialize the - * hostinfo registers for the XMAC's alternate MAC - * addresses. But that is necessary for BMAC (case 2 - * and case 3 below) - */ rs = npi_mac_hostinfo_entry(handle, OP_SET, - nxgep->function_num, - XMAC_MULTI_HOST_INFO_ENTRY, &mac_rdc); + nxgep->function_num, XMAC_MULTI_HOST_INFO_ENTRY, &mac_rdc); break; case 2: case 3: - for (i = 1; i <= BMAC_MAX_ALT_ADDR_ENTRY; i++) - rs |= npi_mac_hostinfo_entry(handle, OP_SET, - nxgep->function_num, i, &mac_rdc); - - rs |= npi_mac_hostinfo_entry(handle, OP_SET, - nxgep->function_num, - BMAC_MULTI_HOST_INFO_ENTRY, &mac_rdc); + rs = npi_mac_hostinfo_entry(handle, OP_SET, + nxgep->function_num, BMAC_MULTI_HOST_INFO_ENTRY, &mac_rdc); break; default: NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL, diff --git a/usr/src/uts/common/io/nxge/nxge_hio.c b/usr/src/uts/common/io/nxge/nxge_hio.c index 1130955670..2eaadd7b7c 100644 --- a/usr/src/uts/common/io/nxge/nxge_hio.c +++ b/usr/src/uts/common/io/nxge/nxge_hio.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -50,6 +50,7 @@ extern npi_status_t npi_rxdma_dump_rdc_table(npi_handle_t, uint8_t); extern int nxge_m_mmac_remove(void *arg, int slot); extern int nxge_m_mmac_add_g(void *arg, const uint8_t *maddr, int rdctbl, boolean_t usetbl); +extern int nxge_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); /* The following function may be found in nxge_[t|r]xdma.c */ extern npi_status_t nxge_txdma_channel_disable(nxge_t *, int); @@ -428,6 +429,7 @@ nxge_grp_dc_add( nxge_hio_dc_t *dc; nxge_grp_set_t *set; nxge_status_t status = NXGE_OK; + int error = 0; NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_grp_dc_add")); @@ -501,8 +503,13 @@ nxge_grp_dc_add( dc->group = group; - if (isLDOMguest(nxge)) - (void) nxge_hio_ldsv_add(nxge, dc); + if (isLDOMguest(nxge)) { + error = nxge_hio_ldsv_add(nxge, dc); + if (error != 0) { + MUTEX_EXIT(&nhd->lock); + return (NXGE_ERROR); + } + } NXGE_DC_SET(set->owned.map, channel); set->owned.count++; @@ -1778,6 +1785,10 @@ nxge_hio_share_bind(mac_share_handle_t shandle, uint64_t cookie, uint64_t rmap, tmap, hv_rmap, hv_tmap; int rv; + ASSERT(shp != NULL); + ASSERT(shp->nxgep != NULL); + ASSERT(shp->vrp != NULL); + nxge = shp->nxgep; vr = (nxge_hio_vr_t *)shp->vrp; @@ -1956,16 +1967,17 @@ nxge_hio_unshare( int nxge_hio_addres(nxge_hio_vr_t *vr, mac_ring_type_t type, uint64_t *map) { - nxge_t *nxge = (nxge_t *)vr->nxge; + nxge_t *nxge; nxge_grp_t *group; int groupid; int i, rv = 0; int max_dcs; - NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_addres")); + ASSERT(vr != NULL); + ASSERT(vr->nxge != NULL); + nxge = (nxge_t *)vr->nxge; - if (!nxge) - return (EINVAL); + NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_addres")); /* * For each ring associated with the group, add the resources @@ -1984,6 +1996,8 @@ nxge_hio_addres(nxge_hio_vr_t *vr, mac_ring_type_t type, uint64_t *map) group = nxge->rx_set.group[groupid]; } + ASSERT(group != NULL); + if (group->map == 0) { NXGE_DEBUG_MSG((nxge, HIO_CTL, "There is no rings associated " "with this VR")); @@ -2424,6 +2438,7 @@ nxge_hio_rdc_unshare( nxge_grp_set_t *set = &nxge->rx_set; nxge_grp_t *group; int grpid; + int i; NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_rdc_unshare")); @@ -2484,6 +2499,14 @@ nxge_hio_rdc_unshare( } NXGE_DEBUG_MSG((nxge, HIO_CTL, "<== nxge_hio_rdc_unshare")); + + for (i = 0; i < NXGE_MAX_RDCS; i++) { + if (nxge->rx_ring_handles[i].channel == channel) { + nxge_rx_ring_start( + (mac_ring_driver_t)&nxge->rx_ring_handles[i], + nxge->rx_ring_handles[i].ring_gen_num); + } + } } /* diff --git a/usr/src/uts/common/io/nxge/nxge_hio_guest.c b/usr/src/uts/common/io/nxge/nxge_hio_guest.c index 3c552f2058..176c6a4e09 100644 --- a/usr/src/uts/common/io/nxge/nxge_hio_guest.c +++ b/usr/src/uts/common/io/nxge/nxge_hio_guest.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -169,7 +169,6 @@ static void nxge_check_guest_state(nxge_hio_vr_t *); * Context: * Guest domain */ -/* ARGSUSED */ int nxge_hio_vr_add(nxge_t *nxge) { @@ -411,6 +410,20 @@ nxge_guest_dc_alloc( return (0); } +int +nxge_hio_get_dc_htable_idx(nxge_t *nxge, vpc_type_t type, uint32_t channel) +{ + nxge_hio_dc_t *dc; + + ASSERT(isLDOMguest(nxge)); + + dc = nxge_grp_dc_find(nxge, type, channel); + if (dc == NULL) + return (-1); + + return (dc->ldg.vector); +} + /* * res_map_parse * diff --git a/usr/src/uts/common/io/nxge/nxge_intr.c b/usr/src/uts/common/io/nxge/nxge_intr.c index 0e6f85a0b6..2e73677ca5 100644 --- a/usr/src/uts/common/io/nxge/nxge_intr.c +++ b/usr/src/uts/common/io/nxge/nxge_intr.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -903,26 +903,23 @@ nxge_hio_rdsv_add( * Context: * Guest domain */ -hv_rv_t -nxge_hio_ldsv_add( - nxge_t *nxge, - nxge_hio_dc_t *dc) +int +nxge_hio_ldsv_add(nxge_t *nxge, nxge_hio_dc_t *dc) { nxge_ldgv_t *control; nxge_ldg_t *group; nxge_ldv_t *device; - hv_rv_t hv_rv; if (dc->type == VP_BOUND_TX) { NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_ldsv_add(TDC %d)", dc->channel)); - if ((hv_rv = nxge_hio_tdsv_add(nxge, dc)) != 0) - return (hv_rv); + if (nxge_hio_tdsv_add(nxge, dc) != 0) + return (EIO); } else { NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_ldsv_add(RDC %d)", dc->channel)); - if ((hv_rv = nxge_hio_rdsv_add(nxge, dc)) != 0) - return (hv_rv); + if (nxge_hio_rdsv_add(nxge, dc) != 0) + return (EIO); } dc->ldg.map |= (1 << dc->ldg.ldsv); diff --git a/usr/src/uts/common/io/nxge/nxge_kstats.c b/usr/src/uts/common/io/nxge/nxge_kstats.c index c9fa73c35f..34cfafc58d 100644 --- a/usr/src/uts/common/io/nxge/nxge_kstats.c +++ b/usr/src/uts/common/io/nxge/nxge_kstats.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/nxge/nxge_impl.h> #include <sys/nxge/nxge_hio.h> @@ -2192,6 +2190,86 @@ nxge_m_tx_stat( return (val); } +/* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +nxge_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + p_nxge_ring_handle_t rhp = (p_nxge_ring_handle_t)rdriver; + p_nxge_t nxgep = rhp->nxgep; + int r_index; + p_nxge_stats_t statsp; + + ASSERT(nxgep != NULL); + statsp = (p_nxge_stats_t)nxgep->statsp; + ASSERT(statsp != NULL); + r_index = rhp->index + nxgep->pt_config.hw_config.start_rdc; + + if (statsp->rdc_ksp[r_index] == NULL) + return (0); + + switch (stat) { + case MAC_STAT_IERRORS: + *val = statsp->rdc_stats[r_index].ierrors; + break; + + case MAC_STAT_RBYTES: + *val = statsp->rdc_stats[r_index].ibytes; + break; + + case MAC_STAT_IPACKETS: + *val = statsp->rdc_stats[r_index].ipackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +nxge_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + p_nxge_ring_handle_t rhp = (p_nxge_ring_handle_t)rdriver; + p_nxge_t nxgep = rhp->nxgep; + int r_index; + p_nxge_stats_t statsp; + + ASSERT(nxgep != NULL); + statsp = (p_nxge_stats_t)nxgep->statsp; + ASSERT(statsp != NULL); + r_index = nxgep->pt_config.hw_config.tdc.start + rhp->index; + + if (statsp->tdc_ksp[r_index] == NULL) + return (0); + + switch (stat) { + case MAC_STAT_OERRORS: + *val = statsp->tdc_stats[r_index].oerrors; + break; + + case MAC_STAT_OBYTES: + *val = statsp->tdc_stats[r_index].obytes; + break; + + case MAC_STAT_OPACKETS: + *val = statsp->tdc_stats[r_index].opackets; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + /* ARGSUSED */ int nxge_m_stat(void *arg, uint_t stat, uint64_t *value) diff --git a/usr/src/uts/common/io/nxge/nxge_mac.c b/usr/src/uts/common/io/nxge/nxge_mac.c index dd8387652a..38aa5cc722 100644 --- a/usr/src/uts/common/io/nxge/nxge_mac.c +++ b/usr/src/uts/common/io/nxge/nxge_mac.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -3340,16 +3340,46 @@ fail: return (NXGE_ERROR | rs); } +static npi_status_t +nxge_rx_mac_mcast_hash_table(p_nxge_t nxgep) +{ + uint32_t i; + uint16_t hashtab_e; + p_hash_filter_t hash_filter; + uint8_t portn; + npi_handle_t handle; + npi_status_t rs = NPI_SUCCESS; + + portn = NXGE_GET_PORT_NUM(nxgep->function_num); + handle = nxgep->npi_handle; + + /* + * Load the multicast hash filter bits. + */ + hash_filter = nxgep->hash_filter; + for (i = 0; i < MAC_MAX_HASH_ENTRY; i++) { + if (hash_filter != NULL) { + hashtab_e = (uint16_t)hash_filter->hash_filter_regs[ + (NMCFILTER_REGS - 1) - i]; + } else { + hashtab_e = 0; + } + + if ((rs = npi_mac_hashtab_entry(handle, OP_SET, portn, i, + (uint16_t *)&hashtab_e)) != NPI_SUCCESS) + return (rs); + } -/* Initialize the RxMAC sub-block */ + return (NPI_SUCCESS); +} +/* + * Initialize the RxMAC sub-block + */ nxge_status_t nxge_rx_mac_init(p_nxge_t nxgep) { npi_attr_t ap; - uint32_t i; - uint16_t hashtab_e; - p_hash_filter_t hash_filter; nxge_port_t portt; uint8_t portn; npi_handle_t handle; @@ -3370,9 +3400,8 @@ nxge_rx_mac_init(p_nxge_t nxgep) addr0 = ntohs(addr16p[2]); addr1 = ntohs(addr16p[1]); addr2 = ntohs(addr16p[0]); - SET_MAC_ATTR3(handle, ap, portn, MAC_PORT_ADDR, addr0, addr1, addr2, - rs); - + SET_MAC_ATTR3(handle, ap, portn, MAC_PORT_ADDR, + addr0, addr1, addr2, rs); if (rs != NPI_SUCCESS) goto fail; SET_MAC_ATTR3(handle, ap, portn, MAC_PORT_ADDR_FILTER, 0, 0, 0, rs); @@ -3382,22 +3411,9 @@ nxge_rx_mac_init(p_nxge_t nxgep) if (rs != NPI_SUCCESS) goto fail; - /* - * Load the multicast hash filter bits. - */ - hash_filter = nxgep->hash_filter; - for (i = 0; i < MAC_MAX_HASH_ENTRY; i++) { - if (hash_filter != NULL) { - hashtab_e = (uint16_t)hash_filter->hash_filter_regs[ - (NMCFILTER_REGS - 1) - i]; - } else { - hashtab_e = 0; - } - - if ((rs = npi_mac_hashtab_entry(handle, OP_SET, portn, i, - (uint16_t *)&hashtab_e)) != NPI_SUCCESS) - goto fail; - } + rs = nxge_rx_mac_mcast_hash_table(nxgep); + if (rs != NPI_SUCCESS) + goto fail; if (portt == PORT_TYPE_XMAC) { if ((rs = npi_xmac_rx_iconfig(handle, INIT, portn, @@ -3413,48 +3429,51 @@ nxge_rx_mac_init(p_nxge_t nxgep) if (nxgep->filter.all_phys_cnt != 0) xconfig |= CFG_XMAC_RX_PROMISCUOUS; - if (nxgep->filter.all_multicast_cnt != 0) xconfig |= CFG_XMAC_RX_PROMISCUOUSGROUP; xconfig |= CFG_XMAC_RX_HASH_FILTER; - if ((rs = npi_xmac_rx_config(handle, INIT, portn, - xconfig)) != NPI_SUCCESS) + if ((rs = npi_xmac_rx_config(handle, INIT, + portn, xconfig)) != NPI_SUCCESS) goto fail; nxgep->mac.rx_config = xconfig; - /* Comparison of mac unique address is always enabled on XMAC */ - + /* + * Comparison of mac unique address is always + * enabled on XMAC + */ if ((rs = npi_xmac_zap_rx_counters(handle, portn)) != NPI_SUCCESS) goto fail; } else { - (void) nxge_fflp_init_hostinfo(nxgep); - if (npi_bmac_rx_iconfig(nxgep->npi_handle, INIT, portn, 0) != NPI_SUCCESS) goto fail; + nxgep->mac.rx_iconfig = NXGE_BMAC_RX_INTRS; + (void) nxge_fflp_init_hostinfo(nxgep); + bconfig = CFG_BMAC_RX_DISCARD_ON_ERR | CFG_BMAC_RX & ~CFG_BMAC_RX_STRIP_CRC; if (nxgep->filter.all_phys_cnt != 0) bconfig |= CFG_BMAC_RX_PROMISCUOUS; - if (nxgep->filter.all_multicast_cnt != 0) bconfig |= CFG_BMAC_RX_PROMISCUOUSGROUP; bconfig |= CFG_BMAC_RX_HASH_FILTER; - if ((rs = npi_bmac_rx_config(handle, INIT, portn, - bconfig)) != NPI_SUCCESS) + if ((rs = npi_bmac_rx_config(handle, INIT, + portn, bconfig)) != NPI_SUCCESS) goto fail; nxgep->mac.rx_config = bconfig; - /* Always enable comparison of mac unique address */ - if ((rs = npi_mac_altaddr_enable(handle, portn, 0)) - != NPI_SUCCESS) + /* + * Always enable comparison of mac unique address + */ + if ((rs = npi_mac_altaddr_enable(handle, + portn, 0)) != NPI_SUCCESS) goto fail; } @@ -4919,9 +4938,9 @@ nxge_add_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) uint32_t mchash; p_hash_filter_t hash_filter; uint16_t hash_bit; - boolean_t rx_init = B_FALSE; uint_t j; nxge_status_t status = NXGE_OK; + npi_status_t rs; NXGE_DEBUG_MSG((nxgep, MAC_CTL, "==> nxge_add_mcast_addr")); @@ -4933,6 +4952,7 @@ nxge_add_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) nxgep->hash_filter = KMEM_ZALLOC(sizeof (hash_filter_t), KM_SLEEP); } + hash_filter = nxgep->hash_filter; j = mchash / HASH_REG_WIDTH; hash_bit = (1 << (mchash % HASH_REG_WIDTH)); @@ -4940,19 +4960,14 @@ nxge_add_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) hash_filter->hash_bit_ref_cnt[mchash]++; if (hash_filter->hash_bit_ref_cnt[mchash] == 1) { hash_filter->hash_ref_cnt++; - rx_init = B_TRUE; - } - if (rx_init) { - if ((status = nxge_rx_mac_disable(nxgep)) != NXGE_OK) - goto fail; - if ((status = nxge_rx_mac_enable(nxgep)) != NXGE_OK) - goto fail; } - RW_EXIT(&nxgep->filter_lock); + rs = nxge_rx_mac_mcast_hash_table(nxgep); + if (rs != NPI_SUCCESS) + goto fail; + RW_EXIT(&nxgep->filter_lock); NXGE_DEBUG_MSG((nxgep, MAC_CTL, "<== nxge_add_mcast_addr")); - return (NXGE_OK); fail: RW_EXIT(&nxgep->filter_lock); @@ -4969,9 +4984,9 @@ nxge_del_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) uint32_t mchash; p_hash_filter_t hash_filter; uint16_t hash_bit; - boolean_t rx_init = B_FALSE; uint_t j; nxge_status_t status = NXGE_OK; + npi_status_t rs; NXGE_DEBUG_MSG((nxgep, MAC_CTL, "==> nxge_del_mcast_addr")); RW_ENTER_WRITER(&nxgep->filter_lock); @@ -4990,8 +5005,8 @@ nxge_del_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) hash_bit = (1 << (mchash % HASH_REG_WIDTH)); hash_filter->hash_filter_regs[j] &= ~hash_bit; hash_filter->hash_ref_cnt--; - rx_init = B_TRUE; } + if (hash_filter->hash_ref_cnt == 0) { NXGE_DEBUG_MSG((NULL, STR_CTL, "De-allocating hash filter storage.")); @@ -4999,12 +5014,10 @@ nxge_del_mcast_addr(p_nxge_t nxgep, struct ether_addr *addrp) nxgep->hash_filter = NULL; } - if (rx_init) { - if ((status = nxge_rx_mac_disable(nxgep)) != NXGE_OK) - goto fail; - if ((status = nxge_rx_mac_enable(nxgep)) != NXGE_OK) - goto fail; - } + rs = nxge_rx_mac_mcast_hash_table(nxgep); + if (rs != NPI_SUCCESS) + goto fail; + RW_EXIT(&nxgep->filter_lock); NXGE_DEBUG_MSG((nxgep, MAC_CTL, "<== nxge_del_mcast_addr")); diff --git a/usr/src/uts/common/io/nxge/nxge_main.c b/usr/src/uts/common/io/nxge/nxge_main.c index c8df562520..885f521ed3 100644 --- a/usr/src/uts/common/io/nxge/nxge_main.c +++ b/usr/src/uts/common/io/nxge/nxge_main.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -296,12 +296,13 @@ static boolean_t nxge_m_getcapab(void *, mac_capab_t, void *); static int nxge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int nxge_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void nxge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); +static void nxge_priv_propinfo(const char *, mac_prop_info_handle_t); static int nxge_set_priv_prop(nxge_t *, const char *, uint_t, const void *); -static int nxge_get_priv_prop(nxge_t *, const char *, uint_t, uint_t, - void *, uint_t *); -static int nxge_get_def_val(nxge_t *, mac_prop_id_t, uint_t, void *); +static int nxge_get_priv_prop(nxge_t *, const char *, uint_t, void *); static void nxge_fill_ring(void *, mac_ring_type_t, const int, const int, mac_ring_info_t *, mac_ring_handle_t); static void nxge_group_add_ring(mac_group_driver_t, mac_ring_driver_t, @@ -312,34 +313,32 @@ static void nxge_group_rem_ring(mac_group_driver_t, mac_ring_driver_t, static void nxge_niu_peu_reset(p_nxge_t nxgep); static void nxge_set_pci_replay_timeout(nxge_t *); -mac_priv_prop_t nxge_priv_props[] = { - {"_adv_10gfdx_cap", MAC_PROP_PERM_RW}, - {"_adv_pause_cap", MAC_PROP_PERM_RW}, - {"_function_number", MAC_PROP_PERM_READ}, - {"_fw_version", MAC_PROP_PERM_READ}, - {"_port_mode", MAC_PROP_PERM_READ}, - {"_hot_swap_phy", MAC_PROP_PERM_READ}, - {"_rxdma_intr_time", MAC_PROP_PERM_RW}, - {"_rxdma_intr_pkts", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_tcp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_udp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_ah", MAC_PROP_PERM_RW}, - {"_class_opt_ipv4_sctp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_tcp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_udp", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_ah", MAC_PROP_PERM_RW}, - {"_class_opt_ipv6_sctp", MAC_PROP_PERM_RW}, - {"_soft_lso_enable", MAC_PROP_PERM_RW} +char *nxge_priv_props[] = { + "_adv_10gfdx_cap", + "_adv_pause_cap", + "_function_number", + "_fw_version", + "_port_mode", + "_hot_swap_phy", + "_rxdma_intr_time", + "_rxdma_intr_pkts", + "_class_opt_ipv4_tcp", + "_class_opt_ipv4_udp", + "_class_opt_ipv4_ah", + "_class_opt_ipv4_sctp", + "_class_opt_ipv6_tcp", + "_class_opt_ipv6_udp", + "_class_opt_ipv6_ah", + "_class_opt_ipv6_sctp", + "_soft_lso_enable", + NULL }; -#define NXGE_MAX_PRIV_PROPS \ - (sizeof (nxge_priv_props)/sizeof (mac_priv_prop_t)) - #define NXGE_NEPTUNE_MAGIC 0x4E584745UL #define MAX_DUMP_SZ 256 #define NXGE_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO) mac_callbacks_t nxge_m_callbacks = { NXGE_M_CALLBACK_FLAGS, @@ -350,12 +349,14 @@ mac_callbacks_t nxge_m_callbacks = { nxge_m_multicst, NULL, NULL, + NULL, nxge_m_ioctl, nxge_m_getcapab, NULL, NULL, nxge_m_setprop, - nxge_m_getprop + nxge_m_getprop, + nxge_m_propinfo }; void @@ -4547,16 +4548,12 @@ nxge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, const void *pr_val) { nxge_t *nxgep = barg; - p_nxge_param_t param_arr; - p_nxge_stats_t statsp; + p_nxge_param_t param_arr = nxgep->param_arr; + p_nxge_stats_t statsp = nxgep->statsp; int err = 0; - uint8_t val; - uint32_t cur_mtu, new_mtu, old_framesize; - link_flowctrl_t fl; NXGE_DEBUG_MSG((nxgep, NXGE_CTL, "==> nxge_m_setprop")); - param_arr = nxgep->param_arr; - statsp = nxgep->statsp; + mutex_enter(nxgep->genlock); if (statsp->port_stats.lb_mode != nxge_lb_normal && nxge_param_locked(pr_num)) { @@ -4570,139 +4567,115 @@ nxge_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, return (EBUSY); } - val = *(uint8_t *)pr_val; switch (pr_num) { - case MAC_PROP_EN_1000FDX_CAP: - nxgep->param_en_1000fdx = val; - param_arr[param_anar_1000fdx].value = val; - - goto reprogram; + case MAC_PROP_EN_1000FDX_CAP: + nxgep->param_en_1000fdx = + param_arr[param_anar_1000fdx].value = *(uint8_t *)pr_val; + goto reprogram; - case MAC_PROP_EN_100FDX_CAP: - nxgep->param_en_100fdx = val; - param_arr[param_anar_100fdx].value = val; + case MAC_PROP_EN_100FDX_CAP: + nxgep->param_en_100fdx = + param_arr[param_anar_100fdx].value = *(uint8_t *)pr_val; + goto reprogram; - goto reprogram; + case MAC_PROP_EN_10FDX_CAP: + nxgep->param_en_10fdx = + param_arr[param_anar_10fdx].value = *(uint8_t *)pr_val; + goto reprogram; - case MAC_PROP_EN_10FDX_CAP: - nxgep->param_en_10fdx = val; - param_arr[param_anar_10fdx].value = val; + case MAC_PROP_AUTONEG: + param_arr[param_autoneg].value = *(uint8_t *)pr_val; + goto reprogram; - goto reprogram; + case MAC_PROP_MTU: { + uint32_t cur_mtu, new_mtu, old_framesize; - case MAC_PROP_EN_1000HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_ADV_10FDX_CAP: - case MAC_PROP_ADV_10HDX_CAP: - case MAC_PROP_STATUS: - case MAC_PROP_SPEED: - case MAC_PROP_DUPLEX: - err = EINVAL; /* cannot set read-only properties */ - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_setprop: read only property %d", - pr_num)); - break; + cur_mtu = nxgep->mac.default_mtu; + ASSERT(pr_valsize >= sizeof (new_mtu)); + bcopy(pr_val, &new_mtu, sizeof (new_mtu)); - case MAC_PROP_AUTONEG: - param_arr[param_autoneg].value = val; + NXGE_DEBUG_MSG((nxgep, NXGE_CTL, + "==> nxge_m_setprop: set MTU: %d is_jumbo %d", + new_mtu, nxgep->mac.is_jumbo)); - goto reprogram; + if (new_mtu == cur_mtu) { + err = 0; + break; + } - case MAC_PROP_MTU: - cur_mtu = nxgep->mac.default_mtu; - bcopy(pr_val, &new_mtu, sizeof (new_mtu)); - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_setprop: set MTU: %d is_jumbo %d", - new_mtu, nxgep->mac.is_jumbo)); + if (nxgep->nxge_mac_state == NXGE_MAC_STARTED) { + err = EBUSY; + break; + } - if (new_mtu == cur_mtu) { - err = 0; - break; - } + if ((new_mtu < NXGE_DEFAULT_MTU) || + (new_mtu > NXGE_MAXIMUM_MTU)) { + err = EINVAL; + break; + } - if (nxgep->nxge_mac_state == NXGE_MAC_STARTED) { - err = EBUSY; - break; - } + old_framesize = (uint32_t)nxgep->mac.maxframesize; + nxgep->mac.maxframesize = (uint16_t) + (new_mtu + NXGE_EHEADER_VLAN_CRC); + if (nxge_mac_set_framesize(nxgep)) { + nxgep->mac.maxframesize = + (uint16_t)old_framesize; + err = EINVAL; + break; + } - if ((new_mtu < NXGE_DEFAULT_MTU) || - (new_mtu > NXGE_MAXIMUM_MTU)) { - err = EINVAL; - break; - } + nxgep->mac.default_mtu = new_mtu; + nxgep->mac.is_jumbo = (new_mtu > NXGE_DEFAULT_MTU); - old_framesize = (uint32_t)nxgep->mac.maxframesize; - nxgep->mac.maxframesize = (uint16_t) - (new_mtu + NXGE_EHEADER_VLAN_CRC); - if (nxge_mac_set_framesize(nxgep)) { - nxgep->mac.maxframesize = - (uint16_t)old_framesize; - err = EINVAL; - break; - } + NXGE_DEBUG_MSG((nxgep, NXGE_CTL, + "==> nxge_m_setprop: set MTU: %d maxframe %d", + new_mtu, nxgep->mac.maxframesize)); + break; + } - err = mac_maxsdu_update(nxgep->mach, new_mtu); - if (err) { - nxgep->mac.maxframesize = - (uint16_t)old_framesize; - err = EINVAL; - break; - } + case MAC_PROP_FLOWCTRL: { + link_flowctrl_t fl; - nxgep->mac.default_mtu = new_mtu; - if (new_mtu > NXGE_DEFAULT_MTU) - nxgep->mac.is_jumbo = B_TRUE; - else - nxgep->mac.is_jumbo = B_FALSE; + ASSERT(pr_valsize >= sizeof (fl)); + bcopy(pr_val, &fl, sizeof (fl)); - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_setprop: set MTU: %d maxframe %d", - new_mtu, nxgep->mac.maxframesize)); + switch (fl) { + case LINK_FLOWCTRL_NONE: + param_arr[param_anar_pause].value = 0; break; - case MAC_PROP_FLOWCTRL: - bcopy(pr_val, &fl, sizeof (fl)); - switch (fl) { - default: - err = EINVAL; - break; - - case LINK_FLOWCTRL_NONE: - param_arr[param_anar_pause].value = 0; - break; - - case LINK_FLOWCTRL_RX: - param_arr[param_anar_pause].value = 1; - break; + case LINK_FLOWCTRL_RX: + param_arr[param_anar_pause].value = 1; + break; - case LINK_FLOWCTRL_TX: - case LINK_FLOWCTRL_BI: + case LINK_FLOWCTRL_TX: + case LINK_FLOWCTRL_BI: + err = EINVAL; + break; + default: + err = EINVAL; + break; + } +reprogram: + if ((err == 0) && !isLDOMguest(nxgep)) { + if (!nxge_param_link_update(nxgep)) { err = EINVAL; - break; } + } else { + err = EINVAL; + } + break; + } -reprogram: - if (err == 0) { - if (!nxge_param_link_update(nxgep)) { - err = EINVAL; - } - } - break; - case MAC_PROP_PRIVATE: - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_setprop: private property")); - err = nxge_set_priv_prop(nxgep, pr_name, pr_valsize, - pr_val); - break; + case MAC_PROP_PRIVATE: + NXGE_DEBUG_MSG((nxgep, NXGE_CTL, + "==> nxge_m_setprop: private property")); + err = nxge_set_priv_prop(nxgep, pr_name, pr_valsize, pr_val); + break; - default: - err = ENOTSUP; - break; + default: + err = ENOTSUP; + break; } mutex_exit(nxgep->genlock); @@ -4714,142 +4687,198 @@ reprogram: static int nxge_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { nxge_t *nxgep = barg; p_nxge_param_t param_arr = nxgep->param_arr; p_nxge_stats_t statsp = nxgep->statsp; - int err = 0; - link_flowctrl_t fl; - uint64_t tmp = 0; - link_state_t ls; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, "==> nxge_m_getprop: pr_num %d", pr_num)); - if (pr_valsize == 0) - return (EINVAL); + switch (pr_num) { + case MAC_PROP_DUPLEX: + *(uint8_t *)pr_val = statsp->mac_stats.link_duplex; + break; - *perm = MAC_PROP_PERM_RW; + case MAC_PROP_SPEED: { + uint64_t val = statsp->mac_stats.link_speed * 1000000ull; - if ((is_default) && (pr_num != MAC_PROP_PRIVATE)) { - err = nxge_get_def_val(nxgep, pr_num, pr_valsize, pr_val); - return (err); + ASSERT(pr_valsize >= sizeof (val)); + bcopy(&val, pr_val, sizeof (val)); + break; } - bzero(pr_val, pr_valsize); - switch (pr_num) { - case MAC_PROP_DUPLEX: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)pr_val = statsp->mac_stats.link_duplex; - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, - "==> nxge_m_getprop: duplex mode %d", - *(uint8_t *)pr_val)); - break; + case MAC_PROP_STATUS: { + link_state_t state = statsp->mac_stats.link_up ? + LINK_STATE_UP : LINK_STATE_DOWN; - case MAC_PROP_SPEED: - if (pr_valsize < sizeof (uint64_t)) - return (EINVAL); - *perm = MAC_PROP_PERM_READ; - tmp = statsp->mac_stats.link_speed * 1000000ull; - bcopy(&tmp, pr_val, sizeof (tmp)); - break; + ASSERT(pr_valsize >= sizeof (state)); + bcopy(&state, pr_val, sizeof (state)); + break; + } - case MAC_PROP_STATUS: - if (pr_valsize < sizeof (link_state_t)) - return (EINVAL); - *perm = MAC_PROP_PERM_READ; - if (!statsp->mac_stats.link_up) - ls = LINK_STATE_DOWN; - else - ls = LINK_STATE_UP; - bcopy(&ls, pr_val, sizeof (ls)); - break; + case MAC_PROP_AUTONEG: + *(uint8_t *)pr_val = param_arr[param_autoneg].value; + break; - case MAC_PROP_AUTONEG: - *(uint8_t *)pr_val = - param_arr[param_autoneg].value; - break; + case MAC_PROP_FLOWCTRL: { + link_flowctrl_t fl = param_arr[param_anar_pause].value != 0 ? + LINK_FLOWCTRL_RX : LINK_FLOWCTRL_NONE; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); + ASSERT(pr_valsize >= sizeof (fl)); + bcopy(&fl, pr_val, sizeof (fl)); + break; + } - fl = LINK_FLOWCTRL_NONE; - if (param_arr[param_anar_pause].value) { - fl = LINK_FLOWCTRL_RX; - } - bcopy(&fl, pr_val, sizeof (fl)); - break; + case MAC_PROP_ADV_1000FDX_CAP: + *(uint8_t *)pr_val = param_arr[param_anar_1000fdx].value; + break; - case MAC_PROP_ADV_1000FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)pr_val = - param_arr[param_anar_1000fdx].value; - break; + case MAC_PROP_EN_1000FDX_CAP: + *(uint8_t *)pr_val = nxgep->param_en_1000fdx; + break; - case MAC_PROP_EN_1000FDX_CAP: - *(uint8_t *)pr_val = nxgep->param_en_1000fdx; - break; + case MAC_PROP_ADV_100FDX_CAP: + *(uint8_t *)pr_val = param_arr[param_anar_100fdx].value; + break; - case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)pr_val = - param_arr[param_anar_100fdx].value; - break; + case MAC_PROP_EN_100FDX_CAP: + *(uint8_t *)pr_val = nxgep->param_en_100fdx; + break; - case MAC_PROP_EN_100FDX_CAP: - *(uint8_t *)pr_val = nxgep->param_en_100fdx; - break; + case MAC_PROP_ADV_10FDX_CAP: + *(uint8_t *)pr_val = param_arr[param_anar_10fdx].value; + break; - case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - *(uint8_t *)pr_val = - param_arr[param_anar_10fdx].value; - break; + case MAC_PROP_EN_10FDX_CAP: + *(uint8_t *)pr_val = nxgep->param_en_10fdx; + break; - case MAC_PROP_EN_10FDX_CAP: - *(uint8_t *)pr_val = nxgep->param_en_10fdx; - break; + case MAC_PROP_PRIVATE: + return (nxge_get_priv_prop(nxgep, pr_name, pr_valsize, + pr_val)); - case MAC_PROP_EN_1000HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_ADV_10HDX_CAP: - err = ENOTSUP; - break; + default: + return (ENOTSUP); + } - case MAC_PROP_PRIVATE: - err = nxge_get_priv_prop(nxgep, pr_name, pr_flags, - pr_valsize, pr_val, perm); - break; + return (0); +} - case MAC_PROP_MTU: { - mac_propval_range_t range; - - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = - range.range_uint32[0].mpur_max = NXGE_DEFAULT_MTU; - range.range_uint32[0].mpur_max = NXGE_MAXIMUM_MTU; - bcopy(&range, pr_val, sizeof (range)); - break; - } - default: - err = EINVAL; - break; +static void +nxge_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + nxge_t *nxgep = barg; + p_nxge_stats_t statsp = nxgep->statsp; + + /* + * By default permissions are read/write unless specified + * otherwise by the driver. + */ + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + case MAC_PROP_STATUS: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_EN_100HDX_CAP: + case MAC_PROP_EN_10HDX_CAP: + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + /* + * Note that read-only properties don't need to + * provide default values since they cannot be + * changed by the administrator. + */ + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_100FDX_CAP: + case MAC_PROP_EN_10FDX_CAP: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_AUTONEG: + mac_prop_info_set_default_uint8(prh, 1); + break; + + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, LINK_FLOWCTRL_RX); + break; + + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + NXGE_DEFAULT_MTU, NXGE_MAXIMUM_MTU); + break; + + case MAC_PROP_PRIVATE: + nxge_priv_propinfo(pr_name, prh); + break; + } + + mutex_enter(nxgep->genlock); + if (statsp->port_stats.lb_mode != nxge_lb_normal && + nxge_param_locked(pr_num)) { + /* + * Some properties are locked (read-only) while the + * device is in any sort of loopback mode. + */ + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); } + mutex_exit(nxgep->genlock); +} - NXGE_DEBUG_MSG((nxgep, NXGE_CTL, "<== nxge_m_getprop")); +static void +nxge_priv_propinfo(const char *pr_name, mac_prop_info_handle_t prh) +{ + char valstr[64]; - return (err); + bzero(valstr, sizeof (valstr)); + + if (strcmp(pr_name, "_function_number") == 0 || + strcmp(pr_name, "_fw_version") == 0 || + strcmp(pr_name, "_port_mode") == 0 || + strcmp(pr_name, "_hot_swap_phy") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + + } else if (strcmp(pr_name, "_rxdma_intr_time") == 0) { + (void) snprintf(valstr, sizeof (valstr), + "%d", RXDMA_RCR_TO_DEFAULT); + + } else if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { + (void) snprintf(valstr, sizeof (valstr), + "%d", RXDMA_RCR_PTHRES_DEFAULT); + + } else if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0 || + strcmp(pr_name, "_class_opt_ipv4_udp") == 0 || + strcmp(pr_name, "_class_opt_ipv4_ah") == 0 || + strcmp(pr_name, "_class_opt_ipv4_sctp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_tcp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_udp") == 0 || + strcmp(pr_name, "_class_opt_ipv6_ah") == 0 || + strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%x", + NXGE_CLASS_FLOW_GEN_SERVER); + + } else if (strcmp(pr_name, "_soft_lso_enable") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", 0); + + } else if (strcmp(pr_name, "_adv_10gfdx_cap") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", 1); + + } else if (strcmp(pr_name, "_adv_pause_cap") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", 1); + } + + if (strlen(valstr) > 0) + mac_prop_info_set_default_str(prh, valstr); } /* ARGSUSED */ @@ -5104,23 +5133,19 @@ nxge_set_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_valsize, } static int -nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, - uint_t pr_valsize, void *pr_val, uint_t *perm) +nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_valsize, + void *pr_val) { p_nxge_param_t param_arr = nxgep->param_arr; char valstr[MAXNAMELEN]; int err = EINVAL; uint_t strsize; - boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, "==> nxge_get_priv_prop: property %s", pr_name)); /* function number */ if (strcmp(pr_name, "_function_number") == 0) { - if (is_default) - return (ENOTSUP); - *perm = MAC_PROP_PERM_READ; (void) snprintf(valstr, sizeof (valstr), "%d", nxgep->function_num); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, @@ -5134,9 +5159,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Neptune firmware version */ if (strcmp(pr_name, "_fw_version") == 0) { - if (is_default) - return (ENOTSUP); - *perm = MAC_PROP_PERM_READ; (void) snprintf(valstr, sizeof (valstr), "%s", nxgep->vpd_info.ver); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, @@ -5150,9 +5172,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* port PHY mode */ if (strcmp(pr_name, "_port_mode") == 0) { - if (is_default) - return (ENOTSUP); - *perm = MAC_PROP_PERM_READ; switch (nxgep->mac.portmode) { case PORT_1G_COPPER: (void) snprintf(valstr, sizeof (valstr), "1G copper %s", @@ -5221,9 +5240,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Hot swappable PHY */ if (strcmp(pr_name, "_hot_swap_phy") == 0) { - if (is_default) - return (ENOTSUP); - *perm = MAC_PROP_PERM_READ; (void) snprintf(valstr, sizeof (valstr), "%s", nxgep->hot_swappable_phy ? "yes" : "no"); @@ -5241,12 +5257,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Receive Interrupt Blanking Parameters */ if (strcmp(pr_name, "_rxdma_intr_time") == 0) { err = 0; - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), - "%d", RXDMA_RCR_TO_DEFAULT); - goto done; - } - (void) snprintf(valstr, sizeof (valstr), "%d", nxgep->intr_timeout); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, @@ -5258,11 +5268,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, if (strcmp(pr_name, "_rxdma_intr_pkts") == 0) { err = 0; - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), - "%d", RXDMA_RCR_PTHRES_DEFAULT); - goto done; - } (void) snprintf(valstr, sizeof (valstr), "%d", nxgep->intr_threshold); NXGE_DEBUG_MSG((nxgep, NXGE_CTL, @@ -5274,12 +5279,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Classification and Load Distribution Configuration */ if (strcmp(pr_name, "_class_opt_ipv4_tcp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv4_tcp]); @@ -5292,12 +5291,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv4_udp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv4_udp]); @@ -5309,12 +5302,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, goto done; } if (strcmp(pr_name, "_class_opt_ipv4_ah") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv4_ah]); @@ -5327,12 +5314,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv4_sctp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv4_sctp]); @@ -5345,12 +5326,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv6_tcp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv6_tcp]); @@ -5363,12 +5338,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv6_udp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv6_udp]); @@ -5381,12 +5350,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv6_ah") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv6_ah]); @@ -5399,12 +5362,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_class_opt_ipv6_sctp") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%x", - NXGE_CLASS_FLOW_GEN_SERVER); - err = 0; - goto done; - } err = nxge_dld_get_ip_opt(nxgep, (caddr_t)¶m_arr[param_class_opt_ipv6_sctp]); @@ -5418,11 +5375,6 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, /* Software LSO */ if (strcmp(pr_name, "_soft_lso_enable") == 0) { - if (is_default) { - (void) snprintf(valstr, sizeof (valstr), "%d", 0); - err = 0; - goto done; - } (void) snprintf(valstr, sizeof (valstr), "%d", nxgep->soft_lso_enable); err = 0; @@ -5434,8 +5386,7 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_adv_10gfdx_cap") == 0) { err = 0; - if (is_default || - nxgep->param_arr[param_anar_10gfdx].value != 0) { + if (nxgep->param_arr[param_anar_10gfdx].value != 0) { (void) snprintf(valstr, sizeof (valstr), "%d", 1); goto done; } else { @@ -5445,8 +5396,7 @@ nxge_get_priv_prop(p_nxge_t nxgep, const char *pr_name, uint_t pr_flags, } if (strcmp(pr_name, "_adv_pause_cap") == 0) { err = 0; - if (is_default || - nxgep->param_arr[param_anar_pause].value != 0) { + if (nxgep->param_arr[param_anar_pause].value != 0) { (void) snprintf(valstr, sizeof (valstr), "%d", 1); goto done; } else { @@ -5587,6 +5537,7 @@ nxge_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num) ring = nxgep->tx_rings->rings[channel]; MUTEX_ENTER(&ring->lock); + ASSERT(ring->tx_ring_handle == NULL); ring->tx_ring_handle = rhp->ring_handle; MUTEX_EXIT(&ring->lock); @@ -5605,11 +5556,12 @@ nxge_tx_ring_stop(mac_ring_driver_t rdriver) ring = nxgep->tx_rings->rings[channel]; MUTEX_ENTER(&ring->lock); + ASSERT(ring->tx_ring_handle != NULL); ring->tx_ring_handle = (mac_ring_handle_t)NULL; MUTEX_EXIT(&ring->lock); } -static int +int nxge_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num) { p_nxge_ring_handle_t rhp = (p_nxge_ring_handle_t)rdriver; @@ -5623,23 +5575,25 @@ nxge_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num) MUTEX_ENTER(&ring->lock); - if (nxgep->rx_channel_started[channel] == B_TRUE) { + if (ring->started) { + ASSERT(ring->started == B_FALSE); MUTEX_EXIT(&ring->lock); return (0); } /* set rcr_ring */ for (i = 0; i < nxgep->ldgvp->maxldvs; i++) { - if ((nxgep->ldgvp->ldvp[i].is_rxdma == 1) && + if ((nxgep->ldgvp->ldvp[i].is_rxdma) && (nxgep->ldgvp->ldvp[i].channel == channel)) { ring->ldvp = &nxgep->ldgvp->ldvp[i]; ring->ldgp = nxgep->ldgvp->ldvp[i].ldgp; } } - nxgep->rx_channel_started[channel] = B_TRUE; ring->rcr_mac_handle = rhp->ring_handle; ring->rcr_gen_num = mr_gen_num; + ring->started = B_TRUE; + rhp->ring_gen_num = mr_gen_num; MUTEX_EXIT(&ring->lock); return (0); @@ -5657,11 +5611,53 @@ nxge_rx_ring_stop(mac_ring_driver_t rdriver) ring = nxgep->rx_rcr_rings->rcr_rings[channel]; MUTEX_ENTER(&ring->lock); - nxgep->rx_channel_started[channel] = B_FALSE; + ASSERT(ring->started == B_TRUE); ring->rcr_mac_handle = NULL; + ring->ldvp = NULL; + ring->ldgp = NULL; + ring->started = B_FALSE; MUTEX_EXIT(&ring->lock); } +static int +nxge_ring_get_htable_idx(p_nxge_t nxgep, mac_ring_type_t type, uint32_t channel) +{ + int i; + +#if defined(sun4v) + if (isLDOMguest(nxgep)) { + return (nxge_hio_get_dc_htable_idx(nxgep, + (type == MAC_RING_TYPE_TX) ? VP_BOUND_TX : VP_BOUND_RX, + channel)); + } +#endif + + ASSERT(nxgep->ldgvp != NULL); + + switch (type) { + case MAC_RING_TYPE_TX: + for (i = 0; i < nxgep->ldgvp->maxldvs; i++) { + if ((nxgep->ldgvp->ldvp[i].is_txdma) && + (nxgep->ldgvp->ldvp[i].channel == channel)) { + return ((int) + nxgep->ldgvp->ldvp[i].ldgp->htable_idx); + } + } + break; + + case MAC_RING_TYPE_RX: + for (i = 0; i < nxgep->ldgvp->maxldvs; i++) { + if ((nxgep->ldgvp->ldvp[i].is_rxdma) && + (nxgep->ldgvp->ldvp[i].channel == channel)) { + return ((int) + nxgep->ldgvp->ldvp[i].ldgp->htable_idx); + } + } + } + + return (-1); +} + /* * Callback funtion for MAC layer to register all rings. */ @@ -5671,13 +5667,22 @@ nxge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, { p_nxge_t nxgep = (p_nxge_t)arg; p_nxge_hw_pt_cfg_t p_cfgp = &nxgep->pt_config.hw_config; + p_nxge_intr_t intrp; + uint32_t channel; + int htable_idx; + p_nxge_ring_handle_t rhandlep; + + ASSERT(nxgep != NULL); + ASSERT(p_cfgp != NULL); + ASSERT(infop != NULL); - NXGE_DEBUG_MSG((nxgep, TX_CTL, + NXGE_DEBUG_MSG((nxgep, DDI_CTL, "==> nxge_fill_ring 0x%x index %d", rtype, index)); + switch (rtype) { case MAC_RING_TYPE_TX: { - p_nxge_ring_handle_t rhandlep; + mac_intr_t *mintr = &infop->mri_intr; NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_fill_ring (TX) 0x%x index %d ntdcs %d", @@ -5689,17 +5694,31 @@ nxge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, rhandlep->index = index; rhandlep->ring_handle = rh; + channel = nxgep->pt_config.hw_config.tdc.start + index; + rhandlep->channel = channel; + intrp = (p_nxge_intr_t)&nxgep->nxge_intr_type; + htable_idx = nxge_ring_get_htable_idx(nxgep, rtype, + channel); + if (htable_idx >= 0) + mintr->mi_ddi_handle = intrp->htable[htable_idx]; + else + mintr->mi_ddi_handle = NULL; + infop->mri_driver = (mac_ring_driver_t)rhandlep; infop->mri_start = nxge_tx_ring_start; infop->mri_stop = nxge_tx_ring_stop; infop->mri_tx = nxge_tx_ring_send; - + infop->mri_stat = nxge_tx_ring_stat; + infop->mri_flags = MAC_RING_TX_SERIALIZE; break; } + case MAC_RING_TYPE_RX: { - p_nxge_ring_handle_t rhandlep; - int nxge_rindex; mac_intr_t nxge_mac_intr; + int nxge_rindex; + p_nxge_intr_t intrp; + + intrp = (p_nxge_intr_t)&nxgep->nxge_intr_type; NXGE_DEBUG_MSG((nxgep, RX_CTL, "==> nxge_fill_ring (RX) 0x%x index %d nrdcs %d", @@ -5710,34 +5729,47 @@ nxge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, * Find the ring index in the nxge instance. */ nxge_rindex = nxge_get_rxring_index(nxgep, rg_index, index); + channel = nxgep->pt_config.hw_config.start_rdc + index; + intrp = (p_nxge_intr_t)&nxgep->nxge_intr_type; ASSERT((nxge_rindex >= 0) && (nxge_rindex < p_cfgp->max_rdcs)); rhandlep = &nxgep->rx_ring_handles[nxge_rindex]; rhandlep->nxgep = nxgep; rhandlep->index = nxge_rindex; rhandlep->ring_handle = rh; + rhandlep->channel = channel; /* * Entrypoint to enable interrupt (disable poll) and * disable interrupt (enable poll). */ + bzero(&nxge_mac_intr, sizeof (nxge_mac_intr)); nxge_mac_intr.mi_handle = (mac_intr_handle_t)rhandlep; nxge_mac_intr.mi_enable = (mac_intr_enable_t)nxge_disable_poll; nxge_mac_intr.mi_disable = (mac_intr_disable_t)nxge_enable_poll; + + htable_idx = nxge_ring_get_htable_idx(nxgep, rtype, + channel); + if (htable_idx >= 0) + nxge_mac_intr.mi_ddi_handle = intrp->htable[htable_idx]; + else + nxge_mac_intr.mi_ddi_handle = NULL; + infop->mri_driver = (mac_ring_driver_t)rhandlep; infop->mri_start = nxge_rx_ring_start; infop->mri_stop = nxge_rx_ring_stop; - infop->mri_intr = nxge_mac_intr; /* ??? */ + infop->mri_intr = nxge_mac_intr; infop->mri_poll = nxge_rx_poll; - + infop->mri_stat = nxge_rx_ring_stat; + infop->mri_flags = MAC_RING_RX_ENQUEUE; break; } + default: break; } - NXGE_DEBUG_MSG((nxgep, DDI_CTL, "<== nxge_fill_ring 0x%x", - rtype)); + NXGE_DEBUG_MSG((nxgep, DDI_CTL, "<== nxge_fill_ring 0x%x", rtype)); } static void @@ -6181,6 +6213,8 @@ nxge_add_intrs_adv_type(p_nxge_t nxgep, uint32_t int_type) return (NXGE_ERROR | NXGE_DDI_FAILED); } + + ldgp->htable_idx = x; intrp->intr_added++; } @@ -6341,6 +6375,8 @@ nxge_add_intrs_adv_type_fix(p_nxge_t nxgep, uint32_t int_type) return (NXGE_ERROR | NXGE_DDI_FAILED); } + + ldgp->htable_idx = x; intrp->intr_added++; } @@ -6516,13 +6552,10 @@ nxge_mac_register(p_nxge_t nxgep) macp->m_max_sdu = nxgep->mac.default_mtu; macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = nxge_priv_props; - macp->m_priv_prop_count = NXGE_MAX_PRIV_PROPS; - if (isLDOMguest(nxgep)) { - macp->m_v12n = MAC_VIRT_LEVEL1 | MAC_VIRT_SERIALIZE; - } else { - macp->m_v12n = MAC_VIRT_HIO | MAC_VIRT_LEVEL1 | \ - MAC_VIRT_SERIALIZE; - } + if (isLDOMguest(nxgep)) + macp->m_v12n = MAC_VIRT_LEVEL1; + else + macp->m_v12n = MAC_VIRT_HIO | MAC_VIRT_LEVEL1; NXGE_DEBUG_MSG((nxgep, MAC_CTL, "==> nxge_mac_register: instance %d " @@ -6975,40 +7008,6 @@ nxge_create_msi_property(p_nxge_t nxgep) return (nmsi); } -/* ARGSUSED */ -static int -nxge_get_def_val(nxge_t *nxgep, mac_prop_id_t pr_num, uint_t pr_valsize, - void *pr_val) -{ - int err = 0; - link_flowctrl_t fl; - - switch (pr_num) { - case MAC_PROP_AUTONEG: - *(uint8_t *)pr_val = 1; - break; - case MAC_PROP_FLOWCTRL: - if (pr_valsize < sizeof (link_flowctrl_t)) - return (EINVAL); - fl = LINK_FLOWCTRL_RX; - bcopy(&fl, pr_val, sizeof (fl)); - break; - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_EN_1000FDX_CAP: - *(uint8_t *)pr_val = 1; - break; - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_EN_100FDX_CAP: - *(uint8_t *)pr_val = 1; - break; - default: - err = ENOTSUP; - break; - } - return (err); -} - - /* * The following is a software around for the Neptune hardware's * interrupt bugs; The Neptune hardware may generate spurious interrupts when diff --git a/usr/src/uts/common/io/nxge/nxge_rxdma.c b/usr/src/uts/common/io/nxge/nxge_rxdma.c index 9751396cf8..16931c739b 100644 --- a/usr/src/uts/common/io/nxge/nxge_rxdma.c +++ b/usr/src/uts/common/io/nxge/nxge_rxdma.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1814,7 +1814,7 @@ nxge_rx_intr(void *arg1, void *arg2) channel = ldvp->channel; ldgp = ldvp->ldgp; - if (!isLDOMguest(nxgep) && (!nxgep->rx_channel_started[channel])) { + if (!isLDOMguest(nxgep) && (!rcrp->started)) { NXGE_DEBUG_MSG((nxgep, INT_CTL, "<== nxge_rx_intr: channel is not started")); @@ -2718,8 +2718,7 @@ nxge_receive_packet(p_nxge_t nxgep, is_valid, multi, is_tcp_udp, frag, error_type)); if (is_tcp_udp && !frag && !error_type) { - (void) hcksum_assoc(nmp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM_OK | HCK_FULLCKSUM, 0); + mac_hcksum_set(nmp, 0, 0, 0, 0, HCK_FULLCKSUM_OK); NXGE_DEBUG_MSG((nxgep, RX_CTL, "==> nxge_receive_packet: Full tcp/udp cksum " "is_valid 0x%x multi 0x%llx pkt %d frag %d " diff --git a/usr/src/uts/common/io/nxge/nxge_send.c b/usr/src/uts/common/io/nxge/nxge_send.c index 4f7edf292a..7b78fa8af6 100644 --- a/usr/src/uts/common/io/nxge/nxge_send.c +++ b/usr/src/uts/common/io/nxge/nxge_send.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -62,6 +62,8 @@ nxge_tx_ring_task(void *arg) { p_tx_ring_t ring = (p_tx_ring_t)arg; + ASSERT(ring->tx_ring_handle != NULL); + MUTEX_ENTER(&ring->lock); (void) nxge_txdma_reclaim(ring->nxgep, ring, 0); MUTEX_EXIT(&ring->lock); @@ -274,8 +276,8 @@ nxge_start(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp) } } - hcksum_retrieve(mp, NULL, NULL, &start_offset, - &stuff_offset, &end_offset, &value, &cksum_flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, &end_offset, + &value, &cksum_flags); if (!NXGE_IS_VLAN_PACKET(mp->b_rptr)) { start_offset += sizeof (ether_header_t); stuff_offset += sizeof (ether_header_t); @@ -809,7 +811,7 @@ nxge_start_control_header_only: i = TXDMA_DESC_NEXT_INDEX(i, 1, tx_ring_p->tx_wrap_mask); if (ngathers > nxge_tx_max_gathers) { good_packet = B_FALSE; - hcksum_retrieve(mp, NULL, NULL, &start_offset, + mac_hcksum_get(mp, &start_offset, &stuff_offset, &end_offset, &value, &cksum_flags); diff --git a/usr/src/uts/common/io/nxge/nxge_txdma.c b/usr/src/uts/common/io/nxge/nxge_txdma.c index 68b823b01c..f3fd19a3c2 100644 --- a/usr/src/uts/common/io/nxge/nxge_txdma.c +++ b/usr/src/uts/common/io/nxge/nxge_txdma.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1021,7 +1021,7 @@ nxge_txdma_reclaim(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, int nmblks) "==> nxge_txdma_reclaim: dump desc:")); pkt_len = tx_desc_pp->bits.hdw.tr_len; - tdc_stats->obytes += pkt_len; + tdc_stats->obytes += (pkt_len - TX_PKT_HEADER_SIZE); tdc_stats->opackets += tx_desc_pp->bits.hdw.sop; NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_txdma_reclaim: pkt_len %d " diff --git a/usr/src/uts/common/io/nxge/nxge_virtual.c b/usr/src/uts/common/io/nxge/nxge_virtual.c index 27840f8b30..395ded69b7 100644 --- a/usr/src/uts/common/io/nxge/nxge_virtual.c +++ b/usr/src/uts/common/io/nxge/nxge_virtual.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2542,10 +2542,6 @@ nxge_set_hw_dma_config(p_nxge_t nxgep) tdc_grp_p->grp_index = group->index; } - for (i = 0; i < NXGE_MAX_RDCS; i++) { - nxgep->rx_channel_started[i] = B_FALSE; - } - /* * Setup RDC groups */ diff --git a/usr/src/uts/common/io/pcan/pcan.c b/usr/src/uts/common/io/pcan/pcan.c index a22601cca2..be1fbf4aec 100644 --- a/usr/src/uts/common/io/pcan/pcan.c +++ b/usr/src/uts/common/io/pcan/pcan.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -103,11 +103,12 @@ static int pcan_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int pcan_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void pcan_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); mac_callbacks_t pcan_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, pcan_gstat, pcan_start, pcan_stop, @@ -115,12 +116,14 @@ mac_callbacks_t pcan_m_callbacks = { pcan_sdmulti, pcan_saddr, pcan_tx, + NULL, pcan_ioctl, NULL, NULL, NULL, pcan_m_setprop, - pcan_m_getprop + pcan_m_getprop, + pcan_m_propinfo }; static char *pcan_name_str = "pcan"; @@ -4525,7 +4528,7 @@ pcan_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, /* ARGSUSED */ static int pcan_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; pcan_maci_t *pcan_p = (pcan_maci_t *)arg; @@ -4536,9 +4539,6 @@ pcan_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, err = EINVAL; return (err); } - bzero(wldp_buf, wldp_length); - - *perm = MAC_PROP_PERM_RW; switch (wldp_pr_num) { /* mac_prop_id */ @@ -4558,22 +4558,18 @@ pcan_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, pcan_get_encrypt(pcan_p, wldp_buf); break; case MAC_PROP_WL_BSSTYPE: - *perm = MAC_PROP_PERM_READ; pcan_get_bsstype(pcan_p, wldp_buf); break; case MAC_PROP_WL_LINKSTATUS: pcan_get_linkstatus(pcan_p, wldp_buf); break; case MAC_PROP_WL_ESS_LIST: - *perm = MAC_PROP_PERM_READ; pcan_get_esslist(pcan_p, wldp_buf); break; case MAC_PROP_WL_SUPPORTED_RATES: - *perm = MAC_PROP_PERM_READ; pcan_get_suprates(wldp_buf); break; case MAC_PROP_WL_RSSI: - *perm = MAC_PROP_PERM_READ; err = pcan_get_rssi(pcan_p, wldp_buf); break; case MAC_PROP_WL_RADIO: @@ -4610,6 +4606,23 @@ pcan_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } +static void +pcan_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + _NOTE(ARGUNUSED(arg, pr_name)); + + switch (wldp_pr_num) { + case MAC_PROP_WL_BSSTYPE: + case MAC_PROP_WL_ESS_LIST: + case MAC_PROP_WL_SUPPORTED_RATES: + case MAC_PROP_WL_RSSI: + mac_prop_info_set_perm(mph, MAC_PROP_PERM_READ); + break; + } +} + + /* * quiesce(9E) entry point. * diff --git a/usr/src/uts/common/io/pcwl/pcwl.c b/usr/src/uts/common/io/pcwl/pcwl.c index 58d7028c39..bf1bfc7fd4 100644 --- a/usr/src/uts/common/io/pcwl/pcwl.c +++ b/usr/src/uts/common/io/pcwl/pcwl.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -85,13 +85,14 @@ static int pcwl_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int pcwl_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void pcwl_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wlpd_pr_num, mac_prop_info_handle_t mph); static void pcwl_delay(pcwl_maci_t *, clock_t); mac_callbacks_t pcwl_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, pcwl_gstat, pcwl_start, pcwl_stop, @@ -99,12 +100,14 @@ mac_callbacks_t pcwl_m_callbacks = { pcwl_sdmulti, pcwl_saddr, pcwl_tx, + NULL, pcwl_ioctl, NULL, NULL, NULL, pcwl_m_setprop, - pcwl_m_getprop + pcwl_m_getprop, + pcwl_m_propinfo }; static char *pcwl_name_str = "pcwl"; @@ -4400,18 +4403,11 @@ pcwl_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, /* ARGSUSED */ static int pcwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; - pcwl_maci_t *pcwl_p = (pcwl_maci_t *)arg; - if (wldp_length == 0) { - err = EINVAL; - return (err); - } - bzero(wldp_buf, wldp_length); - mutex_enter(&pcwl_p->pcwl_glock); if (!(pcwl_p->pcwl_flag & PCWL_CARD_READY)) { mutex_exit(&pcwl_p->pcwl_glock); @@ -4419,8 +4415,6 @@ pcwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } - *perm = MAC_PROP_PERM_RW; - switch (wldp_pr_num) { /* mac_prop_id */ case MAC_PROP_WL_ESSID: @@ -4442,19 +4436,15 @@ pcwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, pcwl_get_bsstype(pcwl_p, wldp_buf); break; case MAC_PROP_WL_LINKSTATUS: - *perm = MAC_PROP_PERM_READ; err = pcwl_get_linkstatus(pcwl_p, wldp_buf); break; case MAC_PROP_WL_ESS_LIST: - *perm = MAC_PROP_PERM_READ; pcwl_get_esslist(pcwl_p, wldp_buf); break; case MAC_PROP_WL_SUPPORTED_RATES: - *perm = MAC_PROP_PERM_READ; pcwl_get_suprates(wldp_buf); break; case MAC_PROP_WL_RSSI: - *perm = MAC_PROP_PERM_READ; pcwl_get_param_rssi(pcwl_p, wldp_buf); break; case MAC_PROP_WL_RADIO: @@ -4493,6 +4483,23 @@ pcwl_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } + +static void +pcwl_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wlpd_pr_num, + mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(arg, pr_name)); + + switch (wlpd_pr_num) { + case MAC_PROP_WL_LINKSTATUS: + case MAC_PROP_WL_ESS_LIST: + case MAC_PROP_WL_SUPPORTED_RATES: + case MAC_PROP_WL_RSSI: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } +} + + /* * quiesce(9E) entry point. * diff --git a/usr/src/uts/common/io/ral/rt2560.c b/usr/src/uts/common/io/ral/rt2560.c index 45e32d00dd..74733347cb 100644 --- a/usr/src/uts/common/io/ral/rt2560.c +++ b/usr/src/uts/common/io/ral/rt2560.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -172,10 +172,12 @@ static void rt2560_m_ioctl(void *, queue_t *, mblk_t *); static int rt2560_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int rt2560_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void rt2560_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t rt2560_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rt2560_m_stat, rt2560_m_start, rt2560_m_stop, @@ -183,12 +185,14 @@ static mac_callbacks_t rt2560_m_callbacks = { rt2560_m_multicst, rt2560_m_unicst, rt2560_m_tx, + NULL, rt2560_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, rt2560_m_setprop, - rt2560_m_getprop + rt2560_m_getprop, + rt2560_m_propinfo }; uint32_t ral_dbg_flags = 0; @@ -2138,18 +2142,27 @@ rt2560_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int rt2560_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct rt2560_softc *sc = arg; int err; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } static void +rt2560_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct rt2560_softc *sc = arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + +static void rt2560_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) { struct rt2560_softc *sc = (struct rt2560_softc *)arg; diff --git a/usr/src/uts/common/io/rge/rge_main.c b/usr/src/uts/common/io/rge/rge_main.c index 773d474301..7ad85f53de 100644 --- a/usr/src/uts/common/io/rge/rge_main.c +++ b/usr/src/uts/common/io/rge/rge_main.c @@ -123,6 +123,7 @@ static mac_callbacks_t rge_m_callbacks = { rge_m_multicst, rge_m_unicst, rge_m_tx, + NULL, rge_m_ioctl, rge_m_getcapab }; diff --git a/usr/src/uts/common/io/rge/rge_rxtx.c b/usr/src/uts/common/io/rge/rge_rxtx.c index a2d881c67b..9b16c2ae82 100644 --- a/usr/src/uts/common/io/rge/rge_rxtx.c +++ b/usr/src/uts/common/io/rge/rge_rxtx.c @@ -287,11 +287,11 @@ rge_receive_packet(rge_t *rgep, uint32_t slot) proto = rx_status & RBD_FLAG_PROTOCOL; if ((proto == RBD_FLAG_TCP && !(rx_status & RBD_TCP_CKSUM_ERR)) || (proto == RBD_FLAG_UDP && !(rx_status & RBD_UDP_CKSUM_ERR))) - pflags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK; + pflags |= HCK_FULLCKSUM_OK; if (proto != RBD_FLAG_NONE_IP && !(rx_status & RBD_IP_CKSUM_ERR)) - pflags |= HCK_IPV4_HDRCKSUM; + pflags |= HCK_IPV4_HDRCKSUM_OK; if (pflags != 0) { - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, pflags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, pflags); } return (mp); @@ -574,7 +574,7 @@ rge_send_copy(rge_t *rgep, mblk_t *mp, uint16_t tci) /* * h/w checksum offload flags */ - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); if (pflags & HCK_FULLCKSUM) { ASSERT(totlen >= sizeof (struct ether_header) + sizeof (struct ip)); diff --git a/usr/src/uts/common/io/rtls/rtls.c b/usr/src/uts/common/io/rtls/rtls.c index 4a0f6fef9a..d470fdfc7d 100644 --- a/usr/src/uts/common/io/rtls/rtls.c +++ b/usr/src/uts/common/io/rtls/rtls.c @@ -93,10 +93,6 @@ static int rtls_m_multicst(void *, boolean_t, const uint8_t *); static int rtls_m_promisc(void *, boolean_t); static mblk_t *rtls_m_tx(void *, mblk_t *); static int rtls_m_stat(void *, uint_t, uint64_t *); -static int rtls_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); -static int rtls_m_setprop(void *, const char *, mac_prop_id_t, uint_t, - const void *); static uint_t rtls_intr(caddr_t); @@ -184,13 +180,7 @@ static mac_callbacks_t rtls_m_callbacks = { rtls_m_promisc, rtls_m_multicst, rtls_m_unicst, - rtls_m_tx, - NULL, /* mc_ioctl */ - NULL, /* mc_getcapab */ - NULL, /* mc_open */ - NULL, /* mc_close */ - rtls_m_setprop, - rtls_m_getprop, + rtls_m_tx }; static mii_ops_t rtls_mii_ops = { @@ -912,24 +902,6 @@ rtls_m_stat(void *arg, uint_t stat, uint64_t *val) return (0); } -int -rtls_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags, - uint_t sz, void *val, uint_t *perm) -{ - rtls_t *rtlsp = arg; - - return (mii_m_getprop(rtlsp->mii, name, num, flags, sz, val, perm)); -} - -int -rtls_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz, - const void *val) -{ - rtls_t *rtlsp = arg; - - return (mii_m_setprop(rtlsp->mii, name, num, sz, val)); -} - /* * rtls_send() -- send a packet * diff --git a/usr/src/uts/common/io/rtw/rtw.c b/usr/src/uts/common/io/rtw/rtw.c index be463e40d5..c237184a6e 100644 --- a/usr/src/uts/common/io/rtw/rtw.c +++ b/usr/src/uts/common/io/rtw/rtw.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,6 +41,7 @@ #include <sys/mac_provider.h> #include <sys/mac_wifi.h> #include <sys/net80211.h> +#include <sys/byteorder.h> #include "rtwreg.h" #include "rtwvar.h" #include "smc93cx6var.h" @@ -139,10 +140,12 @@ static void rtw_m_ioctl(void *, queue_t *, mblk_t *); static int rtw_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int rtw_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void rtw_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t rtw_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rtw_m_stat, rtw_m_start, rtw_m_stop, @@ -150,12 +153,14 @@ static mac_callbacks_t rtw_m_callbacks = { rtw_m_multicst, rtw_m_unicst, rtw_m_tx, + NULL, rtw_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, rtw_m_setprop, - rtw_m_getprop + rtw_m_getprop, + rtw_m_propinfo }; DDI_DEFINE_STREAM_OPS(rtw_dev_ops, nulldev, nulldev, rtw_attach, rtw_detach, @@ -2914,17 +2919,25 @@ rtw_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int rtw_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { rtw_softc_t *rsc = arg; int err; err = ieee80211_getprop(&rsc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +rtw_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + rtw_softc_t *rsc = arg; + + ieee80211_propinfo(&rsc->sc_ic, pr_name, wldp_pr_num, prh); +} static int rtw_m_start(void *arg) diff --git a/usr/src/uts/common/io/rum/rum.c b/usr/src/uts/common/io/rum/rum.c index f76fb0fae1..abec7e727d 100644 --- a/usr/src/uts/common/io/rum/rum.c +++ b/usr/src/uts/common/io/rum/rum.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ #include <sys/mac_provider.h> #include <sys/mac_wifi.h> #include <sys/net80211.h> +#include <sys/byteorder.h> #define USBDRV_MAJOR_VER 2 #define USBDRV_MINOR_VER 0 @@ -260,10 +261,12 @@ static void rum_m_ioctl(void *, queue_t *, mblk_t *); static int rum_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int rum_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void rum_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t rum_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rum_m_stat, rum_m_start, rum_m_stop, @@ -271,12 +274,14 @@ static mac_callbacks_t rum_m_callbacks = { rum_m_multicst, rum_m_unicst, rum_m_tx, + NULL, rum_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, rum_m_setprop, - rum_m_getprop + rum_m_getprop, + rum_m_propinfo }; static void rum_amrr_start(struct rum_softc *, struct ieee80211_node *); @@ -2088,18 +2093,27 @@ rum_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int rum_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct rum_softc *sc = (struct rum_softc *)arg; int err; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } static void +rum_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct rum_softc *sc = (struct rum_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + +static void rum_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) { struct rum_softc *sc = (struct rum_softc *)arg; diff --git a/usr/src/uts/common/io/rwd/rt2661.c b/usr/src/uts/common/io/rwd/rt2661.c index 6419dbd96d..df3e688e4d 100644 --- a/usr/src/uts/common/io/rwd/rt2661.c +++ b/usr/src/uts/common/io/rwd/rt2661.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -271,11 +271,13 @@ static int rt2661_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int rt2661_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, + void *wldp_buf); +static void rt2661_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); static mac_callbacks_t rt2661_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rt2661_m_stat, rt2661_m_start, rt2661_m_stop, @@ -283,12 +285,14 @@ static mac_callbacks_t rt2661_m_callbacks = { rt2661_m_multicst, rt2661_m_unicst, rt2661_m_tx, + NULL, rt2661_m_ioctl, NULL, NULL, NULL, rt2661_m_setprop, - rt2661_m_getprop + rt2661_m_getprop, + rt2661_m_propinfo }; #ifdef DEBUG @@ -2617,17 +2621,26 @@ rt2661_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int rt2661_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct rt2661_softc *sc = (struct rt2661_softc *)arg; int err = 0; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +rt2661_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + struct rt2661_softc *sc = (struct rt2661_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + static int rt2661_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/rwn/rt2860.c b/usr/src/uts/common/io/rwn/rt2860.c index 588d5c3730..703f1df19a 100644 --- a/usr/src/uts/common/io/rwn/rt2860.c +++ b/usr/src/uts/common/io/rwn/rt2860.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -245,12 +245,14 @@ static void rt2860_m_ioctl(void *, queue_t *, mblk_t *); static int rt2860_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); +static void rt2860_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t prh); static int rt2860_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, + void *wldp_buf); static mac_callbacks_t rt2860_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, rt2860_m_stat, rt2860_m_start, rt2860_m_stop, @@ -258,12 +260,14 @@ static mac_callbacks_t rt2860_m_callbacks = { rt2860_m_multicst, rt2860_m_unicst, rt2860_m_tx, + NULL, rt2860_m_ioctl, NULL, NULL, NULL, rt2860_m_setprop, - rt2860_m_getprop + rt2860_m_getprop, + rt2860_m_propinfo }; #ifdef DEBUG @@ -2635,17 +2639,26 @@ rt2860_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) */ static int rt2860_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct rt2860_softc *sc = (struct rt2860_softc *)arg; int err = 0; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +rt2860_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct rt2860_softc *sc = (struct rt2860_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + static int rt2860_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/sfe/sfe_util.c b/usr/src/uts/common/io/sfe/sfe_util.c index c600c6927a..fb6d4ceb9b 100644 --- a/usr/src/uts/common/io/sfe/sfe_util.c +++ b/usr/src/uts/common/io/sfe/sfe_util.c @@ -32,7 +32,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -4069,6 +4069,7 @@ static mac_callbacks_t gem_m_callbacks = { gem_m_multicst, gem_m_unicst, gem_m_tx, + NULL, gem_m_ioctl, gem_m_getcapab, }; diff --git a/usr/src/uts/common/io/simnet/simnet.c b/usr/src/uts/common/io/simnet/simnet.c index f1a172dd9b..727fbbad8e 100644 --- a/usr/src/uts/common/io/simnet/simnet.c +++ b/usr/src/uts/common/io/simnet/simnet.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -109,10 +109,12 @@ static mblk_t *simnet_m_tx(void *, mblk_t *); static int simnet_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int simnet_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void simnet_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t simnet_m_callbacks = { - (MC_IOCTL | MC_SETPROP | MC_GETPROP), + (MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO), simnet_m_stat, simnet_m_start, simnet_m_stop, @@ -120,12 +122,14 @@ static mac_callbacks_t simnet_m_callbacks = { simnet_m_multicst, simnet_m_unicst, simnet_m_tx, + NULL, simnet_m_ioctl, NULL, NULL, NULL, simnet_m_setprop, - simnet_m_getprop + simnet_m_getprop, + simnet_m_propinfo }; /* @@ -1228,17 +1232,16 @@ simnet_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, } static int -simnet_get_priv_prop(simnet_dev_t *sdev, const char *pr_name, uint_t pr_flags, +simnet_get_priv_prop(simnet_dev_t *sdev, const char *pr_name, uint_t pr_valsize, void *pr_val) { simnet_wifidev_t *wdev = sdev->sd_wifidev; - boolean_t is_default = ((pr_flags & MAC_PROP_DEFAULT) != 0); int err = 0; int value; if (strcmp(pr_name, "_wl_esslist") == 0) { /* Returns num of _wl_ess_conf_t that have been set */ - value = (is_default ? 0:wdev->swd_esslist_num); + value = wdev->swd_esslist_num; } else if (strcmp(pr_name, "_wl_connected") == 0) { value = ((wdev->swd_linkstatus == WL_CONNECTED) ? 1:0); } else { @@ -1252,7 +1255,7 @@ simnet_get_priv_prop(simnet_dev_t *sdev, const char *pr_name, uint_t pr_flags, static int simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { simnet_dev_t *sdev = arg; simnet_wifidev_t *wdev = sdev->sd_wifidev; @@ -1276,9 +1279,6 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, case MAC_PROP_WL_AUTH_MODE: case MAC_PROP_WL_ENCRYPTION: break; - case MAC_PROP_WL_BSSTYPE: - *perm = MAC_PROP_PERM_READ; - break; case MAC_PROP_WL_LINKSTATUS: (void) memcpy(wldp_buf, &wdev->swd_linkstatus, sizeof (wdev->swd_linkstatus)); @@ -1286,7 +1286,6 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, case MAC_PROP_WL_ESS_LIST: { wl_ess_conf_t *w_ess_conf; - *perm = MAC_PROP_PERM_READ; ((wl_ess_list_t *)wldp_buf)->wl_ess_list_num = wdev->swd_esslist_num; /* LINTED E_BAD_PTR_CAST_ALIGN */ @@ -1299,11 +1298,7 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, } break; } - case MAC_PROP_WL_SUPPORTED_RATES: - *perm = MAC_PROP_PERM_READ; - break; case MAC_PROP_WL_RSSI: - *perm = MAC_PROP_PERM_READ; *(wl_rssi_t *)wldp_buf = wdev->swd_rssi; break; case MAC_PROP_WL_RADIO: @@ -1314,8 +1309,8 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, case MAC_PROP_WL_DESIRED_RATES: break; case MAC_PROP_PRIVATE: - err = simnet_get_priv_prop(sdev, pr_name, pr_flags, - wldp_length, wldp_buf); + err = simnet_get_priv_prop(sdev, pr_name, wldp_length, + wldp_buf); break; default: err = ENOTSUP; @@ -1324,3 +1319,40 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } + +static void +simnet_priv_propinfo(const char *pr_name, mac_prop_info_handle_t prh) +{ + char valstr[MAXNAMELEN]; + + bzero(valstr, sizeof (valstr)); + + if (strcmp(pr_name, "_wl_esslist") == 0) { + (void) snprintf(valstr, sizeof (valstr), "%d", 0); + } + + if (strlen(valstr) > 0) + mac_prop_info_set_default_str(prh, valstr); +} + +static void +simnet_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + simnet_dev_t *sdev = arg; + + if (sdev->sd_type == DL_ETHER) + return; + + switch (wldp_pr_num) { + case MAC_PROP_WL_BSSTYPE: + case MAC_PROP_WL_ESS_LIST: + case MAC_PROP_WL_SUPPORTED_RATES: + case MAC_PROP_WL_RSSI: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + case MAC_PROP_PRIVATE: + simnet_priv_propinfo(pr_name, prh); + break; + } +} diff --git a/usr/src/uts/common/io/softmac/softmac_main.c b/usr/src/uts/common/io/softmac/softmac_main.c index 9e33c31b7b..05f74dd4c1 100644 --- a/usr/src/uts/common/io/softmac/softmac_main.c +++ b/usr/src/uts/common/io/softmac/softmac_main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -84,11 +84,13 @@ static boolean_t softmac_m_getcapab(void *, mac_capab_t, void *); static int softmac_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int softmac_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); - + uint_t, void *); +static void softmac_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); #define SOFTMAC_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_OPEN | MC_CLOSE | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_OPEN | MC_CLOSE | MC_SETPROP | \ + MC_GETPROP | MC_PROPINFO) static mac_callbacks_t softmac_m_callbacks = { SOFTMAC_M_CALLBACK_FLAGS, @@ -99,12 +101,14 @@ static mac_callbacks_t softmac_m_callbacks = { softmac_m_multicst, softmac_m_unicst, softmac_m_tx, + NULL, softmac_m_ioctl, softmac_m_getcapab, softmac_m_open, softmac_m_close, softmac_m_setprop, - softmac_m_getprop + softmac_m_getprop, + softmac_m_propinfo }; /*ARGSUSED*/ @@ -1468,8 +1472,8 @@ softmac_m_setprop(void *arg, const char *name, mac_prop_id_t id, } static int -softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t flags, - uint_t valsize, void *val, uint_t *perm) +softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, + uint_t valsize, void *val) { softmac_t *softmac = arg; char *fpstr; @@ -1478,18 +1482,15 @@ softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t flags, return (ENOTSUP); if (strcmp(name, "_fastpath") == 0) { - if ((flags & MAC_PROP_DEFAULT) != 0) - return (ENOTSUP); - - *perm = MAC_PROP_PERM_READ; mutex_enter(&softmac->smac_fp_mutex); fpstr = (DATAPATH_MODE(softmac) == SOFTMAC_SLOWPATH) ? "disabled" : "enabled"; mutex_exit(&softmac->smac_fp_mutex); } else if (strcmp(name, "_disable_fastpath") == 0) { - *perm = MAC_PROP_PERM_RW; - fpstr = ((flags & MAC_PROP_DEFAULT) != 0) ? "false" : - (softmac->smac_fastpath_admin_disabled ? "true" : "false"); + fpstr = softmac->smac_fastpath_admin_disabled ? + "true" : "false"; + } else if (strcmp(name, "_softmac") == 0) { + fpstr = "true"; } else { return (ENOTSUP); } @@ -1497,6 +1498,23 @@ softmac_m_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t flags, return (strlcpy(val, fpstr, valsize) >= valsize ? EINVAL : 0); } +static void +softmac_m_propinfo(void *arg, const char *name, mac_prop_id_t id, + mac_prop_info_handle_t prh) +{ + _NOTE(ARGUNUSED(arg)); + + if (id != MAC_PROP_PRIVATE) + return; + + if (strcmp(name, "_fastpath") == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + } else if (strcmp(name, "_disable_fastpath") == 0) { + mac_prop_info_set_default_str(prh, "false"); + } + +} + int softmac_hold_device(dev_t dev, dls_dev_handle_t *ddhp) { diff --git a/usr/src/uts/common/io/softmac/softmac_pkt.c b/usr/src/uts/common/io/softmac/softmac_pkt.c index 4641fb3372..6389ec1328 100644 --- a/usr/src/uts/common/io/softmac/softmac_pkt.c +++ b/usr/src/uts/common/io/softmac/softmac_pkt.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -75,10 +75,8 @@ softmac_rput_process_data(softmac_lower_t *slp, mblk_t *mp) "copymsg failed"); goto failed; } - hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, - &value, &flags); - VERIFY(hcksum_assoc(tmp, NULL, NULL, start, stuff, end, - value, flags, KM_NOSLEEP) == 0); + mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags); + mac_hcksum_set(tmp, start, stuff, end, value, flags); freemsg(mp); mp = tmp; } diff --git a/usr/src/uts/common/io/uath/uath.c b/usr/src/uts/common/io/uath/uath.c index 0bc0ba7ea1..3353369389 100644 --- a/usr/src/uts/common/io/uath/uath.c +++ b/usr/src/uts/common/io/uath/uath.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -60,6 +60,7 @@ #include <sys/strsun.h> #include <sys/modctl.h> #include <sys/devops.h> +#include <sys/byteorder.h> #include <sys/mac_provider.h> #include <sys/mac_wifi.h> #include <sys/net80211.h> @@ -193,10 +194,12 @@ static void uath_m_ioctl(void *, queue_t *, mblk_t *); static int uath_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int uath_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void uath_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t uath_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, uath_m_stat, uath_m_start, uath_m_stop, @@ -204,12 +207,14 @@ static mac_callbacks_t uath_m_callbacks = { uath_m_multicst, uath_m_unicst, uath_m_tx, + NULL, uath_m_ioctl, NULL, NULL, NULL, uath_m_setprop, - uath_m_getprop + uath_m_getprop, + uath_m_propinfo }; static usb_alt_if_data_t * @@ -2874,16 +2879,25 @@ uath_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int uath_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct uath_softc *sc = (struct uath_softc *)arg; int err; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +uath_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t prh) +{ + struct uath_softc *sc = (struct uath_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, prh); +} + static int uath_m_stat(void *arg, uint_t stat, uint64_t *val) { diff --git a/usr/src/uts/common/io/ural/ural.c b/usr/src/uts/common/io/ural/ural.c index 82e9d711e4..a55ecd9f61 100644 --- a/usr/src/uts/common/io/ural/ural.c +++ b/usr/src/uts/common/io/ural/ural.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +29,7 @@ #include <sys/strsubr.h> #include <sys/modctl.h> #include <sys/devops.h> +#include <sys/byteorder.h> #include <sys/mac_provider.h> #include <sys/mac_wifi.h> #include <sys/net80211.h> @@ -264,10 +265,12 @@ static void ural_m_ioctl(void *, queue_t *, mblk_t *); static int ural_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static int ural_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +static void ural_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t ural_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, ural_m_stat, ural_m_start, ural_m_stop, @@ -275,12 +278,14 @@ static mac_callbacks_t ural_m_callbacks = { ural_m_multicst, ural_m_unicst, ural_m_tx, + NULL, ural_m_ioctl, NULL, /* mc_getcapab */ NULL, NULL, ural_m_setprop, - ural_m_getprop + ural_m_getprop, + ural_m_propinfo }; static void ural_amrr_start(struct ural_softc *, struct ieee80211_node *); @@ -2067,18 +2072,27 @@ ural_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int ural_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct ural_softc *sc = (struct ural_softc *)arg; int err; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } static void +ural_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + struct ural_softc *sc = (struct ural_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + +static void ural_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) { struct ural_softc *sc = (struct ural_softc *)arg; diff --git a/usr/src/uts/common/io/urtw/urtw.c b/usr/src/uts/common/io/urtw/urtw.c index 3060ea80a0..b649ca609d 100644 --- a/usr/src/uts/common/io/urtw/urtw.c +++ b/usr/src/uts/common/io/urtw/urtw.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -83,11 +83,13 @@ static mblk_t *urtw_m_tx(void *, mblk_t *); static void urtw_m_ioctl(void *, queue_t *, mblk_t *); static int urtw_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -static int urtw_m_getprop(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); +static int urtw_m_getprop(void *, const char *, mac_prop_id_t, + uint_t, void *); +static void urtw_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mac_callbacks_t urtw_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, urtw_m_stat, urtw_m_start, urtw_m_stop, @@ -95,12 +97,14 @@ static mac_callbacks_t urtw_m_callbacks = { urtw_m_multicst, urtw_m_unicst, urtw_m_tx, + NULL, urtw_m_ioctl, NULL, NULL, NULL, urtw_m_setprop, - urtw_m_getprop + urtw_m_getprop, + urtw_m_propinfo }; static int urtw_tx_start(struct urtw_softc *, mblk_t *, int); @@ -4024,16 +4028,25 @@ urtw_m_promisc(void *arg, boolean_t on) static int urtw_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct urtw_softc *sc = (struct urtw_softc *)arg; int err = 0; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +urtw_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + struct urtw_softc *sc = (struct urtw_softc *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + static int urtw_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/vnic/vnic_dev.c b/usr/src/uts/common/io/vnic/vnic_dev.c index b62ed5a16c..2b063cf79c 100644 --- a/usr/src/uts/common/io/vnic/vnic_dev.c +++ b/usr/src/uts/common/io/vnic/vnic_dev.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -82,8 +82,8 @@ static int vnic_m_stat(void *, uint_t, uint64_t *); static void vnic_m_ioctl(void *, queue_t *, mblk_t *); static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -static int vnic_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); +static void vnic_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static mblk_t *vnic_m_tx(void *, mblk_t *); static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *); static void vnic_notify_cb(void *, mac_notify_type_t); @@ -101,7 +101,7 @@ static mod_hash_t *vnic_hash; #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id) #define VNIC_M_CALLBACK_FLAGS \ - (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO) static mac_callbacks_t vnic_m_callbacks = { VNIC_M_CALLBACK_FLAGS, @@ -112,12 +112,14 @@ static mac_callbacks_t vnic_m_callbacks = { vnic_m_multicst, vnic_m_unicst, vnic_m_tx, + NULL, vnic_m_ioctl, vnic_m_capab_get, NULL, NULL, vnic_m_setprop, - vnic_m_getprop + NULL, + vnic_m_propinfo }; void @@ -185,7 +187,7 @@ static int vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type, int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg, uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag, - uint16_t vid) + uint16_t vid, boolean_t req_hwgrp_flag) { mac_diag_t mac_diag; uint16_t mac_flags = 0; @@ -290,7 +292,14 @@ vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type, /* * We get the address here since we copy it in the * vnic's vn_addr. + * We can't ask for hardware resources since we + * don't currently support hardware classification + * for these MAC clients. */ + if (req_hwgrp_flag) { + *diag = VNIC_IOC_DIAG_NO_HWRINGS; + return (ENOTSUP); + } mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg); *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh); mac_flags |= MAC_UNICAST_VNIC_PRIMARY; @@ -330,8 +339,7 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0); char vnic_name[MAXNAMELEN]; const mac_info_t *minfop; - uint32_t req_hwgrp_flag = ((flags & VNIC_IOC_CREATE_REQ_HWRINGS) != 0) ? - MAC_OPEN_FLAGS_REQ_HWRINGS : 0; + uint32_t req_hwgrp_flag = B_FALSE; *diag = VNIC_IOC_DIAG_NONE; @@ -394,11 +402,15 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, (void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL, NULL); err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch, - vnic_name, MAC_OPEN_FLAGS_IS_VNIC | req_hwgrp_flag); + vnic_name, MAC_OPEN_FLAGS_IS_VNIC); if (err != 0) goto bail; if (mrp != NULL) { + if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 || + (mrp->mrp_mask & MRP_TX_RINGS) != 0) { + req_hwgrp_flag = B_TRUE; + } err = mac_client_set_resources(vnic->vn_mch, mrp); if (err != 0) goto bail; @@ -406,10 +418,11 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, /* assign a MAC address to the VNIC */ err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot, - mac_prefix_len, mac_len, mac_addr, flags, diag, vid); + mac_prefix_len, mac_len, mac_addr, flags, diag, vid, + req_hwgrp_flag); if (err != 0) { vnic->vn_muh = NULL; - if (diag != NULL && req_hwgrp_flag != 0) + if (diag != NULL && req_hwgrp_flag) *diag = VNIC_IOC_DIAG_NO_HWRINGS; goto bail; } @@ -495,7 +508,7 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, /* Set the VNIC's MAC in the client */ if (!is_anchor) - mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh); + mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp); err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp)); if (err != 0) { @@ -850,38 +863,25 @@ vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, return (err); } -/*ARGSUSED*/ -static int -vnic_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) +/* ARGSUSED */ +static void vnic_m_propinfo(void *m_driver, const char *pr_name, + mac_prop_id_t pr_num, mac_prop_info_handle_t prh) { - mac_propval_range_t range; - vnic_t *vn = m_driver; - int err = ENOTSUP; + vnic_t *vn = m_driver; /* MTU setting allowed only on an etherstub */ if (vn->vn_link_id != DATALINK_INVALID_LINKID) - return (err); + return; switch (pr_num) { case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) - return (ENOTSUP); - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = ANCHOR_VNIC_MIN_MTU; - range.range_uint32[0].mpur_max = ANCHOR_VNIC_MAX_MTU; - bcopy(&range, pr_val, sizeof (range)); - return (0); - default: + mac_prop_info_set_range_uint32(prh, + ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU); break; } - - return (err); } + int vnic_info(vnic_info_t *info, cred_t *credp) { diff --git a/usr/src/uts/common/io/vr/vr.c b/usr/src/uts/common/io/vr/vr.c index 03fadffe72..4a756212ea 100644 --- a/usr/src/uts/common/io/vr/vr.c +++ b/usr/src/uts/common/io/vr/vr.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -148,7 +148,7 @@ static ddi_dma_attr_t vr_data_dma_attr = { }; static mac_callbacks_t vr_mac_callbacks = { - MC_SETPROP|MC_GETPROP, /* Which callbacks are set */ + MC_SETPROP|MC_GETPROP|MC_PROPINFO, /* Which callbacks are set */ vr_mac_getstat, /* Get the value of a statistic */ vr_mac_start, /* Start the device */ vr_mac_stop, /* Stop the device */ @@ -156,12 +156,14 @@ static mac_callbacks_t vr_mac_callbacks = { vr_mac_set_multicast, /* Enable or disable a multicast addr */ vr_mac_set_ether_addr, /* Set the unicast MAC address */ vr_mac_tx_enqueue_list, /* Transmit a packet */ + NULL, NULL, /* Process an unknown ioctl */ NULL, /* Get capability information */ NULL, /* Open the device */ NULL, /* Close the device */ vr_mac_setprop, /* Set properties of the device */ - vr_mac_getprop /* Get properties of the device */ + vr_mac_getprop, /* Get properties of the device */ + vr_mac_propinfo /* Get properties attributes */ }; /* @@ -3157,7 +3159,7 @@ vr_remove_kstats(vr_t *vrp) */ int vr_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { vr_t *vrp; uint32_t err; @@ -3168,228 +3170,220 @@ vr_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, err = 0; vrp = (vr_t *)arg; - if ((pr_flags & MAC_PROP_DEFAULT) != 0) { - /* - * Defaults depend on the PHY/MAC's capabilities - * All defaults are read/write, otherwise reset-linkprop fails - * with enotsup .... - */ - *perm = MAC_PROP_PERM_RW; - switch (pr_num) { - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_EN_1000FDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - val = 0; - break; + switch (pr_num) { + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + val = 0; + break; - case MAC_PROP_ADV_100FDX_CAP: - case MAC_PROP_EN_100FDX_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_100_BASEX_FD) != 0; - break; + case MAC_PROP_ADV_100FDX_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_100BASE_TX_FD) != 0; + break; - case MAC_PROP_ADV_100HDX_CAP: - case MAC_PROP_EN_100HDX_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_100_BASEX) != 0; - break; + case MAC_PROP_ADV_100HDX_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_100BASE_TX) != 0; + break; - case MAC_PROP_ADV_100T4_CAP: - case MAC_PROP_EN_100T4_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_100_BASE_T4) != 0; - break; + case MAC_PROP_ADV_100T4_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_100BASE_T4) != 0; + break; - case MAC_PROP_ADV_10FDX_CAP: - case MAC_PROP_EN_10FDX_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_10_FD) != 0; - break; + case MAC_PROP_ADV_10FDX_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_10BASE_T_FD) != 0; + break; - case MAC_PROP_ADV_10HDX_CAP: - case MAC_PROP_EN_10HDX_CAP: - val = (vrp->chip.mii.status & - MII_STATUS_10) != 0; - break; + case MAC_PROP_ADV_10HDX_CAP: + val = (vrp->chip.mii.anadv & + MII_ABILITY_10BASE_T) != 0; + break; - case MAC_PROP_AUTONEG: - case MAC_PROP_EN_AUTONEG: - val = (vrp->chip.mii.status & - MII_STATUS_CANAUTONEG) != 0; - break; + case MAC_PROP_AUTONEG: + val = (vrp->chip.mii.control & + MII_CONTROL_ANE) != 0; + break; - case MAC_PROP_DUPLEX: - val = VR_LINK_DUPLEX_FULL; - break; + case MAC_PROP_DUPLEX: + val = vrp->chip.link.duplex; + break; - case MAC_PROP_FLOWCTRL: - val = VR_PAUSE_BIDIRECTIONAL; - break; + case MAC_PROP_EN_100FDX_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_100BASE_TX_FD) != 0; + break; - case MAC_PROP_MTU: - val = ETHERMTU; - break; + case MAC_PROP_EN_100HDX_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_100BASE_TX) != 0; + break; - case MAC_PROP_SPEED: - val = 100 * 1000 * 1000; - break; + case MAC_PROP_EN_100T4_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_100BASE_T4) != 0; + break; - case MAC_PROP_STATUS: - val = VR_LINK_STATE_UP; - break; + case MAC_PROP_EN_10FDX_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_10BASE_T_FD) != 0; + break; - default: - return (ENOTSUP); - } - } else { - switch (pr_num) { - case MAC_PROP_ADV_1000FDX_CAP: - case MAC_PROP_ADV_1000HDX_CAP: - val = 0; - *perm = MAC_PROP_PERM_READ; - break; + case MAC_PROP_EN_10HDX_CAP: + val = (vrp->param.anadv_en & + MII_ABILITY_10BASE_T) != 0; + break; + + case MAC_PROP_EN_AUTONEG: + val = vrp->param.an_en == VR_LINK_AUTONEG_ON; + break; + + case MAC_PROP_FLOWCTRL: + val = vrp->chip.link.flowctrl; + break; + + case MAC_PROP_MTU: + val = vrp->param.mtu; + break; - case MAC_PROP_EN_1000FDX_CAP: - case MAC_PROP_EN_1000HDX_CAP: - *perm = MAC_PROP_PERM_READ; + case MAC_PROP_SPEED: + if (vrp->chip.link.speed == + VR_LINK_SPEED_100MBS) + val = 100 * 1000 * 1000; + else if (vrp->chip.link.speed == + VR_LINK_SPEED_10MBS) + val = 10 * 1000 * 1000; + else val = 0; - break; + break; - case MAC_PROP_ADV_100FDX_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_100BASE_TX_FD) != 0; - break; + case MAC_PROP_STATUS: + val = vrp->chip.link.state; + break; - case MAC_PROP_ADV_100HDX_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_100BASE_TX) != 0; - break; + default: + err = ENOTSUP; + break; + } - case MAC_PROP_ADV_100T4_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_100BASE_T4) != 0; - break; + if (err == 0 && pr_num != MAC_PROP_PRIVATE) { + if (pr_valsize == sizeof (uint64_t)) + *(uint64_t *)pr_val = val; + else if (pr_valsize == sizeof (uint32_t)) + *(uint32_t *)pr_val = val; + else if (pr_valsize == sizeof (uint16_t)) + *(uint16_t *)pr_val = val; + else if (pr_valsize == sizeof (uint8_t)) + *(uint8_t *)pr_val = val; + else + err = EINVAL; + } + return (err); +} - case MAC_PROP_ADV_10FDX_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_10BASE_T_FD) != 0; - break; +void +vr_mac_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + vr_t *vrp = (vr_t *)arg; + uint8_t val, perm; - case MAC_PROP_ADV_10HDX_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->chip.mii.anadv & - MII_ABILITY_10BASE_T) != 0; - break; + /* Since we have no private properties */ + _NOTE(ARGUNUSED(pr_name)) - case MAC_PROP_AUTONEG: - *perm = MAC_PROP_PERM_RW; - val = (vrp->chip.mii.control & - MII_CONTROL_ANE) != 0; - break; + switch (pr_num) { + case MAC_PROP_ADV_1000FDX_CAP: + case MAC_PROP_ADV_1000HDX_CAP: + case MAC_PROP_EN_1000FDX_CAP: + case MAC_PROP_EN_1000HDX_CAP: + case MAC_PROP_ADV_100FDX_CAP: + case MAC_PROP_ADV_100HDX_CAP: + case MAC_PROP_ADV_100T4_CAP: + case MAC_PROP_ADV_10FDX_CAP: + case MAC_PROP_ADV_10HDX_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; - case MAC_PROP_DUPLEX: - /* - * Writability depends on autoneg. - */ - if ((vrp->chip.mii.control & - MII_CONTROL_ANE) == 0) - *perm = MAC_PROP_PERM_RW; - else - *perm = MAC_PROP_PERM_READ; - val = vrp->chip.link.duplex; - break; + case MAC_PROP_EN_100FDX_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_100_BASEX_FD) != 0; + break; - case MAC_PROP_EN_100FDX_CAP: - *perm = MAC_PROP_PERM_RW; - val = (vrp->param.anadv_en & - MII_ABILITY_100BASE_TX_FD) != 0; - break; + case MAC_PROP_EN_100HDX_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_100_BASEX) != 0; + break; - case MAC_PROP_EN_100HDX_CAP: - *perm = MAC_PROP_PERM_RW; - val = (vrp->param.anadv_en & - MII_ABILITY_100BASE_TX) != 0; - break; + case MAC_PROP_EN_100T4_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_100_BASE_T4) != 0; + break; - case MAC_PROP_EN_100T4_CAP: - *perm = MAC_PROP_PERM_READ; - val = (vrp->param.anadv_en & - MII_ABILITY_100BASE_T4) != 0; - break; + case MAC_PROP_EN_10FDX_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_10_FD) != 0; + break; - case MAC_PROP_EN_10FDX_CAP: - *perm = MAC_PROP_PERM_RW; - val = (vrp->param.anadv_en & - MII_ABILITY_10BASE_T_FD) != 0; - break; + case MAC_PROP_EN_10HDX_CAP: + val = (vrp->chip.mii.status & + MII_STATUS_10) != 0; + break; - case MAC_PROP_EN_10HDX_CAP: - *perm = MAC_PROP_PERM_RW; - val = (vrp->param.anadv_en & - MII_ABILITY_10BASE_T) != 0; - break; + case MAC_PROP_AUTONEG: + case MAC_PROP_EN_AUTONEG: + val = (vrp->chip.mii.status & + MII_STATUS_CANAUTONEG) != 0; + break; - case MAC_PROP_EN_AUTONEG: - *perm = MAC_PROP_PERM_RW; - val = vrp->param.an_en == VR_LINK_AUTONEG_ON; - break; + case MAC_PROP_FLOWCTRL: + mac_prop_info_set_default_link_flowctrl(prh, + LINK_FLOWCTRL_BI); + return; - case MAC_PROP_FLOWCTRL: - *perm = MAC_PROP_PERM_RW; - val = vrp->chip.link.flowctrl; - break; + case MAC_PROP_MTU: + mac_prop_info_set_range_uint32(prh, + ETHERMTU, ETHERMTU); + return; - case MAC_PROP_MTU: - *perm = MAC_PROP_PERM_RW; - val = vrp->param.mtu; - break; + case MAC_PROP_DUPLEX: + /* + * Writability depends on autoneg. + */ + perm = ((vrp->chip.mii.control & + MII_CONTROL_ANE) == 0) ? MAC_PROP_PERM_RW : + MAC_PROP_PERM_READ; + mac_prop_info_set_perm(prh, perm); + + if (perm == MAC_PROP_PERM_RW) { + mac_prop_info_set_default_uint8(prh, + VR_LINK_DUPLEX_FULL); + } + return; - case MAC_PROP_SPEED: - /* - * Writability depends on autoneg. - */ - if ((vrp->chip.mii.control & - MII_CONTROL_ANE) == 0) - *perm = MAC_PROP_PERM_RW; - else - *perm = MAC_PROP_PERM_READ; - if (vrp->chip.link.speed == - VR_LINK_SPEED_100MBS) - val = 100 * 1000 * 1000; - else if (vrp->chip.link.speed == - VR_LINK_SPEED_10MBS) - val = 10 * 1000 * 1000; - else - val = 0; - break; + case MAC_PROP_SPEED: + perm = ((vrp->chip.mii.control & + MII_CONTROL_ANE) == 0) ? + MAC_PROP_PERM_RW : MAC_PROP_PERM_READ; + mac_prop_info_set_perm(prh, perm); + + if (perm == MAC_PROP_PERM_RW) { + mac_prop_info_set_default_uint64(prh, + 100 * 1000 * 1000); + } + return; - case MAC_PROP_STATUS: - val = vrp->chip.link.state; - break; + case MAC_PROP_STATUS: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + return; - default: - err = ENOTSUP; - break; + default: + return; } - } - if (err == 0 && pr_num != MAC_PROP_PRIVATE) { - if (pr_valsize == sizeof (uint64_t)) - *(uint64_t *)pr_val = val; - else if (pr_valsize == sizeof (uint32_t)) - *(uint32_t *)pr_val = val; - else if (pr_valsize == sizeof (uint16_t)) - *(uint16_t *)pr_val = val; - else if (pr_valsize == sizeof (uint8_t)) - *(uint8_t *)pr_val = val; - else - err = EINVAL; - } - return (err); + + mac_prop_info_set_default_uint8(prh, val); } /* diff --git a/usr/src/uts/common/io/vr/vr.h b/usr/src/uts/common/io/vr/vr.h index f120895b8f..29b0144272 100644 --- a/usr/src/uts/common/io/vr/vr.h +++ b/usr/src/uts/common/io/vr/vr.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -487,11 +487,13 @@ int vr_mac_set_ether_addr(void *vrp, const uint8_t *macaddr); mblk_t *vr_mac_tx_enqueue_list(void *p, mblk_t *mp); int vr_mac_getprop(void *arg, const char *pr_name, - mac_prop_id_t pr_num, uint_t pr_flags, - uint_t pr_valsize, void *pr_val, uint_t *perm); + mac_prop_id_t pr_num, uint_t pr_valsize, + void *pr_val); int vr_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, const void *pr_val); +void vr_mac_propinfo(void *arg, const char *pr_name, + mac_prop_id_t pr_num, mac_prop_info_handle_t prh); uint_t vr_intr(caddr_t arg1, caddr_t arg2); #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/wpi/wpi.c b/usr/src/uts/common/io/wpi/wpi.c index d25acafb21..1913d7d980 100644 --- a/usr/src/uts/common/io/wpi/wpi.c +++ b/usr/src/uts/common/io/wpi/wpi.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -273,8 +273,9 @@ static void wpi_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); static int wpi_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); static int wpi_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, uint_t wldp_lenth, - void *wldp_buf, uint_t *); + mac_prop_id_t wldp_pr_num, uint_t wldp_lenth, void *wldp_buf); +static void wpi_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); static void wpi_destroy_locks(wpi_sc_t *sc); static int wpi_send(ieee80211com_t *ic, mblk_t *mp, uint8_t type); static void wpi_thread(wpi_sc_t *sc); @@ -364,7 +365,7 @@ _info(struct modinfo *mip) * Mac Call Back entries */ mac_callbacks_t wpi_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, wpi_m_stat, wpi_m_start, wpi_m_stop, @@ -372,12 +373,14 @@ mac_callbacks_t wpi_m_callbacks = { wpi_m_multicst, wpi_m_unicst, wpi_m_tx, + NULL, wpi_m_ioctl, NULL, NULL, NULL, wpi_m_setprop, - wpi_m_getprop + wpi_m_getprop, + wpi_m_propinfo }; #ifdef DEBUG @@ -2324,16 +2327,26 @@ wpi_m_ioctl(void* arg, queue_t *wq, mblk_t *mp) /* ARGSUSED */ static int wpi_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_name, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { int err = 0; wpi_sc_t *sc = (wpi_sc_t *)arg; err = ieee80211_getprop(&sc->sc_ic, pr_name, wldp_pr_name, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } + +static void +wpi_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + wpi_sc_t *sc = (wpi_sc_t *)arg; + + ieee80211_propinfo(&sc->sc_ic, pr_name, wldp_pr_num, mph); +} + static int wpi_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_name, uint_t wldp_length, const void *wldp_buf) diff --git a/usr/src/uts/common/io/xge/drv/xgell.c b/usr/src/uts/common/io/xge/drv/xgell.c index d1b85d74f2..a5d857f05d 100644 --- a/usr/src/uts/common/io/xge/drv/xgell.c +++ b/usr/src/uts/common/io/xge/drv/xgell.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -112,6 +112,7 @@ static mac_callbacks_t xgell_m_callbacks = { xgell_m_multicst, NULL, NULL, + NULL, xgell_m_ioctl, xgell_m_getcapab }; @@ -613,15 +614,13 @@ xgell_rx_hcksum_assoc(mblk_t *mp, char *vaddr, int pkt_length, if (!(ext_info->proto & XGE_HAL_FRAME_PROTO_IP_FRAGMENTED)) { if (ext_info->proto & XGE_HAL_FRAME_PROTO_TCP_OR_UDP) { if (ext_info->l3_cksum == XGE_HAL_L3_CKSUM_OK) { - cksum_flags |= HCK_IPV4_HDRCKSUM; + cksum_flags |= HCK_IPV4_HDRCKSUM_OK; } if (ext_info->l4_cksum == XGE_HAL_L4_CKSUM_OK) { cksum_flags |= HCK_FULLCKSUM_OK; } - if (cksum_flags) { - cksum_flags |= HCK_FULLCKSUM; - (void) hcksum_assoc(mp, NULL, NULL, 0, - 0, 0, 0, cksum_flags, 0); + if (cksum_flags != 0) { + mac_hcksum_set(mp, 0, 0, 0, 0, cksum_flags); } } } else if (ext_info->proto & @@ -640,9 +639,8 @@ xgell_rx_hcksum_assoc(mblk_t *mp, char *vaddr, int pkt_length, start = 40; } cksum_flags |= HCK_PARTIALCKSUM; - (void) hcksum_assoc(mp, NULL, NULL, start, 0, - end, ntohs(ext_info->l4_cksum), cksum_flags, - 0); + mac_hcksum_set(mp, start, 0, end, + ntohs(ext_info->l4_cksum), cksum_flags); } } @@ -795,7 +793,8 @@ xgell_rx_1b_callback(xge_hal_channel_h channelh, xge_hal_dtr_h dtr, u8 t_code, xgell_rx_hcksum_assoc(mp, (char *)rx_buffer->vaddr + HEADROOM, pkt_length, &ext_info); - ring->received_bytes += pkt_length; + ring->rx_pkts++; + ring->rx_bytes += pkt_length; if (mp_head == NULL) { mp_head = mp; @@ -954,9 +953,11 @@ xgell_ring_tx(void *arg, mblk_t *mp) uint32_t mss; int handle_cnt, frag_cnt, ret, i, copied; boolean_t used_copy; + uint64_t sent_bytes; _begin: handle_cnt = frag_cnt = 0; + sent_bytes = 0; if (!lldev->is_initialized || lldev->in_reset) return (mp); @@ -1041,7 +1042,7 @@ _begin: continue; } - ring->sent_bytes += mblen; + sent_bytes += mblen; /* * Check the message length to decide to DMA or bcopy() data @@ -1159,14 +1160,14 @@ _begin: * If LSO is required, just call xge_hal_fifo_dtr_mss_set(dtr, mss) to * do all necessary work. */ - lso_info_get(mp, &mss, &lsoflags); + mac_lso_get(mp, &mss, &lsoflags); if (lsoflags & HW_LSO) { xge_assert((mss != 0) && (mss <= XGE_HAL_DEFAULT_MTU)); xge_hal_fifo_dtr_mss_set(dtr, mss); } - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &hckflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &hckflags); if (hckflags & HCK_IPV4_HDRCKSUM) { xge_hal_fifo_dtr_cksum_set_bits(dtr, XGE_HAL_TXD_TX_CKO_IPV4_EN); @@ -1178,6 +1179,10 @@ _begin: xge_hal_fifo_dtr_post(ring->channelh, dtr); + /* Update per-ring tx statistics */ + atomic_add_64(&ring->tx_pkts, 1); + atomic_add_64(&ring->tx_bytes, sent_bytes); + return (NULL); _exit_cleanup: @@ -1458,6 +1463,7 @@ xgell_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = xgell_rx_ring_start; infop->mri_stop = xgell_rx_ring_stop; infop->mri_poll = xgell_rx_poll; + infop->mri_stat = xgell_rx_ring_stat; mintr = &infop->mri_intr; mintr->mi_handle = (mac_intr_handle_t)rx_ring; @@ -1480,6 +1486,7 @@ xgell_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, infop->mri_start = xgell_tx_ring_start; infop->mri_stop = xgell_tx_ring_stop; infop->mri_tx = xgell_ring_tx; + infop->mri_stat = xgell_tx_ring_stat; break; } @@ -1618,7 +1625,6 @@ xgell_rx_ring_open(xgell_rx_ring_t *rx_ring) mutex_init(&rx_ring->ring_lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(hldev->irqh)); - rx_ring->received_bytes = 0; rx_ring->poll_bytes = -1; rx_ring->polled_bytes = 0; rx_ring->poll_mp = NULL; @@ -1769,7 +1775,6 @@ xgell_tx_ring_open(xgell_tx_ring_t *tx_ring) return (B_FALSE); } - tx_ring->sent_bytes = 0; tx_ring->live = B_TRUE; return (B_TRUE); @@ -2262,6 +2267,56 @@ xgell_m_stat(void *arg, uint_t stat, uint64_t *val) } /* + * Retrieve a value for one of the statistics for a particular rx ring + */ +int +xgell_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + xgell_rx_ring_t *rx_ring = (xgell_rx_ring_t *)rh; + + switch (stat) { + case MAC_STAT_RBYTES: + *val = rx_ring->rx_bytes; + break; + + case MAC_STAT_IPACKETS: + *val = rx_ring->rx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* + * Retrieve a value for one of the statistics for a particular tx ring + */ +int +xgell_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + xgell_tx_ring_t *tx_ring = (xgell_tx_ring_t *)rh; + + switch (stat) { + case MAC_STAT_OBYTES: + *val = tx_ring->tx_bytes; + break; + + case MAC_STAT_OPACKETS: + *val = tx_ring->tx_pkts; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +/* * xgell_device_alloc - Allocate new LL device */ int diff --git a/usr/src/uts/common/io/xge/drv/xgell.h b/usr/src/uts/common/io/xge/drv/xgell.h index 93845bb655..39c6447ebf 100644 --- a/usr/src/uts/common/io/xge/drv/xgell.h +++ b/usr/src/uts/common/io/xge/drv/xgell.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -329,8 +329,8 @@ struct xgell_rx_ring { xgell_multi_mac_t mmac; /* per group multiple addrs */ xgell_rx_buffer_pool_t bf_pool; /* per ring buffer pool */ - int received_bytes; /* total received bytes */ - int intr_bytes; /* interrupt received bytes */ + uint64_t rx_pkts; /* total received packets */ + uint64_t rx_bytes; /* total received bytes */ int poll_bytes; /* bytes to be polled up */ int polled_bytes; /* total polled bytes */ mblk_t *poll_mp; /* polled messages */ @@ -344,7 +344,8 @@ struct xgell_tx_ring { xge_hal_channel_h channelh; /* hardware channel */ xgelldev_t *lldev; /* driver device */ mac_ring_handle_t ring_handle; /* call back ring handle */ - int sent_bytes; /* bytes sent though the ring */ + uint64_t tx_pkts; /* packets sent */ + uint64_t tx_bytes; /* bytes sent though the ring */ boolean_t need_resched; }; @@ -418,8 +419,9 @@ void xge_disable_intrs(xgelldev_t *lldev); void xge_rem_intrs(xgelldev_t *lldev); +int xgell_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val); - +int xgell_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val); #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/yge/yge.c b/usr/src/uts/common/io/yge/yge.c index c41dda7b60..33d2fb527a 100644 --- a/usr/src/uts/common/io/yge/yge.c +++ b/usr/src/uts/common/io/yge/yge.c @@ -228,8 +228,9 @@ static mblk_t *yge_m_tx(void *, mblk_t *); static int yge_m_stat(void *, uint_t, uint64_t *); static int yge_m_start(void *); static void yge_m_stop(void *); -static int yge_m_getprop(void *, const char *, mac_prop_id_t, uint_t, - uint_t, void *, uint_t *); +static int yge_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +static void yge_m_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); static int yge_m_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); static void yge_m_ioctl(void *, queue_t *, mblk_t *); @@ -240,7 +241,7 @@ extern int yge_phys_restart(yge_port_t *, boolean_t); extern int yge_phys_init(yge_port_t *, phy_readreg_t, phy_writereg_t); static mac_callbacks_t yge_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, yge_m_stat, yge_m_start, yge_m_stop, @@ -248,12 +249,14 @@ static mac_callbacks_t yge_m_callbacks = { yge_m_multicst, yge_m_unicst, yge_m_tx, + NULL, yge_m_ioctl, NULL, /* mc_getcapab */ NULL, /* mc_open */ NULL, /* mc_close */ yge_m_setprop, yge_m_getprop, + yge_m_propinfo }; static mii_ops_t yge_mii_ops = { @@ -3348,47 +3351,30 @@ err: int yge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, - uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) + uint_t pr_valsize, void *pr_val) { yge_port_t *port = arg; - mac_propval_range_t range; - int err; - err = mii_m_getprop(port->p_mii, pr_name, pr_num, pr_flags, - pr_valsize, pr_val, perm); - if (err != ENOTSUP) { - return (err); - } - - if (pr_valsize == 0) - return (EINVAL); + return (mii_m_getprop(port->p_mii, pr_name, pr_num, pr_valsize, + pr_val)); +} - bzero(pr_val, pr_valsize); - *perm = MAC_PROP_PERM_RW; +static void +yge_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + yge_port_t *port = arg; switch (pr_num) { case MAC_PROP_MTU: - if (!(pr_flags & MAC_PROP_POSSIBLE)) { - err = ENOTSUP; - break; - } - if (pr_valsize < sizeof (mac_propval_range_t)) - return (EINVAL); - range.mpr_count = 1; - range.mpr_type = MAC_PROPVAL_UINT32; - range.range_uint32[0].mpur_min = ETHERMTU; - range.range_uint32[0].mpur_max = + mac_prop_info_set_range_uint32(prh, ETHERMTU, port->p_flags & PORT_FLAG_NOJUMBO ? - ETHERMTU : YGE_JUMBO_MTU; - bcopy(&range, pr_val, sizeof (range)); - err = 0; + ETHERMTU : YGE_JUMBO_MTU); break; - default: - err = ENOTSUP; + mii_m_propinfo(port->p_mii, pr_name, pr_num, prh); break; } - return (err); } void diff --git a/usr/src/uts/common/io/zyd/zyd.c b/usr/src/uts/common/io/zyd/zyd.c index bdd5ef37d1..61c4abf3c6 100644 --- a/usr/src/uts/common/io/zyd/zyd.c +++ b/usr/src/uts/common/io/zyd/zyd.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -54,8 +54,9 @@ static int zyd_m_promisc(void *arg, boolean_t on); static void zyd_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); static mblk_t *zyd_m_tx(void *arg, mblk_t *mp); static int zyd_m_getprop(void *arg, const char *pr_name, - mac_prop_id_t wldp_pr_num, uint_t pr_flags, - uint_t wldp_length, void *wldp_buf, uint_t *perm); + mac_prop_id_t wldp_pr_num, uint_t wldp_length, void *wldp_buf); +static void zyd_m_propinfo(void *arg, const char *pr_name, + mac_prop_id_t wldp_pr_num, mac_prop_info_handle_t mph); static int zyd_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, uint_t wldp_length, const void *wldp_buf); @@ -72,7 +73,7 @@ void *zyd_ssp; * Mac Call Back entries */ static mac_callbacks_t zyd_m_callbacks = { - MC_IOCTL | MC_SETPROP | MC_GETPROP, + MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO, zyd_m_stat, /* Get the value of a statistic */ zyd_m_start, /* Start the device */ zyd_m_stop, /* Stop the device */ @@ -80,12 +81,14 @@ static mac_callbacks_t zyd_m_callbacks = { zyd_m_multicst, /* Enable or disable a multicast addr */ zyd_m_unicst, /* Set the unicast MAC address */ zyd_m_tx, /* Transmit a packet */ + NULL, zyd_m_ioctl, /* Process an unknown ioctl */ NULL, /* mc_getcapab */ NULL, NULL, zyd_m_setprop, - zyd_m_getprop + zyd_m_getprop, + zyd_m_propinfo }; /* @@ -893,7 +896,7 @@ zyd_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, static int zyd_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t pr_flags, uint_t wldp_length, void *wldp_buf, uint_t *perm) + uint_t wldp_length, void *wldp_buf) { struct zyd_softc *sc = (struct zyd_softc *)arg; int err; @@ -903,11 +906,20 @@ zyd_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, } err = ieee80211_getprop(&sc->ic, pr_name, wldp_pr_num, - pr_flags, wldp_length, wldp_buf, perm); + wldp_length, wldp_buf); return (err); } +static void +zyd_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, + mac_prop_info_handle_t mph) +{ + struct zyd_softc *sc = (struct zyd_softc *)arg; + + ieee80211_propinfo(&sc->ic, pr_name, wldp_pr_num, mph); +} + /* * Transmit a data frame. */ diff --git a/usr/src/uts/common/os/pool.c b/usr/src/uts/common/os/pool.c index 7c3c70de3c..4b4337b3a2 100644 --- a/usr/src/uts/common/os/pool.c +++ b/usr/src/uts/common/os/pool.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,6 +44,7 @@ #include <sys/zone.h> #include <sys/policy.h> #include <sys/schedctl.h> +#include <sys/taskq.h> /* * RESOURCE POOLS @@ -153,6 +154,12 @@ static kthread_t *pool_busy_thread; /* thread holding "pool_lock" */ static kmutex_t pool_barrier_lock; /* synch. with pool_barrier_* */ static kcondvar_t pool_barrier_cv; /* synch. with pool_barrier_* */ static int pool_barrier_count; /* synch. with pool_barrier_* */ +static list_t pool_event_cb_list; /* pool event callbacks */ +static boolean_t pool_event_cb_init = B_FALSE; +static kmutex_t pool_event_cb_lock; +static taskq_t *pool_event_cb_taskq = NULL; + +void pool_event_dispatch(pool_event_t, poolid_t); /* * Boot-time pool initialization. @@ -373,6 +380,21 @@ pool_lookup_pool_by_id(poolid_t poolid) return (NULL); } +pool_t * +pool_lookup_pool_by_pset(int id) +{ + pool_t *pool = pool_default; + psetid_t psetid = (psetid_t)id; + + ASSERT(pool_lock_held()); + for (pool = list_head(&pool_list); pool != NULL; + pool = list_next(&pool_list, pool)) { + if (pool->pool_pset->pset_id == psetid) + return (pool); + } + return (NULL); +} + /* * Create new pool, associate it with default resource sets, and give * it a temporary name. @@ -545,12 +567,14 @@ pool_status(int status) if (ret != 0) return (ret); pool_state = POOL_ENABLED; + pool_event_dispatch(POOL_E_ENABLE, NULL); break; case POOL_DISABLED: ret = pool_disable(); if (ret != 0) return (ret); pool_state = POOL_DISABLED; + pool_event_dispatch(POOL_E_DISABLE, NULL); break; default: ret = EINVAL; @@ -572,6 +596,8 @@ pool_assoc(poolid_t poolid, int idtype, id_t id) switch (idtype) { case PREC_PSET: ret = pool_pset_assoc(poolid, (psetid_t)id); + if (ret == 0) + pool_event_dispatch(POOL_E_CHANGE, poolid); break; default: ret = EINVAL; @@ -595,6 +621,8 @@ pool_dissoc(poolid_t poolid, int idtype) switch (idtype) { case PREC_PSET: ret = pool_pset_assoc(poolid, PS_NONE); + if (ret == 0) + pool_event_dispatch(POOL_E_CHANGE, poolid); break; default: ret = EINVAL; @@ -612,24 +640,48 @@ int pool_transfer(int type, id_t src, id_t dst, uint64_t qty) { int ret = EINVAL; + return (ret); } +static poolid_t +pool_lookup_id_by_pset(int id) +{ + pool_t *pool = pool_default; + psetid_t psetid = (psetid_t)id; + + ASSERT(pool_lock_held()); + for (pool = list_head(&pool_list); pool != NULL; + pool = list_next(&pool_list, pool)) { + if (pool->pool_pset->pset_id == psetid) + return (pool->pool_id); + } + return (POOL_INVALID); +} + /* * Transfer resources specified by their IDs between resource sets. */ int -pool_xtransfer(int type, id_t src, id_t dst, uint_t size, id_t *ids) +pool_xtransfer(int type, id_t src_pset, id_t dst_pset, uint_t size, id_t *ids) { int ret; + poolid_t src_pool, dst_pool; ASSERT(pool_lock_held()); if (pool_state == POOL_DISABLED) return (ENOTACTIVE); switch (type) { case PREC_PSET: - ret = pool_pset_xtransfer((psetid_t)src, (psetid_t)dst, - size, ids); + ret = pool_pset_xtransfer((psetid_t)src_pset, + (psetid_t)dst_pset, size, ids); + + if ((src_pool = pool_lookup_id_by_pset(src_pset)) == -1) + return (EINVAL); + if ((dst_pool = pool_lookup_id_by_pset(dst_pset)) == -1) + return (EINVAL); + pool_event_dispatch(POOL_E_CHANGE, src_pool); + pool_event_dispatch(POOL_E_CHANGE, dst_pool); break; default: ret = EINVAL; @@ -643,7 +695,7 @@ pool_xtransfer(int type, id_t src, id_t dst, uint_t size, id_t *ids) int pool_bind(poolid_t poolid, idtype_t idtype, id_t id) { - pool_t *pool; + pool_t *pool; ASSERT(pool_lock_held()); @@ -1234,6 +1286,17 @@ pool_change_class(proc_t *p, id_t cid) kmem_free(bufs, nlwp * sizeof (void *)); } +void +pool_get_name(pool_t *pool, char **name) +{ + ASSERT(pool_lock_held()); + + (void) nvlist_lookup_string(pool->pool_props, "pool.name", name); + + ASSERT(strlen(*name) != 0); +} + + /* * The meat of the bind operation. The steps in pool_do_bind are: * @@ -1658,3 +1721,71 @@ out: switch (idtype) { ASSERT(pool_barrier_count == 0); return (rv); } + +void +pool_event_cb_register(pool_event_cb_t *cb) +{ + ASSERT(!pool_lock_held() || panicstr); + ASSERT(cb->pec_func != NULL); + + mutex_enter(&pool_event_cb_lock); + if (!pool_event_cb_init) { + list_create(&pool_event_cb_list, sizeof (pool_event_cb_t), + offsetof(pool_event_cb_t, pec_list)); + pool_event_cb_init = B_TRUE; + } + list_insert_tail(&pool_event_cb_list, cb); + mutex_exit(&pool_event_cb_lock); +} + +void +pool_event_cb_unregister(pool_event_cb_t *cb) +{ + ASSERT(!pool_lock_held() || panicstr); + + mutex_enter(&pool_event_cb_lock); + list_remove(&pool_event_cb_list, cb); + mutex_exit(&pool_event_cb_lock); +} + +typedef struct { + pool_event_t tqd_what; + poolid_t tqd_id; +} pool_tqd_t; + +void +pool_event_notify(void *arg) +{ + pool_tqd_t *tqd = (pool_tqd_t *)arg; + pool_event_cb_t *cb; + + ASSERT(!pool_lock_held() || panicstr); + + mutex_enter(&pool_event_cb_lock); + for (cb = list_head(&pool_event_cb_list); cb != NULL; + cb = list_next(&pool_event_cb_list, cb)) { + cb->pec_func(tqd->tqd_what, tqd->tqd_id, cb->pec_arg); + } + mutex_exit(&pool_event_cb_lock); + kmem_free(tqd, sizeof (*tqd)); +} + +void +pool_event_dispatch(pool_event_t what, poolid_t id) +{ + pool_tqd_t *tqd = NULL; + + ASSERT(pool_lock_held()); + + if (pool_event_cb_taskq == NULL) { + pool_event_cb_taskq = taskq_create("pool_event_cb_taskq", 1, + -1, 1, 1, TASKQ_PREPOPULATE); + } + + tqd = kmem_alloc(sizeof (*tqd), KM_SLEEP); + tqd->tqd_what = what; + tqd->tqd_id = id; + + (void) taskq_dispatch(pool_event_cb_taskq, pool_event_notify, tqd, + KM_SLEEP); +} diff --git a/usr/src/uts/common/os/strsubr.c b/usr/src/uts/common/os/strsubr.c index 75bd481d21..c8068ca965 100644 --- a/usr/src/uts/common/os/strsubr.c +++ b/usr/src/uts/common/os/strsubr.c @@ -8559,18 +8559,6 @@ lso_info_cleanup(mblk_t *mp) DB_LSOMSS(mp) = 0; } -void -lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) -{ - ASSERT(DB_TYPE(mp) == M_DATA); - - if (flags != NULL) { - *flags = DB_CKSUMFLAGS(mp) & HW_LSO_FLAGS; - if ((*flags != 0) && (mss != NULL)) - *mss = (uint32_t)DB_LSOMSS(mp); - } -} - /* * Checksum buffer *bp for len bytes with psum partial checksum, * or 0 if none, and return the 16 bit partial checksum. diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index e251fd3222..834725f8f7 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -356,6 +356,7 @@ CHKHDRS= \ mac_impl.h \ mac_provider.h \ mac_soft_ring.h \ + mac_stat.h \ machelf.h \ map.h \ md4.h \ diff --git a/usr/src/uts/common/sys/aggr_impl.h b/usr/src/uts/common/sys/aggr_impl.h index ee0979b798..8363d231cf 100644 --- a/usr/src/uts/common/sys/aggr_impl.h +++ b/usr/src/uts/common/sys/aggr_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -75,6 +75,19 @@ typedef struct aggr_pseudo_rx_group_s { uint_t arg_ring_cnt; } aggr_pseudo_rx_group_t; +typedef struct aggr_pseudo_tx_ring_s { + mac_ring_handle_t atr_rh; /* filled in by aggr_fill_ring() */ + struct aggr_port_s *atr_port; + mac_ring_handle_t atr_hw_rh; + uint_t atr_flags; +} aggr_pseudo_tx_ring_t; + +typedef struct aggr_pseudo_tx_group_s { + mac_group_handle_t atg_gh; /* filled in by aggr_fill_group() */ + uint_t atg_ring_cnt; + aggr_pseudo_tx_ring_t atg_rings[MAX_RINGS_PER_GROUP]; +} aggr_pseudo_tx_group_t; + /* * A link aggregation MAC port. * Note that lp_next is protected by the lg_lock of the group the @@ -93,9 +106,10 @@ typedef struct aggr_port_s { lp_collector_enabled : 1, lp_promisc_on : 1, lp_no_link_update : 1, - lp_grp_added : 1, + lp_rx_grp_added : 1, + lp_tx_grp_added : 1, lp_closing : 1, - lp_pad_bits : 25; + lp_pad_bits : 24; mac_handle_t lp_mh; mac_client_handle_t lp_mch; const mac_info_t *lp_mip; @@ -116,6 +130,17 @@ typedef struct aggr_port_s { aggr_unicst_addr_t *lp_prom_addr; /* handle of the underlying HW RX group */ mac_group_handle_t lp_hwgh; + int lp_tx_ring_cnt; + /* handles of the underlying HW TX rings */ + mac_ring_handle_t *lp_tx_rings; + /* + * Handles of the pseudo TX rings. Each of them maps to + * corresponding hardware TX ring in lp_tx_rings[]. A + * pseudo TX ring is presented to aggr primary mac + * client even when underlying NIC has no TX ring. + */ + mac_ring_handle_t *lp_pseudo_tx_rings; + void *lp_tx_notify_mh; } aggr_port_t; /* @@ -187,7 +212,16 @@ typedef struct aggr_grp_s { mblk_t *lg_lacp_tail; kthread_t *lg_lacp_rx_thread; boolean_t lg_lacp_done; + aggr_pseudo_rx_group_t lg_rx_group; + aggr_pseudo_tx_group_t lg_tx_group; + + kmutex_t lg_tx_flowctl_lock; + kcondvar_t lg_tx_flowctl_cv; + uint_t lg_tx_blocked_cnt; + mac_ring_handle_t *lg_tx_blocked_rings; + kthread_t *lg_tx_notify_thread; + boolean_t lg_tx_notify_done; /* * The following fields are used by aggr to wait for all the @@ -274,7 +308,8 @@ extern void aggr_port_init_callbacks(aggr_port_t *); extern void aggr_recv_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t); -extern mblk_t *aggr_m_tx(void *, mblk_t *); +extern void aggr_tx_ring_update(void *, uintptr_t); +extern void aggr_tx_notify_thread(void *); extern void aggr_send_port_enable(aggr_port_t *); extern void aggr_send_port_disable(aggr_port_t *); extern void aggr_send_update_policy(aggr_grp_t *, uint32_t); @@ -302,6 +337,10 @@ extern void aggr_grp_port_wait(aggr_grp_t *); extern int aggr_port_addmac(aggr_port_t *, const uint8_t *); extern void aggr_port_remmac(aggr_port_t *, const uint8_t *); +extern mblk_t *aggr_ring_tx(void *, mblk_t *); +extern mblk_t *aggr_find_tx_ring(void *, mblk_t *, + uintptr_t, mac_ring_handle_t *); + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h index ed80269fbc..fb2a0749d3 100644 --- a/usr/src/uts/common/sys/dld.h +++ b/usr/src/uts/common/sys/dld.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -27,7 +27,7 @@ #define _SYS_DLD_H /* - * Data-Link Driver (public header). + * Data-Link Driver ioctl interfaces. * * Note that the datastructures defined here define an ioctl interface * that is shared betwen user and kernel space. The dld driver thus @@ -280,10 +280,12 @@ typedef struct dld_ioc_usagelog { #define DLDIOC_SETMACPROP DLDIOC(0x1b) #define DLDIOC_GETMACPROP DLDIOC(0x1c) -#define MAC_PROP_VERSION 1 + +/* pr_flags can be set to a combination of the following flags */ +#define DLD_PROP_DEFAULT 0x0001 +#define DLD_PROP_POSSIBLE 0x0002 typedef struct dld_ioc_macprop_s { - int pr_version; uint_t pr_flags; datalink_id_t pr_linkid; mac_prop_id_t pr_num; @@ -308,7 +310,7 @@ typedef struct dld_hwgrpinfo { uint_t dhi_grp_type; uint_t dhi_n_rings; uint_t dhi_n_clnts; - /* XXXX later we should use dhi_n_clnts * MAXNAMELEN for dhi_clnts */ + uint_t dhi_rings[MAX_RINGS_PER_GROUP]; char dhi_clnts[MAXCLIENTNAMELEN]; } dld_hwgrpinfo_t; diff --git a/usr/src/uts/common/sys/fibre-channel/fca/oce/oce_impl.h b/usr/src/uts/common/sys/fibre-channel/fca/oce/oce_impl.h index ca255b1643..61e0fb1a44 100644 --- a/usr/src/uts/common/sys/fibre-channel/fca/oce/oce_impl.h +++ b/usr/src/uts/common/sys/fibre-channel/fca/oce/oce_impl.h @@ -286,7 +286,10 @@ void oce_m_ioctl(void *arg, queue_t *wq, mblk_t *mp); int oce_m_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size, const void *val); int oce_m_getprop(void *arg, const char *name, mac_prop_id_t id, - uint_t flags, uint_t size, void *val, uint_t *perm); + uint_t size, void *val); +void oce_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh); + int oce_m_stat(void *arg, uint_t stat, uint64_t *val); /* Hardware start/stop functions */ diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h index 7a8fc3293d..ab04126708 100644 --- a/usr/src/uts/common/sys/mac.h +++ b/usr/src/uts/common/sys/mac.h @@ -92,7 +92,7 @@ typedef enum { } link_tagmode_t; /* - * Defines range of uint32 values + * Defines range of uint32_t values */ typedef struct mac_propval_uint32_range_s { uint32_t mpur_min; @@ -100,10 +100,12 @@ typedef struct mac_propval_uint32_range_s { } mac_propval_uint32_range_t; /* - * Data type of the value + * Data type of property values. */ typedef enum { - MAC_PROPVAL_UINT32 = 0x1 + MAC_PROPVAL_UINT8, + MAC_PROPVAL_UINT32, + MAC_PROPVAL_STR } mac_propval_type_t; /* @@ -111,8 +113,6 @@ typedef enum { * range of values (int32, int64, uint32, uint64, et al) or collection/ * enumeration of values (strings). * Can be used as a value-result parameter. - * - * See PSARC 2009/235 for more information. */ typedef struct mac_propval_range_s { uint_t mpr_count; /* count of ranges */ @@ -122,7 +122,7 @@ typedef struct mac_propval_range_s { } u; } mac_propval_range_t; -#define range_uint32 u.mpr_uint32 +#define mpr_range_uint32 u.mpr_uint32 /* * Maximum MAC address length @@ -134,26 +134,15 @@ typedef enum { MAC_LOGTYPE_FLOW } mac_logtype_t; -/* - * Encodings for public properties. - * A most significant bit value of 1 indicates private property, intended - * to allow private property implementations to use internal encodings - * if desired. - * - * Note that there are 2 sets of parameters: the *_EN_* - * values are those that the Administrator configures for autonegotiation. - * The _ADV_* values are those that are currently exposed over the wire. - */ -#define MAXLINKPROPNAME 256 -#define MAC_PROP_DEFAULT 0x0001 /* default property value */ +#define MAXLINKPROPNAME 256 /* max property name len */ /* - * Indicates the linkprop framework is interested in knowing the list of - * possible property values. When used to obtain possible values for a - * property, one may have to change all the drivers. See PSARC 2009/235. + * Public properties. + * + * Note that there are 2 sets of parameters: the *_EN_* values are + * those that the Administrator configures for autonegotiation. The + * _ADV_* values are those that are currently exposed over the wire. */ -#define MAC_PROP_POSSIBLE 0x0002 /* possible property values */ - typedef enum { MAC_PROP_DUPLEX = 0x00000001, MAC_PROP_SPEED, @@ -202,16 +191,20 @@ typedef enum { MAC_PROP_WL_DELKEY, MAC_PROP_WL_KEY, MAC_PROP_WL_MLME, - MAC_PROP_MAXBW, - MAC_PROP_PRIO, - MAC_PROP_BIND_CPU, MAC_PROP_TAGMODE, MAC_PROP_ADV_10GFDX_CAP, MAC_PROP_EN_10GFDX_CAP, MAC_PROP_PVID, MAC_PROP_LLIMIT, MAC_PROP_LDECAY, - MAC_PROP_PROTECT, + MAC_PROP_RESOURCE, + MAC_PROP_RESOURCE_EFF, + MAC_PROP_RXRINGSRANGE, + MAC_PROP_TXRINGSRANGE, + MAC_PROP_MAX_TX_RINGS_AVAIL, + MAC_PROP_MAX_RX_RINGS_AVAIL, + MAC_PROP_MAX_RXHWCLNT_AVAIL, + MAC_PROP_MAX_TXHWCLNT_AVAIL, MAC_PROP_PRIVATE = -1 } mac_prop_id_t; @@ -248,7 +241,8 @@ enum mac_mod_stat { MAC_STAT_LINK_STATE, MAC_STAT_LINK_UP, MAC_STAT_PROMISC, - MAC_STAT_LOWLINK_STATE + MAC_STAT_LOWLINK_STATE, + MAC_STAT_HDROPS }; /* @@ -328,9 +322,13 @@ typedef struct mac_capab_vnic_s { } mac_capab_vnic_t; typedef void (*mac_rename_fn_t)(const char *, void *); +typedef mblk_t *(*mac_tx_ring_fn_t)(void *, mblk_t *, uintptr_t, + mac_ring_handle_t *); typedef struct mac_capab_aggr_s { mac_rename_fn_t mca_rename_fn; int (*mca_unicst)(void *, const uint8_t *); + mac_tx_ring_fn_t mca_find_tx_ring_fn; + void *mca_arg; } mac_capab_aggr_t; /* Bridge transmit and receive function signatures */ @@ -373,6 +371,8 @@ typedef struct mac_intr_s { mac_intr_handle_t mi_handle; mac_intr_enable_t mi_enable; mac_intr_disable_t mi_disable; + ddi_intr_handle_t mi_ddi_handle; + boolean_t mi_ddi_shared; } mac_intr_t; typedef struct mac_rx_fifo_s { @@ -571,12 +571,6 @@ typedef struct mactype_register_s { size_t mtr_mappingcount; } mactype_register_t; -typedef struct mac_prop_s { - mac_prop_id_t mp_id; - char *mp_name; - uint_t mp_flags; -} mac_prop_t; - /* * Driver interface functions. */ @@ -617,6 +611,7 @@ extern int mac_start_logusage(mac_logtype_t, uint_t); extern void mac_stop_logusage(mac_logtype_t); extern mac_handle_t mac_get_lower_mac_handle(mac_handle_t); +extern boolean_t mac_is_vnic_primary(mac_handle_t); /* * Packet hashing for distribution to multiple ports and rings. diff --git a/usr/src/uts/common/sys/mac_client.h b/usr/src/uts/common/sys/mac_client.h index ad3f30aa63..40cd15a1b8 100644 --- a/usr/src/uts/common/sys/mac_client.h +++ b/usr/src/uts/common/sys/mac_client.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -68,6 +68,18 @@ typedef enum { MAC_DIAG_MACNO_HWRINGS } mac_diag_t; +/* + * These are used when MAC clients what to specify tx and rx rings + * properties. MAC_RXRINGS_NONE/MAC_TXRINGS_NONE mean that we should + * not reserve any rings while MAC_RXRINGS_DONTCARE/MAC_TXRINGS_DONTCARE + * mean that the system can decide if it wants to reserve rings or + * not. + */ +#define MAC_RXRINGS_NONE 0 +#define MAC_TXRINGS_NONE MAC_RXRINGS_NONE +#define MAC_RXRINGS_DONTCARE -1 +#define MAC_TXRINGS_DONTCARE MAC_RXRINGS_DONTCARE + typedef enum { MAC_CLIENT_PROMISC_ALL, MAC_CLIENT_PROMISC_FILTERED, @@ -87,11 +99,10 @@ typedef enum { #define MAC_OPEN_FLAGS_IS_VNIC 0x0001 #define MAC_OPEN_FLAGS_EXCLUSIVE 0x0002 #define MAC_OPEN_FLAGS_IS_AGGR_PORT 0x0004 -#define MAC_OPEN_FLAGS_NO_HWRINGS 0x0008 -#define MAC_OPEN_FLAGS_SHARES_DESIRED 0x0010 -#define MAC_OPEN_FLAGS_USE_DATALINK_NAME 0x0020 -#define MAC_OPEN_FLAGS_REQ_HWRINGS 0x0040 -#define MAC_OPEN_FLAGS_MULTI_PRIMARY 0x0080 +#define MAC_OPEN_FLAGS_SHARES_DESIRED 0x0008 +#define MAC_OPEN_FLAGS_USE_DATALINK_NAME 0x0010 +#define MAC_OPEN_FLAGS_MULTI_PRIMARY 0x0020 +#define MAC_OPEN_FLAGS_NO_UNICAST_ADDR 0x0040 /* flags passed to mac_client_close */ #define MAC_CLOSE_FLAGS_IS_VNIC 0x0001 @@ -161,12 +172,12 @@ extern uint_t mac_addr_factory_num(mac_handle_t); extern mac_tx_notify_handle_t mac_client_tx_notify(mac_client_handle_t, mac_tx_notify_t, void *); -extern int mac_set_resources(mac_handle_t, mac_resource_props_t *); -extern void mac_get_resources(mac_handle_t, mac_resource_props_t *); extern int mac_client_set_resources(mac_client_handle_t, mac_resource_props_t *); extern void mac_client_get_resources(mac_client_handle_t, mac_resource_props_t *); +extern void mac_client_get_eff_resources(mac_client_handle_t, + mac_resource_props_t *); /* bridging-related interfaces */ extern int mac_set_pvid(mac_handle_t, uint16_t); @@ -180,15 +191,7 @@ extern void mac_share_unbind(mac_client_handle_t); extern int mac_set_mtu(mac_handle_t, uint_t, uint_t *); -extern uint_t mac_hwgrp_num(mac_handle_t); -extern void mac_get_hwgrp_info(mac_handle_t, int, uint_t *, uint_t *, - uint_t *, uint_t *, char *); - -extern uint32_t mac_no_notification(mac_handle_t); -extern int mac_set_prop(mac_handle_t, mac_prop_t *, void *, uint_t); -extern int mac_get_prop(mac_handle_t, mac_prop_t *, void *, uint_t, uint_t *); - -extern boolean_t mac_is_vnic(mac_handle_t); +extern void mac_client_set_rings(mac_client_handle_t, int, int); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h index bcdeb1da46..ae25df6a0d 100644 --- a/usr/src/uts/common/sys/mac_client_impl.h +++ b/usr/src/uts/common/sys/mac_client_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,6 +31,7 @@ #include <sys/mac_provider.h> #include <sys/mac.h> #include <sys/mac_impl.h> +#include <sys/mac_stat.h> #include <net/if.h> #include <sys/mac_flow_impl.h> @@ -153,16 +154,7 @@ struct mac_client_impl_s { /* Protected by */ uintptr_t mci_tx_notify_id; /* per MAC client stats */ /* None */ - uint64_t mci_stat_multircv; - uint64_t mci_stat_brdcstrcv; - uint64_t mci_stat_multixmt; - uint64_t mci_stat_brdcstxmt; - uint64_t mci_stat_obytes; - uint64_t mci_stat_opackets; - uint64_t mci_stat_oerrors; - uint64_t mci_stat_ibytes; - uint64_t mci_stat_ipackets; - uint64_t mci_stat_ierrors; + mac_misc_stats_t mci_misc_stat; flow_tab_t *mci_subflow_tab; /* Rx quiescence */ @@ -182,6 +174,20 @@ struct mac_client_impl_s { /* Protected by */ struct mac_mcast_addrs_s *mci_mcast_addrs; /* mi_rw_lock */ /* + * Mac protection related fields + */ + kmutex_t mci_protect_lock; + uint32_t mci_protect_flags; /* SL */ + in6_addr_t mci_v6_local_addr; /* SL */ + avl_tree_t mci_v4_pending_txn; /* mci_protect_lock */ + avl_tree_t mci_v4_completed_txn; /* mci_protect_lock */ + avl_tree_t mci_v4_dyn_ip; /* mci_protect_lock */ + avl_tree_t mci_v6_pending_txn; /* mci_protect_lock */ + avl_tree_t mci_v6_cid; /* mci_protect_lock */ + avl_tree_t mci_v6_dyn_ip; /* mci_protect_lock */ + timeout_id_t mci_txn_cleanup_tid; /* mci_protect_lock */ + + /* * Protected by mci_tx_pcpu[0].pcpu_tx_lock */ uint_t mci_tx_flag; @@ -287,12 +293,15 @@ extern int mac_tx_percpu_cnt; #define MCIS_CLIENT_POLL_CAPABLE 0x0020 #define MCIS_DESC_LOGGED 0x0040 #define MCIS_SHARE_BOUND 0x0080 -#define MCIS_NO_HWRINGS 0x0100 -#define MCIS_DISABLE_TX_VID_CHECK 0x0200 -#define MCIS_USE_DATALINK_NAME 0x0400 -#define MCIS_UNICAST_HW 0x0800 -#define MCIS_REQ_HWRINGS 0x1000 -#define MCIS_RX_BYPASS_DISABLE 0x2000 +#define MCIS_DISABLE_TX_VID_CHECK 0x0100 +#define MCIS_USE_DATALINK_NAME 0x0200 +#define MCIS_UNICAST_HW 0x0400 +#define MCIS_IS_AGGR 0x0800 +#define MCIS_RX_BYPASS_DISABLE 0x1000 +#define MCIS_NO_UNICAST_ADDR 0x2000 + +/* Mac protection flags */ +#define MPT_FLAG_V6_LOCAL_ADDR_SET 0x0001 /* in mac_client.c */ extern void mac_promisc_client_dispatch(mac_client_impl_t *, mblk_t *); @@ -301,7 +310,7 @@ extern void mac_client_fini(void); extern void mac_promisc_dispatch(mac_impl_t *, mblk_t *, mac_client_impl_t *); -extern int mac_validate_props(mac_resource_props_t *); +extern int mac_validate_props(mac_impl_t *, mac_resource_props_t *); extern mac_client_impl_t *mac_vnic_lower(mac_impl_t *); extern mac_client_impl_t *mac_primary_client_handle(mac_impl_t *); @@ -316,6 +325,10 @@ boolean_t mac_client_check_flow_vid(mac_client_impl_t *, uint16_t); extern boolean_t mac_is_primary_client(mac_client_impl_t *); +extern int mac_client_set_rings_prop(mac_client_impl_t *, + mac_resource_props_t *, mac_resource_props_t *); +extern void mac_set_prim_vlan_rings(mac_impl_t *, mac_resource_props_t *); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/mac_client_priv.h b/usr/src/uts/common/sys/mac_client_priv.h index 78421a3b80..0ddc1f074d 100644 --- a/usr/src/uts/common/sys/mac_client_priv.h +++ b/usr/src/uts/common/sys/mac_client_priv.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -117,35 +117,71 @@ extern int mac_link_flow_walk(datalink_id_t, int (*)(mac_flowinfo_t *, void *), void *); extern int mac_link_flow_info(char *, mac_flowinfo_t *); -extern void *mac_tx_hold(mac_client_handle_t); -extern void mac_tx_rele(mac_client_handle_t, void *); -extern void mac_rx_client_quiesce(mac_client_handle_t); -extern void mac_rx_client_restart(mac_client_handle_t); -extern void mac_srs_perm_quiesce(mac_client_handle_t, boolean_t); -extern int mac_hwrings_get(mac_client_handle_t, mac_group_handle_t *, - mac_ring_handle_t *, mac_ring_type_t); -extern void mac_hwring_setup(mac_ring_handle_t, mac_resource_handle_t); -extern void mac_hwring_teardown(mac_ring_handle_t); -extern int mac_hwring_disable_intr(mac_ring_handle_t); -extern int mac_hwring_enable_intr(mac_ring_handle_t); -extern int mac_hwring_start(mac_ring_handle_t); -extern void mac_hwring_stop(mac_ring_handle_t); -extern mblk_t *mac_hwring_poll(mac_ring_handle_t, int); -#define MAC_HWRING_POLL(ring, bytes) \ - (((ring)->mr_info.mri_poll) \ - ((ring)->mr_info.mri_driver, (bytes))) - -extern int mac_hwgroup_addmac(mac_group_handle_t, const uint8_t *); -extern int mac_hwgroup_remmac(mac_group_handle_t, const uint8_t *); - -extern void mac_set_upper_mac(mac_client_handle_t, mac_handle_t); +extern void mac_rx_client_quiesce(mac_client_handle_t); +extern void mac_rx_client_restart(mac_client_handle_t); +extern void mac_tx_client_quiesce(mac_client_handle_t); +extern void mac_tx_client_condemn(mac_client_handle_t); +extern void mac_tx_client_restart(mac_client_handle_t); +extern void mac_srs_perm_quiesce(mac_client_handle_t, boolean_t); +extern int mac_hwrings_get(mac_client_handle_t, mac_group_handle_t *, + mac_ring_handle_t *, mac_ring_type_t); +extern uint_t mac_hwring_getinfo(mac_ring_handle_t); +extern void mac_hwring_setup(mac_ring_handle_t, mac_resource_handle_t, + mac_ring_handle_t); +extern void mac_hwring_teardown(mac_ring_handle_t); +extern int mac_hwring_disable_intr(mac_ring_handle_t); +extern int mac_hwring_enable_intr(mac_ring_handle_t); +extern int mac_hwring_start(mac_ring_handle_t); +extern void mac_hwring_stop(mac_ring_handle_t); +extern mblk_t *mac_hwring_poll(mac_ring_handle_t, int); +extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); +extern int mac_hwring_getstat(mac_ring_handle_t, uint_t, uint64_t *); +extern mblk_t *mac_hwring_send_priv(mac_client_handle_t, + mac_ring_handle_t, mblk_t *); + +#define MAC_HWRING_POLL(ring, bytes) \ + (((ring)->mr_info.mri_poll) \ + ((ring)->mr_info.mri_driver, (bytes))) + +extern int mac_hwgroup_addmac(mac_group_handle_t, const uint8_t *); +extern int mac_hwgroup_remmac(mac_group_handle_t, const uint8_t *); + +extern void mac_set_upper_mac(mac_client_handle_t, mac_handle_t, + mac_resource_props_t *); extern int mac_mark_exclusive(mac_handle_t); extern void mac_unmark_exclusive(mac_handle_t); -extern int32_t mac_client_intr_cpu(mac_client_handle_t); -extern void mac_client_set_intr_cpu(void *, mac_client_handle_t, int32_t); -extern void *mac_get_devinfo(mac_handle_t); +extern uint_t mac_hwgrp_num(mac_handle_t, int); +extern void mac_get_hwrxgrp_info(mac_handle_t, int, uint_t *, uint_t *, + uint_t *, uint_t *, uint_t *, char *); +extern void mac_get_hwtxgrp_info(mac_handle_t, int, uint_t *, uint_t *, + uint_t *, uint_t *, uint_t *, char *); + +extern uint_t mac_txavail_get(mac_handle_t); +extern uint_t mac_rxavail_get(mac_handle_t); +extern uint_t mac_txrsvd_get(mac_handle_t); +extern uint_t mac_rxrsvd_get(mac_handle_t); +extern uint_t mac_rxhwlnksavail_get(mac_handle_t); +extern uint_t mac_rxhwlnksrsvd_get(mac_handle_t); +extern uint_t mac_txhwlnksavail_get(mac_handle_t); +extern uint_t mac_txhwlnksrsvd_get(mac_handle_t); + +extern int32_t mac_client_intr_cpu(mac_client_handle_t); +extern void mac_client_set_intr_cpu(void *, mac_client_handle_t, int32_t); +extern void *mac_get_devinfo(mac_handle_t); + +extern boolean_t mac_is_vnic(mac_handle_t); +extern uint32_t mac_no_notification(mac_handle_t); + +extern int mac_set_prop(mac_handle_t, mac_prop_id_t, char *, void *, uint_t); +extern int mac_get_prop(mac_handle_t, mac_prop_id_t, char *, void *, uint_t); +extern int mac_prop_info(mac_handle_t, mac_prop_id_t, char *, void *, + uint_t, mac_propval_range_t *, uint_t *); +extern boolean_t mac_prop_check_size(mac_prop_id_t, uint_t, boolean_t); + +extern uint64_t mac_pseudo_rx_ring_stat_get(mac_ring_handle_t, uint_t); +extern uint64_t mac_pseudo_tx_ring_stat_get(mac_ring_handle_t, uint_t); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/mac_flow.h b/usr/src/uts/common/sys/mac_flow.h index 08c7a211a3..9f9902fc29 100644 --- a/usr/src/uts/common/sys/mac_flow.h +++ b/usr/src/uts/common/sys/mac_flow.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,6 +39,8 @@ extern "C" { #include <netinet/in.h> /* for IPPROTO_* constants */ #include <sys/ethernet.h> +#define MAX_RINGS_PER_GROUP 128 + /* * MAXFLOWNAMELEN defines the longest possible permitted flow name, * including the terminating NUL. @@ -93,29 +95,45 @@ typedef struct flow_desc_s { /* * In MCM_CPUS mode, cpu bindings is user specified. In MCM_FANOUT mode, * user only specifies a fanout count. - * mc_fanout_cnt gives the number of CPUs used for fanout soft rings. - * mc_fanout_cpus[] array stores the CPUs used for fanout soft rings. + * mc_rx_fanout_cnt gives the number of CPUs used for fanout soft rings. + * mc_rx_fanout_cpus[] array stores the CPUs used for fanout soft rings. */ typedef enum { MCM_FANOUT = 1, MCM_CPUS } mac_cpu_mode_t; +/* + * Structure to store the value of the CPUs to be used to re-target + * Tx interrupt. + */ +typedef struct mac_tx_intr_cpus_s { + /* cpu value to re-target intr to */ + int32_t mtc_intr_cpu[MRP_NCPUS]; + /* re-targeted CPU or -1 if failed */ + int32_t mtc_retargeted_cpu[MRP_NCPUS]; +} mac_tx_intr_cpu_t; + typedef struct mac_cpus_props_s { uint32_t mc_ncpus; /* num of cpus */ uint32_t mc_cpus[MRP_NCPUS]; /* cpu list */ - uint32_t mc_fanout_cnt; /* soft ring cpu cnt */ - uint32_t mc_fanout_cpus[MRP_NCPUS]; /* SR cpu list */ - uint32_t mc_pollid; /* poll thr binding */ - uint32_t mc_workerid; /* worker thr binding */ + uint32_t mc_rx_fanout_cnt; /* soft ring cpu cnt */ + uint32_t mc_rx_fanout_cpus[MRP_NCPUS]; /* SR cpu list */ + uint32_t mc_rx_pollid; /* poll thr binding */ + uint32_t mc_rx_workerid; /* worker thr binding */ /* * interrupt cpu: mrp_intr_cpu less than 0 implies platform limitation * in retargetting the interrupt assignment. */ - int32_t mc_intr_cpu; + int32_t mc_rx_intr_cpu; + int32_t mc_tx_fanout_cpus[MRP_NCPUS]; + mac_tx_intr_cpu_t mc_tx_intr_cpus; mac_cpu_mode_t mc_fanout_mode; /* fanout mode */ } mac_cpus_t; +#define mc_tx_intr_cpu mc_tx_intr_cpus.mtc_intr_cpu +#define mc_tx_retargeted_cpu mc_tx_intr_cpus.mtc_retargeted_cpu + /* Priority values */ typedef enum { MPL_LOW, @@ -126,19 +144,41 @@ typedef enum { /* Protection types */ #define MPT_MACNOSPOOF 0x00000001 -#define MPT_IPNOSPOOF 0x00000002 -#define MPT_RESTRICTED 0x00000004 -#define MPT_ALL (MPT_MACNOSPOOF|MPT_IPNOSPOOF|MPT_RESTRICTED) +#define MPT_RESTRICTED 0x00000002 +#define MPT_IPNOSPOOF 0x00000004 +#define MPT_DHCPNOSPOOF 0x00000008 +#define MPT_ALL 0x0000000f #define MPT_RESET 0xffffffff -#define MPT_MAXIPADDR 32 +#define MPT_MAXCNT 32 +#define MPT_MAXIPADDR MPT_MAXCNT +#define MPT_MAXCID MPT_MAXCNT +#define MPT_MAXCIDLEN 256 + +typedef struct mac_ipaddr_s { + uint32_t ip_version; + in6_addr_t ip_addr; +} mac_ipaddr_t; + +typedef enum { + CIDFORM_TYPED = 1, + CIDFORM_HEX, + CIDFORM_STR +} mac_dhcpcid_form_t; + +typedef struct mac_dhcpcid_s { + uchar_t dc_id[MPT_MAXCIDLEN]; + uint32_t dc_len; + mac_dhcpcid_form_t dc_form; +} mac_dhcpcid_t; typedef struct mac_protect_s { uint32_t mp_types; uint32_t mp_ipaddrcnt; - ipaddr_t mp_ipaddrs[MPT_MAXIPADDR]; + mac_ipaddr_t mp_ipaddrs[MPT_MAXIPADDR]; + uint32_t mp_cidcnt; + mac_dhcpcid_t mp_cids[MPT_MAXCID]; } mac_protect_t; - /* The default priority for links */ #define MPL_LINK_DEFAULT MPL_HIGH @@ -150,6 +190,12 @@ typedef struct mac_protect_s { #define MRP_CPUS_USERSPEC 0x00000004 /* CPU/fanout from user */ #define MRP_PRIORITY 0x00000008 /* Priority set */ #define MRP_PROTECT 0x00000010 /* Protection set */ +#define MRP_RX_RINGS 0x00000020 /* Rx rings */ +#define MRP_TX_RINGS 0x00000040 /* Tx rings */ +#define MRP_RXRINGS_UNSPEC 0x00000080 /* unspecified rings */ +#define MRP_TXRINGS_UNSPEC 0x00000100 /* unspecified rings */ +#define MRP_RINGS_RESET 0x00000200 /* resetting rings */ +#define MRP_POOL 0x00000400 /* CPU pool */ #define MRP_THROTTLE MRP_MAXBW @@ -174,21 +220,24 @@ typedef struct mac_resource_props_s { mac_priority_level_t mrp_priority; /* relative flow priority */ mac_cpus_t mrp_cpus; mac_protect_t mrp_protect; + uint32_t mrp_nrxrings; + uint32_t mrp_ntxrings; + char mrp_pool[MAXPATHLEN]; /* CPU pool */ } mac_resource_props_t; -#define mrp_ncpus mrp_cpus.mc_ncpus -#define mrp_cpu mrp_cpus.mc_cpus -#define mrp_fanout_cnt mrp_cpus.mc_fanout_cnt -#define mrp_fanout_cpu mrp_cpus.mc_fanout_cpus -#define mrp_pollid mrp_cpus.mc_pollid -#define mrp_workerid mrp_cpus.mc_workerid -#define mrp_intr_cpu mrp_cpus.mc_intr_cpu -#define mrp_fanout_mode mrp_cpus.mc_fanout_mode +#define mrp_ncpus mrp_cpus.mc_ncpus +#define mrp_cpu mrp_cpus.mc_cpus +#define mrp_rx_fanout_cnt mrp_cpus.mc_rx_fanout_cnt +#define mrp_rx_pollid mrp_cpus.mc_rx_pollid +#define mrp_rx_workerid mrp_cpus.mc_rx_workerid +#define mrp_rx_intr_cpu mrp_cpus.mc_rx_intr_cpu +#define mrp_fanout_mode mrp_cpus.mc_fanout_mode #define MAC_COPY_CPUS(mrp, fmrp) { \ int ncpus; \ (fmrp)->mrp_ncpus = (mrp)->mrp_ncpus; \ - (fmrp)->mrp_intr_cpu = (mrp)->mrp_intr_cpu; \ + (fmrp)->mrp_rx_fanout_cnt = (mrp)->mrp_rx_fanout_cnt; \ + (fmrp)->mrp_rx_intr_cpu = (mrp)->mrp_rx_intr_cpu; \ (fmrp)->mrp_fanout_mode = (mrp)->mrp_fanout_mode; \ if ((mrp)->mrp_ncpus == 0) { \ (fmrp)->mrp_mask &= ~MRP_CPUS; \ @@ -202,24 +251,6 @@ typedef struct mac_resource_props_s { } \ } -typedef struct flow_stats_s { - uint64_t fs_rbytes; - uint64_t fs_ipackets; - uint64_t fs_ierrors; - uint64_t fs_obytes; - uint64_t fs_opackets; - uint64_t fs_oerrors; -} flow_stats_t; - -typedef enum { - FLOW_STAT_RBYTES, - FLOW_STAT_IPACKETS, - FLOW_STAT_IERRORS, - FLOW_STAT_OBYTES, - FLOW_STAT_OPACKETS, - FLOW_STAT_OERRORS -} flow_stat_t; - #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 #pragma pack() #endif diff --git a/usr/src/uts/common/sys/mac_flow_impl.h b/usr/src/uts/common/sys/mac_flow_impl.h index f01d9d486c..307e06c1bf 100644 --- a/usr/src/uts/common/sys/mac_flow_impl.h +++ b/usr/src/uts/common/sys/mac_flow_impl.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -280,7 +280,9 @@ struct flow_entry_s { /* Protected by */ void *fe_rx_ring_group; /* SL */ void *fe_rx_srs[MAX_RINGS_PER_GROUP]; /* fe_lock */ int fe_rx_srs_cnt; /* fe_lock */ + void *fe_tx_ring_group; void *fe_tx_srs; /* WO */ + int fe_tx_ring_cnt; /* * This is a unicast flow, and is a mac_client_impl_t @@ -317,7 +319,8 @@ struct flow_entry_s { /* Protected by */ flow_tab_t *fe_flow_tab; kstat_t *fe_ksp; - flow_stats_t fe_flowstats; + kstat_t *fe_misc_stat_ksp; + boolean_t fe_desc_logged; uint64_t fe_nic_speed; }; @@ -465,23 +468,36 @@ typedef struct flow_tab_info_s { #define FLOW_TAB_EMPTY(ft) ((ft) == NULL || (ft)->ft_flow_count == 0) -/* - * This is used by mac_tx_send. - */ -typedef struct mac_tx_stats_s { - uint_t ts_opackets; - uint_t ts_obytes; - uint_t ts_oerrors; -} mac_tx_stats_t; - -#define FLOW_STAT_UPDATE(f, s, c) { \ - ((flow_entry_t *)(f))->fe_flowstats.fs_##s += ((uint64_t)(c)); \ + +#define MCIP_STAT_UPDATE(m, s, c) { \ + ((mac_client_impl_t *)(m))->mci_misc_stat.mms_##s \ + += ((uint64_t)(c)); \ +} + +#define SRS_RX_STAT_UPDATE(m, s, c) { \ + ((mac_soft_ring_set_t *)(m))->srs_rx.sr_stat.mrs_##s \ + += ((uint64_t)(c)); \ +} + +#define SRS_TX_STAT_UPDATE(m, s, c) { \ + ((mac_soft_ring_set_t *)(m))->srs_tx.st_stat.mts_##s \ + += ((uint64_t)(c)); \ +} + +#define SRS_TX_STATS_UPDATE(m, s) { \ + SRS_TX_STAT_UPDATE((m), opackets, (s)->mts_opackets); \ + SRS_TX_STAT_UPDATE((m), obytes, (s)->mts_obytes); \ + SRS_TX_STAT_UPDATE((m), oerrors, (s)->mts_oerrors); \ +} + +#define SOFTRING_TX_STAT_UPDATE(m, s, c) { \ + ((mac_soft_ring_t *)(m))->s_st_stat.mts_##s += ((uint64_t)(c)); \ } -#define FLOW_TX_STATS_UPDATE(f, s) { \ - FLOW_STAT_UPDATE((f), opackets, (s)->ts_opackets); \ - FLOW_STAT_UPDATE((f), obytes, (s)->ts_obytes); \ - FLOW_STAT_UPDATE((f), oerrors, (s)->ts_oerrors); \ +#define SOFTRING_TX_STATS_UPDATE(m, s) { \ + SOFTRING_TX_STAT_UPDATE((m), opackets, (s)->mts_opackets); \ + SOFTRING_TX_STAT_UPDATE((m), obytes, (s)->mts_obytes); \ + SOFTRING_TX_STAT_UPDATE((m), oerrors, (s)->mts_oerrors); \ } extern void mac_flow_init(); diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h index 760e2a4a18..ff4eeb1221 100644 --- a/usr/src/uts/common/sys/mac_impl.h +++ b/usr/src/uts/common/sys/mac_impl.h @@ -26,10 +26,12 @@ #ifndef _SYS_MAC_IMPL_H #define _SYS_MAC_IMPL_H +#include <sys/cpupart.h> #include <sys/modhash.h> #include <sys/mac_client.h> #include <sys/mac_provider.h> #include <sys/note.h> +#include <sys/avl.h> #include <net/if.h> #include <sys/mac_flow_impl.h> #include <netinet/ip6.h> @@ -85,6 +87,8 @@ typedef struct mac_chain_s { #define MCB_NOTIFY_CB_T 0x2 #define MCB_TX_NOTIFY_CB_T 0x4 +extern boolean_t mac_tx_serialize; + typedef struct mac_cb_s { struct mac_cb_s *mcb_nextp; /* Linked list of callbacks */ void *mcb_objp; /* Ptr to enclosing object */ @@ -189,6 +193,8 @@ typedef enum { #define MR_CONDEMNED 0x2 #define MR_QUIESCE 0x4 +typedef struct mac_impl_s mac_impl_t; + struct mac_ring_s { int mr_index; /* index in the original list */ mac_ring_type_t mr_type; /* ring type */ @@ -196,11 +202,15 @@ struct mac_ring_s { mac_group_handle_t mr_gh; /* reference to group */ mac_classify_type_t mr_classify_type; /* HW vs SW */ - struct mac_soft_ring_set_s *mr_srs; /* associated SRS */ - uint_t mr_refcnt; /* Ring references */ + struct mac_soft_ring_set_s *mr_srs; /* associated SRS */ + mac_ring_handle_t mr_prh; /* associated pseudo ring hdl */ + uint_t mr_refcnt; /* Ring references */ /* ring generation no. to guard against drivers using stale rings */ uint64_t mr_gen_num; + kstat_t *mr_ksp; /* ring kstats */ + mac_impl_t *mr_mip; /* pointer to primary's mip */ + kmutex_t mr_lock; kcondvar_t mr_cv; /* mr_lock */ mac_ring_state_t mr_state; /* mr_lock */ @@ -211,6 +221,7 @@ struct mac_ring_s { #define mr_driver mr_info.mri_driver #define mr_start mr_info.mri_start #define mr_stop mr_info.mri_stop +#define mr_stat mr_info.mri_stat #define MAC_RING_MARK(mr, flag) \ (mr)->mr_flag |= flag; @@ -245,9 +256,9 @@ typedef struct mac_grp_client { struct mac_client_impl_s *mgc_client; } mac_grp_client_t; -#define MAC_RX_GROUP_NO_CLIENT(g) ((g)->mrg_clients == NULL) +#define MAC_GROUP_NO_CLIENT(g) ((g)->mrg_clients == NULL) -#define MAC_RX_GROUP_ONLY_CLIENT(g) \ +#define MAC_GROUP_ONLY_CLIENT(g) \ ((((g)->mrg_clients != NULL) && \ ((g)->mrg_clients->mgc_next == NULL)) ? \ (g)->mrg_clients->mgc_client : NULL) @@ -267,7 +278,6 @@ struct mac_group_s { mac_grp_client_t *mrg_clients; /* clients list */ - struct mac_client_impl_s *mrg_tx_client; /* TX client pointer */ mac_group_info_t mrg_info; /* driver supplied info */ }; @@ -279,8 +289,6 @@ struct mac_group_s { #define GROUP_INTR_ENABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_enable #define GROUP_INTR_DISABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_disable -#define MAC_DEFAULT_GROUP(mh) (((mac_impl_t *)mh)->mi_rx_groups) - #define MAC_RING_TX(mhp, rh, mp, rest) { \ mac_ring_handle_t mrh = rh; \ mac_impl_t *mimpl = (mac_impl_t *)mhp; \ @@ -304,7 +312,8 @@ struct mac_group_s { * rh nulled out if the bridge chooses to send output on a different * link due to forwarding. */ -#define MAC_TX(mip, rh, mp, share_bound) { \ +#define MAC_TX(mip, rh, mp, src_mcip) { \ + mac_ring_handle_t rhandle = (rh); \ /* \ * If there is a bound Hybrid I/O share, send packets through \ * the default tx ring. (When there's a bound Hybrid I/O share, \ @@ -312,17 +321,19 @@ struct mac_group_s { * and not accessible from here.) \ */ \ _NOTE(CONSTANTCONDITION) \ - if (share_bound) \ - rh = NULL; \ + if ((src_mcip)->mci_state_flags & MCIS_SHARE_BOUND) \ + rhandle = (mip)->mi_default_tx_ring; \ + if (mip->mi_promisc_list != NULL) \ + mac_promisc_dispatch(mip, mp, src_mcip); \ /* \ * Grab the proper transmit pointer and handle. Special \ * optimization: we can test mi_bridge_link itself atomically, \ * and if that indicates no bridge send packets through tx ring.\ */ \ if (mip->mi_bridge_link == NULL) { \ - MAC_RING_TX(mip, rh, mp, mp); \ + MAC_RING_TX(mip, rhandle, mp, mp); \ } else { \ - mp = mac_bridge_tx(mip, rh, mp); \ + mp = mac_bridge_tx(mip, rhandle, mp); \ } \ } @@ -346,8 +357,6 @@ typedef enum { MAC_ADDRESS_TYPE_UNICAST_PROMISC /* promiscuous mode */ } mac_address_type_t; -typedef struct mac_impl_s mac_impl_t; - typedef struct mac_address_s { mac_address_type_t ma_type; /* address type */ int ma_nusers; /* number of users */ @@ -406,7 +415,6 @@ struct mac_impl_s { link_state_t mi_lowlinkstate; /* none */ link_state_t mi_lastlowlinkstate; /* none */ uint_t mi_devpromisc; /* SL */ - kmutex_t mi_lock; uint8_t mi_addr[MAXMACADDRLEN]; /* mi_rw_lock */ uint8_t mi_dstaddr[MAXMACADDRLEN]; /* mi_rw_lock */ boolean_t mi_dstaddr_set; @@ -436,6 +444,11 @@ struct mac_impl_s { mac_group_type_t mi_rx_group_type; /* grouping type */ uint_t mi_rx_group_count; mac_group_t *mi_rx_groups; + mac_group_t *mi_rx_donor_grp; + uint_t mi_rxrings_rsvd; + uint_t mi_rxrings_avail; + uint_t mi_rxhwclnt_avail; + uint_t mi_rxhwclnt_used; mac_capab_rings_t mi_rx_rings_cap; @@ -446,8 +459,11 @@ struct mac_impl_s { uint_t mi_tx_group_count; uint_t mi_tx_group_free; mac_group_t *mi_tx_groups; - mac_capab_rings_t mi_tx_rings_cap; + uint_t mi_txrings_rsvd; + uint_t mi_txrings_avail; + uint_t mi_txhwclnt_avail; + uint_t mi_txhwclnt_used; mac_ring_handle_t mi_default_tx_ring; @@ -516,7 +532,7 @@ struct mac_impl_s { * sorted: the first one has the greatest value. */ mac_margin_req_t *mi_mmrp; - mac_priv_prop_t *mi_priv_prop; + char **mi_priv_prop; uint_t mi_priv_prop_count; /* @@ -541,6 +557,72 @@ struct mac_impl_s { #endif }; +/* + * The default TX group is the last one in the list. + */ +#define MAC_DEFAULT_TX_GROUP(mip) \ + (mip)->mi_tx_groups + (mip)->mi_tx_group_count + +/* + * The default RX group is the first one in the list + */ +#define MAC_DEFAULT_RX_GROUP(mip) (mip)->mi_rx_groups + +/* Reserved RX rings */ +#define MAC_RX_RING_RESERVED(m, cnt) { \ + ASSERT((m)->mi_rxrings_avail >= (cnt)); \ + (m)->mi_rxrings_rsvd += (cnt); \ + (m)->mi_rxrings_avail -= (cnt); \ +} + +/* Released RX rings */ +#define MAC_RX_RING_RELEASED(m, cnt) { \ + ASSERT((m)->mi_rxrings_rsvd >= (cnt)); \ + (m)->mi_rxrings_rsvd -= (cnt); \ + (m)->mi_rxrings_avail += (cnt); \ +} + +/* Reserved a RX group */ +#define MAC_RX_GRP_RESERVED(m) { \ + ASSERT((m)->mi_rxhwclnt_avail > 0); \ + (m)->mi_rxhwclnt_avail--; \ + (m)->mi_rxhwclnt_used++; \ +} + +/* Released a RX group */ +#define MAC_RX_GRP_RELEASED(m) { \ + ASSERT((m)->mi_rxhwclnt_used > 0); \ + (m)->mi_rxhwclnt_avail++; \ + (m)->mi_rxhwclnt_used--; \ +} + +/* Reserved TX rings */ +#define MAC_TX_RING_RESERVED(m, cnt) { \ + ASSERT((m)->mi_txrings_avail >= (cnt)); \ + (m)->mi_txrings_rsvd += (cnt); \ + (m)->mi_txrings_avail -= (cnt); \ +} +/* Released TX rings */ +#define MAC_TX_RING_RELEASED(m, cnt) { \ + ASSERT((m)->mi_txrings_rsvd >= (cnt)); \ + (m)->mi_txrings_rsvd -= (cnt); \ + (m)->mi_txrings_avail += (cnt); \ +} + +/* Reserved a TX group */ +#define MAC_TX_GRP_RESERVED(m) { \ + ASSERT((m)->mi_txhwclnt_avail > 0); \ + (m)->mi_txhwclnt_avail--; \ + (m)->mi_txhwclnt_used++; \ +} + +/* Released a TX group */ +#define MAC_TX_GRP_RELEASED(m) { \ + ASSERT((m)->mi_txhwclnt_used > 0); \ + (m)->mi_txhwclnt_avail++; \ + (m)->mi_txhwclnt_used--; \ +} + /* for mi_state_flags */ #define MIS_DISABLED 0x0001 #define MIS_IS_VNIC 0x0002 @@ -570,12 +652,6 @@ typedef struct mac_notify_task_arg { mac_ring_t *mnt_ring; } mac_notify_task_arg_t; -typedef enum { - MAC_RX_NO_RESERVE, - MAC_RX_RESERVE_DEFAULT, - MAC_RX_RESERVE_NONDEFAULT -} mac_rx_group_reserve_type_t; - /* * XXX All MAC_DBG_PRTs must be replaced with call to dtrace probes. For now * it may be easier to have these printfs for easier debugging @@ -599,18 +675,45 @@ extern int mac_dbg; (need_close) = ((uintptr_t)mph & 0x1); \ } +/* + * Type of property information that can be returned by a driver. + * Valid flags of the pr_flags of the mac_prop_info_t data structure. + */ +#define MAC_PROP_INFO_DEFAULT 0x0001 +#define MAC_PROP_INFO_RANGE 0x0002 +#define MAC_PROP_INFO_PERM 0x0004 + +/* + * Property information. pr_flags is a combination of one of the + * MAC_PROP_INFO_* flags, it is reset by the framework before invoking + * the driver's prefix_propinfo() entry point. + * + * Drivers should use MAC_PROP_INFO_SET_*() macros to provide + * information about a property. + */ +typedef struct mac_prop_info_state_s { + uint8_t pr_flags; + uint8_t pr_perm; + void *pr_default; + size_t pr_default_size; + uint8_t pr_default_status; + mac_propval_range_t *pr_range; +} mac_prop_info_state_t; + +#define MAC_PROTECT_ENABLED(mcip, type) \ + (((mcip)->mci_flent-> \ + fe_resource_props.mrp_mask & MRP_PROTECT) != 0 && \ + ((mcip)->mci_flent-> \ + fe_resource_props.mrp_protect.mp_types & (type)) != 0) + typedef struct mac_client_impl_s mac_client_impl_t; extern void mac_init(void); extern int mac_fini(void); -extern void mac_stat_create(mac_impl_t *); -extern void mac_stat_destroy(mac_impl_t *); -extern uint64_t mac_stat_default(mac_impl_t *, uint_t); extern void mac_ndd_ioctl(mac_impl_t *, queue_t *, mblk_t *); -extern void mac_create_soft_ring_kstats(mac_impl_t *, int32_t); -extern boolean_t mac_ip_hdr_length_v6(mblk_t *, ip6_t *, uint16_t *, - uint8_t *, boolean_t *, uint32_t *); +extern boolean_t mac_ip_hdr_length_v6(ip6_t *, uint8_t *, uint16_t *, + uint8_t *, ip6_frag_t **); extern mblk_t *mac_copymsgchain_cksum(mblk_t *); extern mblk_t *mac_fix_cksum(mblk_t *); @@ -649,10 +752,17 @@ extern int mac_rx_group_add_flow(mac_client_impl_t *, flow_entry_t *, mac_group_t *); extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); extern mblk_t *mac_bridge_tx(mac_impl_t *, mac_ring_handle_t, mblk_t *); +extern mac_group_t *mac_reserve_rx_group(mac_client_impl_t *, uint8_t *, + boolean_t); +extern void mac_release_rx_group(mac_client_impl_t *, mac_group_t *); +extern int mac_rx_switch_group(mac_client_impl_t *, mac_group_t *, + mac_group_t *); extern mac_ring_t *mac_reserve_tx_ring(mac_impl_t *, mac_ring_t *); -extern void mac_release_tx_ring(mac_ring_handle_t); -extern mac_group_t *mac_reserve_tx_group(mac_impl_t *, mac_share_handle_t); -extern void mac_release_tx_group(mac_impl_t *, mac_group_t *); +extern mac_group_t *mac_reserve_tx_group(mac_client_impl_t *, boolean_t); +extern void mac_release_tx_group(mac_client_impl_t *, mac_group_t *); +extern void mac_tx_switch_group(mac_client_impl_t *, mac_group_t *, + mac_group_t *); +extern void mac_rx_switch_grp_to_sw(mac_group_t *); /* * MAC address functions are used internally by MAC layer. @@ -676,7 +786,7 @@ extern void mac_link_flow_clean(mac_client_handle_t, flow_entry_t *); * Fanout update routines called when the link speed of the NIC changes * or when a MAC client's share is unbound. */ -extern void mac_fanout_recompute_client(mac_client_impl_t *); +extern void mac_fanout_recompute_client(mac_client_impl_t *, cpupart_t *); extern void mac_fanout_recompute(mac_impl_t *); /* @@ -687,14 +797,15 @@ extern void mac_fanout_recompute(mac_impl_t *); extern int mac_datapath_setup(mac_client_impl_t *, flow_entry_t *, uint32_t); extern void mac_datapath_teardown(mac_client_impl_t *, flow_entry_t *, uint32_t); -extern void mac_srs_group_setup(mac_client_impl_t *, flow_entry_t *, - mac_group_t *, uint32_t); -extern void mac_srs_group_teardown(mac_client_impl_t *, flow_entry_t *, +extern void mac_rx_srs_group_setup(mac_client_impl_t *, flow_entry_t *, + uint32_t); +extern void mac_tx_srs_group_setup(mac_client_impl_t *, flow_entry_t *, + uint32_t); +extern void mac_rx_srs_group_teardown(flow_entry_t *, boolean_t); +extern void mac_tx_srs_group_teardown(mac_client_impl_t *, flow_entry_t *, uint32_t); extern int mac_rx_classify_flow_quiesce(flow_entry_t *, void *); extern int mac_rx_classify_flow_restart(flow_entry_t *, void *); -extern void mac_tx_client_quiesce(mac_client_impl_t *, uint_t); -extern void mac_tx_client_restart(mac_client_impl_t *); extern void mac_client_quiesce(mac_client_impl_t *); extern void mac_client_restart(mac_client_impl_t *); @@ -725,15 +836,17 @@ extern void mac_rx_group_unmark(mac_group_t *, uint_t); extern void mac_tx_client_flush(mac_client_impl_t *); extern void mac_tx_client_block(mac_client_impl_t *); extern void mac_tx_client_unblock(mac_client_impl_t *); +extern void mac_tx_invoke_callbacks(mac_client_impl_t *, mac_tx_cookie_t); extern int i_mac_promisc_set(mac_impl_t *, boolean_t); extern void i_mac_promisc_walker_cleanup(mac_impl_t *); extern mactype_t *mactype_getplugin(const char *); extern void mac_addr_factory_init(mac_impl_t *); extern void mac_addr_factory_fini(mac_impl_t *); -extern void mac_register_priv_prop(mac_impl_t *, mac_priv_prop_t *, uint_t); +extern void mac_register_priv_prop(mac_impl_t *, char **); extern void mac_unregister_priv_prop(mac_impl_t *); extern int mac_init_rings(mac_impl_t *, mac_ring_type_t); extern void mac_free_rings(mac_impl_t *, mac_ring_type_t); +extern void mac_compare_ddi_handle(mac_group_t *, uint_t, mac_ring_t *); extern int mac_start_group(mac_group_t *); extern void mac_stop_group(mac_group_t *); @@ -742,20 +855,41 @@ extern void mac_stop_ring(mac_ring_t *); extern int mac_add_macaddr(mac_impl_t *, mac_group_t *, uint8_t *, boolean_t); extern int mac_remove_macaddr(mac_address_t *); -extern void mac_set_rx_group_state(mac_group_t *, mac_group_state_t); -extern void mac_rx_group_add_client(mac_group_t *, mac_client_impl_t *); -extern void mac_rx_group_remove_client(mac_group_t *, mac_client_impl_t *) -; +extern void mac_set_group_state(mac_group_t *, mac_group_state_t); +extern void mac_group_add_client(mac_group_t *, mac_client_impl_t *); +extern void mac_group_remove_client(mac_group_t *, mac_client_impl_t *); + extern int i_mac_group_add_ring(mac_group_t *, mac_ring_t *, int); extern void i_mac_group_rem_ring(mac_group_t *, mac_ring_t *, boolean_t); - +extern int mac_group_ring_modify(mac_client_impl_t *, mac_group_t *, + mac_group_t *); extern void mac_poll_state_change(mac_handle_t, boolean_t); +extern mac_group_state_t mac_group_next_state(mac_group_t *, + mac_client_impl_t **, mac_group_t *, boolean_t); + extern mblk_t *mac_protect_check(mac_client_handle_t, mblk_t *); extern int mac_protect_set(mac_client_handle_t, mac_resource_props_t *); extern boolean_t mac_protect_enabled(mac_client_handle_t, uint32_t); extern int mac_protect_validate(mac_resource_props_t *); extern void mac_protect_update(mac_resource_props_t *, mac_resource_props_t *); +extern void mac_protect_update_v6_local_addr(mac_client_impl_t *); +extern void mac_protect_intercept_dhcp(mac_client_impl_t *, mblk_t *); +extern void mac_protect_flush_dhcp(mac_client_impl_t *); +extern void mac_protect_cancel_timer(mac_client_impl_t *); +extern void mac_protect_init(mac_client_impl_t *); +extern void mac_protect_fini(mac_client_impl_t *); + +extern int mac_set_resources(mac_handle_t, mac_resource_props_t *); +extern void mac_get_resources(mac_handle_t, mac_resource_props_t *); +extern void mac_get_effective_resources(mac_handle_t, mac_resource_props_t *); + +extern cpupart_t *mac_pset_find(mac_resource_props_t *, boolean_t *); +extern void mac_set_pool_effective(boolean_t, cpupart_t *, + mac_resource_props_t *, mac_resource_props_t *); +extern void mac_set_rings_effective(mac_client_impl_t *); +extern mac_client_impl_t *mac_check_primary_relocation(mac_client_impl_t *, + boolean_t); /* Global callbacks into the bridging module (when loaded) */ extern mac_bridge_tx_t mac_bridge_tx_cb; @@ -763,6 +897,7 @@ extern mac_bridge_rx_t mac_bridge_rx_cb; extern mac_bridge_ref_t mac_bridge_ref_cb; extern mac_bridge_ls_t mac_bridge_ls_cb; + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h index 988f723f67..c96d07b594 100644 --- a/usr/src/uts/common/sys/mac_provider.h +++ b/usr/src/uts/common/sys/mac_provider.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,7 +32,6 @@ #include <sys/sunddi.h> #include <sys/stream.h> #include <sys/mkdev.h> -#include <sys/mac_flow.h> #include <sys/mac.h> /* @@ -44,18 +43,16 @@ extern "C" { #endif /* - * MAC version identifier. This is used by mac_alloc() mac_register() to + * MAC version identifiers. Drivers compiled against the stable V1 version + * of the API should register with MAC_VERSION_V1. ON drivers should use + * MAC_VERSION. This is used by mac_alloc() mac_register() to * verify that incompatible drivers don't register. */ -#define MAC_VERSION 0x2 +#define MAC_VERSION_V1 0x1 +#define MAC_VERSION MAC_VERSION_V1 /* - * Opaque handle types - */ -typedef struct __mac_rule_handle *mac_rule_handle_t; - -/* - * Statistics + * Possible values for ETHER_STAT_XCVR_INUSE statistic. */ #define XCVR_UNDEFINED 0 @@ -82,28 +79,35 @@ typedef struct __mac_rule_handle *mac_rule_handle_t; */ typedef enum { /* - * Capabilities reserved for internal use only + * Public Capabilities (MAC_VERSION_V1) */ - MAC_CAPAB_VNIC = 0x0001, /* data is mac_capab_vnic_t */ - MAC_CAPAB_ANCHOR_VNIC = 0x0002, /* boolean only, no data */ - MAC_CAPAB_AGGR = 0x0004, /* data is mac_capab_aggr_t */ - MAC_CAPAB_NO_NATIVEVLAN = 0x0008, /* boolean only, no data */ - MAC_CAPAB_NO_ZCOPY = 0x0010, /* boolean only, no data */ - MAC_CAPAB_LEGACY = 0x0020, /* data is mac_capab_legacy_t */ - MAC_CAPAB_VRRP = 0x0040, /* data is mac_capab_vrrp_t */ + MAC_CAPAB_HCKSUM = 0x00000001, /* data is a uint32_t */ + MAC_CAPAB_LSO = 0x00000008, /* data is mac_capab_lso_t */ /* - * Public Capabilities + * Reserved capabilities, do not use */ - MAC_CAPAB_HCKSUM = 0x0100, /* data is a uint32_t */ - MAC_CAPAB_LSO = 0x0200, /* data is mac_capab_lso_t */ - MAC_CAPAB_RINGS = 0x0400, /* data is mac_capab_rings_t */ - MAC_CAPAB_MULTIFACTADDR = 0x0800, /* mac_data_multifactaddr_t */ - MAC_CAPAB_SHARES = 0x1000 /* data is mac_capab_share_t */ + MAC_CAPAB_RESERVED1 = 0x00000002, + MAC_CAPAB_RESERVED2 = 0x00000004, - /* add new capabilities here */ -} mac_capab_t; + /* + * Private driver capabilities + */ + MAC_CAPAB_RINGS = 0x00000010, /* data is mac_capab_rings_t */ + MAC_CAPAB_SHARES = 0x00000020, /* data is mac_capab_share_t */ + MAC_CAPAB_MULTIFACTADDR = 0x00000040, /* mac_data_multifactaddr_t */ + /* + * Private driver capabilities for use by the GLDv3 framework only + */ + MAC_CAPAB_VNIC = 0x00010000, /* data is mac_capab_vnic_t */ + MAC_CAPAB_ANCHOR_VNIC = 0x00020000, /* boolean only, no data */ + MAC_CAPAB_AGGR = 0x00040000, /* data is mac_capab_aggr_t */ + MAC_CAPAB_NO_NATIVEVLAN = 0x00080000, /* boolean only, no data */ + MAC_CAPAB_NO_ZCOPY = 0x00100000, /* boolean only, no data */ + MAC_CAPAB_LEGACY = 0x00200000, /* data is mac_capab_legacy_t */ + MAC_CAPAB_VRRP = 0x00400000 /* data is mac_capab_vrrp_t */ +} mac_capab_t; /* * LSO capability @@ -164,6 +168,8 @@ typedef struct mac_capab_legacy_s { void (*ml_fastpath_enable)(void *); } mac_capab_legacy_t; +typedef struct __mac_prop_info_handle *mac_prop_info_handle_t; + /* * MAC driver entry point types. */ @@ -182,12 +188,15 @@ typedef void (*mac_close_t)(void *); typedef int (*mac_set_prop_t)(void *, const char *, mac_prop_id_t, uint_t, const void *); typedef int (*mac_get_prop_t)(void *, const char *, mac_prop_id_t, - uint_t, uint_t, void *, uint_t *); + uint_t, void *); +typedef void (*mac_prop_info_t)(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); /* - * Drivers must set all of these callbacks except for mc_resources, - * mc_ioctl, and mc_getcapab, which are optional. If any of these optional - * callbacks are set, their appropriate flags must be set in mc_callbacks. + * Driver callbacks. The following capabilities are optional, and if + * implemented by the driver, must have a corresponding MC_ flag set + * in the mc_callbacks field. + * * Any future additions to this list must also be accompanied by an * associated mc_callbacks flag so that the framework can grow without * affecting the binary compatibility of the interface. @@ -201,18 +210,31 @@ typedef struct mac_callbacks_s { mac_multicst_t mc_multicst; /* Enable or disable a multicast addr */ mac_unicst_t mc_unicst; /* Set the unicast MAC address */ mac_tx_t mc_tx; /* Transmit a packet */ + void *mc_reserved; /* Reserved, do not use */ mac_ioctl_t mc_ioctl; /* Process an unknown ioctl */ mac_getcapab_t mc_getcapab; /* Get capability information */ mac_open_t mc_open; /* Open the device */ mac_close_t mc_close; /* Close the device */ mac_set_prop_t mc_setprop; mac_get_prop_t mc_getprop; + mac_prop_info_t mc_propinfo; } mac_callbacks_t; -typedef struct mac_priv_prop_s { - char mpp_name[MAXLINKPROPNAME]; - uint_t mpp_flags; -} mac_priv_prop_t; +/* + * Flags for mc_callbacks. Requiring drivers to set the flags associated + * with optional callbacks initialized in the structure allows the mac + * module to add optional callbacks in the future without requiring drivers + * to recompile. + */ +#define MC_RESERVED 0x0001 +#define MC_IOCTL 0x0002 +#define MC_GETCAPAB 0x0004 +#define MC_OPEN 0x0008 +#define MC_CLOSE 0x0010 +#define MC_SETPROP 0x0020 +#define MC_GETPROP 0x0040 +#define MC_PROPINFO 0x0080 +#define MC_PROPERTIES (MC_SETPROP | MC_GETPROP | MC_PROPINFO) /* * Virtualization Capabilities @@ -245,24 +267,16 @@ typedef void (*mac_rx_func_t)(void *, mac_resource_handle_t, mblk_t *, * * MAC_VIRT_HIO: Hybrid I/O capable MAC. Require the support * of the MAC_CAPAB_SHARES capability. - * - * MAC_VIRT_SERIALIZE: Temporary flag *ONLY* for nxge. Mac layer - * uses this to enable mac Tx serializer on - * outbound traffic and to always enqueue - * incoming traffic on Rx soft rings in mac. */ #define MAC_VIRT_NONE 0x0 #define MAC_VIRT_LEVEL1 0x1 #define MAC_VIRT_HIO 0x2 -#define MAC_VIRT_SERIALIZE 0x4 typedef enum { MAC_RING_TYPE_RX = 1, /* Receive ring */ MAC_RING_TYPE_TX /* Transmit ring */ } mac_ring_type_t; -#define MAX_RINGS_PER_GROUP 128 - /* * Grouping type of a ring group * @@ -313,6 +327,8 @@ typedef void (*mac_ring_stop_t)(mac_ring_driver_t); typedef mblk_t *(*mac_ring_send_t)(void *, mblk_t *); typedef mblk_t *(*mac_ring_poll_t)(void *, int); +typedef int (*mac_ring_stat_t)(mac_ring_driver_t, uint_t, uint64_t *); + typedef struct mac_ring_info_s { mac_ring_driver_t mri_driver; mac_ring_start_t mri_start; @@ -322,11 +338,27 @@ typedef struct mac_ring_info_s { mac_ring_send_t send; mac_ring_poll_t poll; } mrfunion; + mac_ring_stat_t mri_stat; + /* + * mri_flags will have some bits set to indicate some special + * property/feature of a ring like serialization needed for a + * Tx ring or packets should always need enqueuing on Rx side, + * etc. + */ + uint_t mri_flags; } mac_ring_info_s; #define mri_tx mrfunion.send #define mri_poll mrfunion.poll +/* + * #defines for mri_flags. The flags are temporary flags that are provided + * only to workaround issues in specific drivers, and they will be + * removed in the future. + */ +#define MAC_RING_TX_SERIALIZE 0x1 +#define MAC_RING_RX_ENQUEUE 0x2 + typedef int (*mac_group_start_t)(mac_group_driver_t); typedef void (*mac_group_stop_t)(mac_group_driver_t); typedef int (*mac_add_mac_addr_t)(void *, const uint8_t *); @@ -415,26 +447,12 @@ typedef struct mac_register_s { uint_t m_max_sdu; void *m_pdata; size_t m_pdata_size; + char **m_priv_props; uint32_t m_margin; - mac_priv_prop_t *m_priv_props; - size_t m_priv_prop_count; uint32_t m_v12n; /* Virtualization level */ } mac_register_t; /* - * Flags for mc_callbacks. Requiring drivers to set the flags associated - * with optional callbacks initialized in the structure allows the mac - * module to add optional callbacks in the future without requiring drivers - * to recompile. - */ -#define MC_IOCTL 0x001 -#define MC_GETCAPAB 0x002 -#define MC_OPEN 0x004 -#define MC_CLOSE 0x008 -#define MC_SETPROP 0x010 -#define MC_GETPROP 0x020 - -/* * Driver interface functions. */ extern void mac_sdu_get(mac_handle_t, uint_t *, uint_t *); @@ -476,6 +494,9 @@ extern void mac_init_ops(struct dev_ops *, const char *); extern void mac_fini_ops(struct dev_ops *); extern int mac_devt_to_instance(dev_t); extern minor_t mac_private_minor(void); +extern void mac_ring_intr_set(mac_ring_handle_t, + ddi_intr_handle_t); + extern mactype_register_t *mactype_alloc(uint_t); extern void mactype_free(mactype_register_t *); @@ -488,6 +509,31 @@ extern boolean_t mac_unicst_verify(mac_handle_t, extern int mac_group_add_ring(mac_group_handle_t, int); extern void mac_group_rem_ring(mac_group_handle_t, mac_ring_handle_t); +extern mac_ring_handle_t mac_find_ring(mac_group_handle_t, int); + +extern void mac_prop_info_set_default_uint8( + mac_prop_info_handle_t, uint8_t); +extern void mac_prop_info_set_default_str( + mac_prop_info_handle_t, const char *); +extern void mac_prop_info_set_default_uint64( + mac_prop_info_handle_t, uint64_t); +extern void mac_prop_info_set_default_uint32( + mac_prop_info_handle_t, uint32_t); +extern void mac_prop_info_set_default_link_flowctrl( + mac_prop_info_handle_t, link_flowctrl_t); +extern void mac_prop_info_set_range_uint32( + mac_prop_info_handle_t, + uint32_t, uint32_t); +extern void mac_prop_info_set_perm(mac_prop_info_handle_t, + uint8_t); + +extern void mac_hcksum_get(mblk_t *, uint32_t *, + uint32_t *, uint32_t *, uint32_t *, + uint32_t *); +extern void mac_hcksum_set(mblk_t *, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t); + +extern void mac_lso_get(mblk_t *, uint32_t *, uint32_t *); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/mac_soft_ring.h b/usr/src/uts/common/sys/mac_soft_ring.h index ed4c47954d..88f1aa7249 100644 --- a/usr/src/uts/common/sys/mac_soft_ring.h +++ b/usr/src/uts/common/sys/mac_soft_ring.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,11 +33,13 @@ extern "C" { #include <sys/types.h> #include <sys/cpuvar.h> +#include <sys/cpupart.h> #include <sys/processor.h> #include <sys/stream.h> #include <sys/squeue.h> #include <sys/dlpi.h> #include <sys/mac_impl.h> +#include <sys/mac_stat.h> #define S_RING_NAMELEN 64 @@ -85,8 +87,6 @@ struct mac_soft_ring_s { /* # of mblocks after which to relieve flow control */ int s_ring_tx_lowat; boolean_t s_ring_tx_woken_up; - uint32_t s_ring_blocked_cnt; /* times blocked for Tx descs */ - uint32_t s_ring_unblocked_cnt; /* unblock calls from driver */ uint32_t s_ring_hiwat_cnt; /* times blocked for Tx descs */ void *s_ring_tx_arg1; @@ -107,9 +107,9 @@ struct mac_soft_ring_s { kthread_t *s_ring_worker; /* kernel thread id */ char s_ring_name[S_RING_NAMELEN + 1]; uint32_t s_ring_total_inpkt; + uint32_t s_ring_total_rbytes; uint32_t s_ring_drops; struct mac_client_impl_s *s_ring_mcip; - void *s_ring_flent; kstat_t *s_ring_ksp; /* Teardown, poll disable control ops */ @@ -119,6 +119,8 @@ struct mac_soft_ring_s { mac_soft_ring_t *s_ring_next; mac_soft_ring_t *s_ring_prev; mac_soft_ring_drain_func_t s_ring_drain_func; + + mac_tx_stats_t s_st_stat; }; typedef void (*mac_srs_drain_proc_t)(mac_soft_ring_set_t *, uint_t); @@ -131,9 +133,6 @@ typedef struct mac_srs_tx_s { void *st_arg1; void *st_arg2; mac_group_t *st_group; /* TX group for share */ - uint32_t st_ring_count; /* no. of tx rings */ - mac_ring_handle_t *st_rings; - boolean_t st_woken_up; /* @@ -156,18 +155,19 @@ typedef struct mac_srs_tx_s { */ uint32_t st_hiwat; /* mblk cnt to apply flow control */ uint32_t st_lowat; /* mblk cnt to relieve flow control */ - uint32_t st_drop_count; + uint32_t st_hiwat_cnt; /* times blocked for Tx descs */ + mac_tx_stats_t st_stat; + mac_capab_aggr_t st_capab_aggr; /* - * Number of times the srs gets blocked due to lack of Tx - * desc is noted down. Corresponding wakeup from driver - * to unblock is also noted down. They should match in a - * correctly working setup. If there is less unblocks - * than blocks, then Tx side waits forever for a wakeup - * from below. The following protected by srs_lock. + * st_soft_rings is used as an array to store aggr Tx soft + * rings. When aggr_find_tx_ring() returns a pseudo ring, + * the associated soft ring has to be found. st_soft_rings + * array stores the soft ring associated with a pseudo Tx + * ring and it can be accessed using the pseudo ring + * index (mr_index). Note that the ring index is unique + * for each ring in a group. */ - uint32_t st_blocked_cnt; /* times blocked for Tx descs */ - uint32_t st_unblocked_cnt; /* unblock calls from driver */ - uint32_t st_hiwat_cnt; /* times blocked for Tx descs */ + mac_soft_ring_t **st_soft_rings; } mac_srs_tx_t; /* Receive side Soft Ring Set */ @@ -191,9 +191,7 @@ typedef struct mac_srs_rx_s { uint32_t sr_hiwat; /* mblk cnt to relieve flow control */ uint32_t sr_lowat; - uint32_t sr_poll_count; - uint32_t sr_intr_count; - uint32_t sr_drop_count; + mac_rx_stats_t sr_stat; /* Times polling was enabled */ uint32_t sr_poll_on; @@ -246,13 +244,6 @@ typedef struct mac_srs_rx_s { uint32_t sr_drain_finish_intr; /* Polling thread needs to schedule worker wakeup */ uint32_t sr_poll_worker_wakeup; - - /* Chains less than 10 pkts */ - uint32_t sr_chain_cnt_undr10; - /* Chains between 10 & 50 pkts */ - uint32_t sr_chain_cnt_10to50; - /* Chains over 50 pkts */ - uint32_t sr_chain_cnt_over50; } mac_srs_rx_t; /* @@ -334,12 +325,14 @@ struct mac_soft_ring_set_s { int srs_tcp_ring_count; mac_soft_ring_t **srs_udp_soft_rings; int srs_udp_ring_count; + mac_soft_ring_t **srs_oth_soft_rings; + int srs_oth_ring_count; /* - * srs_oth_soft_rings is also used by tx_srs in + * srs_tx_soft_rings is used by tx_srs in * when operating in multi tx ring mode. */ - mac_soft_ring_t **srs_oth_soft_rings; - int srs_oth_ring_count; + mac_soft_ring_t **srs_tx_soft_rings; + int srs_tx_ring_count; /* * Bandwidth control related members. @@ -386,6 +379,7 @@ struct mac_soft_ring_set_s { mac_srs_rx_t srs_rx; mac_srs_tx_t srs_tx; + kstat_t *srs_ksp; }; /* @@ -507,7 +501,9 @@ typedef enum { SRS_TX_SERIALIZE, SRS_TX_FANOUT, SRS_TX_BW, - SRS_TX_BW_FANOUT + SRS_TX_BW_FANOUT, + SRS_TX_AGGR, + SRS_TX_BW_AGGR } mac_tx_srs_mode_t; /* @@ -626,9 +622,7 @@ extern struct dls_kstats dls_kstat; (srs)->srs_bw->mac_bw_used += (sz); \ } -#define TX_MULTI_RING_MODE(mac_srs) \ - ((mac_srs)->srs_tx.st_mode == SRS_TX_FANOUT || \ - (mac_srs)->srs_tx.st_mode == SRS_TX_BW_FANOUT) +#define MAC_TX_SOFT_RINGS(mac_srs) ((mac_srs)->srs_tx_ring_count >= 1) /* Soft ring flags for teardown */ #define SRS_POLL_THR_OWNER (SRS_PROC | SRS_POLLING | SRS_GET_PKTS) @@ -639,7 +633,8 @@ extern struct dls_kstats dls_kstat; extern void mac_soft_ring_init(void); extern void mac_soft_ring_finish(void); extern void mac_fanout_setup(mac_client_impl_t *, flow_entry_t *, - mac_resource_props_t *, mac_direct_rx_t, void *, mac_resource_handle_t); + mac_resource_props_t *, mac_direct_rx_t, void *, mac_resource_handle_t, + cpupart_t *); extern void mac_soft_ring_worker_wakeup(mac_soft_ring_t *); extern void mac_soft_ring_blank(void *, time_t, uint_t, int); @@ -654,6 +649,8 @@ extern mac_soft_ring_set_t *mac_srs_create(struct mac_client_impl_s *, extern void mac_srs_free(mac_soft_ring_set_t *); extern void mac_srs_signal(mac_soft_ring_set_t *, uint_t); extern cpu_t *mac_srs_bind(mac_soft_ring_set_t *, processorid_t); +extern void mac_rx_srs_retarget_intr(mac_soft_ring_set_t *, processorid_t); +extern void mac_tx_srs_retarget_intr(mac_soft_ring_set_t *); extern void mac_srs_change_upcall(void *, mac_direct_rx_t, void *); extern void mac_srs_quiesce_initiate(mac_soft_ring_set_t *); @@ -673,12 +670,13 @@ extern void mac_tx_srs_quiesce(mac_soft_ring_set_t *, uint_t); /* Tx SRS, Tx softring */ extern void mac_tx_srs_wakeup(mac_soft_ring_set_t *, mac_ring_handle_t); -extern void mac_tx_srs_setup(struct mac_client_impl_s *, - flow_entry_t *, uint32_t); +extern void mac_tx_srs_setup(struct mac_client_impl_s *, flow_entry_t *); extern mac_tx_func_t mac_tx_get_func(uint32_t); extern mblk_t *mac_tx_send(mac_client_handle_t, mac_ring_handle_t, mblk_t *, mac_tx_stats_t *); extern boolean_t mac_tx_srs_ring_present(mac_soft_ring_set_t *, mac_ring_t *); +extern mac_soft_ring_t *mac_tx_srs_get_soft_ring(mac_soft_ring_set_t *, + mac_ring_t *); extern void mac_tx_srs_add_ring(mac_soft_ring_set_t *, mac_ring_t *); extern void mac_tx_srs_del_ring(mac_soft_ring_set_t *, mac_ring_t *); extern mac_tx_cookie_t mac_tx_srs_no_desc(mac_soft_ring_set_t *, mblk_t *, @@ -695,12 +693,12 @@ extern void mac_client_update_classifier(mac_client_impl_t *, boolean_t); extern void mac_soft_ring_intr_enable(void *); extern boolean_t mac_soft_ring_intr_disable(void *); -extern mac_soft_ring_t *mac_soft_ring_create(int, clock_t, void *, uint16_t, +extern mac_soft_ring_t *mac_soft_ring_create(int, clock_t, uint16_t, pri_t, mac_client_impl_t *, mac_soft_ring_set_t *, processorid_t, mac_direct_rx_t, void *, mac_resource_handle_t); extern cpu_t *mac_soft_ring_bind(mac_soft_ring_t *, processorid_t); extern void mac_soft_ring_unbind(mac_soft_ring_t *); -extern void mac_soft_ring_free(mac_soft_ring_t *, boolean_t); +extern void mac_soft_ring_free(mac_soft_ring_t *); extern void mac_soft_ring_signal(mac_soft_ring_t *, uint_t); extern void mac_rx_soft_ring_process(mac_client_impl_t *, mac_soft_ring_t *, mblk_t *, mblk_t *, int, size_t); diff --git a/usr/src/uts/common/sys/mac_stat.h b/usr/src/uts/common/sys/mac_stat.h new file mode 100644 index 0000000000..2d2989cd76 --- /dev/null +++ b/usr/src/uts/common/sys/mac_stat.h @@ -0,0 +1,124 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _MAC_STAT_H +#define _MAC_STAT_H + +#include <sys/mac_flow_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif + +struct mac_soft_ring_set_s; +struct mac_soft_ring_s; + +typedef struct mac_rx_stats_s { + uint64_t mrs_lclbytes; + uint64_t mrs_lclcnt; + uint64_t mrs_pollcnt; + uint64_t mrs_pollbytes; + uint64_t mrs_intrcnt; + uint64_t mrs_intrbytes; + uint64_t mrs_sdrops; + uint64_t mrs_chaincntundr10; + uint64_t mrs_chaincnt10to50; + uint64_t mrs_chaincntover50; + uint64_t mrs_ierrors; +} mac_rx_stats_t; + +typedef struct mac_tx_stats_s { + uint64_t mts_obytes; + uint64_t mts_opackets; + uint64_t mts_oerrors; + /* + * Number of times the srs gets blocked due to lack of Tx + * desc is noted down. Corresponding wakeup from driver + * to unblock is also noted down. They should match in a + * correctly working setup. If there is less unblocks + * than blocks, then Tx side waits forever for a wakeup + * from below. The following protected by srs_lock. + */ + uint64_t mts_blockcnt; /* times blocked for Tx descs */ + uint64_t mts_unblockcnt; /* unblock calls from driver */ + uint64_t mts_sdrops; +} mac_tx_stats_t; + +typedef struct mac_misc_stats_s { + uint64_t mms_multircv; + uint64_t mms_brdcstrcv; + uint64_t mms_multixmt; + uint64_t mms_brdcstxmt; + uint64_t mms_multircvbytes; + uint64_t mms_brdcstrcvbytes; + uint64_t mms_multixmtbytes; + uint64_t mms_brdcstxmtbytes; + uint64_t mms_txerrors; /* vid_check, tag needed errors */ + + /* + * When a ring is taken away from a mac client, before destroying + * corresponding SRS (for rx ring) or soft ring (for tx ring), add stats + * recorded by that SRS or soft ring to defunct lane stats. + */ + mac_rx_stats_t mms_defunctrxlanestats; + mac_tx_stats_t mms_defuncttxlanestats; + + /* link protection stats */ + uint64_t mms_macspoofed; + uint64_t mms_ipspoofed; + uint64_t mms_dhcpspoofed; + uint64_t mms_restricted; + uint64_t mms_dhcpdropped; +} mac_misc_stats_t; + +extern void mac_misc_stat_create(flow_entry_t *); +extern void mac_misc_stat_delete(flow_entry_t *); + +extern void mac_ring_stat_create(mac_ring_t *); +extern void mac_ring_stat_delete(mac_ring_t *); + +extern void mac_srs_stat_create(struct mac_soft_ring_set_s *); +extern void mac_srs_stat_delete(struct mac_soft_ring_set_s *); +extern void mac_tx_srs_stat_recreate(struct mac_soft_ring_set_s *, + boolean_t); + +extern void mac_soft_ring_stat_create(struct mac_soft_ring_s *); +extern void mac_soft_ring_stat_delete(struct mac_soft_ring_s *); + +extern void mac_stat_rename(mac_client_impl_t *); +extern void mac_pseudo_ring_stat_rename(mac_impl_t *); + +extern void mac_driver_stat_create(mac_impl_t *); +extern void mac_driver_stat_delete(mac_impl_t *); +extern uint64_t mac_driver_stat_default(mac_impl_t *, uint_t); + +extern uint64_t mac_rx_ring_stat_get(void *, uint_t); +extern uint64_t mac_tx_ring_stat_get(void *, uint_t); + +#endif /* _MAC_STAT_H */ diff --git a/usr/src/uts/common/sys/mii.h b/usr/src/uts/common/sys/mii.h index 6a060c8100..c3941affce 100644 --- a/usr/src/uts/common/sys/mii.h +++ b/usr/src/uts/common/sys/mii.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -428,11 +428,8 @@ boolean_t mii_m_loop_ioctl(mii_handle_t mii, queue_t *wq, mblk_t *msg); * mii MII handle. * name Property name. * id Property ID. - * flags Property flags (MAC_PROP_DEFAULT). * sz Size of property in bytes. * val Location to receive property value. - * perm Location to receive property permissions (either - * MAC_PROP_PERM_READ or MAC_PROP_PERM_RW). * * Returns * @@ -441,7 +438,7 @@ boolean_t mii_m_loop_ioctl(mii_handle_t mii, queue_t *wq, mblk_t *msg); * ENOTSUP if the prooperty is not supported by MII or the PHY. */ int mii_m_getprop(mii_handle_t mii, const char *name, mac_prop_id_t id, - uint_t flags, uint_t sz, void *val, uint_t *perm); + uint_t sz, void *val); /* * mii_m_setprop @@ -471,6 +468,25 @@ int mii_m_setprop(mii_handle_t mii, const char *name, mac_prop_id_t id, uint_t sz, const void *val); /* + * mii_m_propinfo + * + * Used to support the driver's mc_setprop() mac callback, + * and only to be called from that function (and without any + * locks held). + * + * Arguments + * + * mii MII handle. + * name Property name. + * id Property ID. + * prh Property info handle. + * + */ +void mii_m_propinfo(mii_handle_t mii, const char *name, mac_prop_id_t id, + mac_prop_info_handle_t prh); + + +/* * mii_m_getstat * * Used to support the driver's mc_getstat() mac callback for diff --git a/usr/src/uts/common/sys/net80211.h b/usr/src/uts/common/sys/net80211.h index a8d45c6174..313b335afa 100644 --- a/usr/src/uts/common/sys/net80211.h +++ b/usr/src/uts/common/sys/net80211.h @@ -1,5 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,6 +39,7 @@ #define _SYS_NET80211_H #include <sys/mac.h> +#include <sys/mac_provider.h> #include <sys/ethernet.h> #include <sys/net80211_proto.h> #include <sys/net80211_crypto.h> @@ -731,8 +732,10 @@ void *ieee80211_malloc(size_t); void ieee80211_free(void *); int ieee80211_setprop(void *, const char *, mac_prop_id_t, uint_t, const void *); -int ieee80211_getprop(void *, const char *, mac_prop_id_t, uint_t, uint_t, - void *, uint_t *); +int ieee80211_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); +void ieee80211_propinfo(void *, const char *, mac_prop_id_t, + mac_prop_info_handle_t); + struct ieee80211_channel *ieee80211_find_channel(ieee80211com_t *, int, int); const struct ieee80211_rateset *ieee80211_get_suprates(ieee80211com_t *, diff --git a/usr/src/uts/common/sys/nxge/nxge.h b/usr/src/uts/common/sys/nxge/nxge.h index 14801131ce..042590e3e0 100644 --- a/usr/src/uts/common/sys/nxge/nxge.h +++ b/usr/src/uts/common/sys/nxge/nxge.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -601,6 +601,8 @@ typedef struct _nxge_ring_handle_t { p_nxge_t nxgep; int index; /* port-wise */ mac_ring_handle_t ring_handle; + uint64_t ring_gen_num; /* For RX Ring Start */ + uint32_t channel; } nxge_ring_handle_t, *p_nxge_ring_handle_t; /* @@ -791,8 +793,6 @@ struct _nxge_t { nxge_grp_set_t tx_set; boolean_t tdc_is_shared[NXGE_MAX_TDCS]; - boolean_t rx_channel_started[NXGE_MAX_RDCS]; - /* Ring Handles */ nxge_ring_handle_t tx_ring_handles[NXGE_MAX_TDCS]; nxge_ring_handle_t rx_ring_handles[NXGE_MAX_RDCS]; diff --git a/usr/src/uts/common/sys/nxge/nxge_hio.h b/usr/src/uts/common/sys/nxge/nxge_hio.h index 2f809e557f..492da24d55 100644 --- a/usr/src/uts/common/sys/nxge/nxge_hio.h +++ b/usr/src/uts/common/sys/nxge/nxge_hio.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -367,6 +367,8 @@ extern int nxge_hio_rxdma_bind_intr(nxge_t *, rx_rcr_ring_t *, int); /* nxge_hio_guest.c */ extern void nxge_hio_unregister(nxge_t *); +extern int nxge_hio_get_dc_htable_idx(nxge_t *nxge, vpc_type_t type, + uint32_t channel); extern int nxge_guest_regs_map(nxge_t *); extern void nxge_guest_regs_map_free(nxge_t *); @@ -392,7 +394,7 @@ extern nxge_status_t nxge_hio_intr_remove(nxge_t *, vpc_type_t, int); extern nxge_status_t nxge_hio_intr_add(nxge_t *, vpc_type_t, int); extern nxge_status_t nxge_hio_intr_rem(nxge_t *, int); -extern hv_rv_t nxge_hio_ldsv_add(nxge_t *, nxge_hio_dc_t *); +extern int nxge_hio_ldsv_add(nxge_t *, nxge_hio_dc_t *); extern void nxge_hio_ldsv_im(nxge_t *, nxge_ldg_t *, pio_ld_op_t, uint64_t *); extern void nxge_hio_ldgimgn(nxge_t *, nxge_ldg_t *); diff --git a/usr/src/uts/common/sys/nxge/nxge_impl.h b/usr/src/uts/common/sys/nxge/nxge_impl.h index dc6b29be68..1221e542dc 100644 --- a/usr/src/uts/common/sys/nxge/nxge_impl.h +++ b/usr/src/uts/common/sys/nxge/nxge_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -641,6 +641,7 @@ struct _nxge_ldg_t { p_nxge_ldv_t ldvp; nxge_sys_intr_t sys_intr_handler; p_nxge_t nxgep; + uint32_t htable_idx; }; struct _nxge_ldv_t { @@ -885,6 +886,8 @@ int nxge_port_kstat_update(kstat_t *, int); void nxge_save_cntrs(p_nxge_t); int nxge_m_stat(void *arg, uint_t, uint64_t *); +int nxge_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); +int nxge_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *); /* nxge_hw.c */ void diff --git a/usr/src/uts/common/sys/nxge/nxge_rxdma.h b/usr/src/uts/common/sys/nxge/nxge_rxdma.h index ab0d0cde60..885f051cef 100644 --- a/usr/src/uts/common/sys/nxge/nxge_rxdma.h +++ b/usr/src/uts/common/sys/nxge/nxge_rxdma.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -209,7 +209,7 @@ typedef struct _rx_rcr_ring_t { p_nxge_rx_ring_stats_t rdc_stats; - int poll_flag; /* 1 if polling mode */ + boolean_t poll_flag; /* B_TRUE, if polling mode */ rcrcfig_a_t rcr_cfga; rcrcfig_b_t rcr_cfgb; @@ -244,6 +244,7 @@ typedef struct _rx_rcr_ring_t { uint32_t rcvd_pkt_bytes; /* Received bytes of a packet */ p_nxge_ldv_t ldvp; p_nxge_ldg_t ldgp; + boolean_t started; } rx_rcr_ring_t, *p_rx_rcr_ring_t; diff --git a/usr/src/uts/common/sys/pattr.h b/usr/src/uts/common/sys/pattr.h index 4d3dc29753..1269aeca10 100644 --- a/usr/src/uts/common/sys/pattr.h +++ b/usr/src/uts/common/sys/pattr.h @@ -67,20 +67,24 @@ typedef struct pattr_hcksum_s { */ #define HCK_IPV4_HDRCKSUM 0x01 /* On Transmit: Compute IP header */ /* checksum in hardware. */ - /* On Receive: IP header checksum */ + +#define HCK_IPV4_HDRCKSUM_OK 0x01 /* On Receive: IP header checksum */ /* was verified by h/w and is */ /* correct. */ + #define HCK_PARTIALCKSUM 0x02 /* On Transmit: Compute partial 1's */ /* complement checksum based on */ /* start, stuff and end offsets. */ /* On Receive : Partial checksum */ /* computed and attached. */ + #define HCK_FULLCKSUM 0x04 /* On Transmit: Compute full(in case */ /* of TCP/UDP, full is pseudo-header */ /* + header + payload) checksum for */ /* this packet. */ /* On Receive : Full checksum */ /* computed in h/w and is attached */ + #define HCK_FULLCKSUM_OK 0x08 /* On Transmit: N/A */ /* On Receive: Full checksum status */ /* If set, implies full checksum */ diff --git a/usr/src/uts/common/sys/pool.h b/usr/src/uts/common/sys/pool.h index 679ca05a86..c4cc9fc22a 100644 --- a/usr/src/uts/common/sys/pool.h +++ b/usr/src/uts/common/sys/pool.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_POOL_H #define _SYS_POOL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/time.h> #include <sys/nvpair.h> @@ -41,6 +38,7 @@ extern "C" { #define POOL_DEFAULT 0 /* default pool's ID */ #define POOL_MAXID 999999 /* maximum possible pool ID */ +#define POOL_INVALID -1 /* pools states */ #define POOL_DISABLED 0 /* pools enabled */ @@ -81,6 +79,7 @@ extern size_t pool_bufsz; /* size of pool_buf */ */ extern pool_t *pool_lookup_pool_by_id(poolid_t); extern pool_t *pool_lookup_pool_by_name(char *); +extern pool_t *pool_lookup_pool_by_pset(int); /* * Configuration routines @@ -102,6 +101,7 @@ extern int pool_propput(int, int, id_t, nvpair_t *); extern int pool_proprm(int, int, id_t, char *); extern int pool_propget(char *, int, int, id_t, nvlist_t **); extern int pool_commit(int); +extern void pool_get_name(pool_t *, char **); /* * Synchronization routines @@ -113,6 +113,25 @@ extern void pool_unlock(void); extern void pool_barrier_enter(void); extern void pool_barrier_exit(void); +typedef enum { + POOL_E_ENABLE, + POOL_E_DISABLE, + POOL_E_CHANGE, +} pool_event_t; + +typedef void pool_event_cb_func_t(pool_event_t, poolid_t, void *); + +typedef struct pool_event_cb { + pool_event_cb_func_t *pec_func; + void *pec_arg; + list_node_t pec_list; +} pool_event_cb_t; + +/* + * Routines used to register interest in changes in cpu pools. + */ +extern void pool_event_cb_register(pool_event_cb_t *); +extern void pool_event_cb_unregister(pool_event_cb_t *); #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/strsubr.h b/usr/src/uts/common/sys/strsubr.h index fd5db10058..7168f50362 100644 --- a/usr/src/uts/common/sys/strsubr.h +++ b/usr/src/uts/common/sys/strsubr.h @@ -1243,7 +1243,6 @@ extern int hcksum_assoc(mblk_t *, struct multidata_s *, struct pdesc_s *, extern void hcksum_retrieve(mblk_t *, struct multidata_s *, struct pdesc_s *, uint32_t *, uint32_t *, uint32_t *, uint32_t *, uint32_t *); extern void lso_info_set(mblk_t *, uint32_t, uint32_t); -extern void lso_info_get(mblk_t *, uint32_t *, uint32_t *); extern void lso_info_cleanup(mblk_t *); extern unsigned int bcksum(uchar_t *, int, unsigned int); extern boolean_t is_vmloaned_mblk(mblk_t *, struct multidata_s *, diff --git a/usr/src/uts/common/sys/vnic.h b/usr/src/uts/common/sys/vnic.h index 7e2aeac90a..3a6f5279ee 100644 --- a/usr/src/uts/common/sys/vnic.h +++ b/usr/src/uts/common/sys/vnic.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -111,9 +111,6 @@ typedef enum { */ #define VNIC_IOC_CREATE_FORCE 0x00000004 -/* Allocate a hardware ring to the vnic */ -#define VNIC_IOC_CREATE_REQ_HWRINGS 0x00000008 - typedef struct vnic_ioc_create { datalink_id_t vc_vnic_id; datalink_id_t vc_link_id; diff --git a/usr/src/uts/common/sys/vnic_impl.h b/usr/src/uts/common/sys/vnic_impl.h index b91f128fef..2bb48a60c6 100644 --- a/usr/src/uts/common/sys/vnic_impl.h +++ b/usr/src/uts/common/sys/vnic_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -63,16 +63,6 @@ typedef struct vnic_s { uint32_t vn_hcksum_txflags; } vnic_t; -#define vn_madd_naddr vn_mma_capab.maddr_naddr -#define vn_maddr_naddrfree vn_mma_capab.maddr_naddrfree -#define vn_maddr_flag vn_mma_capab.maddr_flag -#define vn_maddr_handle vn_mma_capab.maddr_handle -#define vn_maddr_reserve vn_mma_capab.maddr_reserve -#define vn_maddr_add vn_mma_capab.maddr_add -#define vn_maddr_remove vn_mma_capab.maddr_remove -#define vn_maddr_modify vn_mma_capab.maddr_modify -#define vn_maddr_get vn_mma_capab.maddr_get - extern int vnic_dev_create(datalink_id_t, datalink_id_t, vnic_mac_addr_type_t *, int *, uchar_t *, int *, uint_t, uint16_t, vrid_t, int, mac_resource_props_t *, uint32_t, vnic_ioc_diag_t *, cred_t *); diff --git a/usr/src/uts/common/xen/io/xnb.c b/usr/src/uts/common/xen/io/xnb.c index 9ddae7aa30..7c22ff8e52 100644 --- a/usr/src/uts/common/xen/io/xnb.c +++ b/usr/src/uts/common/xen/io/xnb.c @@ -250,8 +250,7 @@ xnb_software_csum(xnb_t *xnbp, mblk_t *mp) * XXPV dme: shouldn't rely on mac_fix_cksum(), not least * because it doesn't cover all of the interesting cases :-( */ - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM, KM_NOSLEEP); + mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM); return (mac_fix_cksum(mp)); } @@ -342,9 +341,7 @@ xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) */ *stuffp = 0; - (void) hcksum_assoc(mp, NULL, NULL, - 0, 0, 0, 0, - HCK_FULLCKSUM, KM_NOSLEEP); + mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM); xnbp->xnb_stat_csum_hardware++; @@ -375,9 +372,8 @@ xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) *stuffp = (uint16_t)(cksum ? cksum : ~cksum); } - (void) hcksum_assoc(mp, NULL, NULL, - start, stuff, length, 0, - HCK_PARTIALCKSUM, KM_NOSLEEP); + mac_hcksum_set(mp, start, stuff, length, 0, + HCK_PARTIALCKSUM); xnbp->xnb_stat_csum_hardware++; @@ -911,13 +907,13 @@ replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) mblk_t *new_mp; new_mp = copyb(mp); - if (new_mp == NULL) + if (new_mp == NULL) { cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" "for %p, len %lu", (void *) mp, len); + } - hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); - (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value, - flags, KM_NOSLEEP); + mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags); + mac_hcksum_set(new_mp, start, stuff, end, value, flags); new_mp->b_next = mp->b_next; new_mp->b_prev = mp->b_prev; diff --git a/usr/src/uts/common/xen/io/xnbo.c b/usr/src/uts/common/xen/io/xnbo.c index 78c6ba02e7..9a51328896 100644 --- a/usr/src/uts/common/xen/io/xnbo.c +++ b/usr/src/uts/common/xen/io/xnbo.c @@ -173,8 +173,7 @@ xnbo_cksum_to_peer(xnb_t *xnbp, mblk_t *mp) * gain some advantage. */ - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, - NULL, &csum, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, &csum, &pflags); /* * If the MAC driver has asserted that the checksum is diff --git a/usr/src/uts/common/xen/io/xnbu.c b/usr/src/uts/common/xen/io/xnbu.c index cf192365cf..c41a584e8b 100644 --- a/usr/src/uts/common/xen/io/xnbu.c +++ b/usr/src/uts/common/xen/io/xnbu.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -70,6 +70,7 @@ static mac_callbacks_t xnbu_callbacks = { xnbu_m_set_mac_addr, xnbu_m_send, NULL, + NULL, xnbu_m_getcapab }; @@ -130,16 +131,8 @@ xnbu_cksum_from_peer(xnb_t *xnbp, mblk_t *mp, uint16_t flags) if ((flags & NETTXF_data_validated) != 0) { /* * The checksum is asserted valid. - * - * The hardware checksum offload specification says - * that we must provide the actual checksum as well as - * an assertion that it is valid, but the protocol - * stack doesn't actually use it so we don't bother. - * If it was necessary we could grovel in the packet - * to find it. */ - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, - HCK_FULLCKSUM | HCK_FULLCKSUM_OK, KM_NOSLEEP); + mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM_OK); } return (mp); @@ -152,8 +145,7 @@ xnbu_cksum_to_peer(xnb_t *xnbp, mblk_t *mp) uint16_t r = 0; uint32_t pflags; - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, - NULL, NULL, &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); /* * If the protocol stack has requested checksum diff --git a/usr/src/uts/common/xen/io/xnf.c b/usr/src/uts/common/xen/io/xnf.c index b6d4cad439..534b3f0904 100644 --- a/usr/src/uts/common/xen/io/xnf.c +++ b/usr/src/uts/common/xen/io/xnf.c @@ -257,6 +257,7 @@ static mac_callbacks_t xnf_callbacks = { xnf_set_mac_addr, xnf_send, NULL, + NULL, xnf_getcapab }; @@ -1619,8 +1620,7 @@ xnf_send(void *arg, mblk_t *mp) txp->tx_txreq.size = length; txp->tx_txreq.offset = (uintptr_t)txp->tx_bufp & PAGEOFFSET; txp->tx_txreq.flags = 0; - hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, - &pflags); + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags); if (pflags != 0) { /* * If the local protocol stack requests checksum @@ -2104,21 +2104,9 @@ xnf_rx_collect(xnf_t *xnfp) * blank" flag, and hence could have a * packet here that we are asserting * is good with a blank checksum. - * - * The hardware checksum offload - * specification says that we must - * provide the actual checksum as well - * as an assertion that it is valid, - * but the protocol stack doesn't - * actually use it and some other - * drivers don't bother, so we don't. - * If it was necessary we could grovel - * in the packet to find it. */ - (void) hcksum_assoc(mp, NULL, - NULL, 0, 0, 0, 0, - HCK_FULLCKSUM | - HCK_FULLCKSUM_OK, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, + HCK_FULLCKSUM_OK); xnfp->xnf_stat_rx_cksum_no_need++; } if (head == NULL) { diff --git a/usr/src/uts/intel/hxge/Makefile b/usr/src/uts/intel/hxge/Makefile index 40f6b64bcb..836f7c0924 100644 --- a/usr/src/uts/intel/hxge/Makefile +++ b/usr/src/uts/intel/hxge/Makefile @@ -20,12 +20,9 @@ # # uts/intel/hxge/Makefile # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# -# ident "%Z%%M% %I% %E% SMI" -# # This makefile drives the production of the Sun # 10G hxge Ethernet leaf driver kernel module. # @@ -71,7 +68,6 @@ CFLAGS += -dalign # # Include hxge specific header files # -INC_PATH += -I$(UTSBASE)/common INC_PATH += -I$(UTSBASE)/common/io/hxge # # diff --git a/usr/src/uts/intel/io/amd8111s/amd8111s_main.c b/usr/src/uts/intel/io/amd8111s/amd8111s_main.c index 1664ee7543..317e55b22a 100644 --- a/usr/src/uts/intel/io/amd8111s/amd8111s_main.c +++ b/usr/src/uts/intel/io/amd8111s/amd8111s_main.c @@ -1,5 +1,5 @@ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -195,6 +195,7 @@ static mac_callbacks_t amd8111s_m_callbacks = { amd8111s_m_multicst, amd8111s_m_unicst, amd8111s_m_tx, + NULL, amd8111s_m_ioctl }; diff --git a/usr/src/uts/intel/io/dnet/dnet.c b/usr/src/uts/intel/io/dnet/dnet.c index 2c045e893c..a6badb9b4b 100644 --- a/usr/src/uts/intel/io/dnet/dnet.c +++ b/usr/src/uts/intel/io/dnet/dnet.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -314,6 +314,7 @@ static mac_callbacks_t dnet_m_callbacks = { dnet_m_multicst, /* mc_multicst */ dnet_m_unicst, /* mc_unicst */ dnet_m_tx, /* mc_tx */ + NULL, NULL, /* mc_ioctl */ NULL, /* mc_getcapab */ NULL, /* mc_open */ diff --git a/usr/src/uts/intel/mii/Makefile b/usr/src/uts/intel/mii/Makefile index 8f3b7b6772..28ac502177 100644 --- a/usr/src/uts/intel/mii/Makefile +++ b/usr/src/uts/intel/mii/Makefile @@ -21,7 +21,7 @@ # # uts/intel/mii/Makefile # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the mii support module. @@ -55,7 +55,7 @@ ALL_TARGET = $(BINARY) LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) -LDFLAGS += -dy +LDFLAGS += -dy -N misc/mac # # Default build targets. diff --git a/usr/src/uts/intel/qlge/Makefile b/usr/src/uts/intel/qlge/Makefile index 52f1c143a2..82f64ac215 100644 --- a/usr/src/uts/intel/qlge/Makefile +++ b/usr/src/uts/intel/qlge/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the qlge driver kernel module. @@ -57,8 +57,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # MODSTUBS_DIR = $(OBJS_DIR) -INC_PATH += -I$(ROOT)/usr/include -INC_PATH += -I$(UTSBASE)/common/sys INC_PATH += -I$(UTSBASE)/common/sys/fibre-channel/fca/qlge LDFLAGS += -dy -Nmisc/mac -Ndrv/ip diff --git a/usr/src/uts/sparc/hxge/Makefile b/usr/src/uts/sparc/hxge/Makefile index 79b504d443..a04957a00f 100644 --- a/usr/src/uts/sparc/hxge/Makefile +++ b/usr/src/uts/sparc/hxge/Makefile @@ -20,7 +20,7 @@ # # uts/sparc/hxge/Makefile # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the Sun @@ -68,7 +68,6 @@ CFLAGS += -dalign # # Include hxge specific header files # -INC_PATH += -I$(UTSBASE)/common INC_PATH += -I$(UTSBASE)/common/io/hxge # # diff --git a/usr/src/uts/sparc/mii/Makefile b/usr/src/uts/sparc/mii/Makefile index 52726241b9..b1e80d5bd6 100644 --- a/usr/src/uts/sparc/mii/Makefile +++ b/usr/src/uts/sparc/mii/Makefile @@ -21,7 +21,7 @@ # # uts/sparc/mii/Makefile # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the mii support module. @@ -55,7 +55,7 @@ ALL_TARGET = $(BINARY) LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) -LDFLAGS += -dy +LDFLAGS += -dy -N misc/mac # # Default build targets. diff --git a/usr/src/uts/sparc/qlge/Makefile b/usr/src/uts/sparc/qlge/Makefile index 52f1c143a2..82f64ac215 100644 --- a/usr/src/uts/sparc/qlge/Makefile +++ b/usr/src/uts/sparc/qlge/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the qlge driver kernel module. @@ -57,8 +57,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # MODSTUBS_DIR = $(OBJS_DIR) -INC_PATH += -I$(ROOT)/usr/include -INC_PATH += -I$(UTSBASE)/common/sys INC_PATH += -I$(UTSBASE)/common/sys/fibre-channel/fca/qlge LDFLAGS += -dy -Nmisc/mac -Ndrv/ip diff --git a/usr/src/uts/sun/io/eri/eri.c b/usr/src/uts/sun/io/eri/eri.c index ab08fafc39..a4ac10cdd3 100644 --- a/usr/src/uts/sun/io/eri/eri.c +++ b/usr/src/uts/sun/io/eri/eri.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -200,6 +200,7 @@ static mac_callbacks_t eri_m_callbacks = { eri_m_multicst, eri_m_unicst, eri_m_tx, + NULL, eri_m_ioctl, eri_m_getcapab }; @@ -264,9 +265,9 @@ static mac_callbacks_t eri_m_callbacks = { if (type == ETHERTYPE_IP || type == ETHERTYPE_IPV6) { \ start_offset = 0; \ end_offset = MBLKL(bp) - ETHERHEADER_SIZE; \ - (void) hcksum_assoc(bp, NULL, NULL, \ + mac_hcksum_set(bp, \ start_offset, 0, end_offset, sum, \ - HCK_PARTIALCKSUM, 0); \ + HCK_PARTIALCKSUM); \ } else { \ /* \ * Strip the PADS for 802.3 \ @@ -3469,8 +3470,7 @@ eri_send_msg(struct eri *erip, mblk_t *mp) } #ifdef ERI_HWCSUM - hcksum_retrieve(mp, NULL, NULL, &start_offset, &stuff_offset, - NULL, NULL, &flags); + mac_hcksum_get(mp, &start_offset, &stuff_offset, NULL, NULL, &flags); if (flags & HCK_PARTIALCKSUM) { if (get_ether_type(mp->b_rptr) == ETHERTYPE_VLAN) { diff --git a/usr/src/uts/sun4v/io/vnet.c b/usr/src/uts/sun4v/io/vnet.c index f25860b251..1f857dbe13 100644 --- a/usr/src/uts/sun4v/io/vnet.c +++ b/usr/src/uts/sun4v/io/vnet.c @@ -27,6 +27,7 @@ #include <sys/types.h> #include <sys/errno.h> #include <sys/param.h> +#include <sys/callb.h> #include <sys/stream.h> #include <sys/kmem.h> #include <sys/conf.h> @@ -84,8 +85,12 @@ static void vnet_get_group(void *arg, mac_ring_type_t type, const int index, mac_group_info_t *infop, mac_group_handle_t handle); static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); static void vnet_rx_ring_stop(mac_ring_driver_t rdriver); +static int vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, + uint64_t *val); static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); static void vnet_tx_ring_stop(mac_ring_driver_t rdriver); +static int vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, + uint64_t *val); static int vnet_ring_enable_intr(void *arg); static int vnet_ring_disable_intr(void *arg); static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup); @@ -107,7 +112,6 @@ static void vnet_unbind_rings(vnet_res_t *vresp); static int vnet_hio_stat(void *, uint_t, uint64_t *); static int vnet_hio_start(void *); static void vnet_hio_stop(void *); -static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type); mblk_t *vnet_hio_tx(void *, mblk_t *); /* Forwarding database (FDB) routines */ @@ -129,6 +133,7 @@ static void vnet_res_start_task(void *arg); static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err); static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp); static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp); +static void vnet_tx_notify_thread(void *); /* Exported to vnet_gen */ int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); @@ -168,8 +173,7 @@ extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg); extern void vdds_cleanup_hybrid_res(void *arg); extern void vdds_cleanup_hio(vnet_t *vnetp); -/* Externs imported from mac_impl */ -extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); +extern pri_t minclsyspri; #define DRV_NAME "vnet" #define VNET_FDBE_REFHOLD(p) \ @@ -199,6 +203,7 @@ static mac_callbacks_t vnet_m_callbacks = { vnet_m_multicst, NULL, /* m_unicst entry must be NULL while rx rings are exposed */ NULL, /* m_tx entry must be NULL while tx rings are exposed */ + NULL, vnet_m_ioctl, vnet_m_capab, NULL @@ -232,6 +237,8 @@ uint32_t vnet_ldc_mtu = VNET_LDC_MTU; /* ldc mtu */ /* Configure tx serialization in mac layer for the vnet device */ boolean_t vnet_mac_tx_serialize = B_TRUE; +/* Configure enqueing at Rx soft rings in mac layer for the vnet device */ +boolean_t vnet_mac_rx_queuing = B_TRUE; /* * Set this to non-zero to enable additional internal receive buffer pools @@ -785,6 +792,7 @@ mblk_t * vnet_tx_ring_send(void *arg, mblk_t *mp) { vnet_pseudo_tx_ring_t *tx_ringp; + vnet_tx_ring_stats_t *statsp; vnet_t *vnetp; vnet_res_t *vresp; mblk_t *next; @@ -795,8 +803,10 @@ vnet_tx_ring_send(void *arg, mblk_t *mp) boolean_t is_pvid; /* non-default pvid ? */ boolean_t hres; /* Hybrid resource ? */ void *tx_arg; + size_t size; tx_ringp = (vnet_pseudo_tx_ring_t *)arg; + statsp = &tx_ringp->tx_ring_stats; vnetp = (vnet_t *)tx_ringp->vnetp; DBG1(vnetp, "enter\n"); ASSERT(mp != NULL); @@ -808,6 +818,9 @@ vnet_tx_ring_send(void *arg, mblk_t *mp) next = mp->b_next; mp->b_next = NULL; + /* update stats */ + size = msgsize(mp); + /* * Find fdb entry for the destination * and hold a reference to it. @@ -911,6 +924,8 @@ vnet_tx_ring_send(void *arg, mblk_t *mp) } } + statsp->obytes += size; + statsp->opackets++; mp = next; } @@ -971,6 +986,10 @@ vnet_ring_grp_init(vnet_t *vnetp) } tx_grp->rings = tx_ringp; tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS; + mutex_init(&tx_grp->flowctl_lock, NULL, MUTEX_DRIVER, NULL); + cv_init(&tx_grp->flowctl_cv, NULL, CV_DRIVER, NULL); + tx_grp->flowctl_thread = thread_create(NULL, 0, + vnet_tx_notify_thread, tx_grp, 0, &p0, TS_RUN, minclsyspri); rx_grp = &vnetp->rx_grp[0]; rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP; @@ -1005,8 +1024,21 @@ vnet_ring_grp_uninit(vnet_t *vnetp) { vnet_pseudo_rx_group_t *rx_grp; vnet_pseudo_tx_group_t *tx_grp; + kt_did_t tid = 0; tx_grp = &vnetp->tx_grp[0]; + + /* Inform tx_notify_thread to exit */ + mutex_enter(&tx_grp->flowctl_lock); + if (tx_grp->flowctl_thread != NULL) { + tid = tx_grp->flowctl_thread->t_did; + tx_grp->flowctl_done = B_TRUE; + cv_signal(&tx_grp->flowctl_cv); + } + mutex_exit(&tx_grp->flowctl_lock); + if (tid != 0) + thread_join(tid); + if (tx_grp->rings != NULL) { ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS); kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) * @@ -1090,14 +1122,7 @@ vnet_mac_register(vnet_t *vnetp) macp->m_max_sdu = vnetp->mtu; macp->m_margin = VLAN_TAGSZ; - /* - * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to - * workaround tx lock contention issues in nxge. - */ macp->m_v12n = MAC_VIRT_LEVEL1; - if (vnet_mac_tx_serialize == B_TRUE) { - macp->m_v12n |= MAC_VIRT_SERIALIZE; - } /* * Finally, we're ready to register ourselves with the MAC layer @@ -1404,6 +1429,73 @@ vnet_tx_update(vio_net_handle_t vrh) } /* + * vnet_tx_notify_thread: + * + * vnet_tx_ring_update() callback function wakes up this thread when + * it gets called. This thread will call mac_tx_ring_update() to + * notify upper mac of flow control getting relieved. Note that + * vnet_tx_ring_update() cannot call mac_tx_ring_update() directly + * because vnet_tx_ring_update() is called from lower mac with + * mi_rw_lock held and mac_tx_ring_update() would also try to grab + * the same lock. + */ +static void +vnet_tx_notify_thread(void *arg) +{ + callb_cpr_t cprinfo; + vnet_pseudo_tx_group_t *tx_grp = (vnet_pseudo_tx_group_t *)arg; + vnet_pseudo_tx_ring_t *tx_ringp; + vnet_t *vnetp; + int i; + + CALLB_CPR_INIT(&cprinfo, &tx_grp->flowctl_lock, callb_generic_cpr, + "vnet_tx_notify_thread"); + + mutex_enter(&tx_grp->flowctl_lock); + while (!tx_grp->flowctl_done) { + CALLB_CPR_SAFE_BEGIN(&cprinfo); + cv_wait(&tx_grp->flowctl_cv, &tx_grp->flowctl_lock); + CALLB_CPR_SAFE_END(&cprinfo, &tx_grp->flowctl_lock); + + for (i = 0; i < tx_grp->ring_cnt; i++) { + tx_ringp = &tx_grp->rings[i]; + if (tx_ringp->woken_up) { + tx_ringp->woken_up = B_FALSE; + vnetp = tx_ringp->vnetp; + mac_tx_ring_update(vnetp->mh, tx_ringp->handle); + } + } + } + /* + * The tx_grp is being destroyed, exit the thread. + */ + tx_grp->flowctl_thread = NULL; + CALLB_CPR_EXIT(&cprinfo); + thread_exit(); +} + +void +vnet_tx_ring_update(void *arg1, uintptr_t arg2) +{ + vnet_t *vnetp = (vnet_t *)arg1; + vnet_pseudo_tx_group_t *tx_grp; + vnet_pseudo_tx_ring_t *tx_ringp; + int i; + + tx_grp = &vnetp->tx_grp[0]; + for (i = 0; i < tx_grp->ring_cnt; i++) { + tx_ringp = &tx_grp->rings[i]; + if (tx_ringp->hw_rh == (mac_ring_handle_t)arg2) { + mutex_enter(&tx_grp->flowctl_lock); + tx_ringp->woken_up = B_TRUE; + cv_signal(&tx_grp->flowctl_cv); + mutex_exit(&tx_grp->flowctl_lock); + break; + } + } +} + +/* * Update the new mtu of vnet into the mac layer. First check if the device has * been plumbed and if so fail the mtu update. Returns 0 on success. */ @@ -2053,6 +2145,22 @@ vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data) * we unmap ring->hw_rh. For rings mapped to LDC resources, we * stop the rx callbacks (in vgen) before we remove ring->hw_rh * (vio_net_resource_unreg()). + * Also, we access ring->hw_rh in vnet_rx_ring_stat(). + * Note that for rings mapped to Hybrid resource, though the + * rings are statically registered with the mac layer, its + * hardware ring mapping (ringp->hw_rh) can be torn down in + * vnet_unbind_hwrings() while the kstat operation is in + * progress. To protect against this, we hold a reference to + * the resource in FDB; this ensures that the thread in + * vio_net_resource_unreg() waits for the reference to be + * dropped before unbinding the ring. + * + * We don't need to do this for rings mapped to LDC resources. + * These rings are registered/unregistered dynamically with + * the mac layer and so any attempt to unregister the ring + * while kstat operation is in progress will block in + * mac_group_rem_ring(). Thus implicitly protects the + * resource (ringp->hw_rh) from disappearing. */ if (cap_rings->mr_type == MAC_RING_TYPE_RX) { @@ -2148,10 +2256,22 @@ vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, infop->mri_driver = (mac_ring_driver_t)rx_ringp; infop->mri_start = vnet_rx_ring_start; infop->mri_stop = vnet_rx_ring_stop; + infop->mri_stat = vnet_rx_ring_stat; /* Set the poll function, as this is an rx ring */ infop->mri_poll = vnet_rx_poll; - + /* + * MAC_RING_RX_ENQUEUE bit needed to be set for nxge + * which was not sending packet chains in interrupt + * context. For such drivers, packets are queued in + * Rx soft rings so that we get a chance to switch + * into a polling mode under backlog. This bug (not + * sending packet chains) has now been fixed. Once + * the performance impact is measured, this change + * will be removed. + */ + infop->mri_flags = (vnet_mac_rx_queuing ? + MAC_RING_RX_ENQUEUE : 0); break; } @@ -2178,10 +2298,17 @@ vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, infop->mri_driver = (mac_ring_driver_t)tx_ringp; infop->mri_start = vnet_tx_ring_start; infop->mri_stop = vnet_tx_ring_stop; + infop->mri_stat = vnet_tx_ring_stat; /* Set the transmit function, as this is a tx ring */ infop->mri_tx = vnet_tx_ring_send; - + /* + * MAC_RING_TX_SERIALIZE bit needs to be set while + * hybridIO is enabled to workaround tx lock + * contention issues in nxge. + */ + infop->mri_flags = (vnet_mac_tx_serialize ? + MAC_RING_TX_SERIALIZE : 0); break; } @@ -2325,6 +2452,44 @@ vnet_rx_ring_stop(mac_ring_driver_t arg) rx_ringp->state &= ~VNET_RXRING_STARTED; } +static int +vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)rdriver; + vnet_t *vnetp = (vnet_t *)rx_ringp->vnetp; + vnet_res_t *vresp; + mac_register_t *macp; + mac_callbacks_t *cbp; + + /* + * Refer to vnet_m_capab() function for detailed comments on ring + * synchronization. + */ + if ((rx_ringp->state & VNET_RXRING_HYBRID) != 0) { + READ_ENTER(&vnetp->vsw_fp_rw); + if (vnetp->hio_fp == NULL) { + RW_EXIT(&vnetp->vsw_fp_rw); + return (0); + } + + VNET_FDBE_REFHOLD(vnetp->hio_fp); + RW_EXIT(&vnetp->vsw_fp_rw); + mac_hwring_getstat(rx_ringp->hw_rh, stat, val); + VNET_FDBE_REFRELE(vnetp->hio_fp); + return (0); + } + + ASSERT((rx_ringp->state & + (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0); + vresp = (vnet_res_t *)rx_ringp->hw_rh; + macp = &vresp->macreg; + cbp = macp->m_callbacks; + + cbp->mc_getstat(macp->m_driver, stat, val); + + return (0); +} + /* ARGSUSED */ static int vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) @@ -2343,6 +2508,31 @@ vnet_tx_ring_stop(mac_ring_driver_t arg) tx_ringp->state &= ~VNET_TXRING_STARTED; } +static int +vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) +{ + vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)rdriver; + vnet_tx_ring_stats_t *statsp; + + statsp = &tx_ringp->tx_ring_stats; + + switch (stat) { + case MAC_STAT_OPACKETS: + *val = statsp->opackets; + break; + + case MAC_STAT_OBYTES: + *val = statsp->obytes; + break; + + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + /* * Disable polling for a ring and enable its interrupt. */ @@ -2569,10 +2759,6 @@ vnet_hio_mac_init(vnet_t *vnetp, char *ifname) /* add the recv callback */ mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp); - /* add the notify callback - only tx updates for now */ - vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb, - vnetp); - return (0); fail: @@ -2584,11 +2770,6 @@ fail: void vnet_hio_mac_cleanup(vnet_t *vnetp) { - if (vnetp->hio_mnh != NULL) { - (void) mac_notify_remove(vnetp->hio_mnh, B_TRUE); - vnetp->hio_mnh = NULL; - } - if (vnetp->hio_vhp != NULL) { vio_net_resource_unreg(vnetp->hio_vhp); vnetp->hio_vhp = NULL; @@ -2666,7 +2847,7 @@ vnet_bind_hwrings(vnet_t *vnetp) /* Bind the pseudo ring to the underlying hwring */ mac_hwring_setup(rx_ringp->hw_rh, - (mac_resource_handle_t)rx_ringp); + (mac_resource_handle_t)rx_ringp, NULL); /* Start the hwring if needed */ if (rx_ringp->state & VNET_RXRING_STARTED) { @@ -2703,6 +2884,8 @@ vnet_bind_hwrings(vnet_t *vnetp) tx_ringp->hw_rh = hw_rh[i]; tx_ringp->state |= VNET_TXRING_HYBRID; } + tx_grp->tx_notify_handle = + mac_client_tx_notify(vnetp->hio_mch, vnet_tx_ring_update, vnetp); mac_perim_exit(mph1); return (0); @@ -2734,6 +2917,8 @@ vnet_unbind_hwrings(vnet_t *vnetp) tx_ringp->hw_rh = NULL; } } + (void) mac_client_tx_notify(vnetp->hio_mch, NULL, + tx_grp->tx_notify_handle); rx_grp = &vnetp->rx_grp[0]; for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { @@ -2980,24 +3165,6 @@ vnet_hio_tx(void *arg, mblk_t *mp) return (mp); } -static void -vnet_hio_notify_cb(void *arg, mac_notify_type_t type) -{ - vnet_t *vnetp = (vnet_t *)arg; - mac_perim_handle_t mph; - - mac_perim_enter_by_mh(vnetp->hio_mh, &mph); - switch (type) { - case MAC_NOTE_TX: - vnet_tx_update(vnetp->hio_vhp); - break; - - default: - break; - } - mac_perim_exit(mph); -} - #ifdef VNET_IOC_DEBUG /* diff --git a/usr/src/uts/sun4v/io/vnet_gen.c b/usr/src/uts/sun4v/io/vnet_gen.c index 6bf674fd85..875c8dd93f 100644 --- a/usr/src/uts/sun4v/io/vnet_gen.c +++ b/usr/src/uts/sun4v/io/vnet_gen.c @@ -461,6 +461,7 @@ static mac_callbacks_t vgen_m_callbacks = { vgen_multicst, vgen_unicst, vgen_tx, + NULL, vgen_ioctl, NULL, NULL diff --git a/usr/src/uts/sun4v/io/vsw.c b/usr/src/uts/sun4v/io/vsw.c index f53adb5af5..a061321e86 100644 --- a/usr/src/uts/sun4v/io/vsw.c +++ b/usr/src/uts/sun4v/io/vsw.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -295,10 +295,7 @@ static mac_callbacks_t vsw_m_callbacks = { vsw_m_promisc, vsw_m_multicst, vsw_m_unicst, - vsw_m_tx, - NULL, - NULL, - NULL + vsw_m_tx }; static struct cb_ops vsw_cb_ops = { diff --git a/usr/src/uts/sun4v/io/vsw_phys.c b/usr/src/uts/sun4v/io/vsw_phys.c index f5fc90b929..c725e8bb5f 100644 --- a/usr/src/uts/sun4v/io/vsw_phys.c +++ b/usr/src/uts/sun4v/io/vsw_phys.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -497,17 +497,12 @@ vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type) * Open a MAC client for a port or an interface. * The flags and their purpose as below: * - * MAC_OPEN_FLAGS_NO_HWRINGS -- This flag is used by default - * for all ports/interface so that they are associated with - * default group & resources. It will not be used for the - * ports that have HybridIO is enabled so that the h/w resources - * assigned to it. - * * MAC_OPEN_FLAGS_SHARES_DESIRED -- This flag is used to indicate * that a port desires a Share. This will be the case with the * the ports that have hybrid mode enabled. This will only cause * MAC layer to allocate a share and corresponding resources - * ahead of time. + * ahead of time. Ports that are not HybridIO enabled are + * associated with default group & resources. * * MAC_UNICAST_TAG_DISABLE -- This flag is used for VLAN * support. It will cause MAC to not add any tags, but expect @@ -525,7 +520,7 @@ vsw_maccl_open(vsw_t *vswp, vsw_port_t *port, int type) char mac_cl_name[MAXNAMELEN]; const char *dev_name; mac_client_handle_t *mchp; - uint64_t flags = MAC_OPEN_FLAGS_NO_HWRINGS; + uint64_t flags = 0; ASSERT(MUTEX_HELD(&vswp->mac_lock)); if (vswp->mh == NULL) { @@ -545,10 +540,8 @@ vsw_maccl_open(vsw_t *vswp, vsw_port_t *port, int type) dev_name = ddi_driver_name(vswp->dip); instance = ddi_get_instance(vswp->dip); if (type == VSW_VNETPORT) { - if (port->p_hio_enabled == B_TRUE) { - flags &= ~MAC_OPEN_FLAGS_NO_HWRINGS; + if (port->p_hio_enabled) flags |= MAC_OPEN_FLAGS_SHARES_DESIRED; - } (void) snprintf(mac_cl_name, MAXNAMELEN, "%s%d%s%d", dev_name, instance, "_port", port->p_instance); } else { @@ -561,6 +554,10 @@ vsw_maccl_open(vsw_t *vswp, vsw_port_t *port, int type) cmn_err(CE_NOTE, "!vsw%d:%s mac_client_open() failed\n", vswp->instance, mac_cl_name); } + + if (type != VSW_VNETPORT || !port->p_hio_enabled) + mac_client_set_rings(*mchp, MAC_RXRINGS_NONE, MAC_TXRINGS_NONE); + return (rv); } @@ -1389,7 +1386,7 @@ vsw_maccl_set_bandwidth(vsw_t *vswp, vsw_port_t *port, int type, uint64_t maxbw) { int rv = 0; uint64_t *bw; - mac_resource_props_t mrp; + mac_resource_props_t *mrp; mac_client_handle_t mch; ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); @@ -1409,15 +1406,15 @@ vsw_maccl_set_bandwidth(vsw_t *vswp, vsw_port_t *port, int type, uint64_t maxbw) } if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) { - bzero(&mrp, sizeof (mac_resource_props_t)); + mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); if (maxbw == 0) { - mrp.mrp_maxbw = MRP_MAXBW_RESETVAL; + mrp->mrp_maxbw = MRP_MAXBW_RESETVAL; } else { - mrp.mrp_maxbw = maxbw; + mrp->mrp_maxbw = maxbw; } - mrp.mrp_mask |= MRP_MAXBW; + mrp->mrp_mask |= MRP_MAXBW; - rv = mac_client_set_resources(mch, &mrp); + rv = mac_client_set_resources(mch, mrp); if (rv != 0) { if (type == VSW_VNETPORT) { cmn_err(CE_NOTE, "!port%d: cannot set " @@ -1434,5 +1431,6 @@ vsw_maccl_set_bandwidth(vsw_t *vswp, vsw_port_t *port, int type, uint64_t maxbw) */ *bw = maxbw; } + kmem_free(mrp, sizeof (*mrp)); } } diff --git a/usr/src/uts/sun4v/sys/vnet.h b/usr/src/uts/sun4v/sys/vnet.h index 21fb92852b..e80324110e 100644 --- a/usr/src/uts/sun4v/sys/vnet.h +++ b/usr/src/uts/sun4v/sys/vnet.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -106,6 +106,11 @@ typedef struct vnet_hio_kstats { kstat_named_t noxmtbuf; /* MIB - ifOutDiscards */ } vnet_hio_kstats_t; +typedef struct vnet_tx_ring_stats { + uint64_t opackets; /* # tx packets */ + uint64_t obytes; /* # bytes transmitted */ +} vnet_tx_ring_stats_t; + /* * A vnet resource structure. */ @@ -214,6 +219,8 @@ typedef struct vnet_pseudo_tx_ring { /* ring handle. Hybrid res: ring hdl */ /* of hardware rx ring; LDC res: hdl */ /* to the res itself (vnet_res_t) */ + boolean_t woken_up; + vnet_tx_ring_stats_t tx_ring_stats; /* ring statistics */ } vnet_pseudo_tx_ring_t; /* @@ -241,6 +248,11 @@ typedef struct vnet_pseudo_tx_group { mac_group_handle_t handle; /* grp handle in mac layer */ uint_t ring_cnt; /* total # of rings in grp */ vnet_pseudo_tx_ring_t *rings; /* array of rings */ + kmutex_t flowctl_lock; /* flow control lock */ + kcondvar_t flowctl_cv; + kthread_t *flowctl_thread; + boolean_t flowctl_done; + void *tx_notify_handle; /* Tx ring notification */ } vnet_pseudo_tx_group_t; /* @@ -298,7 +310,6 @@ typedef struct vnet { mac_handle_t hio_mh; /* HIO mac hdl */ mac_client_handle_t hio_mch; /* HIO mac client hdl */ mac_unicast_handle_t hio_muh; /* HIO mac unicst hdl */ - mac_notify_handle_t hio_mnh; /* HIO notify cb hdl */ mac_group_handle_t rx_hwgh; /* HIO rx ring-group hdl */ mac_group_handle_t tx_hwgh; /* HIO tx ring-group hdl */ } vnet_t; |