diff options
| author | yz147064 <none@none> | 2008-01-23 18:09:15 -0800 |
|---|---|---|
| committer | yz147064 <none@none> | 2008-01-23 18:09:15 -0800 |
| commit | d62bc4badc1c1f1549c961cfb8b420e650e1272b (patch) | |
| tree | 9f466859e9cfb73da13b64261432aba4683f19ad /usr/src/uts/common | |
| parent | d38257c4392a9dd690c2f7f2383236c1fc80e509 (diff) | |
| download | illumos-joyent-d62bc4badc1c1f1549c961cfb8b420e650e1272b.tar.gz | |
PSARC/2006/499 Clearview Nemo unification and vanity naming
PSARC/2007/527 Addendum for Clearview Vanity Naming and Nemo Unification
PSARC/2008/002 Clearview UV Updates
6310766 vlan statistics get reset at unplumb time
6320515 dladm commands with "-R" option should not take effect immediately
6433732 Simplify the GLDv3 control path by making its processing asynchronous
6445912 dladm show-link fails to show a specific link in the debug version
6452413 dladm show-link doesn't show VLAN links for GLDv2 drivers
6504433 libwladm's use of wladm_wlresult2status() needs an overhaul
6504507 dladm set-linkprop failure message is unclear
6534289 DR should work with aggregations
6535719 dladm_aggr_port_attr_db_t`lp_devname should be MAXNAMELEN, not MAXNAMELEN + 1
6539634 GLDv3 should DL_ERROR_ACK a DL_UDQOS_REQ with DL_OUTSTATE when the stream is DL_UNATTACHED
6540246 libdladm should not guess zoneid from DLDIOCZIDGET ioctl errno
6544195 dladm show-dev assumes GLDv3 stats.. incompatible with GLDv2
6563295 dladm show-linkprop -P does not work properly for unavailable links
6577618 integrate network vanity naming and nemo unification
6600446 links assigned to a local zone are still aggregatable by global zone
6607572 "boot net - install" can trigger assertion failure in dld_str_attach()
6613956 "svccfg import -" does not work as bfu expects
6637596 invalid assertion in ip_soft_ring_assignment()
6642350 kernel DLPI processing routines are long overdue
6643338 GLDv3 PPA hack VLAN ID checks don't always work
6647203 bfu: smf_delete_manifest() does not work for non-global zones
6649885 DL_IB GLDv3 mactype plugin must fill in its mtr_nativetype
6650395 libuuid should be lint-clean and linted nightly
--HG--
rename : usr/src/cmd/dladm/aggregation.conf => deleted_files/usr/src/cmd/dladm/aggregation.conf
rename : usr/src/cmd/dladm/linkprop.conf => deleted_files/usr/src/cmd/dladm/linkprop.conf
rename : usr/src/lib/libinetcfg/common/inetcfg_nic.c => deleted_files/usr/src/lib/libinetcfg/common/inetcfg_nic.c
rename : usr/src/lib/libinetcfg/common/inetcfg_nic.h => deleted_files/usr/src/lib/libinetcfg/common/inetcfg_nic.h
Diffstat (limited to 'usr/src/uts/common')
84 files changed, 10019 insertions, 2713 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 5beed63ce0..6bb7e481f4 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -561,7 +561,8 @@ CN_OBJS += cons.o DLD_OBJS += dld_drv.o dld_proto.o dld_str.o -DLS_OBJS += dls.o dls_link.o dls_mod.o dls_stat.o dls_vlan.o dls_soft_ring.o +DLS_OBJS += dls.o dls_link.o dls_mod.o dls_stat.o dls_vlan.o \ + dls_soft_ring.o dls_mgmt.o GLD_OBJS += gld.o gldutil.o @@ -578,6 +579,9 @@ AGGR_OBJS += aggr_dev.o aggr_ctl.o aggr_grp.o aggr_port.o \ VNIC_OBJS += vnic_ctl.o vnic_dev.o vnic_bcast.o vnic_cl.o +SOFTMAC_OBJS += softmac_main.o softmac_ctl.o softmac_capab.o \ + softmac_dev.o softmac_stat.o softmac_pkt.o + NET80211_OBJS += net80211.o net80211_proto.o net80211_input.o \ net80211_output.o net80211_node.o net80211_crypto.o \ net80211_crypto_none.o net80211_crypto_wep.o net80211_ioctl.o \ @@ -848,7 +852,8 @@ DCFS_OBJS += dc_vnops.o DEVFS_OBJS += devfs_subr.o devfs_vfsops.o devfs_vnops.o DEV_OBJS += sdev_subr.o sdev_vfsops.o sdev_vnops.o \ - sdev_ptsops.o sdev_comm.o sdev_profile.o sdev_ncache.o + sdev_ptsops.o sdev_comm.o sdev_profile.o \ + sdev_ncache.o sdev_netops.o CTFS_OBJS += ctfs_all.o ctfs_cdir.o ctfs_ctl.o ctfs_event.o \ ctfs_latest.o ctfs_root.o ctfs_sym.o ctfs_tdir.o ctfs_tmpl.o @@ -1528,6 +1533,8 @@ CH_COM_OBJS = ch_mac.o ch_subr.o cspi.o espi.o ixf1010.o mc3.o mc4.o mc5.o \ # PCI_STRING_OBJS = pci_strings.o +NET_DACF_OBJS += net_dacf.o + # # Xframe 10G NIC driver module # diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules index 4b2282b3ef..208b098b53 100644 --- a/usr/src/uts/common/Makefile.rules +++ b/usr/src/uts/common/Makefile.rules @@ -743,6 +743,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/scsi/adapters/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/softmac/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/scsi/adapters/scsi_vhci/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -1593,6 +1597,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/scsi/impl/%.c $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/scsi/targets/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/softmac/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/sfe/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/common/fs/dev/sdev_netops.c b/usr/src/uts/common/fs/dev/sdev_netops.c new file mode 100644 index 0000000000..b51b155344 --- /dev/null +++ b/usr/src/uts/common/fs/dev/sdev_netops.c @@ -0,0 +1,397 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * vnode ops for the /dev/net directory + * + * The lookup is based on the internal vanity naming node table. We also + * override readdir in order to delete net nodes no longer in-use. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/sunndi.h> +#include <fs/fs_subr.h> +#include <sys/fs/dv_node.h> +#include <sys/fs/sdev_impl.h> +#include <sys/policy.h> +#include <sys/zone.h> +#include <sys/dls.h> + +struct vnodeops *devnet_vnodeops; + +/* + * Called by zone_walk_datalink() to see if the given link name belongs to the + * given zone. Returns 0 to continue the walk, -1 if the link name is found. + */ +static int +devnet_validate_name(const char *link, void *arg) +{ + return ((strcmp(link, arg) == 0) ? -1 : 0); +} + +/* + * Check if a net sdev_node is still valid - i.e. it represents a current + * network link. + * This serves two purposes + * - only valid net nodes are returned during lookup() and readdir(). + * - since net sdev_nodes are not actively destroyed when a network link + * goes away, we use the validator to do deferred cleanup i.e. when such + * nodes are encountered during subsequent lookup() and readdir(). + */ +int +devnet_validate(struct sdev_node *dv) +{ + char *nm = dv->sdev_name; + datalink_id_t linkid; + + ASSERT(!(dv->sdev_flags & SDEV_STALE)); + ASSERT(dv->sdev_state == SDEV_READY); + + if (SDEV_IS_GLOBAL(dv)) { + return ((dls_mgmt_get_linkid(nm, &linkid) != 0) ? + SDEV_VTOR_INVALID : SDEV_VTOR_VALID); + } else { + return ((zone_datalink_walk(getzoneid(), devnet_validate_name, + nm) == -1) ? SDEV_VTOR_VALID : SDEV_VTOR_INVALID); + } +} + +/* + * This callback is invoked from devname_lookup_func() to create + * a net entry when the node is not found in the cache. + */ +static int +devnet_create_rvp(const char *nm, struct vattr *vap, dls_dl_handle_t *ddhp) +{ + timestruc_t now; + dev_t dev; + int error; + + if ((error = dls_devnet_open(nm, ddhp, &dev)) != 0) { + sdcmn_err12(("devnet_create_rvp: not a valid vanity name " + "network node: %s\n", nm)); + return (error); + } + + /* + * This is a valid network device (at least at this point in time). + * Create the node by setting the attribute; the rest is taken care + * of by devname_lookup_func(). + */ + *vap = sdev_vattr_chr; + vap->va_mode |= 0666; + vap->va_rdev = dev; + + gethrestime(&now); + vap->va_atime = now; + vap->va_mtime = now; + vap->va_ctime = now; + return (0); +} + +/* + * Lookup for /dev/net directory + * If the entry does not exist, the devnet_create_rvp() callback + * is invoked to create it. Nodes do not persist across reboot. + */ +/*ARGSUSED3*/ +static int +devnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, + struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred, + caller_context_t *ct, int *direntflags, pathname_t *realpnp) +{ + struct sdev_node *ddv = VTOSDEV(dvp); + struct sdev_node *dv = NULL; + dls_dl_handle_t ddh = NULL; + struct vattr vattr; + int nmlen; + int error = ENOENT; + + if (SDEVTOV(ddv)->v_type != VDIR) + return (ENOTDIR); + + /* + * Empty name or ., return node itself. + */ + nmlen = strlen(nm); + if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { + *vpp = SDEVTOV(ddv); + VN_HOLD(*vpp); + return (0); + } + + /* + * .., return the parent directory + */ + if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { + *vpp = SDEVTOV(ddv->sdev_dotdot); + VN_HOLD(*vpp); + return (0); + } + + rw_enter(&ddv->sdev_contents, RW_WRITER); + + /* + * directory cache lookup: + */ + if ((dv = sdev_cache_lookup(ddv, nm)) != NULL) { + if (dv->sdev_state == SDEV_READY) { + if (!(dv->sdev_flags & SDEV_ATTR_INVALID)) + goto found; + } else { + ASSERT(dv->sdev_state == SDEV_ZOMBIE); + goto failed; + } + } + + /* + * ZOMBIED parent does not allow new node creation, bail out early. + */ + if (ddv->sdev_state == SDEV_ZOMBIE) + goto failed; + + error = devnet_create_rvp(nm, &vattr, &ddh); + if (error != 0) + goto failed; + + error = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL, cred, SDEV_READY); + if (error != 0) { + ASSERT(dv == NULL); + dls_devnet_close(ddh); + goto failed; + } + + ASSERT(dv != NULL); + + rw_enter(&dv->sdev_contents, RW_WRITER); + if (dv->sdev_flags & SDEV_ATTR_INVALID) { + /* + * SDEV_ATTR_INVALID means that this device has been + * detached, and its dev_t might've been changed too. + * Therefore, sdev_node's 'vattr' needs to be updated. + */ + SDEVTOV(dv)->v_rdev = vattr.va_rdev; + ASSERT(dv->sdev_attr != NULL); + dv->sdev_attr->va_rdev = vattr.va_rdev; + dv->sdev_flags &= ~SDEV_ATTR_INVALID; + } + ASSERT(dv->sdev_private == NULL); + dv->sdev_private = ddh; + rw_exit(&dv->sdev_contents); + +found: + ASSERT(SDEV_HELD(dv)); + rw_exit(&ddv->sdev_contents); + return (sdev_to_vp(dv, vpp)); + +failed: + rw_exit(&ddv->sdev_contents); + + if (dv != NULL) + SDEV_RELE(dv); + + *vpp = NULL; + return (error); +} + +static int +devnet_filldir_datalink(const char *link, void *arg) +{ + struct sdev_node *ddv = arg; + struct vattr vattr; + struct sdev_node *dv; + dls_dl_handle_t ddh = NULL; + + ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); + if ((dv = sdev_cache_lookup(ddv, (char *)link)) != NULL) + goto found; + + if (devnet_create_rvp(link, &vattr, &ddh) != 0) + return (0); + + ASSERT(ddh != NULL); + dls_devnet_close(ddh); + + if (sdev_mknode(ddv, (char *)link, &dv, &vattr, NULL, NULL, kcred, + SDEV_READY) != 0) { + return (0); + } + + /* + * As there is no reference holding the network device, it could be + * detached. Set SDEV_ATTR_INVALID so that the 'vattr' will be updated + * later. + */ + rw_enter(&dv->sdev_contents, RW_WRITER); + dv->sdev_flags |= SDEV_ATTR_INVALID; + rw_exit(&dv->sdev_contents); + +found: + SDEV_SIMPLE_RELE(dv); + return (0); +} + +static void +devnet_filldir(struct sdev_node *ddv) +{ + sdev_node_t *dv, *next; + char link[MAXLINKNAMELEN]; + datalink_id_t linkid; + + ASSERT(RW_READ_HELD(&ddv->sdev_contents)); + if (rw_tryupgrade(&ddv->sdev_contents) == NULL) { + rw_exit(&ddv->sdev_contents); + rw_enter(&ddv->sdev_contents, RW_WRITER); + } + + for (dv = ddv->sdev_dot; dv; dv = next) { + next = dv->sdev_next; + + /* skip stale nodes */ + if (dv->sdev_flags & SDEV_STALE) + continue; + + /* validate and prune only ready nodes */ + if (dv->sdev_state != SDEV_READY) + continue; + + switch (devnet_validate(dv)) { + case SDEV_VTOR_VALID: + case SDEV_VTOR_SKIP: + continue; + case SDEV_VTOR_INVALID: + sdcmn_err12(("devnet_filldir: destroy invalid " + "node: %s(%p)\n", dv->sdev_name, (void *)dv)); + break; + } + + if (SDEVTOV(dv)->v_count > 0) + continue; + SDEV_HOLD(dv); + /* remove the cache node */ + (void) sdev_cache_update(ddv, &dv, dv->sdev_name, + SDEV_CACHE_DELETE); + } + + if (((ddv->sdev_flags & SDEV_BUILD) == 0) && !dls_devnet_rebuild()) + goto done; + + if (SDEV_IS_GLOBAL(ddv)) { + linkid = DATALINK_INVALID_LINKID; + do { + linkid = dls_mgmt_get_next(linkid, DATALINK_CLASS_ALL, + DATALINK_ANY_MEDIATYPE, DLMGMT_ACTIVE); + + if ((linkid != DATALINK_INVALID_LINKID) && + (dls_mgmt_get_linkinfo(linkid, link, + NULL, NULL, NULL) == 0)) { + (void) devnet_filldir_datalink(link, ddv); + } + } while (linkid != DATALINK_INVALID_LINKID); + } else { + (void) zone_datalink_walk(getzoneid(), + devnet_filldir_datalink, ddv); + } + + ddv->sdev_flags &= ~SDEV_BUILD; + +done: + rw_downgrade(&ddv->sdev_contents); +} + +/* + * Display all instantiated network datalink device nodes. + * A /dev/net entry will be created only after the first lookup of + * the network datalink device succeeds. + */ +/*ARGSUSED4*/ +static int +devnet_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, + int *eofp, caller_context_t *ct, int flags) +{ + struct sdev_node *sdvp = VTOSDEV(dvp); + + ASSERT(sdvp); + + if (uiop->uio_offset == 0) + devnet_filldir(sdvp); + + return (devname_readdir_func(dvp, uiop, cred, eofp, 0)); +} + +/* + * This callback is invoked from devname_inactive_func() to release + * the net entry which was held in devnet_create_rvp(). + */ +static void +devnet_inactive_callback(struct vnode *dvp) +{ + struct sdev_node *sdvp = VTOSDEV(dvp); + dls_dl_handle_t ddh; + + if (dvp->v_type == VDIR) + return; + + ASSERT(dvp->v_type == VCHR); + rw_enter(&sdvp->sdev_contents, RW_WRITER); + ddh = sdvp->sdev_private; + sdvp->sdev_private = NULL; + sdvp->sdev_flags |= SDEV_ATTR_INVALID; + rw_exit(&sdvp->sdev_contents); + + /* + * "ddh" (sdev_private) could be NULL if devnet_lookup fails. + */ + if (ddh != NULL) + dls_devnet_close(ddh); +} + +/*ARGSUSED*/ +static void +devnet_inactive(struct vnode *dvp, struct cred *cred, caller_context_t *ct) +{ + devname_inactive_func(dvp, cred, devnet_inactive_callback); +} + +/* + * We override lookup and readdir to build entries based on the + * in kernel vanity naming node table. + */ +const fs_operation_def_t devnet_vnodeops_tbl[] = { + VOPNAME_READDIR, { .vop_readdir = devnet_readdir }, + VOPNAME_LOOKUP, { .vop_lookup = devnet_lookup }, + VOPNAME_INACTIVE, { .vop_inactive = devnet_inactive }, + VOPNAME_CREATE, { .error = fs_nosys }, + VOPNAME_REMOVE, { .error = fs_nosys }, + VOPNAME_MKDIR, { .error = fs_nosys }, + VOPNAME_RMDIR, { .error = fs_nosys }, + VOPNAME_SYMLINK, { .error = fs_nosys }, + VOPNAME_SETSECATTR, { .error = fs_nosys }, + NULL, NULL +}; diff --git a/usr/src/uts/common/fs/dev/sdev_subr.c b/usr/src/uts/common/fs/dev/sdev_subr.c index 1075391d17..0159fc568e 100644 --- a/usr/src/uts/common/fs/dev/sdev_subr.c +++ b/usr/src/uts/common/fs/dev/sdev_subr.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -588,6 +588,9 @@ static struct sdev_vop_table vtab[] = { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE }, + { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate, + SDEV_DYNAMIC | SDEV_VTOR }, + { NULL, NULL, NULL, NULL, NULL, 0} }; @@ -3709,3 +3712,70 @@ devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags, rw_exit(&parent->sdev_contents); return (0); } + +/* + * a generic inactive() function + */ +void +devname_inactive_func(struct vnode *vp, struct cred *cred, + void (*callback)(struct vnode *)) +{ + int clean; + struct sdev_node *dv = VTOSDEV(vp); + struct sdev_node *ddv = dv->sdev_dotdot; + struct sdev_node *idv; + struct sdev_node *prev = NULL; + int state; + struct devname_nsmap *map = NULL; + struct devname_ops *dirops = NULL; + void (*fn)(devname_handle_t *, struct cred *) = NULL; + + rw_enter(&ddv->sdev_contents, RW_WRITER); + state = dv->sdev_state; + + mutex_enter(&vp->v_lock); + ASSERT(vp->v_count >= 1); + + if (vp->v_count == 1 && callback != NULL) + callback(vp); + + clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE); + + /* + * last ref count on the ZOMBIE node is released. + * clean up the sdev_node, and + * release the hold on the backing store node so that + * the ZOMBIE backing stores also cleaned out. + */ + if (clean) { + ASSERT(ddv); + if (SDEV_IS_GLOBAL(dv)) { + map = ddv->sdev_mapinfo; + dirops = map ? map->dir_ops : NULL; + if (dirops && (fn = dirops->devnops_inactive)) + (*fn)(&(dv->sdev_handle), cred); + } + + ddv->sdev_nlink--; + if (vp->v_type == VDIR) { + dv->sdev_nlink--; + } + for (idv = ddv->sdev_dot; idv && idv != dv; + prev = idv, idv = idv->sdev_next) + ; + ASSERT(idv == dv); + if (prev == NULL) + ddv->sdev_dot = dv->sdev_next; + else + prev->sdev_next = dv->sdev_next; + dv->sdev_next = NULL; + dv->sdev_nlink--; + --vp->v_count; + mutex_exit(&vp->v_lock); + sdev_nodedestroy(dv, 0); + } else { + --vp->v_count; + mutex_exit(&vp->v_lock); + } + rw_exit(&ddv->sdev_contents); +} diff --git a/usr/src/uts/common/fs/dev/sdev_vnops.c b/usr/src/uts/common/fs/dev/sdev_vnops.c index ff662afc82..b0b67d2a71 100644 --- a/usr/src/uts/common/fs/dev/sdev_vnops.c +++ b/usr/src/uts/common/fs/dev/sdev_vnops.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1215,61 +1215,7 @@ sdev_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp, static void sdev_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct) { - int clean; - struct sdev_node *dv = VTOSDEV(vp); - struct sdev_node *ddv = dv->sdev_dotdot; - struct sdev_node *idv; - struct sdev_node *prev = NULL; - int state; - struct devname_nsmap *map = NULL; - struct devname_ops *dirops = NULL; - void (*fn)(devname_handle_t *, struct cred *) = NULL; - - rw_enter(&ddv->sdev_contents, RW_WRITER); - state = dv->sdev_state; - - mutex_enter(&vp->v_lock); - ASSERT(vp->v_count >= 1); - - clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE); - - /* - * last ref count on the ZOMBIE node is released. - * clean up the sdev_node, and - * release the hold on the backing store node so that - * the ZOMBIE backing stores also cleaned out. - */ - if (clean) { - ASSERT(ddv); - if (SDEV_IS_GLOBAL(dv)) { - map = ddv->sdev_mapinfo; - dirops = map ? map->dir_ops : NULL; - if (dirops && (fn = dirops->devnops_inactive)) - (*fn)(&(dv->sdev_handle), cred); - } - - ddv->sdev_nlink--; - if (vp->v_type == VDIR) { - dv->sdev_nlink--; - } - for (idv = ddv->sdev_dot; idv && idv != dv; - prev = idv, idv = idv->sdev_next) - ; - ASSERT(idv == dv); - if (prev == NULL) - ddv->sdev_dot = dv->sdev_next; - else - prev->sdev_next = dv->sdev_next; - dv->sdev_next = NULL; - dv->sdev_nlink--; - --vp->v_count; - mutex_exit(&vp->v_lock); - sdev_nodedestroy(dv, 0); - } else { - --vp->v_count; - mutex_exit(&vp->v_lock); - } - rw_exit(&ddv->sdev_contents); + devname_inactive_func(vp, cred, NULL); } /*ARGSUSED2*/ diff --git a/usr/src/uts/common/fs/nfs/nfs_dlinet.c b/usr/src/uts/common/fs/nfs/nfs_dlinet.c index 632a5b29a7..944df0336d 100644 --- a/usr/src/uts/common/fs/nfs/nfs_dlinet.c +++ b/usr/src/uts/common/fs/nfs/nfs_dlinet.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -221,11 +221,6 @@ static void revarpinput(ldi_handle_t, struct netbuf *); static void init_netbuf(struct netbuf *); static void free_netbuf(struct netbuf *); static int rtioctl(TIUSER *, int, struct rtentry *); -static int dl_info(ldi_handle_t, dl_info_ack_t *); -extern int dl_attach(ldi_handle_t, int); -extern int dl_bind(ldi_handle_t, uint32_t, uint32_t, uint32_t, - uint32_t); -extern int dl_phys_addr(ldi_handle_t, struct ether_addr *); static void init_config(void); static void cacheinit(void); @@ -1553,19 +1548,19 @@ revarp_myaddr(TIUSER *tiptr) return (rc); } - if (rc = dl_attach(lh, ifunit)) { + if (rc = dl_attach(lh, ifunit, NULL)) { nfs_perror(rc, "revarp_myaddr: dl_attach failed: %m\n"); (void) ldi_close(lh, FREAD|FWRITE, CRED()); return (rc); } - if (rc = dl_bind(lh, ETHERTYPE_REVARP, 0, DL_CLDLS, 0)) { + if (rc = dl_bind(lh, ETHERTYPE_REVARP, NULL)) { nfs_perror(rc, "revarp_myaddr: dl_bind failed: %m\n"); (void) ldi_close(lh, FREAD|FWRITE, CRED()); return (rc); } - if (rc = dl_info(lh, &info)) { + if (rc = dl_info(lh, &info, NULL, NULL, NULL)) { nfs_perror(rc, "revarp_myaddr: dl_info failed: %m\n"); (void) ldi_close(lh, FREAD|FWRITE, CRED()); return (rc); @@ -1611,8 +1606,10 @@ revarp_start(ldi_handle_t lh, struct netbuf *myaddr) mblk_t *mp; struct dladdr *dlsap; static int done = 0; + size_t addrlen = ETHERADDRL; - if (dl_phys_addr(lh, &myether) != 0) { + if (dl_phys_addr(lh, (uchar_t *)&myether, &addrlen, NULL) != 0 || + addrlen != ETHERADDRL) { /* Fallback using per-node address */ (void) localetheraddr((struct ether_addr *)NULL, &myether); cmn_err(CE_CONT, "?DLPI failed to get Ethernet address. Using " @@ -2092,7 +2089,6 @@ myxdr_pmap(XDR *xdrs, struct pmap *regs) return (FALSE); } - /* * From SunOS callrpc.c */ @@ -2121,64 +2117,6 @@ mycallrpc(struct knetconfig *knconf, struct netbuf *call_addr, return (cl_stat); } -static int -dl_info(ldi_handle_t lh, dl_info_ack_t *info) -{ - dl_info_req_t *info_req; - dl_error_ack_t *error_ack; - union DL_primitives *dl_prim; - mblk_t *mp; - int error; - - if ((mp = allocb(sizeof (dl_info_req_t), BPRI_MED)) == NULL) { - cmn_err(CE_WARN, "dl_info: allocb failed"); - return (ENOSR); - } - mp->b_datap->db_type = M_PROTO; - - info_req = (dl_info_req_t *)mp->b_wptr; - mp->b_wptr += sizeof (dl_info_req_t); - info_req->dl_primitive = DL_INFO_REQ; - - (void) ldi_putmsg(lh, mp); - if ((error = ldi_getmsg(lh, &mp, (timestruc_t *)NULL)) != 0) { - nfs_perror(error, "dl_info: ldi_getmsg failed: %m\n"); - return (error); - } - - dl_prim = (union DL_primitives *)mp->b_rptr; - switch (dl_prim->dl_primitive) { - case DL_INFO_ACK: - if ((mp->b_wptr-mp->b_rptr) < sizeof (dl_info_ack_t)) { - printf("dl_info: DL_INFO_ACK protocol error\n"); - break; - } - *info = *(dl_info_ack_t *)mp->b_rptr; - freemsg(mp); - return (0); - - case DL_ERROR_ACK: - if ((mp->b_wptr-mp->b_rptr) < sizeof (dl_error_ack_t)) { - printf("dl_info: DL_ERROR_ACK protocol error\n"); - break; - } - - error_ack = (dl_error_ack_t *)dl_prim; - printf("dl_info: DLPI error %u\n", error_ack->dl_errno); - break; - - default: - printf("dl_bind: bad ACK header %u\n", dl_prim->dl_primitive); - break; - } - - /* - * Error return only. - */ - freemsg(mp); - return (-1); -} - /* * Configure the 'default' interface based on existing boot properties. */ diff --git a/usr/src/uts/common/inet/arp/arp.c b/usr/src/uts/common/inet/arp/arp.c index b96128c4c9..1fe7942f08 100644 --- a/usr/src/uts/common/inet/arp/arp.c +++ b/usr/src/uts/common/inet/arp/arp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -3443,7 +3443,7 @@ ar_rput_dlpi(queue_t *q, mblk_t *mp) freemsg(mp); return; } - err_str = dlpi_prim_str(dlp->error_ack.dl_error_primitive); + err_str = dl_primstr(dlp->error_ack.dl_error_primitive); DTRACE_PROBE2(rput_dl_error, arl_t *, arl, dl_error_ack_t *, &dlp->error_ack); switch (dlp->error_ack.dl_error_primitive) { diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index 29b59311ba..6ef38c8c6c 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -3069,8 +3069,6 @@ struct ipsec_out_s; struct mac_header_info_s; extern boolean_t ip_assign_ifindex(uint_t *, ip_stack_t *); -extern const char *dlpi_prim_str(int); -extern const char *dlpi_err_str(int); extern void ill_frag_timer(void *); extern ill_t *ill_first(int, int, ill_walk_context_t *, ip_stack_t *); extern ill_t *ill_next(ill_walk_context_t *, ill_t *); diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index 29d96f16ae..77beff2c4c 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -6053,76 +6053,6 @@ ip_dlpi_alloc(size_t len, t_uscalar_t prim) return (mp); } -const char * -dlpi_prim_str(int prim) -{ - switch (prim) { - case DL_INFO_REQ: return ("DL_INFO_REQ"); - case DL_INFO_ACK: return ("DL_INFO_ACK"); - case DL_ATTACH_REQ: return ("DL_ATTACH_REQ"); - case DL_DETACH_REQ: return ("DL_DETACH_REQ"); - case DL_BIND_REQ: return ("DL_BIND_REQ"); - case DL_BIND_ACK: return ("DL_BIND_ACK"); - case DL_UNBIND_REQ: return ("DL_UNBIND_REQ"); - case DL_OK_ACK: return ("DL_OK_ACK"); - case DL_ERROR_ACK: return ("DL_ERROR_ACK"); - case DL_ENABMULTI_REQ: return ("DL_ENABMULTI_REQ"); - case DL_DISABMULTI_REQ: return ("DL_DISABMULTI_REQ"); - case DL_PROMISCON_REQ: return ("DL_PROMISCON_REQ"); - case DL_PROMISCOFF_REQ: return ("DL_PROMISCOFF_REQ"); - case DL_UNITDATA_REQ: return ("DL_UNITDATA_REQ"); - case DL_UNITDATA_IND: return ("DL_UNITDATA_IND"); - case DL_UDERROR_IND: return ("DL_UDERROR_IND"); - case DL_PHYS_ADDR_REQ: return ("DL_PHYS_ADDR_REQ"); - case DL_PHYS_ADDR_ACK: return ("DL_PHYS_ADDR_ACK"); - case DL_SET_PHYS_ADDR_REQ: return ("DL_SET_PHYS_ADDR_REQ"); - case DL_NOTIFY_REQ: return ("DL_NOTIFY_REQ"); - case DL_NOTIFY_ACK: return ("DL_NOTIFY_ACK"); - case DL_NOTIFY_IND: return ("DL_NOTIFY_IND"); - case DL_CAPABILITY_REQ: return ("DL_CAPABILITY_REQ"); - case DL_CAPABILITY_ACK: return ("DL_CAPABILITY_ACK"); - case DL_CONTROL_REQ: return ("DL_CONTROL_REQ"); - case DL_CONTROL_ACK: return ("DL_CONTROL_ACK"); - default: return ("<unknown primitive>"); - } -} - -const char * -dlpi_err_str(int err) -{ - switch (err) { - case DL_ACCESS: return ("DL_ACCESS"); - case DL_BADADDR: return ("DL_BADADDR"); - case DL_BADCORR: return ("DL_BADCORR"); - case DL_BADDATA: return ("DL_BADDATA"); - case DL_BADPPA: return ("DL_BADPPA"); - case DL_BADPRIM: return ("DL_BADPRIM"); - case DL_BADQOSPARAM: return ("DL_BADQOSPARAM"); - case DL_BADQOSTYPE: return ("DL_BADQOSTYPE"); - case DL_BADSAP: return ("DL_BADSAP"); - case DL_BADTOKEN: return ("DL_BADTOKEN"); - case DL_BOUND: return ("DL_BOUND"); - case DL_INITFAILED: return ("DL_INITFAILED"); - case DL_NOADDR: return ("DL_NOADDR"); - case DL_NOTINIT: return ("DL_NOTINIT"); - case DL_OUTSTATE: return ("DL_OUTSTATE"); - case DL_SYSERR: return ("DL_SYSERR"); - case DL_UNSUPPORTED: return ("DL_UNSUPPORTED"); - case DL_UNDELIVERABLE: return ("DL_UNDELIVERABLE"); - case DL_NOTSUPPORTED : return ("DL_NOTSUPPORTED "); - case DL_TOOMANY: return ("DL_TOOMANY"); - case DL_NOTENAB: return ("DL_NOTENAB"); - case DL_BUSY: return ("DL_BUSY"); - case DL_NOAUTO: return ("DL_NOAUTO"); - case DL_NOXIDAUTO: return ("DL_NOXIDAUTO"); - case DL_NOTESTAUTO: return ("DL_NOTESTAUTO"); - case DL_XIDAUTO: return ("DL_XIDAUTO"); - case DL_TESTAUTO: return ("DL_TESTAUTO"); - case DL_PENDING: return ("DL_PENDING"); - default: return ("<unknown error>"); - } -} - /* * Debug formatting routine. Returns a character string representation of the * addr in buf, of the form xxx.xxx.xxx.xxx. This routine takes the address @@ -15443,13 +15373,13 @@ ip_dlpi_error(ill_t *ill, t_uscalar_t prim, t_uscalar_t dl_err, if (dl_err == DL_SYSERR) { (void) mi_strlog(ill->ill_rq, 1, SL_CONSOLE|SL_ERROR|SL_TRACE, "%s: %s failed: DL_SYSERR (errno %u)\n", - ill->ill_name, dlpi_prim_str(prim), err); + ill->ill_name, dl_primstr(prim), err); return; } (void) mi_strlog(ill->ill_rq, 1, SL_CONSOLE|SL_ERROR|SL_TRACE, - "%s: %s failed: %s\n", ill->ill_name, dlpi_prim_str(prim), - dlpi_err_str(dl_err)); + "%s: %s failed: %s\n", ill->ill_name, dl_primstr(prim), + dl_errstr(dl_err)); } /* @@ -15470,9 +15400,9 @@ ip_rput_dlpi(queue_t *q, mblk_t *mp) if (dloa->dl_primitive == DL_ERROR_ACK) { ip2dbg(("ip_rput_dlpi(%s): DL_ERROR_ACK %s (0x%x): " "%s (0x%x), unix %u\n", ill->ill_name, - dlpi_prim_str(dlea->dl_error_primitive), + dl_primstr(dlea->dl_error_primitive), dlea->dl_error_primitive, - dlpi_err_str(dlea->dl_errno), + dl_errstr(dlea->dl_errno), dlea->dl_errno, dlea->dl_unix_errno)); } @@ -15532,7 +15462,7 @@ ip_rput_dlpi(queue_t *q, mblk_t *mp) case DL_OK_ACK: ip1dbg(("ip_rput: DL_OK_ACK for %s\n", - dlpi_prim_str((int)dloa->dl_correct_primitive))); + dl_primstr((int)dloa->dl_correct_primitive))); switch (dloa->dl_correct_primitive) { case DL_UNBIND_REQ: mutex_enter(&ill->ill_lock); @@ -15624,7 +15554,7 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) switch (dloa->dl_primitive) { case DL_ERROR_ACK: ip1dbg(("ip_rput_dlpi_writer: got DL_ERROR_ACK for %s\n", - dlpi_prim_str(dlea->dl_error_primitive))); + dl_primstr(dlea->dl_error_primitive))); switch (dlea->dl_error_primitive) { case DL_PROMISCON_REQ: @@ -16254,7 +16184,7 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) } case DL_OK_ACK: ip2dbg(("DL_OK_ACK %s (0x%x)\n", - dlpi_prim_str((int)dloa->dl_correct_primitive), + dl_primstr((int)dloa->dl_correct_primitive), dloa->dl_correct_primitive)); switch (dloa->dl_correct_primitive) { case DL_PROMISCON_REQ: diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index 4e83f1862e..fde1ec4d19 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -18147,7 +18147,7 @@ ill_dl_down(ill_t *ill) ill->ill_unbind_mp = NULL; if (mp != NULL) { ip1dbg(("ill_dl_down: %s (%u) for %s\n", - dlpi_prim_str(*(int *)mp->b_rptr), *(int *)mp->b_rptr, + dl_primstr(*(int *)mp->b_rptr), *(int *)mp->b_rptr, ill->ill_name)); mutex_enter(&ill->ill_lock); ill->ill_state_flags |= ILL_DL_UNBIND_IN_PROGRESS; @@ -18222,7 +18222,7 @@ ill_dlpi_dispatch(ill_t *ill, mblk_t *mp) prim = dlp->dl_primitive; ip1dbg(("ill_dlpi_dispatch: sending %s (%u) to %s\n", - dlpi_prim_str(prim), prim, ill->ill_name)); + dl_primstr(prim), prim, ill->ill_name)); switch (prim) { case DL_PHYS_ADDR_REQ: @@ -18360,11 +18360,11 @@ ill_dlpi_pending(ill_t *ill, t_uscalar_t prim) if (pending == DL_PRIM_INVAL) { (void) mi_strlog(ill->ill_rq, 1, SL_CONSOLE|SL_ERROR|SL_TRACE, "received unsolicited ack for %s on %s\n", - dlpi_prim_str(prim), ill->ill_name); + dl_primstr(prim), ill->ill_name); } else { (void) mi_strlog(ill->ill_rq, 1, SL_CONSOLE|SL_ERROR|SL_TRACE, "received unexpected ack for %s on %s (expecting %s)\n", - dlpi_prim_str(prim), ill->ill_name, dlpi_prim_str(pending)); + dl_primstr(prim), ill->ill_name, dl_primstr(pending)); } return (B_FALSE); } @@ -18385,7 +18385,7 @@ ill_dlpi_done(ill_t *ill, t_uscalar_t prim) ASSERT(ill->ill_dlpi_pending == prim); ip1dbg(("ill_dlpi_done: %s has completed %s (%u)\n", ill->ill_name, - dlpi_prim_str(ill->ill_dlpi_pending), ill->ill_dlpi_pending)); + dl_primstr(ill->ill_dlpi_pending), ill->ill_dlpi_pending)); if ((mp = ill->ill_dlpi_deferred) == NULL) { ill->ill_dlpi_pending = DL_PRIM_INVAL; diff --git a/usr/src/uts/common/inet/ip/ip_squeue.c b/usr/src/uts/common/inet/ip/ip_squeue.c index 8df412e468..9bfe536d61 100644 --- a/usr/src/uts/common/inet/ip/ip_squeue.c +++ b/usr/src/uts/common/inet/ip/ip_squeue.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -707,8 +707,6 @@ ip_soft_ring_assignment(ill_t *ill, ill_rx_ring_t *ip_ring, ip_taskq_arg_t *taskq_arg; boolean_t refheld; - ASSERT(servicing_interrupt()); - mutex_enter(&ill->ill_lock); if (!(ill->ill_state_flags & ILL_SOFT_RING_ASSIGN)) { taskq_arg = (ip_taskq_arg_t *) diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index 47ae998331..e836fcdab2 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -20878,7 +20878,12 @@ tcp_multisend_data(tcp_t *tcp, ire_t *ire, const ill_t *ill, mblk_t *md_mp_head, ire->ire_last_used_time = lbolt; /* send it down */ - putnext(ire->ire_stq, md_mp_head); + if (ILL_DLS_CAPABLE(ill)) { + ill_dls_capab_t *ill_dls = ill->ill_dls_capab; + ill_dls->ill_tx(ill_dls->ill_tx_handle, md_mp_head); + } else { + putnext(ire->ire_stq, md_mp_head); + } /* we're done for TCP/IPv4 */ if (tcp->tcp_ipversion == IPV4_VERSION) diff --git a/usr/src/uts/common/io/afe/afe.c b/usr/src/uts/common/io/afe/afe.c index 96e32b6019..e734b75333 100644 --- a/usr/src/uts/common/io/afe/afe.c +++ b/usr/src/uts/common/io/afe/afe.c @@ -52,6 +52,7 @@ #include <sys/mac_ether.h> #include <sys/ddi.h> #include <sys/sunddi.h> +#include <sys/vlan.h> #include "afe.h" #include "afeimpl.h" @@ -542,6 +543,7 @@ afe_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) macp->m_callbacks = &afe_m_callbacks; macp->m_min_sdu = 0; macp->m_max_sdu = ETHERMTU; + macp->m_margin = VLAN_TAGSZ; if (mac_register(macp, &afep->afe_mh) == DDI_SUCCESS) { mac_free(macp); diff --git a/usr/src/uts/common/io/aggr/aggr_ctl.c b/usr/src/uts/common/io/aggr/aggr_ctl.c index f0990702cf..1e0fdbc437 100644 --- a/usr/src/uts/common/io/aggr/aggr_ctl.c +++ b/usr/src/uts/common/io/aggr/aggr_ctl.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -68,7 +68,8 @@ aggr_ioc_modify(mblk_t *mp, int mode) boolean_t mac_fixed; uchar_t mac_addr[ETHERADDRL]; uint8_t modify_mask_arg, modify_mask = 0; - uint32_t rc, key; + datalink_id_t linkid; + uint32_t rc; aggr_lacp_mode_t lacp_mode; aggr_lacp_timer_t lacp_timer; @@ -76,7 +77,7 @@ aggr_ioc_modify(mblk_t *mp, int mode) if (MBLKL(mp->b_cont) < STRUCT_SIZE(modify_arg)) return (EINVAL); - key = STRUCT_FGET(modify_arg, lu_key); + linkid = STRUCT_FGET(modify_arg, lu_linkid); modify_mask_arg = STRUCT_FGET(modify_arg, lu_modify_mask); if (modify_mask_arg & LAIOC_MODIFY_POLICY) { @@ -100,7 +101,7 @@ aggr_ioc_modify(mblk_t *mp, int mode) lacp_timer = STRUCT_FGET(modify_arg, lu_lacp_timer); } - rc = aggr_grp_modify(key, NULL, modify_mask, policy, mac_fixed, + rc = aggr_grp_modify(linkid, NULL, modify_mask, policy, mac_fixed, mac_addr, lacp_mode, lacp_timer); freemsg(mp->b_cont); @@ -119,6 +120,7 @@ aggr_ioc_create(mblk_t *mp, int mode) laioc_port_t *ports = NULL; uint32_t policy; boolean_t mac_fixed; + boolean_t force; uchar_t mac_addr[ETHERADDRL]; aggr_lacp_mode_t lacp_mode; aggr_lacp_timer_t lacp_timer; @@ -143,9 +145,11 @@ aggr_ioc_create(mblk_t *mp, int mode) bcopy(STRUCT_FGET(create_arg, lc_mac), mac_addr, ETHERADDRL); mac_fixed = STRUCT_FGET(create_arg, lc_mac_fixed); + force = STRUCT_FGET(create_arg, lc_force); - rc = aggr_grp_create(STRUCT_FGET(create_arg, lc_key), - nports, ports, policy, mac_fixed, mac_addr, lacp_mode, lacp_timer); + rc = aggr_grp_create(STRUCT_FGET(create_arg, lc_linkid), + STRUCT_FGET(create_arg, lc_key), nports, ports, policy, + mac_fixed, force, mac_addr, lacp_mode, lacp_timer); freemsg(mp->b_cont); mp->b_cont = NULL; @@ -162,7 +166,7 @@ aggr_ioc_delete(mblk_t *mp, int mode) if (STRUCT_SIZE(delete_arg) > MBLKL(mp)) return (EINVAL); - rc = aggr_grp_delete(STRUCT_FGET(delete_arg, ld_key)); + rc = aggr_grp_delete(STRUCT_FGET(delete_arg, ld_linkid)); freemsg(mp->b_cont); mp->b_cont = NULL; @@ -175,9 +179,9 @@ typedef struct aggr_ioc_info_state { } aggr_ioc_info_state_t; static int -aggr_ioc_info_new_grp(void *arg, uint32_t key, uchar_t *mac, - boolean_t mac_fixed, uint32_t policy, uint32_t nports, - aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) +aggr_ioc_info_new_grp(void *arg, datalink_id_t linkid, uint32_t key, + uchar_t *mac, boolean_t mac_fixed, boolean_t force, uint32_t policy, + uint32_t nports, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) { aggr_ioc_info_state_t *state = arg; laioc_info_group_t grp; @@ -185,9 +189,11 @@ aggr_ioc_info_new_grp(void *arg, uint32_t key, uchar_t *mac, if (state->bytes_left < sizeof (grp)) return (ENOSPC); + grp.lg_linkid = linkid; grp.lg_key = key; bcopy(mac, grp.lg_mac, ETHERADDRL); grp.lg_mac_fixed = mac_fixed; + grp.lg_force = force; grp.lg_policy = policy; grp.lg_nports = nports; grp.lg_lacp_mode = lacp_mode; @@ -201,7 +207,7 @@ aggr_ioc_info_new_grp(void *arg, uint32_t key, uchar_t *mac, } static int -aggr_ioc_info_new_port(void *arg, char *devname, uchar_t *mac, +aggr_ioc_info_new_port(void *arg, datalink_id_t linkid, uchar_t *mac, aggr_port_state_t portstate, aggr_lacp_state_t *lacp_state) { aggr_ioc_info_state_t *state = arg; @@ -210,7 +216,7 @@ aggr_ioc_info_new_port(void *arg, char *devname, uchar_t *mac, if (state->bytes_left < sizeof (port)) return (ENOSPC); - bcopy(devname, port.lp_devname, MAXNAMELEN + 1); + port.lp_linkid = linkid; bcopy(mac, port.lp_mac, ETHERADDRL); port.lp_state = portstate; port.lp_lacp_state = *lacp_state; @@ -227,7 +233,7 @@ static int aggr_ioc_info(mblk_t *mp, int mode) { laioc_info_t *info_argp; - uint32_t ngroups, group_key; + datalink_id_t linkid; int rc, len; aggr_ioc_info_state_t state; @@ -235,19 +241,18 @@ aggr_ioc_info(mblk_t *mp, int mode) return (EINVAL); info_argp = (laioc_info_t *)mp->b_cont->b_rptr; + /* - * Key of the group to return. If zero, the call returns information - * regarding all groups currently defined. + * linkid of the group to return. Must not be DATALINK_INVALID_LINKID. */ - group_key = info_argp->li_group_key; + if ((linkid = info_argp->li_group_linkid) == DATALINK_INVALID_LINKID) + return (EINVAL); state.bytes_left = len - sizeof (laioc_info_t); state.where = (uchar_t *)(info_argp + 1); - rc = aggr_grp_info(&ngroups, group_key, &state, aggr_ioc_info_new_grp, - aggr_ioc_info_new_port); - if (rc == 0) - info_argp->li_ngroups = ngroups; + rc = aggr_grp_info(linkid, &state, + aggr_ioc_info_new_grp, aggr_ioc_info_new_port); return (rc); } @@ -258,6 +263,7 @@ aggr_ioc_add(mblk_t *mp, int mode) STRUCT_HANDLE(laioc_add_rem, add_arg); uint32_t nports; laioc_port_t *ports = NULL; + boolean_t force; int rc, len; STRUCT_SET_HANDLE(add_arg, mode, (void *)mp->b_cont->b_rptr); @@ -272,9 +278,10 @@ aggr_ioc_add(mblk_t *mp, int mode) return (EINVAL); ports = (laioc_port_t *)(STRUCT_BUF(add_arg) + 1); + force = STRUCT_FGET(add_arg, la_force); - rc = aggr_grp_add_ports(STRUCT_FGET(add_arg, la_key), - nports, ports); + rc = aggr_grp_add_ports(STRUCT_FGET(add_arg, la_linkid), + nports, force, ports); freemsg(mp->b_cont); mp->b_cont = NULL; @@ -302,7 +309,7 @@ aggr_ioc_remove(mblk_t *mp, int mode) ports = (laioc_port_t *)(STRUCT_BUF(rem_arg) + 1); - rc = aggr_grp_rem_ports(STRUCT_FGET(rem_arg, la_key), + rc = aggr_grp_rem_ports(STRUCT_FGET(rem_arg, la_linkid), nports, ports); freemsg(mp->b_cont); diff --git a/usr/src/uts/common/io/aggr/aggr_dev.c b/usr/src/uts/common/io/aggr/aggr_dev.c index f56b8f4f2f..2bdd97fc3f 100644 --- a/usr/src/uts/common/io/aggr/aggr_dev.c +++ b/usr/src/uts/common/io/aggr/aggr_dev.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -160,6 +160,12 @@ aggr_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) return (ENOSR); /* + * The ioctl handling callback to process control ioctl + * messages; see comments above dld_ioctl() for details. + */ + dsp->ds_ioctl = aggr_ioctl; + + /* * The aggr control node uses its own set of entry points. */ WR(q)->q_qinfo = &aggr_w_ctl_qinit; @@ -177,6 +183,8 @@ aggr_close(queue_t *q) if (dsp->ds_type == DLD_CONTROL) { qprocsoff(q); + dld_finish_pending_task(dsp); + dsp->ds_ioctl = NULL; dld_str_destroy(dsp); return (0); } @@ -187,7 +195,7 @@ static void aggr_wput(queue_t *q, mblk_t *mp) { if (DB_TYPE(mp) == M_IOCTL) - aggr_ioctl(q, mp); + dld_ioctl(q, mp); else freemsg(mp); } diff --git a/usr/src/uts/common/io/aggr/aggr_grp.c b/usr/src/uts/common/io/aggr/aggr_grp.c index b89e4496e4..65105e298e 100644 --- a/usr/src/uts/common/io/aggr/aggr_grp.c +++ b/usr/src/uts/common/io/aggr/aggr_grp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,9 +31,8 @@ * An instance of the structure aggr_grp_t is allocated for each * link aggregation group. When created, aggr_grp_t objects are * entered into the aggr_grp_hash hash table maintained by the modhash - * module. The hash key is the port number associated with the link - * aggregation group. The port number associated with a group corresponds - * the key associated with the group. + * module. The hash key is the linkid associated with the link + * aggregation group. * * A set of MAC ports are associated with each association group. */ @@ -52,9 +51,11 @@ #include <sys/atomic.h> #include <sys/stat.h> #include <sys/modhash.h> +#include <sys/id_space.h> #include <sys/strsun.h> #include <sys/dlpi.h> - +#include <sys/dls.h> +#include <sys/vlan.h> #include <sys/aggr.h> #include <sys/aggr_impl.h> @@ -67,35 +68,28 @@ static int aggr_m_stat(void *, uint_t, uint64_t *); static void aggr_m_resources(void *); static void aggr_m_ioctl(void *, queue_t *, mblk_t *); static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *); - -static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, const char *); +static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t); static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *, boolean_t *); + static void aggr_grp_capab_set(aggr_grp_t *); static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *); static uint_t aggr_grp_max_sdu(aggr_grp_t *); +static uint32_t aggr_grp_max_margin(aggr_grp_t *); static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *); +static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *); static kmem_cache_t *aggr_grp_cache; static mod_hash_t *aggr_grp_hash; static krwlock_t aggr_grp_lock; static uint_t aggr_grp_cnt; +static id_space_t *key_ids; #define GRP_HASHSZ 64 -#define GRP_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)key) +#define GRP_HASH_KEY(linkid) ((mod_hash_key_t)(uintptr_t)linkid) static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; -/* used by grp_info_walker */ -typedef struct aggr_grp_info_state { - uint32_t ls_group_key; - boolean_t ls_group_found; - aggr_grp_info_new_grp_fn_t ls_new_grp_fn; - aggr_grp_info_new_port_fn_t ls_new_port_fn; - void *ls_fn_arg; - int ls_rc; -} aggr_grp_info_state_t; - #define AGGR_M_CALLBACK_FLAGS (MC_RESOURCES | MC_IOCTL | MC_GETCAPAB) static mac_callbacks_t aggr_m_callbacks = { @@ -153,11 +147,21 @@ aggr_grp_init(void) GRP_HASHSZ, mod_hash_null_valdtor); rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL); aggr_grp_cnt = 0; + + /* + * Allocate an id space to manage key values (when key is not + * specified). The range of the id space will be from + * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol + * uses a 16-bit key. + */ + key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX); + ASSERT(key_ids != NULL); } void aggr_grp_fini(void) { + id_space_destroy(key_ids); rw_destroy(&aggr_grp_lock); mod_hash_destroy_idhash(aggr_grp_hash); kmem_cache_destroy(aggr_grp_cache); @@ -409,7 +413,8 @@ aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port, * Add a port to a link aggregation group. */ static int -aggr_grp_add_port(aggr_grp_t *grp, const char *name, aggr_port_t **pp) +aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t linkid, boolean_t force, + aggr_port_t **pp) { aggr_port_t *port, **cport; int err; @@ -418,7 +423,7 @@ aggr_grp_add_port(aggr_grp_t *grp, const char *name, aggr_port_t **pp) ASSERT(RW_WRITE_HELD(&grp->lg_lock)); /* create new port */ - err = aggr_port_create(name, &port); + err = aggr_port_create(linkid, force, &port); if (err != 0) return (err); @@ -459,16 +464,17 @@ aggr_grp_add_port(aggr_grp_t *grp, const char *name, aggr_port_t **pp) * Add one or more ports to an existing link aggregation group. */ int -aggr_grp_add_ports(uint32_t key, uint_t nports, laioc_port_t *ports) +aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force, + laioc_port_t *ports) { int rc, i, nadded = 0; aggr_grp_t *grp = NULL; aggr_port_t *port; boolean_t link_state_changed = B_FALSE; - /* get group corresponding to key */ + /* get group corresponding to linkid */ rw_enter(&aggr_grp_lock, RW_READER); - if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), + if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), (mod_hash_val_t *)&grp) != 0) { rw_exit(&aggr_grp_lock); return (ENOENT); @@ -482,8 +488,8 @@ aggr_grp_add_ports(uint32_t key, uint_t nports, laioc_port_t *ports) /* add the specified ports to group */ for (i = 0; i < nports; i++) { /* add port to group */ - if ((rc = aggr_grp_add_port(grp, ports[i].lp_devname, &port)) != - 0) { + if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid, + force, &port)) != 0) { goto bail; } ASSERT(port != NULL); @@ -491,7 +497,8 @@ aggr_grp_add_ports(uint32_t key, uint_t nports, laioc_port_t *ports) /* check capabilities */ if (!aggr_grp_capab_check(grp, port) || - !aggr_grp_sdu_check(grp, port)) { + !aggr_grp_sdu_check(grp, port) || + !aggr_grp_margin_check(grp, port)) { rc = ENOTSUP; goto bail; } @@ -532,7 +539,7 @@ bail: if (rc != 0) { /* stop and remove ports that have been added */ for (i = 0; i < nadded && !grp->lg_closing; i++) { - port = aggr_grp_port_lookup(grp, ports[i].lp_devname); + port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); ASSERT(port != NULL); if (grp->lg_started) { rw_enter(&port->lp_lock, RW_WRITER); @@ -555,7 +562,7 @@ bail: * Update properties of an existing link aggregation group. */ int -aggr_grp_modify(uint32_t key, aggr_grp_t *grp_arg, uint8_t update_mask, +aggr_grp_modify(datalink_id_t linkid, aggr_grp_t *grp_arg, uint8_t update_mask, uint32_t policy, boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) { @@ -565,9 +572,9 @@ aggr_grp_modify(uint32_t key, aggr_grp_t *grp_arg, uint8_t update_mask, boolean_t link_state_changed = B_FALSE; if (grp_arg == NULL) { - /* get group corresponding to key */ + /* get group corresponding to linkid */ rw_enter(&aggr_grp_lock, RW_READER); - if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), + if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), (mod_hash_val_t *)&grp) != 0) { rc = ENOENT; goto bail; @@ -660,9 +667,9 @@ bail: * Returns 0 on success, an errno on failure. */ int -aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports, - uint32_t policy, boolean_t mac_fixed, uchar_t *mac_addr, - aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) +aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, + laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force, + uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) { aggr_grp_t *grp = NULL; aggr_port_t *port; @@ -677,8 +684,8 @@ aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports, rw_enter(&aggr_grp_lock, RW_WRITER); - /* does a group with the same key already exist? */ - err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), + /* does a group with the same linkid already exist? */ + err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), (mod_hash_val_t *)&grp); if (err == 0) { rw_exit(&aggr_grp_lock); @@ -692,8 +699,8 @@ aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports, grp->lg_refs = 1; grp->lg_closing = B_FALSE; - grp->lg_key = key; - + grp->lg_force = force; + grp->lg_linkid = linkid; grp->lg_ifspeed = 0; grp->lg_link_state = LINK_STATE_UNKNOWN; grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; @@ -707,8 +714,17 @@ aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports, grp->lg_nattached_ports = 0; grp->lg_ntx_ports = 0; + /* + * If key is not specified by the user, allocate the key. + */ + if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) { + err = ENOMEM; + goto bail; + } + grp->lg_key = key; + for (i = 0; i < nports; i++) { - err = aggr_grp_add_port(grp, ports[i].lp_devname, NULL); + err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL); if (err != 0) goto bail; } @@ -744,21 +760,29 @@ aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports, /* set the initial group capabilities */ aggr_grp_capab_set(grp); - if ((mac = mac_alloc(MAC_VERSION)) == NULL) + if ((mac = mac_alloc(MAC_VERSION)) == NULL) { + err = ENOMEM; goto bail; + } mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; mac->m_driver = grp; mac->m_dip = aggr_dip; - mac->m_instance = key; + mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key; mac->m_src_addr = grp->lg_addr; mac->m_callbacks = &aggr_m_callbacks; mac->m_min_sdu = 0; mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp); + mac->m_margin = aggr_grp_max_margin(grp); err = mac_register(mac, &grp->lg_mh); mac_free(mac); if (err != 0) goto bail; + if ((err = dls_devnet_create(grp->lg_mh, grp->lg_linkid)) != 0) { + (void) mac_unregister(grp->lg_mh); + goto bail; + } + /* set LACP mode */ aggr_lacp_set_mode(grp, lacp_mode, lacp_timer); @@ -774,7 +798,7 @@ aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports, mac_link_update(grp->lg_mh, grp->lg_link_state); /* add new group to hash table */ - err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(key), + err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid), (mod_hash_val_t)grp); ASSERT(err == 0); aggr_grp_cnt++; @@ -800,7 +824,7 @@ bail: rw_exit(&grp->lg_lock); AGGR_LACP_UNLOCK(grp); - kmem_cache_free(aggr_grp_cache, grp); + AGGR_GRP_REFRELE(grp); } rw_exit(&aggr_grp_lock); @@ -808,18 +832,17 @@ bail: } /* - * Return a pointer to the member of a group with specified device name - * and port number. + * Return a pointer to the member of a group with specified linkid. */ static aggr_port_t * -aggr_grp_port_lookup(aggr_grp_t *grp, const char *devname) +aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid) { aggr_port_t *port; ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); for (port = grp->lg_ports; port != NULL; port = port->lp_next) { - if (strcmp(port->lp_devname, devname) == 0) + if (port->lp_linkid == linkid) break; } @@ -909,7 +932,7 @@ aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, /* * If the group MAC address has changed, update the MAC address of - * the remaining consistuent ports according to the new MAC + * the remaining constituent ports according to the new MAC * address of the group. */ if (mac_addr_changed && aggr_grp_update_ports_mac(grp)) @@ -928,7 +951,7 @@ done: * Remove one or more ports from an existing link aggregation group. */ int -aggr_grp_rem_ports(uint32_t key, uint_t nports, laioc_port_t *ports) +aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports) { int rc = 0, i; aggr_grp_t *grp = NULL; @@ -936,9 +959,9 @@ aggr_grp_rem_ports(uint32_t key, uint_t nports, laioc_port_t *ports) boolean_t mac_addr_update = B_FALSE, mac_addr_changed; boolean_t link_state_update = B_FALSE, link_state_changed; - /* get group corresponding to key */ + /* get group corresponding to linkid */ rw_enter(&aggr_grp_lock, RW_READER); - if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), + if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), (mod_hash_val_t *)&grp) != 0) { rw_exit(&aggr_grp_lock); return (ENOENT); @@ -957,7 +980,7 @@ aggr_grp_rem_ports(uint32_t key, uint_t nports, laioc_port_t *ports) /* first verify that all the groups are valid */ for (i = 0; i < nports; i++) { - if (aggr_grp_port_lookup(grp, ports[i].lp_devname) == NULL) { + if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) { /* port not found */ rc = ENOENT; goto bail; @@ -967,7 +990,7 @@ aggr_grp_rem_ports(uint32_t key, uint_t nports, laioc_port_t *ports) /* remove the specified ports from group */ for (i = 0; i < nports && !grp->lg_closing; i++) { /* lookup port */ - port = aggr_grp_port_lookup(grp, ports[i].lp_devname); + port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); ASSERT(port != NULL); /* stop port if group has already been started */ @@ -1002,35 +1025,49 @@ bail: } int -aggr_grp_delete(uint32_t key) +aggr_grp_delete(datalink_id_t linkid) { aggr_grp_t *grp = NULL; aggr_port_t *port, *cport; + datalink_id_t tmpid; mod_hash_val_t val; int err; rw_enter(&aggr_grp_lock, RW_WRITER); - if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), + if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), (mod_hash_val_t *)&grp) != 0) { rw_exit(&aggr_grp_lock); return (ENOENT); } + /* + * Note that dls_devnet_destroy() must be called before lg_lock is + * held. Otherwise, it will deadlock if another thread is in + * aggr_m_stat() and thus has a kstat_hold() on the kstats that + * dls_devnet_destroy() needs to delete. + */ + if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid)) != 0) { + rw_exit(&aggr_grp_lock); + return (err); + } + ASSERT(linkid == tmpid); + AGGR_LACP_LOCK(grp); rw_enter(&grp->lg_lock, RW_WRITER); - grp->lg_closing = B_TRUE; - /* * Unregister from the MAC service module. Since this can * fail if a client hasn't closed the MAC port, we gracefully * fail the operation. */ + grp->lg_closing = B_TRUE; if ((err = mac_disable(grp->lg_mh)) != 0) { grp->lg_closing = B_FALSE; rw_exit(&grp->lg_lock); AGGR_LACP_UNLOCK(grp); + + (void) dls_devnet_create(grp->lg_mh, linkid); rw_exit(&aggr_grp_lock); return (err); } @@ -1053,7 +1090,7 @@ aggr_grp_delete(uint32_t key) rw_exit(&grp->lg_lock); AGGR_LACP_UNLOCK(grp); - (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(key), &val); + (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val); ASSERT(grp == (aggr_grp_t *)val); ASSERT(aggr_grp_cnt > 0); @@ -1069,84 +1106,52 @@ void aggr_grp_free(aggr_grp_t *grp) { ASSERT(grp->lg_refs == 0); + if (grp->lg_key > AGGR_MAX_KEY) { + id_free(key_ids, grp->lg_key); + grp->lg_key = 0; + } kmem_cache_free(aggr_grp_cache, grp); } -/* - * Walker invoked when building the list of configured groups and - * their ports that must be passed up to user-space. - */ - -/*ARGSUSED*/ -static uint_t -aggr_grp_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) +int +aggr_grp_info(datalink_id_t linkid, void *fn_arg, + aggr_grp_info_new_grp_fn_t new_grp_fn, + aggr_grp_info_new_port_fn_t new_port_fn) { - aggr_grp_t *grp; - aggr_port_t *port; - aggr_grp_info_state_t *state = arg; + aggr_grp_t *grp; + aggr_port_t *port; + int rc = 0; - if (state->ls_rc != 0) - return (MH_WALK_TERMINATE); /* terminate walk */ + rw_enter(&aggr_grp_lock, RW_READER); - grp = (aggr_grp_t *)val; + if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), + (mod_hash_val_t *)&grp) != 0) { + rw_exit(&aggr_grp_lock); + return (ENOENT); + } rw_enter(&grp->lg_lock, RW_READER); - if (state->ls_group_key != 0 && grp->lg_key != state->ls_group_key) - goto bail; - - state->ls_group_found = B_TRUE; - - state->ls_rc = state->ls_new_grp_fn(state->ls_fn_arg, grp->lg_key, - grp->lg_addr, grp->lg_addr_fixed, grp->lg_tx_policy, + rc = new_grp_fn(fn_arg, grp->lg_linkid, + (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr, + grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy, grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer); - if (state->ls_rc != 0) + if (rc != 0) goto bail; for (port = grp->lg_ports; port != NULL; port = port->lp_next) { - rw_enter(&port->lp_lock, RW_READER); - - state->ls_rc = state->ls_new_port_fn(state->ls_fn_arg, - port->lp_devname, port->lp_addr, port->lp_state, - &port->lp_lacp.ActorOperPortState); - + rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr, + port->lp_state, &port->lp_lacp.ActorOperPortState); rw_exit(&port->lp_lock); - if (state->ls_rc != 0) + if (rc != 0) goto bail; } bail: rw_exit(&grp->lg_lock); - return ((state->ls_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); -} - -int -aggr_grp_info(uint_t *ngroups, uint32_t group_key, void *fn_arg, - aggr_grp_info_new_grp_fn_t new_grp_fn, - aggr_grp_info_new_port_fn_t new_port_fn) -{ - aggr_grp_info_state_t state; - int rc = 0; - - rw_enter(&aggr_grp_lock, RW_READER); - - *ngroups = aggr_grp_cnt; - - bzero(&state, sizeof (state)); - state.ls_group_key = group_key; - state.ls_new_grp_fn = new_grp_fn; - state.ls_new_port_fn = new_port_fn; - state.ls_fn_arg = fn_arg; - - mod_hash_walk(aggr_grp_hash, aggr_grp_info_walker, &state); - - if ((rc = state.ls_rc) == 0 && group_key != 0 && - !state.ls_group_found) - rc = ENOENT; - rw_exit(&aggr_grp_lock); return (rc); } @@ -1193,7 +1198,7 @@ aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val) *val += aggr_port_stat(port, stat); /* * minus the port stat when it was added, plus any residual - * ammount for the group. + * amount for the group. */ if (IS_MAC_STAT(stat)) { stat_index = stat - MAC_STAT_MIN; @@ -1366,6 +1371,10 @@ aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) * status for this capability. */ return (grp->lg_gldv3_polling); + case MAC_CAPAB_NO_NATIVEVLAN: + return (!grp->lg_vlan); + case MAC_CAPAB_NO_ZCOPY: + return (!grp->lg_zcopy); default: return (B_FALSE); } @@ -1442,18 +1451,25 @@ aggr_grp_capab_set(aggr_grp_t *grp) grp->lg_hcksum_txflags = (uint32_t)-1; grp->lg_gldv3_polling = B_TRUE; + grp->lg_zcopy = B_TRUE; + grp->lg_vlan = B_TRUE; for (port = grp->lg_ports; port != NULL; port = port->lp_next) { if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum)) cksum = 0; grp->lg_hcksum_txflags &= cksum; + grp->lg_vlan &= + !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL); + + grp->lg_zcopy &= + !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL); + grp->lg_gldv3_polling &= mac_capab_get(port->lp_mh, MAC_CAPAB_POLL, NULL); } } - /* * Checks whether the capabilities of the port being added are compatible * with the current capabilities of the aggregation. @@ -1461,10 +1477,20 @@ aggr_grp_capab_set(aggr_grp_t *grp) static boolean_t aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port) { - uint32_t hcksum_txflags; + uint32_t hcksum_txflags; ASSERT(grp->lg_ports != NULL); + if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) & + grp->lg_vlan) != grp->lg_vlan) { + return (B_FALSE); + } + + if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) & + grp->lg_zcopy) != grp->lg_zcopy) { + return (B_FALSE); + } + if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) { if (grp->lg_hcksum_txflags != 0) return (B_FALSE); @@ -1514,3 +1540,46 @@ aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port) return (port_mi->mi_sdu_max >= grp->lg_max_sdu); } + +/* + * Returns the maximum margin according to the margin of the constituent ports. + */ +static uint32_t +aggr_grp_max_margin(aggr_grp_t *grp) +{ + uint32_t margin = UINT32_MAX; + aggr_port_t *port; + + ASSERT(RW_WRITE_HELD(&grp->lg_lock)); + ASSERT(grp->lg_ports != NULL); + + for (port = grp->lg_ports; port != NULL; port = port->lp_next) { + if (margin > port->lp_margin) + margin = port->lp_margin; + } + + grp->lg_margin = margin; + return (margin); +} + +/* + * Checks if the maximum margin of the specified port is compatible + * with the maximum margin of the specified aggregation group, returns + * B_TRUE if it is, B_FALSE otherwise. + */ +static boolean_t +aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port) +{ + if (port->lp_margin >= grp->lg_margin) + return (B_TRUE); + + /* + * See whether the current margin value is allowed to be changed to + * the new value. + */ + if (!mac_margin_update(grp->lg_mh, port->lp_margin)) + return (B_FALSE); + + grp->lg_margin = port->lp_margin; + return (B_TRUE); +} diff --git a/usr/src/uts/common/io/aggr/aggr_lacp.c b/usr/src/uts/common/io/aggr/aggr_lacp.c index d881b8f1b3..8b8c296b09 100644 --- a/usr/src/uts/common/io/aggr/aggr_lacp.c +++ b/usr/src/uts/common/io/aggr/aggr_lacp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -88,10 +88,10 @@ static uint16_t lacp_system_priority = 0x1000; * is used to detect misconfiguration. */ typedef struct lacp_sel_ports { - uint16_t sp_key; + datalink_id_t sp_grp_linkid; + datalink_id_t sp_linkid; /* Note: sp_partner_system must be 2-byte aligned */ struct ether_addr sp_partner_system; - char sp_devname[MAXNAMELEN + 1]; uint32_t sp_partner_key; struct lacp_sel_ports *sp_next; } lacp_sel_ports_t; @@ -188,8 +188,8 @@ aggr_lacp_init_port(aggr_port_t *portp) /* actor port # */ pl->ActorPortNumber = portp->lp_portid; - AGGR_LACP_DBG(("aggr_lacp_init_port(%s): " - "ActorPortNumber = 0x%x\n", portp->lp_devname, + AGGR_LACP_DBG(("aggr_lacp_init_port(%d): " + "ActorPortNumber = 0x%x\n", portp->lp_linkid, pl->ActorPortNumber)); pl->ActorPortPriority = (uint16_t)lacp_port_priority; @@ -198,9 +198,9 @@ aggr_lacp_init_port(aggr_port_t *portp) pl->ActorAdminPortKey = aggrp->lg_key; pl->ActorOperPortKey = pl->ActorAdminPortKey; - AGGR_LACP_DBG(("aggr_lacp_init_port(%s) " + AGGR_LACP_DBG(("aggr_lacp_init_port(%d) " "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n", - portp->lp_devname, pl->ActorAdminPortKey, pl->ActorOperPortKey)); + portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey)); /* Actor admin. port state */ pl->ActorAdminPortState.bit.activity = B_FALSE; @@ -427,8 +427,8 @@ lacp_periodic_sm(aggr_port_t *portp) stop_periodic_timer(portp); pl->sm.periodic_state = LACP_NO_PERIODIC; pl->NTT = B_FALSE; - AGGR_LACP_DBG(("lacp_periodic_sm(%s):NO LACP " - "%s--->%s\n", portp->lp_devname, + AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP " + "%s--->%s\n", portp->lp_linkid, lacp_periodic_str[oldstate], lacp_periodic_str[pl->sm.periodic_state])); return; @@ -443,8 +443,8 @@ lacp_periodic_sm(aggr_port_t *portp) stop_periodic_timer(portp); pl->sm.periodic_state = LACP_NO_PERIODIC; pl->NTT = B_FALSE; - AGGR_LACP_DBG(("lacp_periodic_sm(%s):STOP %s--->%s\n", - portp->lp_devname, lacp_periodic_str[oldstate], + AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n", + portp->lp_linkid, lacp_periodic_str[oldstate], lacp_periodic_str[pl->sm.periodic_state])); return; } @@ -644,9 +644,9 @@ lacp_mux_sm(aggr_port_t *portp) if (pl->ActorOperPortState.bit.collecting || pl->ActorOperPortState.bit.distributing) { - AGGR_LACP_DBG(("trunk link: (%s): " + AGGR_LACP_DBG(("trunk link: (%d): " "Collector_Distributor Disabled.\n", - portp->lp_devname)); + portp->lp_linkid)); } pl->ActorOperPortState.bit.collecting = @@ -708,8 +708,8 @@ again: return; } - AGGR_LACP_DBG(("lacp_mux_sm(%s):%s--->%s\n", - portp->lp_devname, lacp_mux_str[oldstate], + AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n", + portp->lp_linkid, lacp_mux_str[oldstate], lacp_mux_str[pl->sm.mux_state])); /* perform actions on entering a new state */ @@ -717,9 +717,9 @@ again: case LACP_DETACHED: if (pl->ActorOperPortState.bit.collecting || pl->ActorOperPortState.bit.distributing) { - AGGR_LACP_DBG(("trunk link: (%s): " + AGGR_LACP_DBG(("trunk link: (%d): " "Collector_Distributor Disabled.\n", - portp->lp_devname)); + portp->lp_linkid)); } pl->ActorOperPortState.bit.sync = @@ -739,9 +739,9 @@ again: case LACP_ATTACHED: if (pl->ActorOperPortState.bit.collecting || pl->ActorOperPortState.bit.distributing) { - AGGR_LACP_DBG(("trunk link: (%s): " + AGGR_LACP_DBG(("trunk link: (%d): " "Collector_Distributor Disabled.\n", - portp->lp_devname)); + portp->lp_linkid)); } pl->ActorOperPortState.bit.sync = B_TRUE; @@ -765,9 +765,9 @@ again: case LACP_COLLECTING_DISTRIBUTING: if (!pl->ActorOperPortState.bit.collecting && !pl->ActorOperPortState.bit.distributing) { - AGGR_LACP_DBG(("trunk link: (%s): " + AGGR_LACP_DBG(("trunk link: (%d): " "Collector_Distributor Enabled.\n", - portp->lp_devname)); + portp->lp_linkid)); } pl->ActorOperPortState.bit.distributing = B_TRUE; @@ -798,8 +798,8 @@ receive_marker_pdu(aggr_port_t *portp, mblk_t *mp) AGGR_LACP_LOCK(portp->lp_grp); - AGGR_LACP_DBG(("trunk link: (%s): MARKER PDU received:\n", - portp->lp_devname)); + AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n", + portp->lp_linkid)); /* LACP_OFF state not in specification so check here. */ if (!portp->lp_lacp.sm.lacp_on) @@ -809,47 +809,47 @@ receive_marker_pdu(aggr_port_t *portp, mblk_t *mp) goto bail; if (markerp->version != MARKER_VERSION) { - AGGR_LACP_DBG(("trunk link (%s): Malformed MARKER PDU: " + AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: " "version = %d does not match s/w version %d\n", - portp->lp_devname, markerp->version, MARKER_VERSION)); + portp->lp_linkid, markerp->version, MARKER_VERSION)); goto bail; } if (markerp->tlv_marker == MARKER_RESPONSE_TLV) { /* We do not yet send out MARKER info PDUs */ - AGGR_LACP_DBG(("trunk link (%s): MARKER RESPONSE PDU: " + AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: " " MARKER TLV = %d - We don't send out info type!\n", - portp->lp_devname, markerp->tlv_marker)); + portp->lp_linkid, markerp->tlv_marker)); goto bail; } if (markerp->tlv_marker != MARKER_INFO_TLV) { - AGGR_LACP_DBG(("trunk link (%s): Malformed MARKER PDU: " - " MARKER TLV = %d \n", portp->lp_devname, + AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: " + " MARKER TLV = %d \n", portp->lp_linkid, markerp->tlv_marker)); goto bail; } if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) { - AGGR_LACP_DBG(("trunk link (%s): Malformed MARKER PDU: " - " MARKER length = %d \n", portp->lp_devname, + AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: " + " MARKER length = %d \n", portp->lp_linkid, markerp->marker_len)); goto bail; } if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) { - AGGR_LACP_DBG(("trunk link (%s): MARKER PDU: " + AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: " " MARKER Port %d not equal to Partner port %d\n", - portp->lp_devname, markerp->requestor_port, + portp->lp_linkid, markerp->requestor_port, portp->lp_lacp.PartnerOperPortNum)); goto bail; } if (ether_cmp(&markerp->system_id, &portp->lp_lacp.PartnerOperSystem) != 0) { - AGGR_LACP_DBG(("trunk link (%s): MARKER PDU: " + AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: " " MARKER MAC not equal to Partner MAC\n", - portp->lp_devname)); + portp->lp_linkid)); goto bail; } @@ -1013,7 +1013,7 @@ lacp_misconfig_check(aggr_port_t *portp) for (cport = sel_ports; cport != NULL; cport = cport->sp_next) { /* skip entries of the group of the port being checked */ - if (cport->sp_key == grp->lg_key) + if (cport->sp_grp_linkid == grp->lg_linkid) continue; if ((ether_cmp(&cport->sp_partner_system, @@ -1035,12 +1035,12 @@ lacp_misconfig_check(aggr_port_t *portp) mac->ether_addr_octet[4], mac->ether_addr_octet[5]); portp->lp_lacp.sm.selected = AGGR_UNSELECTED; - cmn_err(CE_NOTE, "aggr key %d port %s: Port Partner " - "MAC %s and key %d in use on aggregation " - "key %d port %s\n", grp->lg_key, - portp->lp_devname, mac_str, - portp->lp_lacp.PartnerOperKey, cport->sp_key, - cport->sp_devname); + + cmn_err(CE_NOTE, "aggr %d port %d: Port Partner " + "MAC %s and key %d in use on aggregation %d " + "port %d\n", grp->lg_linkid, portp->lp_linkid, + mac_str, portp->lp_lacp.PartnerOperKey, + cport->sp_grp_linkid, cport->sp_linkid); break; } } @@ -1062,10 +1062,8 @@ lacp_sel_ports_del(aggr_port_t *portp) prev = &sel_ports; for (cport = sel_ports; cport != NULL; prev = &cport->sp_next, cport = cport->sp_next) { - if (bcmp(portp->lp_devname, cport->sp_devname, - MAXNAMELEN + 1) == 0) { + if (portp->lp_linkid == cport->sp_linkid) break; - } } if (cport == NULL) { @@ -1096,8 +1094,7 @@ lacp_sel_ports_add(aggr_port_t *portp) last = &sel_ports; for (cport = sel_ports; cport != NULL; last = &cport->sp_next, cport = cport->sp_next) { - if (bcmp(portp->lp_devname, cport->sp_devname, - MAXNAMELEN + 1) == 0) { + if (portp->lp_linkid == cport->sp_linkid) { ASSERT(cport->sp_partner_key == portp->lp_lacp.PartnerOperKey); ASSERT(ether_cmp(&cport->sp_partner_system, @@ -1115,11 +1112,11 @@ lacp_sel_ports_add(aggr_port_t *portp) return (B_FALSE); } - new_port->sp_key = portp->lp_grp->lg_key; + new_port->sp_grp_linkid = portp->lp_grp->lg_linkid; bcopy(&portp->lp_lacp.PartnerOperSystem, &new_port->sp_partner_system, sizeof (new_port->sp_partner_system)); new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey; - bcopy(portp->lp_devname, new_port->sp_devname, MAXNAMELEN + 1); + new_port->sp_linkid = portp->lp_linkid; *last = new_port; @@ -1165,9 +1162,9 @@ lacp_selection_logic(aggr_port_t *portp) if (pl->sm.begin || !pl->sm.lacp_enabled || (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) { - AGGR_LACP_DBG(("lacp_selection_logic:(%s): " + AGGR_LACP_DBG(("lacp_selection_logic:(%d): " "selected %d-->%d (begin=%d, lacp_enabled = %d, " - "lp_state=%d)\n", portp->lp_devname, pl->sm.selected, + "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled, portp->lp_state)); @@ -1181,8 +1178,8 @@ lacp_selection_logic(aggr_port_t *portp) * If LACP is not enabled then selected is never set. */ if (!pl->sm.lacp_enabled) { - AGGR_LACP_DBG(("lacp_selection_logic:(%s): selected %d-->%d\n", - portp->lp_devname, pl->sm.selected, AGGR_UNSELECTED)); + AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n", + portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED)); lacp_port_unselect(portp); lacp_mux_sm(portp); @@ -1250,8 +1247,8 @@ lacp_selection_logic(aggr_port_t *portp) */ if (ether_cmp(&pl->PartnerOperSystem, (struct ether_addr *)&aggrp->lg_addr) == 0) { - cmn_err(CE_NOTE, "trunk link: (%s): Loopback condition.\n", - portp->lp_devname); + cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n", + portp->lp_linkid); lacp_port_unselect(portp); lacp_mux_sm(portp); @@ -1306,10 +1303,9 @@ lacp_selection_logic(aggr_port_t *portp) */ lacp_port_unselect(portp); - cmn_err(CE_NOTE, "trunk link: (%s): Port Partner MAC or" - " key (%d) incompatible with Aggregation Partner " - "MAC or key (%d)\n", - portp->lp_devname, pl->PartnerOperKey, + cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC " + "or key (%d) incompatible with Aggregation Partner " + "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey, aggrp->aggr.PartnerOperAggrKey); lacp_mux_sm(portp); @@ -1318,8 +1314,8 @@ lacp_selection_logic(aggr_port_t *portp) /* If we get to here, automatically set selected */ if (pl->sm.selected != AGGR_SELECTED) { - AGGR_LACP_DBG(("lacp_selection_logic:(%s): " - "selected %d-->%d\n", portp->lp_devname, + AGGR_LACP_DBG(("lacp_selection_logic:(%d): " + "selected %d-->%d\n", portp->lp_linkid, pl->sm.selected, AGGR_SELECTED)); if (!lacp_port_select(portp)) return; @@ -1360,12 +1356,12 @@ lacp_selection_logic(aggr_port_t *portp) } if (aggrp->aggr.ready) { - AGGR_LACP_DBG(("lacp_selection_logic:(%s): " - "aggr.ready already set\n", portp->lp_devname)); + AGGR_LACP_DBG(("lacp_selection_logic:(%d): " + "aggr.ready already set\n", portp->lp_linkid)); lacp_mux_sm(portp); } else { - AGGR_LACP_DBG(("lacp_selection_logic:(%s): Ready %d-->%d\n", - portp->lp_devname, aggrp->aggr.ready, B_TRUE)); + AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n", + portp->lp_linkid, aggrp->aggr.ready, B_TRUE)); aggrp->aggr.ready = B_TRUE; for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) @@ -1388,8 +1384,8 @@ wait_while_timer_pop(void *data) AGGR_LACP_LOCK(portp->lp_grp); - AGGR_LACP_DBG(("trunk link:(%s): wait_while_timer pop \n", - portp->lp_devname)); + AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n", + portp->lp_linkid)); portp->lp_lacp.wait_while_timer.id = 0; portp->lp_lacp.sm.ready_n = B_TRUE; @@ -1441,8 +1437,8 @@ aggr_lacp_port_attached(aggr_port_t *portp) ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED); ASSERT(RW_WRITE_HELD(&portp->lp_lock)); - AGGR_LACP_DBG(("aggr_lacp_port_attached: port %s\n", - portp->lp_devname)); + AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n", + portp->lp_linkid)); portp->lp_lacp.sm.port_enabled = B_TRUE; /* link on */ @@ -1497,8 +1493,8 @@ aggr_lacp_port_detached(aggr_port_t *portp) ASSERT(AGGR_LACP_LOCK_HELD(grp)); ASSERT(RW_WRITE_HELD(&portp->lp_lock)); - AGGR_LACP_DBG(("aggr_lacp_port_detached: port %s\n", - portp->lp_devname)); + AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n", + portp->lp_linkid)); portp->lp_lacp.sm.port_enabled = B_FALSE; @@ -1542,7 +1538,7 @@ lacp_on(aggr_port_t *portp) lacp_reset_port(portp); portp->lp_lacp.sm.lacp_on = B_TRUE; - AGGR_LACP_DBG(("lacp_on:(%s): \n", portp->lp_devname)); + AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid)); lacp_receive_sm(portp, NULL); lacp_mux_sm(portp); @@ -1570,12 +1566,12 @@ lacp_off(aggr_port_t *portp) portp->lp_lacp.sm.lacp_on = B_FALSE; - AGGR_LACP_DBG(("lacp_off:(%s): \n", portp->lp_devname)); + AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid)); /* - * Disable Slow Protocol Timers. We must temporarely release - * the group and port locks in order to avod deadlocks. Make - * sure that the port nor the group are closing after re-acquiring + * Disable Slow Protocol Timers. We must temporarily release + * the group and port locks to avoid deadlocks. Make sure that + * neither the port nor group are closing after re-acquiring * their locks. */ rw_exit(&portp->lp_lock); @@ -1619,8 +1615,8 @@ valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp) (lacp->partner_info.information_len != sizeof (link_info_t)) || (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) || (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) { - AGGR_LACP_DBG(("trunk link (%s): Malformed LACPDU: " - " Terminator Length = %d \n", portp->lp_devname, + AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: " + " Terminator Length = %d \n", portp->lp_linkid, lacp->terminator_len)); return (B_FALSE); } @@ -1677,8 +1673,8 @@ current_while_timer_pop(void *data) AGGR_LACP_LOCK(portp->lp_grp); - AGGR_LACP_DBG(("trunk link:(%s): current_while_timer " - "pop id=%p\n", portp->lp_devname, + AGGR_LACP_DBG(("trunk link:(%d): current_while_timer " + "pop id=%p\n", portp->lp_linkid, portp->lp_lacp.current_while_timer.id)); portp->lp_lacp.current_while_timer.id = 0; @@ -1765,8 +1761,8 @@ record_PDU(aggr_port_t *portp, lacp_t *lacp) } if (save_sync != pl->PartnerOperPortState.bit.sync) { - AGGR_LACP_DBG(("record_PDU:(%s): partner sync " - "%d -->%d\n", portp->lp_devname, save_sync, + AGGR_LACP_DBG(("record_PDU:(%d): partner sync " + "%d -->%d\n", portp->lp_linkid, save_sync, pl->PartnerOperPortState.bit.sync)); return (B_TRUE); } else { @@ -1797,8 +1793,8 @@ update_selected(aggr_port_t *portp, lacp_t *lacp) (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) || (pl->PartnerOperPortState.bit.aggregation != lacp->actor_info.state.bit.aggregation)) { - AGGR_LACP_DBG(("update_selected:(%s): " - "selected %d-->%d\n", portp->lp_devname, pl->sm.selected, + AGGR_LACP_DBG(("update_selected:(%d): " + "selected %d-->%d\n", portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED)); lacp_port_unselect(portp); @@ -1829,8 +1825,8 @@ update_default_selected(aggr_port_t *portp) (pl->PartnerOperPortState.bit.aggregation != pl->PartnerAdminPortState.bit.aggregation)) { - AGGR_LACP_DBG(("update_default_selected:(%s): " - "selected %d-->%d\n", portp->lp_devname, + AGGR_LACP_DBG(("update_default_selected:(%d): " + "selected %d-->%d\n", portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED)); lacp_port_unselect(portp); @@ -1868,8 +1864,8 @@ update_NTT(aggr_port_t *portp, lacp_t *lacp) (pl->ActorOperPortState.bit.aggregation != lacp->partner_info.state.bit.aggregation)) { - AGGR_LACP_DBG(("update_NTT:(%s): NTT %d-->%d\n", - portp->lp_devname, pl->NTT, B_TRUE)); + AGGR_LACP_DBG(("update_NTT:(%d): NTT %d-->%d\n", + portp->lp_linkid, pl->NTT, B_TRUE)); pl->NTT = B_TRUE; } @@ -1926,8 +1922,8 @@ lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp) if (!((lacp && (oldstate == LACP_CURRENT) && (pl->sm.receive_state == LACP_CURRENT)))) { - AGGR_LACP_DBG(("lacp_receive_sm(%s):%s--->%s\n", - portp->lp_devname, lacp_receive_str[oldstate], + AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n", + portp->lp_linkid, lacp_receive_str[oldstate], lacp_receive_str[pl->sm.receive_state])); } @@ -2015,8 +2011,8 @@ lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp) if (!lacp) /* no LACPDU so current_while_timer popped */ break; - AGGR_LACP_DBG(("lacp_receive_sm: (%s): LACPDU received:\n", - portp->lp_devname)); + AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n", + portp->lp_linkid)); /* * Validate Actor_Information_Length, @@ -2024,9 +2020,9 @@ lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp) * and Terminator_Length fields. */ if (!valid_lacp_pdu(portp, lacp)) { - AGGR_LACP_DBG(("lacp_receive_sm (%s): " + AGGR_LACP_DBG(("lacp_receive_sm (%d): " "Invalid LACPDU received\n", - portp->lp_devname)); + portp->lp_linkid)); break; } @@ -2083,8 +2079,8 @@ aggr_set_coll_dist_locked(aggr_port_t *portp, boolean_t enable) { ASSERT(RW_WRITE_HELD(&portp->lp_lock)); - AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%s) %s\n", - portp->lp_devname, enable ? "ENABLED" : "DISABLED")); + AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n", + portp->lp_linkid, enable ? "ENABLED" : "DISABLED")); if (!enable) { /* @@ -2126,8 +2122,8 @@ aggr_lacp_rx(aggr_port_t *portp, mblk_t *dmp) switch (lacp->subtype) { case LACP_SUBTYPE: - AGGR_LACP_DBG(("aggr_lacp_rx:(%s): LACPDU received.\n", - portp->lp_devname)); + AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n", + portp->lp_linkid)); AGGR_LACP_LOCK(portp->lp_grp); if (!portp->lp_lacp.sm.lacp_on) { @@ -2139,16 +2135,16 @@ aggr_lacp_rx(aggr_port_t *portp, mblk_t *dmp) break; case MARKER_SUBTYPE: - AGGR_LACP_DBG(("aggr_lacp_rx:(%s): Marker Packet received.\n", - portp->lp_devname)); + AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n", + portp->lp_linkid)); (void) receive_marker_pdu(portp, dmp); break; default: - AGGR_LACP_DBG(("aggr_lacp_rx: (%s): " + AGGR_LACP_DBG(("aggr_lacp_rx: (%d): " "Unknown Slow Protocol type %d\n", - portp->lp_devname, lacp->subtype)); + portp->lp_linkid, lacp->subtype)); break; } diff --git a/usr/src/uts/common/io/aggr/aggr_port.c b/usr/src/uts/common/io/aggr/aggr_port.c index 0beb8f364e..bc08874d25 100644 --- a/usr/src/uts/common/io/aggr/aggr_port.c +++ b/usr/src/uts/common/io/aggr/aggr_port.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -48,7 +48,6 @@ #include <sys/stat.h> #include <sys/sdt.h> #include <sys/dlpi.h> - #include <sys/aggr.h> #include <sys/aggr_impl.h> @@ -88,7 +87,7 @@ aggr_port_init(void) /* * Allocate a id space to manage port identification. The range of * the arena will be from 1 to UINT16_MAX, because the LACP protocol - * uses it to be a 16 bits unique identfication. + * specifies 16-bit unique identification. */ aggr_portids = id_space_create("aggr_portids", 1, UINT16_MAX); ASSERT(aggr_portids != NULL); @@ -127,35 +126,67 @@ aggr_port_init_callbacks(aggr_port_t *port) } int -aggr_port_create(const char *name, aggr_port_t **pp) +aggr_port_create(const datalink_id_t linkid, boolean_t force, aggr_port_t **pp) { int err; mac_handle_t mh; aggr_port_t *port; uint16_t portid; uint_t i; + boolean_t no_link_update = B_FALSE; const mac_info_t *mip; + uint32_t note; + uint32_t margin; *pp = NULL; - if ((err = mac_open(name, &mh)) != 0) + if ((err = mac_open_by_linkid(linkid, &mh)) != 0) return (err); mip = mac_info(mh); if (mip->mi_media != DL_ETHER || mip->mi_nativemedia != DL_ETHER) { - mac_close(mh); - return (EINVAL); + err = EINVAL; + goto fail; + } + + /* + * If the underlying MAC does not support link update notification, it + * can only be aggregated if `force' is set. This is because aggr + * depends on link notifications to attach ports whose link is up. + */ + note = mac_no_notification(mh); + if ((note & (DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN)) != 0) { + no_link_update = B_TRUE; + if (!force) { + /* + * We borrow this error code to indicate that link + * notification is not supported. + */ + err = ENETDOWN; + goto fail; + } } if ((portid = (uint16_t)id_alloc(aggr_portids)) == 0) { - mac_close(mh); - return (ENOMEM); + err = ENOMEM; + goto fail; + } + + /* + * As the underlying mac's current margin size is used to determine + * the margin size of the aggregation itself, request the underlying + * mac not to change to a smaller size. + */ + if ((err = mac_margin_add(mh, &margin, B_TRUE)) != 0) { + id_free(aggr_portids, portid); + goto fail; } if (!mac_active_set(mh)) { + VERIFY(mac_margin_remove(mh, margin) == 0); id_free(aggr_portids, portid); - mac_close(mh); - return (EBUSY); + err = EBUSY; + goto fail; } port = kmem_cache_alloc(aggr_port_cache, KM_SLEEP); @@ -164,7 +195,7 @@ aggr_port_create(const char *name, aggr_port_t **pp) port->lp_next = NULL; port->lp_mh = mh; port->lp_mip = mip; - (void) strlcpy(port->lp_devname, name, sizeof (port->lp_devname)); + port->lp_linkid = linkid; port->lp_closing = 0; /* get the port's original MAC address */ @@ -181,12 +212,14 @@ aggr_port_create(const char *name, aggr_port_t **pp) port->lp_started = B_FALSE; port->lp_tx_enabled = B_FALSE; port->lp_promisc_on = B_FALSE; + port->lp_no_link_update = no_link_update; port->lp_portid = portid; + port->lp_margin = margin; /* * Save the current statistics of the port. They will be used - * later by aggr_m_stats() when aggregating the stastics of - * the consistituent ports. + * later by aggr_m_stats() when aggregating the statistics of + * the constituent ports. */ for (i = 0; i < MAC_NSTAT; i++) { port->lp_stat[i] = @@ -202,11 +235,16 @@ aggr_port_create(const char *name, aggr_port_t **pp) *pp = port; return (0); + +fail: + mac_close(mh); + return (err); } void aggr_port_delete(aggr_port_t *port) { + VERIFY(mac_margin_remove(port->lp_mh, port->lp_margin) == 0); mac_rx_remove_wait(port->lp_mh); mac_resource_set(port->lp_mh, NULL, NULL); mac_notify_remove(port->lp_mh, port->lp_mnh); @@ -237,7 +275,7 @@ aggr_port_free(aggr_port_t *port) /* * Invoked upon receiving a MAC_NOTE_LINK notification for - * one of the consistuent ports. + * one of the constituent ports. */ boolean_t aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port, boolean_t dolock) @@ -259,8 +297,12 @@ aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port, boolean_t dolock) rw_enter(&port->lp_lock, RW_WRITER); - /* link state change? */ - link_state = mac_link_get(port->lp_mh); + /* + * link state change? For links that do not support link state + * notification, always assume the link is up. + */ + link_state = port->lp_no_link_update ? LINK_STATE_UP : + mac_link_get(port->lp_mh); if (port->lp_link_state != link_state) { if (link_state == LINK_STATE_UP) do_attach = (port->lp_link_state != LINK_STATE_UP); @@ -303,7 +345,6 @@ aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port, boolean_t dolock) rw_exit(&grp->lg_lock); AGGR_LACP_UNLOCK(grp); } - return (link_state_changed); } @@ -321,7 +362,6 @@ aggr_port_notify_unicst(aggr_grp_t *grp, aggr_port_t *port, ASSERT(mac_addr_changedp != NULL); ASSERT(link_state_changedp != NULL); - AGGR_LACP_LOCK(grp); rw_enter(&grp->lg_lock, RW_WRITER); diff --git a/usr/src/uts/common/io/aggr/aggr_recv.c b/usr/src/uts/common/io/aggr/aggr_recv.c index 6e409e4b89..bf98e65ee3 100644 --- a/usr/src/uts/common/io/aggr/aggr_recv.c +++ b/usr/src/uts/common/io/aggr/aggr_recv.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -46,7 +46,7 @@ aggr_recv_lacp(aggr_port_t *port, mblk_t *mp) { aggr_grp_t *grp = port->lp_grp; - /* in promiscous mode, send copy of packet up */ + /* in promiscuous mode, send copy of packet up */ if (grp->lg_promisc) { mblk_t *nmp = copymsg(mp); @@ -67,6 +67,17 @@ aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) aggr_port_t *port = (aggr_port_t *)arg; aggr_grp_t *grp = port->lp_grp; + /* + * If this message is looped back from the legacy devices, drop + * it as the Nemo framework will be responsible for looping it + * back by the mac_txloop() function. + */ + if (mp->b_flag & MSGNOLOOP) { + ASSERT(mp->b_next == NULL); + freemsg(mp); + return; + } + if (grp->lg_lacp_mode == AGGR_LACP_OFF) { mac_rx(grp->lg_mh, mrh, mp); } else { diff --git a/usr/src/uts/common/io/bge/bge_main2.c b/usr/src/uts/common/io/bge/bge_main2.c index 088d0a5aaf..f80a750652 100644 --- a/usr/src/uts/common/io/bge/bge_main2.c +++ b/usr/src/uts/common/io/bge/bge_main2.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2860,6 +2860,7 @@ bge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) macp->m_callbacks = &bge_m_callbacks; macp->m_min_sdu = 0; macp->m_max_sdu = cidp->ethmax_size - sizeof (struct ether_header); + macp->m_margin = VLAN_TAGSZ; /* * Finally, we're ready to register ourselves with the MAC layer * interface; if this succeeds, we're all ready to start() diff --git a/usr/src/uts/common/io/dld/dld_drv.c b/usr/src/uts/common/io/dld/dld_drv.c index ad3440e2d8..2b394c051d 100644 --- a/usr/src/uts/common/io/dld/dld_drv.c +++ b/usr/src/uts/common/io/dld/dld_drv.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -34,9 +34,11 @@ #include <sys/modctl.h> #include <sys/stat.h> #include <sys/strsun.h> +#include <sys/vlan.h> #include <sys/dld.h> #include <sys/dld_impl.h> #include <sys/dls_impl.h> +#include <sys/softmac.h> #include <sys/vlan.h> #include <inet/common.h> @@ -83,6 +85,10 @@ dev_info_t *dld_dip; /* dev_info_t for the driver */ uint32_t dld_opt = 0; /* Global options */ static vmem_t *dld_ctl_vmem; /* for control minor numbers */ +#define NAUTOPUSH 32 +static mod_hash_t *dld_ap_hashp; +static krwlock_t dld_ap_hash_lock; + static struct module_info drv_info = { 0, /* mi_idnum */ DLD_DRIVER_NAME, /* mi_idname */ @@ -185,18 +191,46 @@ drv_init(void) NULL, NULL, NULL, 1, VM_SLEEP | VMC_IDENTIFIER); drv_secobj_init(); dld_str_init(); + /* + * Create a hash table for autopush configuration. + */ + dld_ap_hashp = mod_hash_create_idhash("dld_autopush_hash", + NAUTOPUSH, mod_hash_null_valdtor); + + ASSERT(dld_ap_hashp != NULL); + rw_init(&dld_ap_hash_lock, NULL, RW_DRIVER, NULL); +} + +/* ARGSUSED */ +static uint_t +drv_ap_exist(mod_hash_key_t key, mod_hash_val_t *val, void *arg) +{ + boolean_t *pexist = arg; + + *pexist = B_TRUE; + return (MH_WALK_TERMINATE); } static int drv_fini(void) { - int err; + int err; + boolean_t exist = B_FALSE; + + rw_enter(&dld_ap_hash_lock, RW_READER); + mod_hash_walk(dld_ap_hashp, drv_ap_exist, &exist); + rw_exit(&dld_ap_hash_lock); + + if (exist) + return (EBUSY); if ((err = dld_str_fini()) != 0) return (err); drv_secobj_fini(); vmem_destroy(dld_ctl_vmem); + mod_hash_destroy_idhash(dld_ap_hashp); + rw_destroy(&dld_ap_hash_lock); return (0); } @@ -373,241 +407,472 @@ drv_close(queue_t *rq) } /* - * DLDIOCATTR + * DLDIOC_ATTR */ static void drv_ioc_attr(dld_ctl_str_t *ctls, mblk_t *mp) { - dld_ioc_attr_t *diap; - dls_vlan_t *dvp = NULL; - dls_link_t *dlp = NULL; - int err; - queue_t *q = ctls->cs_wq; + dld_ioc_attr_t *diap; + dls_dl_handle_t dlh; + dls_vlan_t *dvp; + int err; + queue_t *q = ctls->cs_wq; if ((err = miocpullup(mp, sizeof (dld_ioc_attr_t))) != 0) goto failed; diap = (dld_ioc_attr_t *)mp->b_cont->b_rptr; - diap->dia_name[IFNAMSIZ - 1] = '\0'; - if (dls_vlan_hold(diap->dia_name, &dvp, B_FALSE) != 0) { - err = ENOENT; + if ((err = dls_devnet_hold_tmp(diap->dia_linkid, &dlh)) != 0) goto failed; - } - dlp = dvp->dv_dlp; - (void) strlcpy(diap->dia_dev, dlp->dl_name, sizeof (diap->dia_dev)); - diap->dia_vid = dvp->dv_id; - diap->dia_max_sdu = dlp->dl_mip->mi_sdu_max; + if ((err = dls_vlan_hold(dls_devnet_mac(dlh), + dls_devnet_vid(dlh), &dvp, B_FALSE, B_FALSE)) != 0) { + dls_devnet_rele_tmp(dlh); + goto failed; + } + diap->dia_max_sdu = dvp->dv_dlp->dl_mip->mi_sdu_max; dls_vlan_rele(dvp); + dls_devnet_rele_tmp(dlh); + miocack(q, mp, sizeof (dld_ioc_attr_t), 0); return; failed: ASSERT(err != 0); - if (err == ENOENT) { - char devname[MAXNAMELEN]; - uint_t instance; - major_t major; + miocnak(q, mp, 0, err); +} + +/* + * DLDIOC_PHYS_ATTR + */ +static void +drv_ioc_phys_attr(dld_ctl_str_t *ctls, mblk_t *mp) +{ + dld_ioc_phys_attr_t *dipp; + int err; + dls_dl_handle_t dlh; + dls_dev_handle_t ddh; + dev_t phydev; + queue_t *q = ctls->cs_wq; + + if ((err = miocpullup(mp, sizeof (dld_ioc_phys_attr_t))) != 0) + goto failed; + + dipp = (dld_ioc_phys_attr_t *)mp->b_cont->b_rptr; + /* + * Every physical link should have its physical dev_t kept in the + * daemon. If not, it is not a valid physical link. + */ + if (dls_mgmt_get_phydev(dipp->dip_linkid, &phydev) != 0) { + err = EINVAL; + goto failed; + } + + /* + * Although this is a valid physical link, it might already be removed + * by DR or during system shutdown. softmac_hold_device() would return + * ENOENT in this case. + */ + if ((err = softmac_hold_device(phydev, &ddh)) != 0) + goto failed; + + if (dls_devnet_hold_tmp(dipp->dip_linkid, &dlh) != 0) { /* - * Try to detect if the specified device is gldv3 - * and return ENODEV if it is not. + * Although this is an active physical link, its link type is + * not supported by GLDv3, and therefore it does not have + * vanity naming support. */ - if (ddi_parse(diap->dia_name, devname, &instance) == 0 && - (major = ddi_name_to_major(devname)) != (major_t)-1 && - !GLDV3_DRV(major)) - err = ENODEV; + dipp->dip_novanity = B_TRUE; + } else { + dipp->dip_novanity = B_FALSE; + dls_devnet_rele_tmp(dlh); } + /* + * Get the physical device name from the major number and the instance + * number derived from phydev. + */ + (void) snprintf(dipp->dip_dev, MAXLINKNAMELEN, "%s%d", + ddi_major_to_name(getmajor(phydev)), getminor(phydev) - 1); + + softmac_rele_device(ddh); + + miocack(q, mp, sizeof (dld_ioc_phys_attr_t), 0); + return; + +failed: miocnak(q, mp, 0, err); } - /* - * DLDIOCVLAN + * DLDIOC_CREATE_VLAN */ -typedef struct dld_ioc_vlan_state { - uint_t bytes_left; - dld_ioc_vlan_t *divp; - dld_vlan_info_t *vlanp; -} dld_ioc_vlan_state_t; - -static int -drv_ioc_vlan_info(dls_vlan_t *dvp, void *arg) +static void +drv_ioc_create_vlan(dld_ctl_str_t *ctls, mblk_t *mp) { - dld_ioc_vlan_state_t *statep = arg; + dld_ioc_create_vlan_t *dicp; + int err; + queue_t *q = ctls->cs_wq; - /* - * passed buffer space is limited to 65536 bytes. So - * copy only the vlans associated with the passed link. - */ - if (strcmp(dvp->dv_dlp->dl_name, statep->divp->div_name) == 0 && - dvp->dv_id != 0) { - if (statep->bytes_left < sizeof (dld_vlan_info_t)) - return (ENOSPC); - - (void) strlcpy(statep->vlanp->dvi_name, - dvp->dv_name, IFNAMSIZ); - statep->divp->div_count++; - statep->bytes_left -= sizeof (dld_vlan_info_t); - statep->vlanp += 1; + if ((err = miocpullup(mp, sizeof (dld_ioc_create_vlan_t))) != 0) + goto failed; + + dicp = (dld_ioc_create_vlan_t *)mp->b_cont->b_rptr; + + if ((err = dls_devnet_create_vlan(dicp->dic_vlanid, + dicp->dic_linkid, dicp->dic_vid, dicp->dic_force)) != 0) { + goto failed; } - return (0); + + miocack(q, mp, 0, 0); + return; + +failed: + miocnak(q, mp, 0, err); } +/* + * DLDIOC_DELETE_VLAN + */ static void -drv_ioc_vlan(dld_ctl_str_t *ctls, mblk_t *mp) +drv_ioc_delete_vlan(dld_ctl_str_t *ctls, mblk_t *mp) { - dld_ioc_vlan_t *divp; - dld_ioc_vlan_state_t state; - int err = EINVAL; + dld_ioc_delete_vlan_t *didp; + int err; queue_t *q = ctls->cs_wq; - mblk_t *bp; - if ((err = miocpullup(mp, sizeof (dld_ioc_vlan_t))) != 0) - goto failed; + if ((err = miocpullup(mp, sizeof (dld_ioc_delete_vlan_t))) != 0) + goto done; - if ((bp = msgpullup(mp->b_cont, -1)) == NULL) + didp = (dld_ioc_delete_vlan_t *)mp->b_cont->b_rptr; + err = dls_devnet_destroy_vlan(didp->did_linkid); + +done: + if (err == 0) + miocack(q, mp, 0, 0); + else + miocnak(q, mp, 0, err); +} + +/* + * DLDIOC_VLAN_ATTR + */ +static void +drv_ioc_vlan_attr(dld_ctl_str_t *ctls, mblk_t *mp) +{ + dld_ioc_vlan_attr_t *divp; + dls_dl_handle_t dlh; + uint16_t vid; + dls_vlan_t *dvp; + int err; + queue_t *q = ctls->cs_wq; + + if ((err = miocpullup(mp, sizeof (dld_ioc_vlan_attr_t))) != 0) goto failed; - freemsg(mp->b_cont); - mp->b_cont = bp; - divp = (dld_ioc_vlan_t *)bp->b_rptr; - divp->div_count = 0; - state.bytes_left = MBLKL(bp) - sizeof (dld_ioc_vlan_t); - state.divp = divp; - state.vlanp = (dld_vlan_info_t *)(divp + 1); + divp = (dld_ioc_vlan_attr_t *)mp->b_cont->b_rptr; - err = dls_vlan_walk(drv_ioc_vlan_info, &state); + /* + * Hold this link to prevent it from being deleted. + */ + err = dls_devnet_hold_tmp(divp->div_vlanid, &dlh); if (err != 0) goto failed; - miocack(q, mp, sizeof (dld_ioc_vlan_t) + - state.divp->div_count * sizeof (dld_vlan_info_t), 0); + if ((vid = dls_devnet_vid(dlh)) == VLAN_ID_NONE) { + dls_devnet_rele_tmp(dlh); + err = EINVAL; + goto failed; + } + + err = dls_vlan_hold(dls_devnet_mac(dlh), vid, &dvp, B_FALSE, B_FALSE); + if (err != 0) { + dls_devnet_rele_tmp(dlh); + err = EINVAL; + goto failed; + } + + divp->div_linkid = dls_devnet_linkid(dlh); + divp->div_implicit = !dls_devnet_is_explicit(dlh); + divp->div_vid = vid; + divp->div_force = dvp->dv_force; + + dls_vlan_rele(dvp); + dls_devnet_rele_tmp(dlh); + miocack(q, mp, sizeof (dld_ioc_vlan_attr_t), 0); return; failed: - ASSERT(err != 0); miocnak(q, mp, 0, err); } /* - * DLDIOCHOLDVLAN + * DLDIOC_RENAME. + * + * This function handles two cases of link renaming. See more in comments above + * dls_datalink_rename(). + */ +static void +drv_ioc_rename(dld_ctl_str_t *ctls, mblk_t *mp) +{ + dld_ioc_rename_t *dir; + mod_hash_key_t key; + mod_hash_val_t val; + int err; + queue_t *q = ctls->cs_wq; + + if ((err = miocpullup(mp, sizeof (dld_ioc_rename_t))) != 0) + goto done; + + dir = (dld_ioc_rename_t *)mp->b_cont->b_rptr; + if ((err = dls_devnet_rename(dir->dir_linkid1, dir->dir_linkid2, + dir->dir_link)) != 0) { + goto done; + } + + if (dir->dir_linkid2 == DATALINK_INVALID_LINKID) + goto done; + + /* + * if dir_linkid2 is not DATALINK_INVALID_LINKID, it means this + * renaming request is to rename a valid physical link (dir_linkid1) + * to a "removed" physical link (dir_linkid2, which is removed by DR + * or during system shutdown). In this case, the link (specified by + * dir_linkid1) would inherit all the configuration of dir_linkid2, + * and dir_linkid1 and its configuration would be lost. + * + * Remove per-link autopush configuration of dir_linkid1 in this case. + */ + key = (mod_hash_key_t)(uintptr_t)dir->dir_linkid1; + rw_enter(&dld_ap_hash_lock, RW_WRITER); + if (mod_hash_find(dld_ap_hashp, key, &val) != 0) { + rw_exit(&dld_ap_hash_lock); + goto done; + } + + VERIFY(mod_hash_remove(dld_ap_hashp, key, &val) == 0); + kmem_free(val, sizeof (dld_ap_t)); + rw_exit(&dld_ap_hash_lock); + +done: + if (err == 0) + miocack(q, mp, 0, 0); + else + miocnak(q, mp, 0, err); +} + +/* + * DLDIOC_SETAUTOPUSH */ static void -drv_hold_vlan(dld_ctl_str_t *ctls, mblk_t *mp) +drv_ioc_setap(dld_ctl_str_t *ctls, mblk_t *mp) { + dld_ioc_ap_t *diap; + dld_ap_t *dap; + int i, err; queue_t *q = ctls->cs_wq; - dld_hold_vlan_t *dhv; - mblk_t *nmp; - int err = EINVAL; - dls_vlan_t *dvp; - char mac[MAXNAMELEN]; - dev_info_t *dip = NULL; - major_t major; - uint_t index; - - nmp = mp->b_cont; - if (nmp == NULL || MBLKL(nmp) < sizeof (dld_hold_vlan_t)) + mod_hash_key_t key; + + if ((err = miocpullup(mp, sizeof (dld_ioc_ap_t))) != 0) goto failed; - dhv = (dld_hold_vlan_t *)nmp->b_rptr; + diap = (dld_ioc_ap_t *)mp->b_cont->b_rptr; + if (diap->dia_npush == 0 || diap->dia_npush > MAXAPUSH) { + err = EINVAL; + goto failed; + } /* - * When a device instance without opens is detached, its - * dls_vlan_t will be destroyed. A subsequent DLDIOCHOLDVLAN - * invoked on this device instance will fail because - * dls_vlan_hold() does not create non-tagged vlans on demand. - * To handle this problem, we must force the creation of the - * dls_vlan_t (if it doesn't already exist) by calling - * ddi_hold_devi_by_instance() before calling dls_vlan_hold(). + * Validate that the specified list of modules exist. */ - if (ddi_parse(dhv->dhv_name, mac, &index) != DDI_SUCCESS) - goto failed; + for (i = 0; i < diap->dia_npush; i++) { + if (fmodsw_find(diap->dia_aplist[i], FMODSW_LOAD) == NULL) { + err = EINVAL; + goto failed; + } + } - if (DLS_PPA2VID(index) == VLAN_ID_NONE && strcmp(mac, "aggr") != 0) { - if ((major = ddi_name_to_major(mac)) == (major_t)-1 || - (dip = ddi_hold_devi_by_instance(major, - DLS_PPA2INST(index), 0)) == NULL) + key = (mod_hash_key_t)(uintptr_t)diap->dia_linkid; + + rw_enter(&dld_ap_hash_lock, RW_WRITER); + if (mod_hash_find(dld_ap_hashp, key, (mod_hash_val_t *)&dap) != 0) { + dap = kmem_zalloc(sizeof (dld_ap_t), KM_NOSLEEP); + if (dap == NULL) { + rw_exit(&dld_ap_hash_lock); + err = ENOMEM; goto failed; + } + + dap->da_linkid = diap->dia_linkid; + err = mod_hash_insert(dld_ap_hashp, key, (mod_hash_val_t)dap); + ASSERT(err == 0); } - err = dls_vlan_hold(dhv->dhv_name, &dvp, B_TRUE); - if (dip != NULL) - ddi_release_devi(dip); + /* + * Update the configuration. + */ + dap->da_anchor = diap->dia_anchor; + dap->da_npush = diap->dia_npush; + for (i = 0; i < diap->dia_npush; i++) { + (void) strlcpy(dap->da_aplist[i], diap->dia_aplist[i], + FMNAMESZ + 1); + } + rw_exit(&dld_ap_hash_lock); - if (err != 0) + miocack(q, mp, 0, 0); + return; + +failed: + miocnak(q, mp, 0, err); +} + +/* + * DLDIOC_GETAUTOPUSH + */ +static void +drv_ioc_getap(dld_ctl_str_t *ctls, mblk_t *mp) +{ + dld_ioc_ap_t *diap; + dld_ap_t *dap; + int i, err; + queue_t *q = ctls->cs_wq; + + if ((err = miocpullup(mp, sizeof (dld_ioc_ap_t))) != 0) goto failed; - if ((err = dls_vlan_setzoneid(dhv->dhv_name, dhv->dhv_zid, - dhv->dhv_docheck)) != 0) { - dls_vlan_rele(dvp); + diap = (dld_ioc_ap_t *)mp->b_cont->b_rptr; + + rw_enter(&dld_ap_hash_lock, RW_READER); + if (mod_hash_find(dld_ap_hashp, + (mod_hash_key_t)(uintptr_t)diap->dia_linkid, + (mod_hash_val_t *)&dap) != 0) { + err = ENOENT; + rw_exit(&dld_ap_hash_lock); goto failed; - } else { - miocack(q, mp, 0, 0); - return; } + + /* + * Retrieve the configuration. + */ + diap->dia_anchor = dap->da_anchor; + diap->dia_npush = dap->da_npush; + for (i = 0; i < dap->da_npush; i++) { + (void) strlcpy(diap->dia_aplist[i], dap->da_aplist[i], + FMNAMESZ + 1); + } + rw_exit(&dld_ap_hash_lock); + + miocack(q, mp, sizeof (dld_ioc_ap_t), 0); + return; + failed: miocnak(q, mp, 0, err); } /* - * DLDIOCRELEVLAN + * DLDIOC_CLRAUTOPUSH */ static void -drv_rele_vlan(dld_ctl_str_t *ctls, mblk_t *mp) +drv_ioc_clrap(dld_ctl_str_t *ctls, mblk_t *mp) { - queue_t *q = ctls->cs_wq; - dld_hold_vlan_t *dhv; - mblk_t *nmp; + dld_ioc_ap_t *diap; + mod_hash_val_t val; + mod_hash_key_t key; int err; + queue_t *q = ctls->cs_wq; - nmp = mp->b_cont; - if (nmp == NULL || MBLKL(nmp) < sizeof (dld_hold_vlan_t)) { - err = EINVAL; - miocnak(q, mp, 0, err); - return; - } - dhv = (dld_hold_vlan_t *)nmp->b_rptr; + if ((err = miocpullup(mp, sizeof (dld_ioc_ap_t))) != 0) + goto done; - if ((err = dls_vlan_setzoneid(dhv->dhv_name, dhv->dhv_zid, - dhv->dhv_docheck)) != 0) { - miocnak(q, mp, 0, err); - return; - } + diap = (dld_ioc_ap_t *)mp->b_cont->b_rptr; + key = (mod_hash_key_t)(uintptr_t)diap->dia_linkid; - if ((err = dls_vlan_rele_by_name(dhv->dhv_name)) != 0) { - miocnak(q, mp, 0, err); - return; + rw_enter(&dld_ap_hash_lock, RW_WRITER); + if (mod_hash_find(dld_ap_hashp, key, &val) != 0) { + rw_exit(&dld_ap_hash_lock); + goto done; } - miocack(q, mp, 0, 0); + VERIFY(mod_hash_remove(dld_ap_hashp, key, &val) == 0); + kmem_free(val, sizeof (dld_ap_t)); + rw_exit(&dld_ap_hash_lock); + +done: + if (err == 0) + miocack(q, mp, 0, 0); + else + miocnak(q, mp, 0, err); } /* - * DLDIOCZIDGET + * DLDIOC_DOORSERVER */ static void -drv_ioc_zid_get(dld_ctl_str_t *ctls, mblk_t *mp) +drv_ioc_doorserver(dld_ctl_str_t *ctls, mblk_t *mp) { queue_t *q = ctls->cs_wq; - dld_hold_vlan_t *dhv; - mblk_t *nmp; + dld_ioc_door_t *did; int err; - nmp = mp->b_cont; - if (nmp == NULL || MBLKL(nmp) < sizeof (dld_hold_vlan_t)) { - err = EINVAL; + if ((err = miocpullup(mp, sizeof (dld_ioc_door_t))) != 0) + goto done; + + did = (dld_ioc_door_t *)mp->b_cont->b_rptr; + err = dls_mgmt_door_set(did->did_start_door); + +done: + if (err == 0) + miocack(q, mp, 0, 0); + else miocnak(q, mp, 0, err); - return; - } - dhv = (dld_hold_vlan_t *)nmp->b_rptr; +} + +/* + * DLDIOC_SETZID + */ +static void +drv_ioc_setzid(dld_ctl_str_t *ctls, mblk_t *mp) +{ + queue_t *q = ctls->cs_wq; + dld_ioc_setzid_t *dis; + int err; + + if ((err = miocpullup(mp, sizeof (dld_ioc_setzid_t))) != 0) + goto done; - if ((err = dls_vlan_getzoneid(dhv->dhv_name, &dhv->dhv_zid)) != 0) + dis = (dld_ioc_setzid_t *)mp->b_cont->b_rptr; + err = dls_devnet_setzid(dis->dis_link, dis->dis_zid); + +done: + if (err == 0) + miocack(q, mp, 0, 0); + else miocnak(q, mp, 0, err); +} + +/* + * DLDIOC_GETZID + */ +static void +drv_ioc_getzid(dld_ctl_str_t *ctls, mblk_t *mp) +{ + queue_t *q = ctls->cs_wq; + dld_ioc_getzid_t *dig; + int err; + + if ((err = miocpullup(mp, sizeof (dld_ioc_getzid_t))) != 0) + goto done; + + dig = (dld_ioc_getzid_t *)mp->b_cont->b_rptr; + err = dls_devnet_getzid(dig->dig_linkid, &dig->dig_zid); + +done: + if (err == 0) + miocack(q, mp, sizeof (dld_ioc_getzid_t), 0); else - miocack(q, mp, sizeof (dld_hold_vlan_t), 0); + miocnak(q, mp, 0, err); } /* @@ -620,29 +885,50 @@ drv_ioc(dld_ctl_str_t *ctls, mblk_t *mp) cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; switch (cmd) { - case DLDIOCATTR: + case DLDIOC_ATTR: drv_ioc_attr(ctls, mp); return; - case DLDIOCVLAN: - drv_ioc_vlan(ctls, mp); + case DLDIOC_PHYS_ATTR: + drv_ioc_phys_attr(ctls, mp); return; - case DLDIOCSECOBJSET: + case DLDIOC_SECOBJ_SET: drv_ioc_secobj_set(ctls, mp); return; - case DLDIOCSECOBJGET: + case DLDIOC_SECOBJ_GET: drv_ioc_secobj_get(ctls, mp); return; - case DLDIOCSECOBJUNSET: + case DLDIOC_SECOBJ_UNSET: drv_ioc_secobj_unset(ctls, mp); return; - case DLDIOCHOLDVLAN: - drv_hold_vlan(ctls, mp); + case DLDIOC_CREATE_VLAN: + drv_ioc_create_vlan(ctls, mp); + return; + case DLDIOC_DELETE_VLAN: + drv_ioc_delete_vlan(ctls, mp); + return; + case DLDIOC_VLAN_ATTR: + drv_ioc_vlan_attr(ctls, mp); + return; + case DLDIOC_SETAUTOPUSH: + drv_ioc_setap(ctls, mp); + return; + case DLDIOC_GETAUTOPUSH: + drv_ioc_getap(ctls, mp); + return; + case DLDIOC_CLRAUTOPUSH: + drv_ioc_clrap(ctls, mp); return; - case DLDIOCRELEVLAN: - drv_rele_vlan(ctls, mp); + case DLDIOC_DOORSERVER: + drv_ioc_doorserver(ctls, mp); return; - case DLDIOCZIDGET: - drv_ioc_zid_get(ctls, mp); + case DLDIOC_SETZID: + drv_ioc_setzid(ctls, mp); + return; + case DLDIOC_GETZID: + drv_ioc_getzid(ctls, mp); + return; + case DLDIOC_RENAME: + drv_ioc_rename(ctls, mp); return; default: miocnak(ctls->cs_wq, mp, 0, ENOTSUP); @@ -681,6 +967,55 @@ drv_uw_srv(queue_t *q) } /* + * Check for GLDv3 autopush information. There are three cases: + * + * 1. If devp points to a GLDv3 datalink and it has autopush configuration, + * fill dlap in with that information and return 0. + * + * 2. If devp points to a GLDv3 datalink but it doesn't have autopush + * configuration, then replace devp with the physical device (if one + * exists) and return 1. This allows stropen() to find the old-school + * per-driver autopush configuration. (For softmac, the result is that + * the softmac dev_t is replaced with the legacy device's dev_t). + * + * 3. If neither of the above apply, don't touch the args and return -1. + */ +int +dld_autopush(dev_t *devp, struct dlautopush *dlap) +{ + dld_ap_t *dap; + datalink_id_t linkid; + dev_t phydev; + + if (!GLDV3_DRV(getmajor(*devp))) + return (-1); + + /* + * Find the linkid by the link's dev_t. + */ + if (dls_devnet_dev2linkid(*devp, &linkid) != 0) + return (-1); + + /* + * Find the autopush configuration associated with the linkid. + */ + rw_enter(&dld_ap_hash_lock, RW_READER); + if (mod_hash_find(dld_ap_hashp, (mod_hash_key_t)(uintptr_t)linkid, + (mod_hash_val_t *)&dap) == 0) { + *dlap = dap->da_ap; + rw_exit(&dld_ap_hash_lock); + return (0); + } + rw_exit(&dld_ap_hash_lock); + + if (dls_devnet_phydev(linkid, &phydev) != 0) + return (-1); + + *devp = phydev; + return (1); +} + +/* * Secure objects implementation */ diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c index 3eb892ac71..78543294d4 100644 --- a/usr/src/uts/common/io/dld/dld_proto.c +++ b/usr/src/uts/common/io/dld/dld_proto.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -54,13 +54,10 @@ static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req, proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req, proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req, proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req, - proto_notify_req, proto_unitdata_req, proto_passive_req; + proto_notify_req, proto_passive_req; static void proto_poll_disable(dld_str_t *); static boolean_t proto_poll_enable(dld_str_t *, dl_capab_dls_t *); -static boolean_t proto_capability_advertise(dld_str_t *, mblk_t *); - -static task_func_t proto_process_unbind_req, proto_process_detach_req; static void proto_soft_ring_disable(dld_str_t *); static boolean_t proto_soft_ring_enable(dld_str_t *, dl_capab_dls_t *); @@ -82,15 +79,12 @@ static void proto_change_soft_ring_fanout(dld_str_t *, int); * by the above primitives. */ void -dld_proto(dld_str_t *dsp, mblk_t *mp) +dld_wput_proto_nondata(dld_str_t *dsp, mblk_t *mp) { union DL_primitives *udlp; t_uscalar_t prim; - if (MBLKL(mp) < sizeof (t_uscalar_t)) { - freemsg(mp); - return; - } + ASSERT(MBLKL(mp) >= sizeof (t_uscalar_t)); udlp = (union DL_primitives *)mp->b_rptr; prim = udlp->dl_primitive; @@ -105,9 +99,6 @@ dld_proto(dld_str_t *dsp, mblk_t *mp) case DL_UNBIND_REQ: (void) proto_unbind_req(dsp, udlp, mp); break; - case DL_UNITDATA_REQ: - (void) proto_unitdata_req(dsp, udlp, mp); - break; case DL_UDQOS_REQ: (void) proto_udqos_req(dsp, udlp, mp); break; @@ -150,28 +141,6 @@ dld_proto(dld_str_t *dsp, mblk_t *mp) } } -/* - * Finish any pending operations. - * Requests that need to be processed asynchronously will be handled - * by a separate thread. After this function returns, other threads - * will be allowed to enter dld; they will not be able to do anything - * until ds_dlstate transitions to a non-pending state. - */ -void -dld_finish_pending_ops(dld_str_t *dsp) -{ - task_func_t *op = NULL; - - ASSERT(MUTEX_HELD(&dsp->ds_thr_lock)); - ASSERT(dsp->ds_thr == 0); - - op = dsp->ds_pending_op; - dsp->ds_pending_op = NULL; - mutex_exit(&dsp->ds_thr_lock); - if (op != NULL) - (void) taskq_dispatch(system_taskq, op, dsp, TQ_SLEEP); -} - #define NEG(x) -(x) typedef struct dl_info_ack_wrapper { @@ -411,30 +380,6 @@ failed: return (B_FALSE); } -/* - * DL_DETACH_REQ - */ -static void -proto_process_detach_req(void *arg) -{ - dld_str_t *dsp = arg; - mblk_t *mp; - - /* - * We don't need to hold locks because no other thread - * would manipulate dsp while it is in a PENDING state. - */ - ASSERT(dsp->ds_pending_req != NULL); - ASSERT(dsp->ds_dlstate == DL_DETACH_PENDING); - - mp = dsp->ds_pending_req; - dsp->ds_pending_req = NULL; - dld_str_detach(dsp); - dlokack(dsp->ds_wq, mp, DL_DETACH_REQ); - - DLD_WAKEUP(dsp); -} - /*ARGSUSED*/ static boolean_t proto_detach_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) @@ -460,18 +405,10 @@ proto_detach_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) } dsp->ds_dlstate = DL_DETACH_PENDING; + dld_str_detach(dsp); - /* - * Complete the detach when the driver is single-threaded. - */ - mutex_enter(&dsp->ds_thr_lock); - ASSERT(dsp->ds_pending_req == NULL); - dsp->ds_pending_req = mp; - dsp->ds_pending_op = proto_process_detach_req; - dsp->ds_pending_cnt++; - mutex_exit(&dsp->ds_thr_lock); rw_exit(&dsp->ds_lock); - + dlokack(dsp->ds_wq, mp, DL_DETACH_REQ); return (B_TRUE); failed: rw_exit(&dsp->ds_lock); @@ -493,8 +430,11 @@ proto_bind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) t_scalar_t sap; queue_t *q = dsp->ds_wq; - rw_enter(&dsp->ds_lock, RW_WRITER); - + /* + * Because control message processing is serialized, we don't need + * to hold any locks to read any fields of dsp; we only need ds_lock + * to update the ds_dlstate, ds_sap and ds_passivestate fields. + */ if (MBLKL(mp) < sizeof (dl_bind_req_t)) { dl_err = DL_BADPRIM; goto failed; @@ -522,7 +462,6 @@ proto_bind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) goto failed; } - dsp->ds_dlstate = DL_BIND_PENDING; /* * Set the receive callback. */ @@ -532,8 +471,8 @@ proto_bind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) /* * Bind the channel such that it can receive packets. */ - sap = dsp->ds_sap = dlp->dl_sap; - err = dls_bind(dsp->ds_dc, dlp->dl_sap); + sap = dlp->dl_sap; + err = dls_bind(dsp->ds_dc, sap); if (err != 0) { switch (err) { case EINVAL: @@ -544,7 +483,7 @@ proto_bind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) dl_err = DL_SYSERR; break; } - dsp->ds_dlstate = DL_UNBOUND; + if (dsp->ds_passivestate == DLD_UNINITIALIZED) dls_active_clear(dsp->ds_dc); @@ -560,19 +499,27 @@ proto_bind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) /* * Copy in the SAP. */ - *(uint16_t *)(dlsap_addr + dlsap_addr_length) = dsp->ds_sap; + *(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap; dlsap_addr_length += sizeof (uint16_t); + rw_enter(&dsp->ds_lock, RW_WRITER); + dsp->ds_dlstate = DL_IDLE; if (dsp->ds_passivestate == DLD_UNINITIALIZED) dsp->ds_passivestate = DLD_ACTIVE; + dsp->ds_sap = sap; + + if (dsp->ds_mode == DLD_FASTPATH) + dsp->ds_tx = str_mdata_fastpath_put; + else if (dsp->ds_mode == DLD_RAW) + dsp->ds_tx = str_mdata_raw_put; + dsp->ds_unitdata_tx = dld_wput_proto_data; rw_exit(&dsp->ds_lock); dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0); return (B_TRUE); failed: - rw_exit(&dsp->ds_lock); dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err); return (B_FALSE); } @@ -581,18 +528,21 @@ failed: * DL_UNBIND_REQ */ /*ARGSUSED*/ -static void -proto_process_unbind_req(void *arg) +static boolean_t +proto_unbind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) { - dld_str_t *dsp = arg; - mblk_t *mp; + queue_t *q = dsp->ds_wq; + t_uscalar_t dl_err; - /* - * We don't need to hold locks because no other thread - * would manipulate dsp while it is in a PENDING state. - */ - ASSERT(dsp->ds_pending_req != NULL); - ASSERT(dsp->ds_dlstate == DL_UNBIND_PENDING); + if (MBLKL(mp) < sizeof (dl_unbind_req_t)) { + dl_err = DL_BADPRIM; + goto failed; + } + + if (dsp->ds_dlstate != DL_IDLE) { + dl_err = DL_OUTSTATE; + goto failed; + } /* * Flush any remaining packets scheduled for transmission. @@ -605,76 +555,40 @@ proto_process_unbind_req(void *arg) dls_unbind(dsp->ds_dc); /* + * Clear the receive callback. + */ + dls_rx_set(dsp->ds_dc, NULL, NULL); + + rw_enter(&dsp->ds_lock, RW_WRITER); + + /* * Disable polling mode, if it is enabled. */ proto_poll_disable(dsp); /* - * Clear LSO flags. + * If soft rings were enabled, the workers should be quiesced. */ - dsp->ds_lso = B_FALSE; - dsp->ds_lso_max = 0; + dls_soft_ring_disable(dsp->ds_dc); /* - * Clear the receive callback. + * Clear LSO flags. */ - dls_rx_set(dsp->ds_dc, NULL, NULL); + dsp->ds_lso = B_FALSE; + dsp->ds_lso_max = 0; /* * Set the mode back to the default (unitdata). */ dsp->ds_mode = DLD_UNITDATA; - - /* - * If soft rings were enabled, the workers - * should be quiesced. We cannot check for - * ds_soft_ring flag because - * proto_soft_ring_disable() called from - * proto_capability_req() would have reset it. - */ - if (dls_soft_ring_workers(dsp->ds_dc)) - dls_soft_ring_disable(dsp->ds_dc); - - mp = dsp->ds_pending_req; - dsp->ds_pending_req = NULL; dsp->ds_dlstate = DL_UNBOUND; - dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ); - - DLD_WAKEUP(dsp); -} - -/*ARGSUSED*/ -static boolean_t -proto_unbind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) -{ - queue_t *q = dsp->ds_wq; - t_uscalar_t dl_err; - - rw_enter(&dsp->ds_lock, RW_WRITER); - - if (MBLKL(mp) < sizeof (dl_unbind_req_t)) { - dl_err = DL_BADPRIM; - goto failed; - } - - if (dsp->ds_dlstate != DL_IDLE) { - dl_err = DL_OUTSTATE; - goto failed; - } - - dsp->ds_dlstate = DL_UNBIND_PENDING; - - mutex_enter(&dsp->ds_thr_lock); - ASSERT(dsp->ds_pending_req == NULL); - dsp->ds_pending_req = mp; - dsp->ds_pending_op = proto_process_unbind_req; - dsp->ds_pending_cnt++; - mutex_exit(&dsp->ds_thr_lock); + DLD_TX_QUIESCE(dsp); rw_exit(&dsp->ds_lock); + dlokack(q, mp, DL_UNBIND_REQ); + return (B_TRUE); failed: - rw_exit(&dsp->ds_lock); dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0); return (B_FALSE); } @@ -688,11 +602,14 @@ proto_promiscon_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)udlp; int err = 0; t_uscalar_t dl_err; - uint32_t promisc_saved; + uint32_t promisc; queue_t *q = dsp->ds_wq; - rw_enter(&dsp->ds_lock, RW_WRITER); - + /* + * Because control message processing is serialized, we don't need + * to hold any locks to read any fields of dsp; we only need ds_lock + * to update the ds_promisc and ds_passivestate fields. + */ if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) { dl_err = DL_BADPRIM; goto failed; @@ -704,20 +621,16 @@ proto_promiscon_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) goto failed; } - promisc_saved = dsp->ds_promisc; switch (dlp->dl_level) { case DL_PROMISC_SAP: - dsp->ds_promisc |= DLS_PROMISC_SAP; + promisc = DLS_PROMISC_SAP; break; - case DL_PROMISC_MULTI: - dsp->ds_promisc |= DLS_PROMISC_MULTI; + promisc = DLS_PROMISC_MULTI; break; - case DL_PROMISC_PHYS: - dsp->ds_promisc |= DLS_PROMISC_PHYS; + promisc = DLS_PROMISC_PHYS; break; - default: dl_err = DL_NOTSUPPORTED; goto failed; @@ -725,7 +638,6 @@ proto_promiscon_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) if (dsp->ds_passivestate == DLD_UNINITIALIZED && !dls_active_set(dsp->ds_dc)) { - dsp->ds_promisc = promisc_saved; dl_err = DL_SYSERR; err = EBUSY; goto failed; @@ -734,24 +646,24 @@ proto_promiscon_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) /* * Adjust channel promiscuity. */ - err = dls_promisc(dsp->ds_dc, dsp->ds_promisc); + promisc = (dsp->ds_promisc | promisc); + err = dls_promisc(dsp->ds_dc, promisc); if (err != 0) { dl_err = DL_SYSERR; - dsp->ds_promisc = promisc_saved; if (dsp->ds_passivestate == DLD_UNINITIALIZED) dls_active_clear(dsp->ds_dc); - goto failed; } + rw_enter(&dsp->ds_lock, RW_WRITER); if (dsp->ds_passivestate == DLD_UNINITIALIZED) dsp->ds_passivestate = DLD_ACTIVE; - + dsp->ds_promisc = promisc; rw_exit(&dsp->ds_lock); + dlokack(q, mp, DL_PROMISCON_REQ); return (B_TRUE); failed: - rw_exit(&dsp->ds_lock); dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err); return (B_FALSE); } @@ -765,11 +677,14 @@ proto_promiscoff_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)udlp; int err = 0; t_uscalar_t dl_err; - uint32_t promisc_saved; + uint32_t promisc; queue_t *q = dsp->ds_wq; - rw_enter(&dsp->ds_lock, RW_WRITER); - + /* + * Because control messages processing is serialized, we don't need + * to hold any lock to read any field of dsp; we hold ds_lock to + * update the ds_promisc field. + */ if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) { dl_err = DL_BADPRIM; goto failed; @@ -781,52 +696,40 @@ proto_promiscoff_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) goto failed; } - promisc_saved = dsp->ds_promisc; switch (dlp->dl_level) { case DL_PROMISC_SAP: - if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) { - dl_err = DL_NOTENAB; - goto failed; - } - dsp->ds_promisc &= ~DLS_PROMISC_SAP; + promisc = DLS_PROMISC_SAP; break; - case DL_PROMISC_MULTI: - if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) { - dl_err = DL_NOTENAB; - goto failed; - } - dsp->ds_promisc &= ~DLS_PROMISC_MULTI; + promisc = DLS_PROMISC_MULTI; break; - case DL_PROMISC_PHYS: - if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) { - dl_err = DL_NOTENAB; - goto failed; - } - dsp->ds_promisc &= ~DLS_PROMISC_PHYS; + promisc = DLS_PROMISC_PHYS; break; - default: dl_err = DL_NOTSUPPORTED; goto failed; } - /* - * Adjust channel promiscuity. - */ - err = dls_promisc(dsp->ds_dc, dsp->ds_promisc); + if (!(dsp->ds_promisc & promisc)) { + dl_err = DL_NOTENAB; + goto failed; + } + + promisc = (dsp->ds_promisc & ~promisc); + err = dls_promisc(dsp->ds_dc, promisc); if (err != 0) { - dsp->ds_promisc = promisc_saved; dl_err = DL_SYSERR; goto failed; } + rw_enter(&dsp->ds_lock, RW_WRITER); + dsp->ds_promisc = promisc; rw_exit(&dsp->ds_lock); + dlokack(q, mp, DL_PROMISCOFF_REQ); return (B_TRUE); failed: - rw_exit(&dsp->ds_lock); dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err); return (B_FALSE); } @@ -842,8 +745,11 @@ proto_enabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) t_uscalar_t dl_err; queue_t *q = dsp->ds_wq; - rw_enter(&dsp->ds_lock, RW_WRITER); - + /* + * Because control messages processing is serialized, we don't need + * to hold any lock to read any field of dsp; we hold ds_lock to + * update the ds_passivestate field. + */ if (dsp->ds_dlstate == DL_UNATTACHED || DL_ACK_PENDING(dsp->ds_dlstate)) { dl_err = DL_OUTSTATE; @@ -879,20 +785,21 @@ proto_enabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) dl_err = DL_SYSERR; break; } + if (dsp->ds_passivestate == DLD_UNINITIALIZED) dls_active_clear(dsp->ds_dc); goto failed; } + rw_enter(&dsp->ds_lock, RW_WRITER); if (dsp->ds_passivestate == DLD_UNINITIALIZED) dsp->ds_passivestate = DLD_ACTIVE; - rw_exit(&dsp->ds_lock); + dlokack(q, mp, DL_ENABMULTI_REQ); return (B_TRUE); failed: - rw_exit(&dsp->ds_lock); dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err); return (B_FALSE); } @@ -908,8 +815,10 @@ proto_disabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) t_uscalar_t dl_err; queue_t *q = dsp->ds_wq; - rw_enter(&dsp->ds_lock, RW_READER); - + /* + * Because control messages processing is serialized, we don't need + * to hold any lock to read any field of dsp. + */ if (dsp->ds_dlstate == DL_UNATTACHED || DL_ACK_PENDING(dsp->ds_dlstate)) { dl_err = DL_OUTSTATE; @@ -925,17 +834,15 @@ proto_disabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) err = dls_multicst_remove(dsp->ds_dc, mp->b_rptr + dlp->dl_addr_offset); if (err != 0) { - switch (err) { + switch (err) { case EINVAL: dl_err = DL_BADADDR; err = 0; break; - case ENOENT: dl_err = DL_NOTENAB; err = 0; break; - default: dl_err = DL_SYSERR; break; @@ -943,11 +850,9 @@ proto_disabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) goto failed; } - rw_exit(&dsp->ds_lock); dlokack(q, mp, DL_DISABMULTI_REQ); return (B_TRUE); failed: - rw_exit(&dsp->ds_lock); dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err); return (B_FALSE); } @@ -1019,8 +924,11 @@ proto_setphysaddr_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) t_uscalar_t dl_err; queue_t *q = dsp->ds_wq; - rw_enter(&dsp->ds_lock, RW_WRITER); - + /* + * Because control message processing is serialized, we don't need + * to hold any locks to read any fields of dsp; we only need ds_lock + * to update the ds_passivestate field. + */ if (dsp->ds_dlstate == DL_UNATTACHED || DL_ACK_PENDING(dsp->ds_dlstate)) { dl_err = DL_OUTSTATE; @@ -1053,19 +961,21 @@ proto_setphysaddr_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) dl_err = DL_SYSERR; break; } + if (dsp->ds_passivestate == DLD_UNINITIALIZED) dls_active_clear(dsp->ds_dc); goto failed; } + + rw_enter(&dsp->ds_lock, RW_WRITER); if (dsp->ds_passivestate == DLD_UNINITIALIZED) dsp->ds_passivestate = DLD_ACTIVE; - rw_exit(&dsp->ds_lock); + dlokack(q, mp, DL_SET_PHYS_ADDR_REQ); return (B_TRUE); failed: - rw_exit(&dsp->ds_lock); dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err); return (B_FALSE); } @@ -1085,8 +995,6 @@ proto_udqos_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) off = dlp->dl_qos_offset; len = dlp->dl_qos_length; - rw_enter(&dsp->ds_lock, RW_WRITER); - if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) { dl_err = DL_BADPRIM; goto failed; @@ -1104,13 +1012,19 @@ proto_udqos_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) goto failed; } - dsp->ds_pri = selp->dl_priority; + if (dsp->ds_dlstate == DL_UNATTACHED || + DL_ACK_PENDING(dsp->ds_dlstate)) { + dl_err = DL_OUTSTATE; + goto failed; + } + rw_enter(&dsp->ds_lock, RW_WRITER); + dsp->ds_pri = selp->dl_priority; rw_exit(&dsp->ds_lock); + dlokack(q, mp, DL_UDQOS_REQ); return (B_TRUE); failed: - rw_exit(&dsp->ds_lock); dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0); return (B_FALSE); } @@ -1142,9 +1056,8 @@ proto_capability_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) offset_t off, end; t_uscalar_t dl_err; queue_t *q = dsp->ds_wq; - boolean_t upgraded; - rw_enter(&dsp->ds_lock, RW_READER); + rw_enter(&dsp->ds_lock, RW_WRITER); if (MBLKL(mp) < sizeof (dl_capability_req_t)) { dl_err = DL_BADPRIM; @@ -1180,7 +1093,6 @@ proto_capability_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) /* * Walk the list of capabilities to be enabled. */ - upgraded = B_FALSE; for (end = off + len; off < end; ) { sp = (dl_capability_sub_t *)(mp->b_rptr + off); size = sizeof (dl_capability_sub_t) + sp->dl_length; @@ -1239,24 +1151,6 @@ proto_capability_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) */ bcopy(pollp, &poll, sizeof (dl_capab_dls_t)); - /* - * We need to become writer before enabling and/or - * disabling the polling interface. If we couldn' - * upgrade, check state again after re-acquiring the - * lock to make sure we can proceed. - */ - if (!upgraded && !rw_tryupgrade(&dsp->ds_lock)) { - rw_exit(&dsp->ds_lock); - rw_enter(&dsp->ds_lock, RW_WRITER); - - if (dsp->ds_dlstate == DL_UNATTACHED || - DL_ACK_PENDING(dsp->ds_dlstate)) { - dl_err = DL_OUTSTATE; - goto failed; - } - } - upgraded = B_TRUE; - switch (poll.dls_flags) { default: /*FALLTHRU*/ @@ -1273,12 +1167,15 @@ proto_capability_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) proto_poll_disable(dsp); /* - * Now attempt enable it. + * Note that only IP should enable POLL. */ if (check_ip_above(dsp->ds_rq) && proto_poll_enable(dsp, &poll)) { bzero(&poll, sizeof (dl_capab_dls_t)); poll.dls_flags = POLL_ENABLE; + } else { + bzero(&poll, sizeof (dl_capab_dls_t)); + poll.dls_flags = POLL_DISABLE; } break; } @@ -1298,24 +1195,6 @@ proto_capability_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) bcopy(soft_ringp, &soft_ring, sizeof (dl_capab_dls_t)); - /* - * We need to become writer before enabling and/or - * disabling the soft_ring interface. If we couldn' - * upgrade, check state again after re-acquiring the - * lock to make sure we can proceed. - */ - if (!upgraded && !rw_tryupgrade(&dsp->ds_lock)) { - rw_exit(&dsp->ds_lock); - rw_enter(&dsp->ds_lock, RW_WRITER); - - if (dsp->ds_dlstate == DL_UNATTACHED || - DL_ACK_PENDING(dsp->ds_dlstate)) { - dl_err = DL_OUTSTATE; - goto failed; - } - } - upgraded = B_TRUE; - switch (soft_ring.dls_flags) { default: /*FALLTHRU*/ @@ -1331,19 +1210,17 @@ proto_capability_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) proto_soft_ring_disable(dsp); /* - * Now attempt enable it. + * Note that only IP can enable soft ring. */ if (check_ip_above(dsp->ds_rq) && proto_soft_ring_enable(dsp, &soft_ring)) { bzero(&soft_ring, sizeof (dl_capab_dls_t)); - soft_ring.dls_flags = - SOFT_RING_ENABLE; + soft_ring.dls_flags = SOFT_RING_ENABLE; } else { bzero(&soft_ring, sizeof (dl_capab_dls_t)); - soft_ring.dls_flags = - SOFT_RING_DISABLE; + soft_ring.dls_flags = SOFT_RING_DISABLE; } break; } @@ -1399,6 +1276,8 @@ proto_notify_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) goto failed; } + note &= ~(mac_no_notification(dsp->ds_mh)); + /* * Cache the notifications that are being enabled. */ @@ -1428,13 +1307,13 @@ failed: } /* - * DL_UINTDATA_REQ + * DL_UNITDATA_REQ */ -static boolean_t -proto_unitdata_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) +void +dld_wput_proto_data(dld_str_t *dsp, mblk_t *mp) { queue_t *q = dsp->ds_wq; - dl_unitdata_req_t *dlp = (dl_unitdata_req_t *)udlp; + dl_unitdata_req_t *dlp = (dl_unitdata_req_t *)mp->b_rptr; off_t off; size_t len, size; const uint8_t *addr; @@ -1444,17 +1323,11 @@ proto_unitdata_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) uint32_t start, stuff, end, value, flags; t_uscalar_t dl_err; - rw_enter(&dsp->ds_lock, RW_READER); - if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) { dl_err = DL_BADPRIM; goto failed; } - if (dsp->ds_dlstate != DL_IDLE) { - dl_err = DL_OUTSTATE; - goto failed; - } addr_length = dsp->ds_mip->mi_addr_length; off = dlp->dl_dest_addr_offset; @@ -1514,25 +1387,14 @@ proto_unitdata_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) */ ASSERT(bp->b_cont == NULL); bp->b_cont = payload; - - /* - * No lock can be held across putnext, which can be called - * from here in dld_tx_single(). The config is held constant - * by the DLD_ENTER done in dld_wput()/dld_wsrv until all - * sending threads are done. - */ - rw_exit(&dsp->ds_lock); dld_tx_single(dsp, bp); - return (B_TRUE); + return; failed: - rw_exit(&dsp->ds_lock); dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0); - return (B_FALSE); + return; baddata: - rw_exit(&dsp->ds_lock); dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0); - return (B_FALSE); } /* @@ -1544,7 +1406,12 @@ proto_passive_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp) { t_uscalar_t dl_err; - rw_enter(&dsp->ds_lock, RW_WRITER); + /* + * READER lock is enough because ds_passivestate can only be changed + * as the result of non-data message processing. + */ + rw_enter(&dsp->ds_lock, RW_READER); + /* * If we've already become active by issuing an active primitive, * then it's too late to try to become passive. @@ -1569,7 +1436,6 @@ failed: return (B_FALSE); } - /* * Catch-all handler. */ @@ -1585,7 +1451,7 @@ proto_poll_disable(dld_str_t *dsp) { mac_handle_t mh; - ASSERT(dsp->ds_pending_req != NULL || RW_WRITE_HELD(&dsp->ds_lock)); + ASSERT(RW_WRITE_HELD(&dsp->ds_lock)); if (!dsp->ds_polling) return; @@ -1606,7 +1472,7 @@ proto_poll_disable(dld_str_t *dsp) * Set receive function back to default. */ dls_rx_set(dsp->ds_dc, (dsp->ds_mode == DLD_FASTPATH) ? - dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp); + dld_str_rx_fastpath : dld_str_rx_unitdata, dsp); /* * Note that polling is disabled. @@ -1636,10 +1502,11 @@ proto_poll_enable(dld_str_t *dsp, dl_capab_dls_t *pollp) */ mac_resource_set(mh, (mac_resource_add_t)pollp->dls_ring_add, (void *)pollp->dls_rx_handle); + mac_resources(mh); /* - * Set the receive function. + * Set the upstream receive function. */ dls_rx_set(dsp->ds_dc, (dls_rx_t)pollp->dls_rx, (void *)pollp->dls_rx_handle); @@ -1694,15 +1561,14 @@ proto_soft_ring_enable(dld_str_t *dsp, dl_capab_dls_t *soft_ringp) static void proto_change_soft_ring_fanout(dld_str_t *dsp, int type) { - dls_rx_t rx; + dls_channel_t dc = dsp->ds_dc; if (type == SOFT_RING_NONE) { - rx = (dsp->ds_mode == DLD_FASTPATH) ? - dld_str_rx_fastpath : dld_str_rx_unitdata; - } else { - rx = (dls_rx_t)dls_soft_ring_fanout; + dls_rx_set(dc, (dsp->ds_mode == DLD_FASTPATH) ? + dld_str_rx_fastpath : dld_str_rx_unitdata, dsp); + } else if (type != SOFT_RING_NONE) { + dls_rx_set(dc, (dls_rx_t)dls_soft_ring_fanout, dc); } - dls_soft_ring_rx_set(dsp->ds_dc, rx, dsp, type); } /* @@ -1720,14 +1586,17 @@ proto_capability_advertise(dld_str_t *dsp, mblk_t *mp) dl_capab_lso_t lso; dl_capab_zerocopy_t zcopy; uint8_t *ptr; - boolean_t cksum_cap; - boolean_t poll_cap; - boolean_t lso_cap; - mac_capab_lso_t mac_lso; queue_t *q = dsp->ds_wq; mblk_t *mp1; + boolean_t is_vlan = (dsp->ds_vid != VLAN_ID_NONE); + boolean_t poll_capable = B_FALSE; + boolean_t soft_ring_capable = B_FALSE; + boolean_t hcksum_capable = B_FALSE; + boolean_t zcopy_capable = B_FALSE; + boolean_t lso_capable = B_FALSE; + mac_capab_lso_t mac_lso; - ASSERT(RW_READ_HELD(&dsp->ds_lock)); + ASSERT(RW_WRITE_HELD(&dsp->ds_lock)); /* * Initially assume no capabilities. @@ -1735,10 +1604,17 @@ proto_capability_advertise(dld_str_t *dsp, mblk_t *mp) subsize = 0; /* - * Advertize soft ring capability unless it has been explicitly - * disabled. + * Check if soft ring can be enabled on this interface. Note that we + * do not enable softring on any legacy drivers, because doing that + * would hurt the performance if the legacy driver has its own taskq + * implementation. Further, most high-performance legacy drivers do + * have their own taskq implementation. + * + * If advertising DL_CAPAB_SOFT_RING has not been explicitly disabled, + * reserve space for that capability. */ - if (!(dld_opt & DLD_OPT_NO_SOFTRING)) { + if (!mac_is_legacy(dsp->ds_mh) && !(dld_opt & DLD_OPT_NO_SOFTRING)) { + soft_ring_capable = B_TRUE; subsize += sizeof (dl_capability_sub_t) + sizeof (dl_capab_dls_t); } @@ -1748,37 +1624,48 @@ proto_capability_advertise(dld_str_t *dsp, mblk_t *mp) * If advertising DL_CAPAB_POLL has not been explicitly disabled * then reserve space for that capability. */ - poll_cap = (mac_capab_get(dsp->ds_mh, MAC_CAPAB_POLL, NULL) && - !(dld_opt & DLD_OPT_NO_POLL) && (dsp->ds_vid == VLAN_ID_NONE)); - if (poll_cap) { + if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_POLL, NULL) && + !(dld_opt & DLD_OPT_NO_POLL) && !is_vlan) { + poll_capable = B_TRUE; subsize += sizeof (dl_capability_sub_t) + sizeof (dl_capab_dls_t); } /* - * If the MAC interface supports checksum offload then reserve - * space for the DL_CAPAB_HCKSUM capability. + * Check if checksum offload is supported on this MAC. Don't + * advertise DL_CAPAB_HCKSUM if the underlying MAC is VLAN incapable, + * since it might not be able to do the hardware checksum offload + * with the correct offset. */ - if (cksum_cap = mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM, + bzero(&hcksum, sizeof (dl_capab_hcksum_t)); + if ((!is_vlan || (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_NATIVEVLAN, + NULL))) && mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM, &hcksum.hcksum_txflags)) { - subsize += sizeof (dl_capability_sub_t) + - sizeof (dl_capab_hcksum_t); + if (hcksum.hcksum_txflags != 0) { + hcksum_capable = B_TRUE; + subsize += sizeof (dl_capability_sub_t) + + sizeof (dl_capab_hcksum_t); + } } /* - * If LSO is usable for MAC, reserve space for the DL_CAPAB_LSO - * capability. + * Check if LSO is supported on this MAC, then reserve space for + * the DL_CAPAB_LSO capability. */ - if (lso_cap = mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) { + if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) { + lso_capable = B_TRUE; subsize += sizeof (dl_capability_sub_t) + sizeof (dl_capab_lso_t); } /* - * If DL_CAPAB_ZEROCOPY has not be explicitly disabled then - * reserve space for it. + * Check if zerocopy is supported on this interface. + * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled + * then reserve space for that capability. */ - if (!(dld_opt & DLD_OPT_NO_ZEROCOPY)) { + if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) && + !(dld_opt & DLD_OPT_NO_ZEROCOPY)) { + zcopy_capable = B_TRUE; subsize += sizeof (dl_capability_sub_t) + sizeof (dl_capab_zerocopy_t); } @@ -1807,60 +1694,32 @@ proto_capability_advertise(dld_str_t *dsp, mblk_t *mp) /* * IP polling interface. */ - if (poll_cap) { + if (poll_capable) { /* * Attempt to disable just in case this is a re-negotiation; - * we need to become writer before doing so. - */ - if (!rw_tryupgrade(&dsp->ds_lock)) { - rw_exit(&dsp->ds_lock); - rw_enter(&dsp->ds_lock, RW_WRITER); - } - - /* - * Check if polling state has changed after we re-acquired - * the lock above, so that we don't mis-advertise it. + * READER lock is enough because ds_polling can only be + * changed as the result of non-data message processing. */ - poll_cap = !(dld_opt & DLD_OPT_NO_POLL) && - (dsp->ds_vid == VLAN_ID_NONE); - - if (!poll_cap) { - int poll_capab_size; - - rw_downgrade(&dsp->ds_lock); - - poll_capab_size = sizeof (dl_capability_sub_t) + - sizeof (dl_capab_dls_t); - - mp->b_wptr -= poll_capab_size; - subsize -= poll_capab_size; - dlap->dl_sub_length = subsize; - } else { - proto_poll_disable(dsp); - - rw_downgrade(&dsp->ds_lock); - - dlsp = (dl_capability_sub_t *)ptr; + proto_poll_disable(dsp); - dlsp->dl_cap = DL_CAPAB_POLL; - dlsp->dl_length = sizeof (dl_capab_dls_t); - ptr += sizeof (dl_capability_sub_t); + dlsp = (dl_capability_sub_t *)ptr; - bzero(&poll, sizeof (dl_capab_dls_t)); - poll.dls_version = POLL_VERSION_1; - poll.dls_flags = POLL_CAPABLE; - poll.dls_tx_handle = (uintptr_t)dsp; - poll.dls_tx = (uintptr_t)str_mdata_fastpath_put; + dlsp->dl_cap = DL_CAPAB_POLL; + dlsp->dl_length = sizeof (dl_capab_dls_t); + ptr += sizeof (dl_capability_sub_t); - dlcapabsetqid(&(poll.dls_mid), dsp->ds_rq); - bcopy(&poll, ptr, sizeof (dl_capab_dls_t)); - ptr += sizeof (dl_capab_dls_t); - } + bzero(&poll, sizeof (dl_capab_dls_t)); + poll.dls_version = POLL_VERSION_1; + poll.dls_flags = POLL_CAPABLE; + poll.dls_tx_handle = (uintptr_t)dsp; + poll.dls_tx = (uintptr_t)str_mdata_fastpath_put; + dlcapabsetqid(&(poll.dls_mid), dsp->ds_rq); + bcopy(&poll, ptr, sizeof (dl_capab_dls_t)); + ptr += sizeof (dl_capab_dls_t); } - ASSERT(RW_READ_HELD(&dsp->ds_lock)); - if (!(dld_opt & DLD_OPT_NO_SOFTRING)) { + if (soft_ring_capable) { dlsp = (dl_capability_sub_t *)ptr; dlsp->dl_cap = DL_CAPAB_SOFT_RING; @@ -1885,7 +1744,7 @@ proto_capability_advertise(dld_str_t *dsp, mblk_t *mp) /* * TCP/IP checksum offload. */ - if (cksum_cap) { + if (hcksum_capable) { dlsp = (dl_capability_sub_t *)ptr; dlsp->dl_cap = DL_CAPAB_HCKSUM; @@ -1901,7 +1760,7 @@ proto_capability_advertise(dld_str_t *dsp, mblk_t *mp) /* * Large segment offload. (LSO) */ - if (lso_cap) { + if (lso_capable) { dlsp = (dl_capability_sub_t *)ptr; dlsp->dl_cap = DL_CAPAB_LSO; @@ -1927,7 +1786,7 @@ proto_capability_advertise(dld_str_t *dsp, mblk_t *mp) /* * Zero copy */ - if (!(dld_opt & DLD_OPT_NO_ZEROCOPY)) { + if (zcopy_capable) { dlsp = (dl_capability_sub_t *)ptr; dlsp->dl_cap = DL_CAPAB_ZEROCOPY; diff --git a/usr/src/uts/common/io/dld/dld_str.c b/usr/src/uts/common/io/dld/dld_str.c index 75d0d6e08c..f89e4a5f94 100644 --- a/usr/src/uts/common/io/dld/dld_str.c +++ b/usr/src/uts/common/io/dld/dld_str.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,7 +33,8 @@ #include <sys/strsun.h> #include <sys/strsubr.h> #include <sys/atomic.h> -#include <sys/mkdev.h> +#include <sys/disp.h> +#include <sys/callb.h> #include <sys/vlan.h> #include <sys/dld.h> #include <sys/dld_impl.h> @@ -53,22 +54,35 @@ static void str_notify_speed(dld_str_t *, uint32_t); static void str_notify(void *, mac_notify_type_t); static void ioc_native(dld_str_t *, mblk_t *); +static void ioc_margin(dld_str_t *, mblk_t *); static void ioc_raw(dld_str_t *, mblk_t *); static void ioc_fast(dld_str_t *, mblk_t *); static void ioc(dld_str_t *, mblk_t *); -static void dld_ioc(dld_str_t *, mblk_t *); -static void str_mdata_raw_put(dld_str_t *, mblk_t *); +static void dld_tx_enqueue(dld_str_t *, mblk_t *, mblk_t *, boolean_t, + uint_t, uint_t); +static void dld_wput_nondata(dld_str_t *, mblk_t *); +static void dld_wput_nondata_task(void *); +static void dld_flush_nondata(dld_str_t *); static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t); static mblk_t *i_dld_ether_header_strip_tag(mblk_t *); static uint32_t str_count; static kmem_cache_t *str_cachep; -static uint32_t minor_count; +static taskq_t *dld_disp_taskq = NULL; static mod_hash_t *str_hashp; #define STR_HASHSZ 64 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) +static inline uint_t mp_getsize(mblk_t *); + +/* + * Interval to count the TX queued depth. Default is 1s (1000000us). + * Count the queue depth immediately (not by timeout) if this is set to 0. + * See more details above dld_tx_enqueue(). + */ +uint_t tx_qdepth_interval = 1000000; + /* * Some notes on entry points, flow-control, queueing and locking: * @@ -162,33 +176,19 @@ i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) ASSERT(statep->ds_minor != 0); /* - * Access to ds_ppa and ds_mh need to be protected by ds_lock. + * Access to ds_mh needs to be protected by ds_lock. */ rw_enter(&dsp->ds_lock, RW_READER); - if (statep->ds_minor <= DLD_MAX_MINOR) { - /* - * Style 1: minor can be derived from the ppa. we - * continue to walk until we find a matching stream - * in attached state. - */ - if (statep->ds_minor == DLS_PPA2MINOR(dsp->ds_ppa) && - dsp->ds_mh != NULL) { - statep->ds_dip = mac_devinfo_get(dsp->ds_mh); - rw_exit(&dsp->ds_lock); - return (MH_WALK_TERMINATE); - } - } else { + if (statep->ds_minor == dsp->ds_minor) { /* * Clone: a clone minor is unique. we can terminate the * walk if we find a matching stream -- even if we fail * to obtain the devinfo. */ - if (statep->ds_minor == dsp->ds_minor) { - if (dsp->ds_mh != NULL) - statep->ds_dip = mac_devinfo_get(dsp->ds_mh); - rw_exit(&dsp->ds_lock); - return (MH_WALK_TERMINATE); - } + if (dsp->ds_mh != NULL) + statep->ds_dip = mac_devinfo_get(dsp->ds_mh); + rw_exit(&dsp->ds_lock); + return (MH_WALK_TERMINATE); } rw_exit(&dsp->ds_lock); return (MH_WALK_CONTINUE); @@ -197,21 +197,24 @@ i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) static dev_info_t * dld_finddevinfo(dev_t dev) { + dev_info_t *dip; i_dld_str_state_t state; + if (getminor(dev) == 0) + return (NULL); + + /* + * See if it's a minor node of a link + */ + if ((dip = dls_finddevinfo(dev)) != NULL) + return (dip); + state.ds_minor = getminor(dev); state.ds_major = getmajor(dev); state.ds_dip = NULL; - if (state.ds_minor == 0) - return (NULL); - mod_hash_walk(str_hashp, i_dld_str_walker, &state); - if (state.ds_dip != NULL || state.ds_minor <= DLD_MAX_MINOR) - return (state.ds_dip); - - /* See if it's a minor node of a VLAN */ - return (dls_finddevinfo(dev)); + return (state.ds_dip); } /* @@ -233,10 +236,10 @@ dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) } break; case DDI_INFO_DEVT2INSTANCE: - if (minor > 0 && minor <= DLD_MAX_MINOR) { + if (minor > 0 && minor <= DLS_MAX_MINOR) { *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor); rc = DDI_SUCCESS; - } else if (minor > DLD_MAX_MINOR && + } else if (minor > DLS_MAX_MINOR && (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { *resp = (void *)(uintptr_t)ddi_get_instance(devinfo); rc = DDI_SUCCESS; @@ -286,12 +289,7 @@ dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) /* * Style 1 open */ - t_uscalar_t ppa; - - if ((err = dls_ppa_from_minor(minor, &ppa)) != 0) - goto failed; - - if ((err = dld_str_attach(dsp, ppa)) != 0) + if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) goto failed; ASSERT(dsp->ds_dlstate == DL_UNBOUND); } else { @@ -323,28 +321,11 @@ dld_close(queue_t *rq) dld_str_t *dsp = rq->q_ptr; /* - * Wait until pending requests are processed. - */ - mutex_enter(&dsp->ds_thr_lock); - while (dsp->ds_pending_cnt > 0) - cv_wait(&dsp->ds_pending_cv, &dsp->ds_thr_lock); - mutex_exit(&dsp->ds_thr_lock); - - /* * Disable the queue srv(9e) routine. */ qprocsoff(rq); - /* - * At this point we can not be entered by any threads via STREAMS - * or the direct call interface, which is available only to IP. - * After the interface is unplumbed, IP wouldn't have any reference - * to this instance, and therefore we are now effectively single - * threaded and don't require any lock protection. Flush all - * pending packets which are sitting in the transmit queue. - */ - ASSERT(dsp->ds_thr == 0); - dld_tx_flush(dsp); + dld_finish_pending_task(dsp); /* * This stream was open to a provider node. Check to see @@ -369,41 +350,57 @@ dld_close(queue_t *rq) void dld_wput(queue_t *wq, mblk_t *mp) { - dld_str_t *dsp = (dld_str_t *)wq->q_ptr; - - DLD_ENTER(dsp); + dld_str_t *dsp = wq->q_ptr; switch (DB_TYPE(mp)) { - case M_DATA: - /* - * State is held constant by the DLD_ENTER done above - * until all sending threads are done. Mode can change - * due to ioctl, however locks must not be held across - * calls to putnext(), which can be called from here - * via dld_tx_single(). - */ - rw_enter(&dsp->ds_lock, RW_READER); - if (dsp->ds_dlstate != DL_IDLE || - dsp->ds_mode == DLD_UNITDATA) { - rw_exit(&dsp->ds_lock); + case M_DATA: { + dld_tx_t tx; + + DLD_TX_ENTER(dsp); + if ((tx = dsp->ds_tx) != NULL) + tx(dsp, mp); + else freemsg(mp); - } else if (dsp->ds_mode == DLD_FASTPATH) { - rw_exit(&dsp->ds_lock); - str_mdata_fastpath_put(dsp, mp); - } else if (dsp->ds_mode == DLD_RAW) { - rw_exit(&dsp->ds_lock); - str_mdata_raw_put(dsp, mp); - } + DLD_TX_EXIT(dsp); break; + } case M_PROTO: - case M_PCPROTO: - dld_proto(dsp, mp); + case M_PCPROTO: { + t_uscalar_t prim; + dld_tx_t tx; + + if (MBLKL(mp) < sizeof (t_uscalar_t)) { + freemsg(mp); + return; + } + + prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; + if (prim != DL_UNITDATA_REQ) { + /* Control path */ + dld_wput_nondata(dsp, mp); + break; + } + + /* Data path */ + DLD_TX_ENTER(dsp); + if ((tx = dsp->ds_unitdata_tx) != NULL) + tx(dsp, mp); + else + dlerrorack(wq, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0); + DLD_TX_EXIT(dsp); break; + } case M_IOCTL: - dld_ioc(dsp, mp); + case M_IOCDATA: + /* Control path */ + dld_wput_nondata(dsp, mp); break; case M_FLUSH: + /* + * Flush both the data messages and the control messages. + */ if (*mp->b_rptr & FLUSHW) { + dld_flush_nondata(dsp); dld_tx_flush(dsp); *mp->b_rptr &= ~FLUSHW; } @@ -418,8 +415,17 @@ dld_wput(queue_t *wq, mblk_t *mp) freemsg(mp); break; } +} - DLD_EXIT(dsp); +/* + * Called by GLDv3 control node to process the ioctls. It will start + * a taskq to allow the ioctl processing to block. This is a temporary + * solution, and will be replaced by a more graceful approach afterwards. + */ +void +dld_ioctl(queue_t *wq, mblk_t *mp) +{ + dld_wput_nondata(wq->q_ptr, mp); } /* @@ -428,10 +434,11 @@ dld_wput(queue_t *wq, mblk_t *mp) void dld_wsrv(queue_t *wq) { - mblk_t *mp; + mblk_t *mp, *head, *tail; dld_str_t *dsp = wq->q_ptr; + uint_t cnt, msgcnt; + timeout_id_t tid = 0; - DLD_ENTER(dsp); rw_enter(&dsp->ds_lock, RW_READER); /* * Grab all packets (chained via b_next) off our transmit queue @@ -453,10 +460,13 @@ dld_wsrv(queue_t *wq) ASSERT(dsp->ds_tx_msgcnt == 0); mutex_exit(&dsp->ds_tx_list_lock); rw_exit(&dsp->ds_lock); - DLD_EXIT(dsp); return; } + head = mp; + tail = dsp->ds_tx_list_tail; dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; + cnt = dsp->ds_tx_cnt; + msgcnt = dsp->ds_tx_msgcnt; dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; mutex_exit(&dsp->ds_tx_list_lock); @@ -466,7 +476,7 @@ dld_wsrv(queue_t *wq) * because regardless of the mode all transmit will end up in * dld_tx_single() where the packets may be queued. */ - ASSERT(DB_TYPE(mp) == M_DATA); + ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_MULTIDATA)); if (dsp->ds_dlstate != DL_IDLE) { freemsgchain(mp); goto done; @@ -477,8 +487,27 @@ dld_wsrv(queue_t *wq) * send them all, re-queue the packet(s) at the beginning of * the transmit queue to avoid any re-ordering. */ - if ((mp = dls_tx(dsp->ds_dc, mp)) != NULL) - dld_tx_enqueue(dsp, mp, B_TRUE); + mp = dls_tx(dsp->ds_dc, mp); + if (mp == head) { + /* + * No message was sent out. Take the saved the queue depth + * as the input, so that dld_tx_enqueue() need not to + * calculate it again. + */ + dld_tx_enqueue(dsp, mp, tail, B_TRUE, msgcnt, cnt); + } else if (mp != NULL) { + /* + * Some but not all messages were sent out. dld_tx_enqueue() + * needs to start the timer to calculate the queue depth if + * timer has not been started. + * + * Note that a timer is used to calculate the queue depth + * to improve network performance, especially for TCP, in + * which case packets are sent without canput() being checked, + * and mostly end up in dld_tx_enqueue() under heavy load. + */ + dld_tx_enqueue(dsp, mp, tail, B_TRUE, 0, 0); + } done: /* @@ -492,11 +521,19 @@ done: dsp->ds_tx_flow_mp = getq(wq); ASSERT(dsp->ds_tx_flow_mp != NULL); dsp->ds_tx_qbusy = B_FALSE; + if ((tid = dsp->ds_tx_qdepth_tid) != 0) + dsp->ds_tx_qdepth_tid = 0; } mutex_exit(&dsp->ds_tx_list_lock); + /* + * Note that ds_tx_list_lock (which is acquired by the timeout + * callback routine) cannot be held across the call to untimeout(). + */ + if (tid != 0) + (void) untimeout(tid); + rw_exit(&dsp->ds_lock); - DLD_EXIT(dsp); } void @@ -566,6 +603,12 @@ dld_str_init(void) ASSERT(str_cachep != NULL); /* + * Create taskq to process DLPI requests. + */ + dld_disp_taskq = taskq_create("dld_disp_taskq", 1024, MINCLSYSPRI, 2, + INT_MAX, TASKQ_DYNAMIC | TASKQ_PREPOPULATE); + + /* * Create a hash table for maintaining dld_str_t's. * The ds_minor field (the clone minor number) of a dld_str_t * is used as a key for this hash table because this number is @@ -587,11 +630,9 @@ dld_str_fini(void) if (str_count != 0) return (EBUSY); - /* - * Check to see if there are any minor numbers still in use. - */ - if (minor_count != 0) - return (EBUSY); + ASSERT(dld_disp_taskq != NULL); + taskq_destroy(dld_disp_taskq); + dld_disp_taskq = NULL; /* * Destroy object cache. @@ -628,6 +669,7 @@ dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) dsp->ds_type = type; dsp->ds_major = major; dsp->ds_style = style; + dsp->ds_tx = dsp->ds_unitdata_tx = NULL; /* * Initialize the queue pointers. @@ -649,6 +691,20 @@ dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) return (dsp); } +void +dld_finish_pending_task(dld_str_t *dsp) +{ + /* + * Wait until the pending requests are processed by the worker thread. + */ + mutex_enter(&dsp->ds_disp_lock); + dsp->ds_closing = B_TRUE; + while (dsp->ds_tid != NULL) + cv_wait(&dsp->ds_disp_cv, &dsp->ds_disp_lock); + dsp->ds_closing = B_FALSE; + mutex_exit(&dsp->ds_disp_lock); +} + /* * Destroy a dld_str_t object. */ @@ -674,11 +730,14 @@ dld_str_destroy(dld_str_t *dsp) ASSERT(dsp->ds_tx_list_tail == NULL); ASSERT(dsp->ds_tx_cnt == 0); ASSERT(dsp->ds_tx_msgcnt == 0); + ASSERT(dsp->ds_tx_qdepth_tid == 0); ASSERT(!dsp->ds_tx_qbusy); - ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); - ASSERT(dsp->ds_thr == 0); - ASSERT(dsp->ds_pending_req == NULL); + ASSERT(MUTEX_NOT_HELD(&dsp->ds_disp_lock)); + ASSERT(dsp->ds_pending_head == NULL); + ASSERT(dsp->ds_pending_tail == NULL); + ASSERT(dsp->ds_tx == NULL); + ASSERT(dsp->ds_unitdata_tx == NULL); /* * Reinitialize all the flags. @@ -720,22 +779,20 @@ str_constructor(void *buf, void *cdrarg, int kmflags) /* * Allocate a new minor number. */ - atomic_add_32(&minor_count, 1); - if ((dsp->ds_minor = dls_minor_hold(kmflags == KM_SLEEP)) == 0) { - atomic_add_32(&minor_count, -1); + if ((dsp->ds_minor = mac_minor_hold(kmflags == KM_SLEEP)) == 0) return (-1); - } /* * Initialize the DLPI state machine. */ dsp->ds_dlstate = DL_UNATTACHED; - dsp->ds_ppa = (t_uscalar_t)-1; - mutex_init(&dsp->ds_thr_lock, NULL, MUTEX_DRIVER, NULL); rw_init(&dsp->ds_lock, NULL, RW_DRIVER, NULL); mutex_init(&dsp->ds_tx_list_lock, NULL, MUTEX_DRIVER, NULL); - cv_init(&dsp->ds_pending_cv, NULL, CV_DRIVER, NULL); + mutex_init(&dsp->ds_disp_lock, NULL, MUTEX_DRIVER, NULL); + cv_init(&dsp->ds_disp_cv, NULL, CV_DRIVER, NULL); + mutex_init(&dsp->ds_tx_lock, NULL, MUTEX_DRIVER, NULL); + cv_init(&dsp->ds_tx_cv, NULL, CV_DRIVER, NULL); return (0); } @@ -759,6 +816,10 @@ str_destructor(void *buf, void *cdrarg) */ ASSERT(dsp->ds_mh == NULL); ASSERT(dsp->ds_dc == NULL); + ASSERT(dsp->ds_tx == NULL); + ASSERT(dsp->ds_unitdata_tx == NULL); + ASSERT(dsp->ds_intx_cnt == 0); + ASSERT(dsp->ds_detaching == B_FALSE); /* * Make sure enabled notifications are cleared. @@ -773,8 +834,7 @@ str_destructor(void *buf, void *cdrarg) /* * Release the minor number. */ - dls_minor_rele(dsp->ds_minor); - atomic_add_32(&minor_count, -1); + mac_minor_rele(dsp->ds_minor); ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); rw_destroy(&dsp->ds_lock); @@ -782,27 +842,26 @@ str_destructor(void *buf, void *cdrarg) ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); mutex_destroy(&dsp->ds_tx_list_lock); ASSERT(dsp->ds_tx_flow_mp == NULL); + ASSERT(dsp->ds_pending_head == NULL); + ASSERT(dsp->ds_pending_tail == NULL); + ASSERT(!dsp->ds_closing); + + ASSERT(MUTEX_NOT_HELD(&dsp->ds_disp_lock)); + mutex_destroy(&dsp->ds_disp_lock); + cv_destroy(&dsp->ds_disp_cv); - ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); - mutex_destroy(&dsp->ds_thr_lock); - ASSERT(dsp->ds_pending_req == NULL); - ASSERT(dsp->ds_pending_op == NULL); - ASSERT(dsp->ds_pending_cnt == 0); - cv_destroy(&dsp->ds_pending_cv); + ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_lock)); + mutex_destroy(&dsp->ds_tx_lock); + cv_destroy(&dsp->ds_tx_cv); } -/* - * M_DATA put. Note that mp is a single message, not a chained message. - */ void dld_tx_single(dld_str_t *dsp, mblk_t *mp) { /* - * This function can be called from within dld or from an upper - * layer protocol (currently only tcp). If we are in the busy - * mode enqueue the packet(s) and return. Otherwise hand them - * over to the MAC driver for transmission; any remaining one(s) - * which didn't get sent will be queued. + * If we are busy enqueue the packet and return. + * Otherwise hand them over to the MAC driver for transmission. + * If the message didn't get sent it will be queued. * * Note here that we don't grab the list lock prior to checking * the busy flag. This is okay, because a missed transition @@ -812,13 +871,14 @@ dld_tx_single(dld_str_t *dsp, mblk_t *mp) * thread to run; the flag is only cleared by the service thread * when there is no more packet to be transmitted. */ - if (dsp->ds_tx_qbusy || (mp = dls_tx(dsp->ds_dc, mp)) != NULL) - dld_tx_enqueue(dsp, mp, B_FALSE); + + if (dsp->ds_tx_qbusy || ((mp = dls_tx(dsp->ds_dc, mp)) != NULL)) + dld_tx_enqueue(dsp, mp, mp, B_FALSE, 1, mp_getsize(mp)); } /* * Update the priority bits and VID (may need to insert tag if mp points - * to an untagged packet. + * to an untagged packet). * If vid is VLAN_ID_NONE, use the VID encoded in the packet. */ static mblk_t * @@ -881,7 +941,7 @@ i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid) /* * Free the original message if it's now empty. Link the - * rest of messages to the header message. + * rest of the messages to the header message. */ if (MBLKL(mp) == 0) { hmp->b_cont = mp->b_cont; @@ -901,7 +961,11 @@ i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid) } /* - * M_DATA put (IP fast-path mode) + * M_DATA put + * + * The poll callback function for DLS clients which are not in the per-stream + * mode. This function is called from an upper layer protocol (currently only + * tcp and udp). */ void str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp) @@ -934,9 +998,9 @@ discard: } /* - * M_DATA put (DLIOCRAW mode) + * M_DATA put (DLIOCRAW mode). */ -static void +void str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) { boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); @@ -1032,29 +1096,50 @@ discard: int dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) { - int err; - const char *drvname; - char name[MAXNAMELEN]; - dls_channel_t dc; - uint_t addr_length; + dev_t dev; + int err; + const char *drvname; + dls_channel_t dc; + uint_t addr_length; + boolean_t qassociated = B_FALSE; ASSERT(dsp->ds_dc == NULL); if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) return (EINVAL); - (void) snprintf(name, MAXNAMELEN, "%s%u", drvname, ppa); - - if (strcmp(drvname, "aggr") != 0 && strcmp(drvname, "vnic") != 0 && - qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) - return (EINVAL); + /* + * /dev node access. This will still be supported for backward + * compatibility reason. + */ + if ((dsp->ds_style == DL_STYLE2) && (strcmp(drvname, "aggr") != 0) && + (strcmp(drvname, "vnic") != 0)) { + if (qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) + return (EINVAL); + qassociated = B_TRUE; + } /* * Open a channel. */ - if ((err = dls_open(name, &dc)) != 0) { - (void) qassociate(dsp->ds_wq, -1); - return (err); + if (dsp->ds_style == DL_STYLE2 && ppa > DLS_MAX_PPA) { + /* + * style-2 VLAN open, this is a /dev VLAN ppa open + * which might result in a newly created dls_vlan_t. + */ + err = dls_open_style2_vlan(dsp->ds_major, ppa, &dc); + if (err != 0) { + if (qassociated) + (void) qassociate(dsp->ds_wq, -1); + return (err); + } + } else { + dev = makedevice(dsp->ds_major, (minor_t)ppa + 1); + if ((err = dls_open_by_dev(dev, &dc)) != 0) { + if (qassociated) + (void) qassociate(dsp->ds_wq, -1); + return (err); + } } /* @@ -1085,7 +1170,6 @@ dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) */ dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, (void *)dsp); - dsp->ds_ppa = ppa; dsp->ds_dc = dc; dsp->ds_dlstate = DL_UNBOUND; @@ -1099,8 +1183,6 @@ dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) void dld_str_detach(dld_str_t *dsp) { - ASSERT(dsp->ds_thr == 0); - /* * Remove the notify function. */ @@ -1114,21 +1196,25 @@ dld_str_detach(dld_str_t *dsp) dld_capabilities_disable(dsp); dsp->ds_promisc = 0; + DLD_TX_QUIESCE(dsp); + + /* + * Flush all pending packets which are sitting in the transmit queue. + */ + dld_tx_flush(dsp); + /* * Clear LSO flags. */ dsp->ds_lso = B_FALSE; dsp->ds_lso_max = 0; - /* - * Close the channel. - */ dls_close(dsp->ds_dc); - dsp->ds_ppa = (t_uscalar_t)-1; dsp->ds_dc = NULL; dsp->ds_mh = NULL; - (void) qassociate(dsp->ds_wq, -1); + if (dsp->ds_style == DL_STYLE2) + (void) qassociate(dsp->ds_wq, -1); /* * Re-initialize the DLPI state machine. @@ -1775,53 +1861,127 @@ str_notify(void *arg, mac_notify_type_t type) str_notify_fastpath_flush(dsp); break; + case MAC_NOTE_MARGIN: + break; default: ASSERT(B_FALSE); break; } } +static inline uint_t +mp_getsize(mblk_t *mp) +{ + ASSERT(DB_TYPE(mp) == M_DATA); + return ((mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); +} + /* - * Enqueue one or more messages to the transmit queue. - * Caller specifies the insertion position (head/tail). + * Calculate the dld queue depth, free the messages that exceed the threshold. */ -void -dld_tx_enqueue(dld_str_t *dsp, mblk_t *mp, boolean_t head_insert) +static void +dld_tx_qdepth_timer(void *arg) { - mblk_t *tail; - queue_t *q = dsp->ds_wq; - uint_t cnt, msgcnt; - uint_t tot_cnt, tot_msgcnt; + dld_str_t *dsp = (dld_str_t *)arg; + mblk_t *prev, *mp; + uint_t cnt, msgcnt, size; + + mutex_enter(&dsp->ds_tx_list_lock); - ASSERT(DB_TYPE(mp) == M_DATA); /* Calculate total size and count of the packet(s) */ - for (tail = mp, cnt = msgdsize(mp), msgcnt = 1; - tail->b_next != NULL; tail = tail->b_next) { - ASSERT(DB_TYPE(tail->b_next) == M_DATA); - cnt += msgdsize(tail->b_next); + cnt = msgcnt = 0; + for (prev = NULL, mp = dsp->ds_tx_list_head; mp != NULL; + prev = mp, mp = mp->b_next) { + size = mp_getsize(mp); + cnt += size; msgcnt++; + if (cnt >= dld_max_q_count || msgcnt >= dld_max_q_count) { + ASSERT(dsp->ds_tx_qbusy); + dsp->ds_tx_list_tail = prev; + if (prev == NULL) + dsp->ds_tx_list_head = NULL; + else + prev->b_next = NULL; + freemsgchain(mp); + cnt -= size; + msgcnt--; + break; + } } + dsp->ds_tx_cnt = cnt; + dsp->ds_tx_msgcnt = msgcnt; + dsp->ds_tx_qdepth_tid = 0; + mutex_exit(&dsp->ds_tx_list_lock); +} + +/* + * Enqueue one or more messages on the transmit queue. Caller specifies: + * - the insertion position (head/tail). + * - the message count and the total message size of messages to be queued + * if they are known to the caller; or 0 if they are not known. + * + * If the caller does not know the message size information, this usually + * means that dld_wsrv() managed to send some but not all of the queued + * messages. For performance reasons, we do not calculate the queue depth + * every time. Instead, a timer is started to calculate the queue depth + * every 1 second (can be changed by tx_qdepth_interval). + */ +static void +dld_tx_enqueue(dld_str_t *dsp, mblk_t *mp, mblk_t *tail, boolean_t head_insert, + uint_t msgcnt, uint_t cnt) +{ + queue_t *q = dsp->ds_wq; + uint_t tot_cnt, tot_msgcnt; + mblk_t *next; mutex_enter(&dsp->ds_tx_list_lock); + + /* + * Simply enqueue the message and calculate the queue depth via + * timer if: + * + * - the current queue depth is incorrect, and the timer is already + * started; or + * + * - the given message size is unknown and it is allowed to start the + * timer; + */ + if ((dsp->ds_tx_qdepth_tid != 0) || + (msgcnt == 0 && tx_qdepth_interval != 0)) { + goto enqueue; + } + /* + * The timer is not allowed, so calculate the message size now. + */ + if (msgcnt == 0) { + for (next = mp; next != NULL; next = next->b_next) { + cnt += mp_getsize(next); + msgcnt++; + } + } + + /* + * Grow the queue depth using the input messesge size. + * * If the queue depth would exceed the allowed threshold, drop * new packet(s) and drain those already in the queue. */ tot_cnt = dsp->ds_tx_cnt + cnt; tot_msgcnt = dsp->ds_tx_msgcnt + msgcnt; - if (!head_insert && - (tot_cnt >= dld_max_q_count || tot_msgcnt >= dld_max_q_count)) { + if (!head_insert && (tot_cnt >= dld_max_q_count || + tot_msgcnt >= dld_max_q_count)) { ASSERT(dsp->ds_tx_qbusy); mutex_exit(&dsp->ds_tx_list_lock); freemsgchain(mp); goto done; } - /* Update the queue size parameters */ dsp->ds_tx_cnt = tot_cnt; dsp->ds_tx_msgcnt = tot_msgcnt; +enqueue: /* * If the transmit queue is currently empty and we are * about to deposit the packet(s) there, switch mode to @@ -1848,6 +2008,17 @@ dld_tx_enqueue(dld_str_t *dsp, mblk_t *mp, boolean_t head_insert) dsp->ds_tx_list_tail = tail; dsp->ds_tx_list_head = mp; } + + if (msgcnt == 0 && dsp->ds_tx_qdepth_tid == 0 && + tx_qdepth_interval != 0) { + /* + * The message size is not given so that we need to start + * the timer to calculate the queue depth. + */ + dsp->ds_tx_qdepth_tid = timeout(dld_tx_qdepth_timer, dsp, + drv_usectohz(tx_qdepth_interval)); + ASSERT(dsp->ds_tx_qdepth_tid != NULL); + } mutex_exit(&dsp->ds_tx_list_lock); done: /* Schedule service thread to drain the transmit queue */ @@ -1858,6 +2029,8 @@ done: void dld_tx_flush(dld_str_t *dsp) { + timeout_id_t tid = 0; + mutex_enter(&dsp->ds_tx_list_lock); if (dsp->ds_tx_list_head != NULL) { freemsgchain(dsp->ds_tx_list_head); @@ -1868,34 +2041,156 @@ dld_tx_flush(dld_str_t *dsp) ASSERT(dsp->ds_tx_flow_mp != NULL); dsp->ds_tx_qbusy = B_FALSE; } + if ((tid = dsp->ds_tx_qdepth_tid) != 0) + dsp->ds_tx_qdepth_tid = 0; } mutex_exit(&dsp->ds_tx_list_lock); + + /* + * Note that ds_tx_list_lock (which is acquired by the timeout + * callback routine) cannot be held across the call to untimeout(). + */ + if (tid != 0) + (void) untimeout(tid); } /* - * Process an M_IOCTL message. + * Process a non-data message. */ static void -dld_ioc(dld_str_t *dsp, mblk_t *mp) +dld_wput_nondata(dld_str_t *dsp, mblk_t *mp) { - uint_t cmd; + ASSERT((dsp->ds_type == DLD_DLPI && dsp->ds_ioctl == NULL) || + (dsp->ds_type == DLD_CONTROL && dsp->ds_ioctl != NULL)); - cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; - ASSERT(dsp->ds_type == DLD_DLPI); + mutex_enter(&dsp->ds_disp_lock); - switch (cmd) { - case DLIOCNATIVE: - ioc_native(dsp, mp); - break; - case DLIOCRAW: - ioc_raw(dsp, mp); + /* + * The processing of the message might block. Enqueue the + * message for later processing. + */ + if (dsp->ds_pending_head == NULL) { + dsp->ds_pending_head = dsp->ds_pending_tail = mp; + } else { + dsp->ds_pending_tail->b_next = mp; + dsp->ds_pending_tail = mp; + } + + /* + * If there is no task pending, kick off the task. + */ + if (dsp->ds_tid == NULL) { + dsp->ds_tid = taskq_dispatch(dld_disp_taskq, + dld_wput_nondata_task, dsp, TQ_SLEEP); + ASSERT(dsp->ds_tid != NULL); + } + mutex_exit(&dsp->ds_disp_lock); +} + +/* + * The worker thread which processes non-data messages. Note we only process + * one message at one time in order to be able to "flush" the queued message + * and serialize the processing. + */ +static void +dld_wput_nondata_task(void *arg) +{ + dld_str_t *dsp = (dld_str_t *)arg; + mblk_t *mp; + + mutex_enter(&dsp->ds_disp_lock); + ASSERT(dsp->ds_pending_head != NULL); + ASSERT(dsp->ds_tid != NULL); + + if (dsp->ds_closing) + goto closing; + + mp = dsp->ds_pending_head; + if ((dsp->ds_pending_head = mp->b_next) == NULL) + dsp->ds_pending_tail = NULL; + mp->b_next = NULL; + + mutex_exit(&dsp->ds_disp_lock); + + switch (DB_TYPE(mp)) { + case M_PROTO: + case M_PCPROTO: + ASSERT(dsp->ds_type == DLD_DLPI); + dld_wput_proto_nondata(dsp, mp); break; - case DLIOCHDRINFO: - ioc_fast(dsp, mp); + case M_IOCTL: { + uint_t cmd; + + if (dsp->ds_type == DLD_CONTROL) { + ASSERT(dsp->ds_ioctl != NULL); + dsp->ds_ioctl(dsp->ds_wq, mp); + break; + } + + cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; + + switch (cmd) { + case DLIOCNATIVE: + ioc_native(dsp, mp); + break; + case DLIOCMARGININFO: + ioc_margin(dsp, mp); + break; + case DLIOCRAW: + ioc_raw(dsp, mp); + break; + case DLIOCHDRINFO: + ioc_fast(dsp, mp); + break; + default: + ioc(dsp, mp); + break; + } break; - default: + } + case M_IOCDATA: + ASSERT(dsp->ds_type == DLD_DLPI); ioc(dsp, mp); + break; } + + mutex_enter(&dsp->ds_disp_lock); + + if (dsp->ds_closing) + goto closing; + + if (dsp->ds_pending_head != NULL) { + dsp->ds_tid = taskq_dispatch(dld_disp_taskq, + dld_wput_nondata_task, dsp, TQ_SLEEP); + ASSERT(dsp->ds_tid != NULL); + } else { + dsp->ds_tid = NULL; + } + mutex_exit(&dsp->ds_disp_lock); + return; + + /* + * If the stream is closing, flush all queued messages and inform + * the stream once it is done. + */ +closing: + freemsgchain(dsp->ds_pending_head); + dsp->ds_pending_head = dsp->ds_pending_tail = NULL; + dsp->ds_tid = NULL; + cv_signal(&dsp->ds_disp_cv); + mutex_exit(&dsp->ds_disp_lock); +} + +/* + * Flush queued non-data messages. + */ +static void +dld_flush_nondata(dld_str_t *dsp) +{ + mutex_enter(&dsp->ds_disp_lock); + freemsgchain(dsp->ds_pending_head); + dsp->ds_pending_head = dsp->ds_pending_tail = NULL; + mutex_exit(&dsp->ds_disp_lock); } /* @@ -1925,6 +2220,32 @@ ioc_native(dld_str_t *dsp, mblk_t *mp) } /* + * DLIOCMARGININFO + */ +static void +ioc_margin(dld_str_t *dsp, mblk_t *mp) +{ + queue_t *q = dsp->ds_wq; + uint32_t margin; + int err; + + if (dsp->ds_dlstate == DL_UNATTACHED) { + err = EINVAL; + goto failed; + } + if ((err = miocpullup(mp, sizeof (uint32_t))) != 0) + goto failed; + + mac_margin_get(dsp->ds_mh, &margin); + *((uint32_t *)mp->b_cont->b_rptr) = margin; + miocack(q, mp, sizeof (uint32_t), 0); + return; + +failed: + miocnak(q, mp, 0, err); +} + +/* * DLIOCRAW */ static void @@ -1932,25 +2253,20 @@ ioc_raw(dld_str_t *dsp, mblk_t *mp) { queue_t *q = dsp->ds_wq; - rw_enter(&dsp->ds_lock, RW_WRITER); if (dsp->ds_polling || dsp->ds_soft_ring) { - rw_exit(&dsp->ds_lock); miocnak(q, mp, 0, EPROTO); return; } - if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { + rw_enter(&dsp->ds_lock, RW_WRITER); + if ((dsp->ds_mode != DLD_RAW) && (dsp->ds_dlstate == DL_IDLE)) { /* * Set the receive callback. */ - dls_rx_set(dsp->ds_dc, dld_str_rx_raw, (void *)dsp); + dls_rx_set(dsp->ds_dc, dld_str_rx_raw, dsp); + dsp->ds_tx = str_mdata_raw_put; } - - /* - * Note that raw mode is enabled. - */ dsp->ds_mode = DLD_RAW; - rw_exit(&dsp->ds_lock); miocack(q, mp, 0, 0); } @@ -1971,7 +2287,6 @@ ioc_fast(dld_str_t *dsp, mblk_t *mp) uint_t addr_length; queue_t *q = dsp->ds_wq; int err; - dls_channel_t dc; if (dld_opt & DLD_OPT_NO_FASTPATH) { err = ENOTSUP; @@ -2003,62 +2318,41 @@ ioc_fast(dld_str_t *dsp, mblk_t *mp) goto failed; } - rw_enter(&dsp->ds_lock, RW_READER); + /* + * We don't need to hold any locks to access ds_dlstate, because + * control message prossessing (which updates this field) is + * serialized. + */ if (dsp->ds_dlstate != DL_IDLE) { - rw_exit(&dsp->ds_lock); err = ENOTSUP; goto failed; } addr_length = dsp->ds_mip->mi_addr_length; if (len != addr_length + sizeof (uint16_t)) { - rw_exit(&dsp->ds_lock); err = EINVAL; goto failed; } addr = nmp->b_rptr + off; sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); - dc = dsp->ds_dc; - if ((hmp = dls_header(dc, addr, sap, 0, NULL)) == NULL) { - rw_exit(&dsp->ds_lock); + if ((hmp = dls_header(dsp->ds_dc, addr, sap, 0, NULL)) == NULL) { err = ENOMEM; goto failed; } - /* - * This is a performance optimization. We originally entered - * as reader and only become writer upon transitioning into - * the DLD_FASTPATH mode for the first time. Otherwise we - * stay as reader and return the fast-path header to IP. - */ + rw_enter(&dsp->ds_lock, RW_WRITER); + ASSERT(dsp->ds_dlstate == DL_IDLE); if (dsp->ds_mode != DLD_FASTPATH) { - if (!rw_tryupgrade(&dsp->ds_lock)) { - rw_exit(&dsp->ds_lock); - rw_enter(&dsp->ds_lock, RW_WRITER); - - /* - * State may have changed before we re-acquired - * the writer lock in case the upgrade failed. - */ - if (dsp->ds_dlstate != DL_IDLE) { - rw_exit(&dsp->ds_lock); - err = ENOTSUP; - goto failed; - } - } - - /* - * Set the receive callback (unless polling is enabled). - */ - if (!dsp->ds_polling && !dsp->ds_soft_ring) - dls_rx_set(dc, dld_str_rx_fastpath, (void *)dsp); - /* - * Note that fast-path mode is enabled. + * Set the receive callback (unless polling or + * soft-ring is enabled). */ dsp->ds_mode = DLD_FASTPATH; + if (!dsp->ds_polling && !dsp->ds_soft_ring) + dls_rx_set(dsp->ds_dc, dld_str_rx_fastpath, dsp); + dsp->ds_tx = str_mdata_fastpath_put; } rw_exit(&dsp->ds_lock); @@ -2071,23 +2365,17 @@ failed: miocnak(q, mp, 0, err); } -/* - * Catch-all handler. - */ static void ioc(dld_str_t *dsp, mblk_t *mp) { queue_t *q = dsp->ds_wq; mac_handle_t mh; - rw_enter(&dsp->ds_lock, RW_READER); if (dsp->ds_dlstate == DL_UNATTACHED) { - rw_exit(&dsp->ds_lock); miocnak(q, mp, 0, EINVAL); return; } mh = dsp->ds_mh; ASSERT(mh != NULL); - rw_exit(&dsp->ds_lock); mac_ioctl(mh, q, mp); } diff --git a/usr/src/uts/common/io/dls/dls.c b/usr/src/uts/common/io/dls/dls.c index 0e9089dff0..2002e994bf 100644 --- a/usr/src/uts/common/io/dls/dls.c +++ b/usr/src/uts/common/io/dls/dls.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -34,6 +34,7 @@ #include <sys/strsun.h> #include <sys/sysmacros.h> #include <sys/atomic.h> +#include <sys/stat.h> #include <sys/dlpi.h> #include <sys/vlan.h> #include <sys/ethernet.h> @@ -53,6 +54,8 @@ struct dls_kstats dls_kstat = { "soft_ring_pkt_drop", KSTAT_DATA_UINT32 }, }; +static int dls_open(dls_vlan_t *, dls_dl_handle_t ddh, dls_channel_t *); + /* * Private functions. */ @@ -78,6 +81,7 @@ i_dls_destructor(void *buf, void *arg) ASSERT(dip->di_dvp == NULL); ASSERT(dip->di_mnh == NULL); ASSERT(dip->di_dmap == NULL); + ASSERT(!dip->di_local); ASSERT(!dip->di_bound); ASSERT(dip->di_rx == NULL); ASSERT(dip->di_txinfo == NULL); @@ -164,47 +168,109 @@ dls_fini(void) } /* - * Client function. + * Client functions. */ +/* + * /dev node style-2 VLAN PPA access. This might result in a newly created + * dls_vlan_t. Note that this dls_vlan_t is different from others, in that + * this VLAN might not have a link name that is managed by dlmgmtd (we cannot + * use its VLAN ppa hack name as it might conflict with a vanity name). + */ int -dls_create(const char *linkname, const char *macname) +dls_open_style2_vlan(major_t major, uint_t ppa, dls_channel_t *dcp) { - return (dls_vlan_create(linkname, macname, 0)); + dev_t dev = makedevice(major, DLS_PPA2INST(ppa) + 1); + uint_t vid = DLS_PPA2VID(ppa); + dls_vlan_t *lndvp, *dvp; + int err; + + /* + * First find the dls_vlan_t this VLAN is created on. This must be + * a GLDv3 driver based device. + */ + if ((err = dls_vlan_hold_by_dev(dev, &lndvp)) != 0) + return (err); + + if (vid > VLAN_ID_MAX) + return (ENOENT); + + err = dls_vlan_hold(lndvp->dv_dlp->dl_name, vid, &dvp, B_FALSE, B_TRUE); + if (err != 0) + goto done; + + if ((err = dls_open(dvp, NULL, dcp)) != 0) + dls_vlan_rele(dvp); + +done: + dls_vlan_rele(lndvp); + return (err); } int -dls_destroy(const char *name) +dls_open_by_dev(dev_t dev, dls_channel_t *dcp) { - return (dls_vlan_destroy(name)); + dls_dl_handle_t ddh; + dls_vlan_t *dvp; + int err; + + /* + * Get a reference to the given dls_vlan_t. + */ + if ((err = dls_devnet_open_by_dev(dev, &dvp, &ddh)) != 0) + return (err); + + if ((err = dls_open(dvp, ddh, dcp)) != 0) { + if (ddh != NULL) + dls_devnet_close(ddh); + else + dls_vlan_rele(dvp); + } + + return (err); } -int -dls_open(const char *name, dls_channel_t *dcp) +static int +dls_open(dls_vlan_t *dvp, dls_dl_handle_t ddh, dls_channel_t *dcp) { dls_impl_t *dip; - dls_vlan_t *dvp; dls_link_t *dlp; int err; + zoneid_t zid = getzoneid(); + boolean_t local; /* - * Get a reference to the named dls_vlan_t. - * Tagged vlans get created automatically. + * Check whether this client belongs to the zone of this dvp. Note that + * a global zone client is allowed to open a local zone dvp. */ - if ((err = dls_vlan_hold(name, &dvp, B_TRUE)) != 0) + mutex_enter(&dvp->dv_lock); + if (zid != GLOBAL_ZONEID && dvp->dv_zid != zid) { + mutex_exit(&dvp->dv_lock); + return (ENOENT); + } + local = (zid == dvp->dv_zid); + dvp->dv_zone_ref += (local ? 1 : 0); + mutex_exit(&dvp->dv_lock); + + dlp = dvp->dv_dlp; + if ((err = mac_start(dlp->dl_mh)) != 0) { + mutex_enter(&dvp->dv_lock); + dvp->dv_zone_ref -= (local ? 1 : 0); + mutex_exit(&dvp->dv_lock); return (err); + } /* * Allocate a new dls_impl_t. */ dip = kmem_cache_alloc(i_dls_impl_cachep, KM_SLEEP); dip->di_dvp = dvp; + dip->di_ddh = ddh; /* * Cache a copy of the MAC interface handle, a pointer to the * immutable MAC info and a copy of the current MAC address. */ - dlp = dvp->dv_dlp; dip->di_mh = dlp->dl_mh; dip->di_mip = dlp->dl_mip; @@ -216,9 +282,11 @@ dls_open(const char *name, dls_channel_t *dcp) dip->di_txinfo = mac_tx_get(dip->di_mh); /* - * Add a notification function so that we get updates from the MAC. + * Add a notification function so that we get updates from + * the MAC. */ - dip->di_mnh = mac_notify_add(dip->di_mh, i_dls_notify, (void *)dip); + dip->di_mnh = mac_notify_add(dip->di_mh, i_dls_notify, + (void *)dip); /* * Bump the kmem_cache count to make sure it is not prematurely @@ -226,16 +294,7 @@ dls_open(const char *name, dls_channel_t *dcp) */ atomic_add_32(&i_dls_impl_count, 1); - /* - * Set the di_zid to the zone id of current zone - */ - dip->di_zid = getzoneid(); - - /* - * Add this dls_impl_t to the list of the "opened stream" - * list of the corresponding dls_vlan_t - */ - dls_vlan_add_impl(dvp, dip); + dip->di_local = local; /* * Hand back a reference to the dls_impl_t. @@ -248,15 +307,22 @@ void dls_close(dls_channel_t dc) { dls_impl_t *dip = (dls_impl_t *)dc; - dls_vlan_t *dvp; - dls_link_t *dlp; + dls_vlan_t *dvp = dip->di_dvp; + dls_link_t *dlp = dvp->dv_dlp; dls_multicst_addr_t *p; dls_multicst_addr_t *nextp; + dls_dl_handle_t ddh = dip->di_ddh; + + if (dip->di_local) { + mutex_enter(&dvp->dv_lock); + dvp->dv_zone_ref--; + mutex_exit(&dvp->dv_lock); + } + dip->di_local = B_FALSE; dls_active_clear(dc); rw_enter(&(dip->di_lock), RW_WRITER); - /* * Remove the notify function. */ @@ -266,9 +332,6 @@ dls_close(dls_channel_t dc) /* * If the dls_impl_t is bound then unbind it. */ - dvp = dip->di_dvp; - dlp = dvp->dv_dlp; - if (dip->di_bound) { rw_exit(&(dip->di_lock)); dls_link_remove(dlp, dip); @@ -276,11 +339,9 @@ dls_close(dls_channel_t dc) dip->di_bound = B_FALSE; } - dip->di_rx = NULL; - dip->di_rx_arg = NULL; - /* - * Walk the list of multicast addresses, disabling each at the MAC. + * Walk the list of multicast addresses, disabling each at + * the MAC. */ for (p = dip->di_dmap; p != NULL; p = nextp) { (void) mac_multicst_remove(dip->di_mh, p->dma_addr); @@ -289,23 +350,19 @@ dls_close(dls_channel_t dc) } dip->di_dmap = NULL; - /* - * Remove this dls_impl_t from the list of the "open streams" - * list of the corresponding dls_vlan_t - */ - dls_vlan_remove_impl(dvp, dip); - + dip->di_rx = NULL; + dip->di_rx_arg = NULL; rw_exit(&(dip->di_lock)); /* * If the MAC has been set in promiscuous mode then disable it. */ (void) dls_promisc(dc, 0); + dip->di_txinfo = NULL; /* * Free the dls_impl_t back to the cache. */ - dip->di_dvp = NULL; dip->di_txinfo = NULL; if (dip->di_soft_ring_list != NULL) { @@ -315,20 +372,27 @@ dls_close(dls_channel_t dc) } dip->di_soft_ring_size = 0; - kmem_cache_free(i_dls_impl_cachep, dip); - /* * Decrement the reference count to allow the cache to be destroyed * if there are no more dls_impl_t. */ atomic_add_32(&i_dls_impl_count, -1); + dip->di_dvp = NULL; + + kmem_cache_free(i_dls_impl_cachep, dip); + + mac_stop(dvp->dv_dlp->dl_mh); + /* * Release our reference to the dls_vlan_t allowing that to be * destroyed if there are no more dls_impl_t. An unreferenced tagged - * vlan gets destroyed automatically. + * (non-persistent) vlan gets destroyed automatically. */ - dls_vlan_rele(dvp); + if (ddh != NULL) + dls_devnet_close(ddh); + else + dls_vlan_rele(dvp); } mac_handle_t @@ -492,6 +556,7 @@ multi: err = mac_promisc_set(dip->di_mh, B_TRUE, MAC_PROMISC); if (err != 0) goto done; + dip->di_promisc |= DLS_PROMISC_PHYS; dlp->dl_npromisc++; } @@ -500,6 +565,7 @@ multi: err = mac_promisc_set(dip->di_mh, B_FALSE, MAC_PROMISC); if (err != 0) goto done; + dip->di_promisc &= ~DLS_PROMISC_PHYS; dlp->dl_npromisc--; } @@ -753,6 +819,13 @@ dls_accept(dls_impl_t *dip, mac_header_info_t *mhip, dls_rx_t *di_rx, if (dip->di_promisc & DLS_PROMISC_PHYS) goto accept; + /* + * For non-promiscs-phys streams, filter out the packets looped back + * from the underlying driver because of promiscuous setting. + */ + if (mhip->mhi_prom_looped) + goto refuse; + switch (mhip->mhi_dsttype) { case MAC_ADDRTYPE_UNICAST: /* @@ -839,6 +912,33 @@ accept: } boolean_t +dls_mac_active_set(dls_link_t *dlp) +{ + mutex_enter(&dlp->dl_lock); + + /* + * If this is the first active client on this link, notify + * the mac that we're becoming an active client. + */ + if (dlp->dl_nactive == 0 && !mac_active_shareable_set(dlp->dl_mh)) { + mutex_exit(&dlp->dl_lock); + return (B_FALSE); + } + dlp->dl_nactive++; + mutex_exit(&dlp->dl_lock); + return (B_TRUE); +} + +void +dls_mac_active_clear(dls_link_t *dlp) +{ + mutex_enter(&dlp->dl_lock); + if (--dlp->dl_nactive == 0) + mac_active_clear(dlp->dl_mh); + mutex_exit(&dlp->dl_lock); +} + +boolean_t dls_active_set(dls_channel_t dc) { dls_impl_t *dip = (dls_impl_t *)dc; @@ -852,18 +952,11 @@ dls_active_set(dls_channel_t dc) return (B_TRUE); } - /* - * If this is the first active client on this link, notify - * the mac that we're becoming an active client. - */ - if (dlp->dl_nactive == 0 && !mac_active_shareable_set(dlp->dl_mh)) { + if (!dls_mac_active_set(dlp)) { rw_exit(&dip->di_lock); return (B_FALSE); } dip->di_active = B_TRUE; - mutex_enter(&dlp->dl_lock); - dlp->dl_nactive++; - mutex_exit(&dlp->dl_lock); rw_exit(&dip->di_lock); return (B_TRUE); } @@ -880,22 +973,8 @@ dls_active_clear(dls_channel_t dc) goto out; dip->di_active = B_FALSE; - mutex_enter(&dlp->dl_lock); - if (--dlp->dl_nactive == 0) - mac_active_clear(dip->di_mh); - mutex_exit(&dlp->dl_lock); + dls_mac_active_clear(dlp); + out: rw_exit(&dip->di_lock); } - -dev_info_t * -dls_finddevinfo(dev_t dev) -{ - return (dls_vlan_finddevinfo(dev)); -} - -int -dls_ppa_from_minor(minor_t minor, t_uscalar_t *ppa) -{ - return (dls_vlan_ppa_from_minor(minor, ppa)); -} diff --git a/usr/src/uts/common/io/dls/dls_link.c b/usr/src/uts/common/io/dls/dls_link.c index e342c95955..759fb97f0a 100644 --- a/usr/src/uts/common/io/dls/dls_link.c +++ b/usr/src/uts/common/io/dls/dls_link.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -90,7 +90,7 @@ i_dls_link_constructor(void *buf, void *arg, int kmflag) bzero(buf, sizeof (dls_link_t)); - (void) sprintf(name, "dls_link_t_%p_hash", buf); + (void) snprintf(name, MAXNAMELEN, "dls_link_t_%p_hash", buf); dlp->dl_impl_hash = mod_hash_create_idhash(name, IMPL_HASHSZ, mod_hash_null_valdtor); @@ -190,12 +190,13 @@ i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, const mac_header_info_t *mhip, prevp->b_next = mp; /* - * The source, destination, sap, and vlan id must all match - * in a given subchain. + * The source, destination, sap, vlan id and the MSGNOLOOP + * flag must all match in a given subchain. */ if (memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 || memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 || - mhip->mhi_bindsap != cmhi.mhi_bindsap) { + mhip->mhi_bindsap != cmhi.mhi_bindsap || + mhip->mhi_prom_looped != cmhi.mhi_prom_looped) { /* * Note that we don't need to restore the padding. */ @@ -700,7 +701,7 @@ dls_link_txloop(void *arg, mblk_t *mp) static uint_t i_dls_link_walk(mod_hash_key_t key, mod_hash_val_t *val, void *arg) { - boolean_t *promiscp = arg; + boolean_t *promiscp = arg; uint32_t sap = KEY_SAP(key); if (sap == DLS_SAP_PROMISC) { @@ -833,7 +834,7 @@ dls_link_hold(const char *name, dls_link_t **dlpp) /* * Insert the dls_link_t. */ - err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)name, + err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name, (mod_hash_val_t)dlp); ASSERT(err == 0); @@ -841,6 +842,7 @@ dls_link_hold(const char *name, dls_link_t **dlpp) ASSERT(i_dls_link_count != 0); done: + /* * Bump the reference count and hand back the reference. */ @@ -884,26 +886,24 @@ done: int dls_mac_hold(dls_link_t *dlp) { + mac_handle_t mh; int err = 0; + err = mac_open(dlp->dl_name, &mh); + mutex_enter(&dlp->dl_lock); ASSERT(IMPLY(dlp->dl_macref != 0, dlp->dl_mh != NULL)); ASSERT(IMPLY(dlp->dl_macref == 0, dlp->dl_mh == NULL)); - - if (dlp->dl_macref == 0) { - /* - * First reference; hold open the MAC interface. - */ - err = mac_open(dlp->dl_name, &dlp->dl_mh); - if (err != 0) - goto done; - - dlp->dl_mip = mac_info(dlp->dl_mh); + if (err == 0) { + ASSERT(dlp->dl_mh == NULL || dlp->dl_mh == mh); + if (dlp->dl_mh == NULL) { + dlp->dl_mh = mh; + dlp->dl_mip = mac_info(mh); + } + dlp->dl_macref++; } - dlp->dl_macref++; -done: mutex_exit(&dlp->dl_lock); return (err); } @@ -914,9 +914,9 @@ dls_mac_rele(dls_link_t *dlp) mutex_enter(&dlp->dl_lock); ASSERT(dlp->dl_mh != NULL); + mac_close(dlp->dl_mh); + if (--dlp->dl_macref == 0) { - mac_rx_remove_wait(dlp->dl_mh); - mac_close(dlp->dl_mh); dlp->dl_mh = NULL; dlp->dl_mip = NULL; } @@ -997,7 +997,7 @@ dls_link_add(dls_link_t *dlp, uint32_t sap, dls_impl_t *dip) /* Replace the existing receive function if there is one. */ if (dlp->dl_mrh != NULL) - mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_FALSE); + mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_TRUE); dlp->dl_mrh = mac_active_rx_add(dlp->dl_mh, rx, (void *)dlp); mutex_exit(&dlp->dl_lock); } @@ -1073,7 +1073,7 @@ dls_link_remove(dls_link_t *dlp, dls_impl_t *dip) */ if (dlp->dl_impl_count == 0) { rw_exit(&dlp->dl_impl_lock); - mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_FALSE); + mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_TRUE); dlp->dl_mrh = NULL; } else { boolean_t promisc = B_FALSE; @@ -1095,7 +1095,7 @@ dls_link_remove(dls_link_t *dlp, dls_impl_t *dip) else rx = i_dls_link_rx; - mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_FALSE); + mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_TRUE); dlp->dl_mrh = mac_active_rx_add(dlp->dl_mh, rx, (void *)dlp); } mutex_exit(&dlp->dl_lock); @@ -1152,5 +1152,11 @@ dls_link_header_info(dls_link_t *dlp, mblk_t *mp, mac_header_info_t *mhip) mhip->mhi_istagged = B_FALSE; mhip->mhi_tci = 0; } + + /* + * The messsage is looped back from the underlying driver. + */ + mhip->mhi_prom_looped = (mp->b_flag & MSGNOLOOP); + return (0); } diff --git a/usr/src/uts/common/io/dls/dls_mgmt.c b/usr/src/uts/common/io/dls/dls_mgmt.c new file mode 100644 index 0000000000..aff6ba26b1 --- /dev/null +++ b/usr/src/uts/common/io/dls/dls_mgmt.c @@ -0,0 +1,1562 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Datalink management routines. + */ + +#include <sys/types.h> +#include <sys/door.h> +#include <sys/zone.h> +#include <sys/modctl.h> +#include <sys/file.h> +#include <sys/modhash.h> +#include <sys/kstat.h> +#include <sys/vnode.h> +#include <sys/cmn_err.h> +#include <sys/vlan.h> +#include <sys/softmac.h> +#include <sys/dls.h> +#include <sys/dls_impl.h> + +static kmem_cache_t *i_dls_devnet_cachep; +static kmutex_t i_dls_mgmt_lock; +static krwlock_t i_dls_devnet_lock; +static mod_hash_t *i_dls_devnet_id_hash; +static mod_hash_t *i_dls_devnet_hash; + +boolean_t devnet_need_rebuild; + +#define VLAN_HASHSZ 67 /* prime */ + +/* Upcall door handle */ +static door_handle_t dls_mgmt_dh = NULL; + +/* + * This structure is used to keep the <linkid, macname, vid> mapping. + */ +typedef struct dls_devnet_s { + datalink_id_t dd_vlanid; + datalink_id_t dd_linkid; + char dd_mac[MAXNAMELEN]; + uint16_t dd_vid; + char dd_spa[MAXSPALEN]; + boolean_t dd_explicit; + kstat_t *dd_ksp; + + uint32_t dd_ref; + + kmutex_t dd_mutex; + kcondvar_t dd_cv; + uint32_t dd_tref; + + kmutex_t dd_zid_mutex; + zoneid_t dd_zid; +} dls_devnet_t; + +/*ARGSUSED*/ +static int +i_dls_devnet_constructor(void *buf, void *arg, int kmflag) +{ + dls_devnet_t *ddp = buf; + + bzero(buf, sizeof (dls_devnet_t)); + mutex_init(&ddp->dd_mutex, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ddp->dd_zid_mutex, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ddp->dd_cv, NULL, CV_DEFAULT, NULL); + return (0); +} + +/*ARGSUSED*/ +static void +i_dls_devnet_destructor(void *buf, void *arg) +{ + dls_devnet_t *ddp = buf; + + ASSERT(ddp->dd_ksp == NULL); + ASSERT(ddp->dd_ref == 0); + ASSERT(ddp->dd_tref == 0); + ASSERT(!ddp->dd_explicit); + mutex_destroy(&ddp->dd_mutex); + mutex_destroy(&ddp->dd_zid_mutex); + cv_destroy(&ddp->dd_cv); +} + +/* + * Module initialization and finalization functions. + */ +void +dls_mgmt_init(void) +{ + mutex_init(&i_dls_mgmt_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&i_dls_devnet_lock, NULL, RW_DEFAULT, NULL); + + /* + * Create a kmem_cache of dls_devnet_t structures. + */ + i_dls_devnet_cachep = kmem_cache_create("dls_devnet_cache", + sizeof (dls_devnet_t), 0, i_dls_devnet_constructor, + i_dls_devnet_destructor, NULL, NULL, NULL, 0); + ASSERT(i_dls_devnet_cachep != NULL); + + /* + * Create a hash table, keyed by dd_vlanid, of dls_devnet_t. + */ + i_dls_devnet_id_hash = mod_hash_create_idhash("dls_devnet_id_hash", + VLAN_HASHSZ, mod_hash_null_valdtor); + + /* + * Create a hash table, keyed by dd_spa. + */ + i_dls_devnet_hash = mod_hash_create_extended("dls_devnet_hash", + VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, + mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); + + devnet_need_rebuild = B_FALSE; +} + +void +dls_mgmt_fini(void) +{ + mod_hash_destroy_hash(i_dls_devnet_hash); + mod_hash_destroy_hash(i_dls_devnet_id_hash); + kmem_cache_destroy(i_dls_devnet_cachep); + rw_destroy(&i_dls_devnet_lock); + mutex_destroy(&i_dls_mgmt_lock); +} + +int +dls_mgmt_door_set(boolean_t start) +{ + int err; + + /* handle daemon restart */ + mutex_enter(&i_dls_mgmt_lock); + if (dls_mgmt_dh != NULL) { + door_ki_rele(dls_mgmt_dh); + dls_mgmt_dh = NULL; + } + + if (start && ((err = door_ki_open(DLMGMT_DOOR, &dls_mgmt_dh)) != 0)) { + mutex_exit(&i_dls_mgmt_lock); + return (err); + } + + mutex_exit(&i_dls_mgmt_lock); + + /* + * Create and associate <link name, linkid> mapping for network devices + * which are already attached before the daemon is started. + */ + if (start) + softmac_recreate(); + return (0); +} + +static boolean_t +i_dls_mgmt_door_revoked(door_handle_t dh) +{ + struct door_info info; + extern int sys_shutdown; + + ASSERT(dh != NULL); + + if (sys_shutdown) { + cmn_err(CE_NOTE, "dls_mgmt_door: shutdown observed\n"); + return (B_TRUE); + } + + if (door_ki_info(dh, &info) != 0) + return (B_TRUE); + + return ((info.di_attributes & DOOR_REVOKED) != 0); +} + +/* + * Upcall to the datalink management daemon (dlmgmtd). + */ +static int +i_dls_mgmt_upcall(void *arg, size_t asize, void *rbuf, size_t *rsizep) +{ + door_arg_t darg, save_arg; + struct dlmgmt_null_retval_s *retvalp; + door_handle_t dh; + int err = EINVAL; + int retry = 0; + +#define MAXRETRYNUM 3 + + ASSERT(arg); + darg.data_ptr = arg; + darg.data_size = asize; + darg.desc_ptr = NULL; + darg.desc_num = 0; + darg.rbuf = rbuf; + darg.rsize = *rsizep; + save_arg = darg; + +retry: + mutex_enter(&i_dls_mgmt_lock); + dh = dls_mgmt_dh; + if ((dh == NULL) || i_dls_mgmt_door_revoked(dh)) { + mutex_exit(&i_dls_mgmt_lock); + return (EBADF); + } + door_ki_hold(dh); + mutex_exit(&i_dls_mgmt_lock); + + for (;;) { + retry++; + if ((err = door_ki_upcall(dh, &darg)) == 0) + break; + + /* + * handle door call errors + */ + darg = save_arg; + switch (err) { + case EINTR: + /* + * If the operation which caused this door upcall gets + * interrupted, return directly. + */ + goto done; + case EAGAIN: + /* + * Repeat upcall if the maximum attempt limit has not + * been reached. + */ + if (retry < MAXRETRYNUM) { + delay(2 * hz); + break; + } + cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err); + goto done; + default: + /* A fatal door error */ + if (i_dls_mgmt_door_revoked(dh)) { + cmn_err(CE_NOTE, + "dls: dlmgmtd door service revoked\n"); + + if (retry < MAXRETRYNUM) { + door_ki_rele(dh); + goto retry; + } + } + cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err); + goto done; + } + } + + if (darg.rbuf != rbuf) { + /* + * The size of the input rbuf was not big enough, so the + * upcall allocated the rbuf itself. If this happens, assume + * that this was an invalid door call request. + */ + kmem_free(darg.rbuf, darg.rsize); + err = ENOSPC; + goto done; + } + + if (darg.rsize > *rsizep || darg.rsize < sizeof (uint_t)) { + err = EINVAL; + goto done; + } + + /* LINTED E_BAD_PTR_CAST_ALIGN */ + retvalp = (struct dlmgmt_null_retval_s *)darg.rbuf; + if (retvalp->lr_err != 0) { + err = retvalp->lr_err; + goto done; + } + + *rsizep = darg.rsize; + +done: + door_ki_rele(dh); + return (err); +} + +/* + * Request the datalink management daemon to create a link with the attributes + * below. Upon success, zero is returned and linkidp contains the linkid for + * the new link; otherwise, an errno is returned. + * + * - dev physical dev_t. required for all physical links, + * including GLDv3 links. It will be used to force the + * attachment of a physical device, hence the + * registration of its mac + * - class datalink class + * - media type media type; DL_OTHER means unknown + * - vid VLAN ID (for VLANs) + * - persist whether to persist the datalink + */ +int +dls_mgmt_create(const char *devname, dev_t dev, datalink_class_t class, + uint32_t media, boolean_t persist, datalink_id_t *linkidp) +{ + dlmgmt_upcall_arg_create_t create; + dlmgmt_create_retval_t retval; + size_t rsize; + int err; + + create.ld_cmd = DLMGMT_CMD_DLS_CREATE; + create.ld_class = class; + create.ld_media = media; + create.ld_phymaj = getmajor(dev); + create.ld_phyinst = getminor(dev); + create.ld_persist = persist; + if (strlcpy(create.ld_devname, devname, MAXNAMELEN) >= MAXNAMELEN) + return (EINVAL); + + rsize = sizeof (retval); + + err = i_dls_mgmt_upcall(&create, sizeof (create), &retval, &rsize); + if (err == 0) + *linkidp = retval.lr_linkid; + return (err); +} + +/* + * Request the datalink management daemon to destroy the specified link. + * Returns zero upon success, or an errno upon failure. + */ +int +dls_mgmt_destroy(datalink_id_t linkid, boolean_t persist) +{ + dlmgmt_upcall_arg_destroy_t destroy; + dlmgmt_destroy_retval_t retval; + size_t rsize; + + destroy.ld_cmd = DLMGMT_CMD_DLS_DESTROY; + destroy.ld_linkid = linkid; + destroy.ld_persist = persist; + rsize = sizeof (retval); + + return (i_dls_mgmt_upcall(&destroy, sizeof (destroy), &retval, &rsize)); +} + +/* + * Request the datalink management daemon to verify/update the information + * for a physical link. Upon success, get its linkid. + * + * - media type media type + * - novanity whether this physical datalink supports vanity naming. + * physical links that do not use the GLDv3 MAC plugin + * cannot suport vanity naming + * + * This function could fail with ENOENT or EEXIST. Two cases return EEXIST: + * + * 1. A link with devname already exists, but the media type does not match. + * In this case, mediap will bee set to the media type of the existing link. + * 2. A link with devname already exists, but its link name does not match + * the device name, although this link does not support vanity naming. + */ +int +dls_mgmt_update(const char *devname, uint32_t media, boolean_t novanity, + uint32_t *mediap, datalink_id_t *linkidp) +{ + dlmgmt_upcall_arg_update_t update; + dlmgmt_update_retval_t retval; + size_t rsize; + int err; + + update.ld_cmd = DLMGMT_CMD_DLS_UPDATE; + + if (strlcpy(update.ld_devname, devname, MAXNAMELEN) >= MAXNAMELEN) + return (EINVAL); + + update.ld_media = media; + update.ld_novanity = novanity; + rsize = sizeof (retval); + + err = i_dls_mgmt_upcall(&update, sizeof (update), &retval, &rsize); + if (err == EEXIST) { + *linkidp = retval.lr_linkid; + *mediap = retval.lr_media; + } else if (err == 0) { + *linkidp = retval.lr_linkid; + } + + return (err); +} + +/* + * Request the datalink management daemon to get the information for a link. + * Returns zero upon success, or an errno upon failure. + * + * Only fills in information for argument pointers that are non-NULL. + * Note that the link argument is expected to be MAXLINKNAMELEN bytes. + */ +int +dls_mgmt_get_linkinfo(datalink_id_t linkid, char *link, + datalink_class_t *classp, uint32_t *mediap, uint32_t *flagsp) +{ + dlmgmt_door_getname_t getname; + dlmgmt_getname_retval_t retval; + size_t rsize; + int err, len; + + getname.ld_cmd = DLMGMT_CMD_GETNAME; + getname.ld_linkid = linkid; + rsize = sizeof (retval); + + err = i_dls_mgmt_upcall(&getname, sizeof (getname), &retval, &rsize); + if (err != 0) + return (err); + + len = strlen(retval.lr_link); + if (len <= 1 || len >= MAXLINKNAMELEN) + return (EINVAL); + + if (link != NULL) + (void) strlcpy(link, retval.lr_link, MAXLINKNAMELEN); + if (classp != NULL) + *classp = retval.lr_class; + if (mediap != NULL) + *mediap = retval.lr_media; + if (flagsp != NULL) + *flagsp = retval.lr_flags; + return (0); +} + +/* + * Request the datalink management daemon to get the linkid for a link. + * Returns a non-zero error code on failure. The linkid argument is only + * set on success (when zero is returned.) + */ +int +dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid) +{ + dlmgmt_door_getlinkid_t getlinkid; + dlmgmt_getlinkid_retval_t retval; + size_t rsize; + int err; + + getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID; + (void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN); + rsize = sizeof (retval); + + err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval, + &rsize); + if (err == 0) + *linkid = retval.lr_linkid; + return (err); +} + +datalink_id_t +dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class, + datalink_media_t dmedia, uint32_t flags) +{ + dlmgmt_door_getnext_t getnext; + dlmgmt_getnext_retval_t retval; + size_t rsize; + + getnext.ld_cmd = DLMGMT_CMD_GETNEXT; + getnext.ld_class = class; + getnext.ld_dmedia = dmedia; + getnext.ld_flags = flags; + getnext.ld_linkid = linkid; + rsize = sizeof (retval); + + if (i_dls_mgmt_upcall(&getnext, sizeof (getnext), &retval, &rsize) != 0) + return (DATALINK_INVALID_LINKID); + + return (retval.lr_linkid); +} + +static int +i_dls_mgmt_get_linkattr(const datalink_id_t linkid, const char *attr, + void *attrval, size_t *attrszp) +{ + dlmgmt_upcall_arg_getattr_t getattr; + dlmgmt_getattr_retval_t *retvalp; + size_t oldsize, size; + int err; + + getattr.ld_cmd = DLMGMT_CMD_DLS_GETATTR; + getattr.ld_linkid = linkid; + (void) strlcpy(getattr.ld_attr, attr, MAXLINKATTRLEN); + + oldsize = size = *attrszp + sizeof (dlmgmt_getattr_retval_t) - 1; + retvalp = kmem_zalloc(oldsize, KM_SLEEP); + + err = i_dls_mgmt_upcall(&getattr, sizeof (getattr), retvalp, &size); + if (err == 0) { + ASSERT(size <= oldsize); + *attrszp = size + 1 - sizeof (dlmgmt_getattr_retval_t); + bcopy(retvalp->lr_attr, attrval, *attrszp); + } + + kmem_free(retvalp, oldsize); + return (err); +} + +/* + * Note that this function can only get devp successfully for non-VLAN link. + */ +int +dls_mgmt_get_phydev(datalink_id_t linkid, dev_t *devp) +{ + uint64_t maj, inst; + size_t attrsz = sizeof (uint64_t); + + if (i_dls_mgmt_get_linkattr(linkid, FPHYMAJ, &maj, &attrsz) != 0 || + attrsz != sizeof (uint64_t) || + i_dls_mgmt_get_linkattr(linkid, FPHYINST, &inst, &attrsz) != 0 || + attrsz != sizeof (uint64_t)) { + return (EINVAL); + } + + *devp = makedevice((major_t)maj, (minor_t)inst); + return (0); +} + +/* + * Hold the vanity naming structure (dls_devnet_t) temporarily. The request to + * delete the dls_devnet_t will wait until the temporary reference is released. + */ +int +dls_devnet_hold_tmp(datalink_id_t linkid, dls_dl_handle_t *ddhp) +{ + dls_devnet_t *ddp; + dls_dev_handle_t ddh = NULL; + dev_t phydev = 0; + int err; + + /* + * Hold this link to prevent it being detached (if physical link). + */ + if (dls_mgmt_get_phydev(linkid, &phydev) == 0) + (void) softmac_hold_device(phydev, &ddh); + + rw_enter(&i_dls_devnet_lock, RW_READER); + if ((err = mod_hash_find(i_dls_devnet_id_hash, + (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&ddp)) != 0) { + ASSERT(err == MH_ERR_NOTFOUND); + rw_exit(&i_dls_devnet_lock); + softmac_rele_device(ddh); + return (ENOENT); + } + + /* + * At least one reference was held when this datalink was created. + */ + ASSERT(ddp->dd_ref > 0); + mutex_enter(&ddp->dd_mutex); + ddp->dd_tref++; + mutex_exit(&ddp->dd_mutex); + rw_exit(&i_dls_devnet_lock); + softmac_rele_device(ddh); + +done: + *ddhp = ddp; + return (0); +} + +void +dls_devnet_rele_tmp(dls_dl_handle_t dlh) +{ + dls_devnet_t *ddp = dlh; + + mutex_enter(&ddp->dd_mutex); + ASSERT(ddp->dd_tref != 0); + if (--ddp->dd_tref == 0) + cv_signal(&ddp->dd_cv); + mutex_exit(&ddp->dd_mutex); +} + +/* + * "link" kstats related functions. + */ + +/* + * Query the "link" kstats. + */ +static int +dls_devnet_stat_update(kstat_t *ksp, int rw) +{ + dls_devnet_t *ddp = ksp->ks_private; + dls_vlan_t *dvp; + int err; + + err = dls_vlan_hold(ddp->dd_mac, ddp->dd_vid, &dvp, B_FALSE, B_FALSE); + if (err != 0) + return (err); + + err = dls_stat_update(ksp, dvp, rw); + dls_vlan_rele(dvp); + return (err); +} + +/* + * Create the "link" kstats. + */ +static void +dls_devnet_stat_create(dls_devnet_t *ddp) +{ + char link[MAXLINKNAMELEN]; + kstat_t *ksp; + + if ((dls_mgmt_get_linkinfo(ddp->dd_vlanid, link, + NULL, NULL, NULL)) != 0) { + return; + } + + if (dls_stat_create("link", 0, link, dls_devnet_stat_update, + ddp, &ksp) != 0) { + return; + } + + ASSERT(ksp != NULL); + ddp->dd_ksp = ksp; +} + +/* + * Destroy the "link" kstats. + */ +static void +dls_devnet_stat_destroy(dls_devnet_t *ddp) +{ + if (ddp->dd_ksp == NULL) + return; + + kstat_delete(ddp->dd_ksp); + ddp->dd_ksp = NULL; +} + +/* + * The link has been renamed. Destroy the old non-legacy kstats ("link kstats") + * and create the new set using the new name. + */ +static void +dls_devnet_stat_rename(dls_devnet_t *ddp, const char *link) +{ + kstat_t *ksp; + + if (ddp->dd_ksp != NULL) { + kstat_delete(ddp->dd_ksp); + ddp->dd_ksp = NULL; + } + + if (dls_stat_create("link", 0, link, dls_devnet_stat_update, + ddp, &ksp) != 0) { + return; + } + + ASSERT(ksp != NULL); + ddp->dd_ksp = ksp; +} + +/* + * Associate a linkid with a given link (identified by <macname/vid>) + * + * Several cases: + * a. implicit VLAN creation: (non-NULL "vlan") + * b. explicit VLAN creation: (NULL "vlan") + * c. explicit non-VLAN creation: + * (NULL "vlan" and linkid could be INVALID_LINKID if the physical device + * was created before the daemon was started) + */ +static int +dls_devnet_set(const char *macname, uint16_t vid, + datalink_id_t vlan_linkid, datalink_id_t linkid, const char *vlan, + dls_devnet_t **ddpp) +{ + dls_devnet_t *ddp = NULL; + char spa[MAXSPALEN]; + boolean_t explicit = (vlan == NULL); + datalink_class_t class; + int err; + + ASSERT(vid != VLAN_ID_NONE || explicit); + ASSERT(vlan_linkid != DATALINK_INVALID_LINKID || !explicit || + vid == VLAN_ID_NONE); + + (void) snprintf(spa, MAXSPALEN, "%s/%d", macname, vid); + rw_enter(&i_dls_devnet_lock, RW_WRITER); + if ((err = mod_hash_find(i_dls_devnet_hash, + (mod_hash_key_t)spa, (mod_hash_val_t *)&ddp)) == 0) { + char link[MAXLINKNAMELEN]; + + if (explicit) { + if ((vid != VLAN_ID_NONE) || + (ddp->dd_vlanid != DATALINK_INVALID_LINKID)) { + err = EEXIST; + goto done; + } + + /* + * This might be a physical link that has already + * been created, but which does not have a vlan_linkid + * because dlmgmtd was not running when it was created. + */ + if ((err = dls_mgmt_get_linkinfo(vlan_linkid, NULL, + &class, NULL, NULL)) != 0) { + goto done; + } + + if (class != DATALINK_CLASS_PHYS) { + err = EINVAL; + goto done; + } + + goto newphys; + } + + /* + * Implicit VLAN, but the same name has already + * been associated with another linkid. Check if the name + * of that link matches the given VLAN name. + */ + ASSERT(vid != VLAN_ID_NONE); + if ((err = dls_mgmt_get_linkinfo(ddp->dd_vlanid, link, + NULL, NULL, NULL)) != 0) { + goto done; + } + + if (strcmp(link, vlan) != 0) { + err = EEXIST; + goto done; + } + + /* + * This is not an implicit created VLAN any more, return + * this existing datalink. + */ + ASSERT(ddp->dd_ref > 0); + ddp->dd_ref++; + goto done; + } + + /* + * Request the daemon to create a new vlan_linkid for this implicitly + * created vlan. + */ + if (!explicit && ((err = dls_mgmt_create(vlan, 0, + DATALINK_CLASS_VLAN, DL_ETHER, B_FALSE, &vlan_linkid)) != 0)) { + goto done; + } + + ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP); + ddp->dd_vid = vid; + ddp->dd_explicit = explicit; + ddp->dd_tref = 0; + ddp->dd_ref++; + ddp->dd_zid = GLOBAL_ZONEID; + (void) strncpy(ddp->dd_mac, macname, MAXNAMELEN); + (void) snprintf(ddp->dd_spa, MAXSPALEN, "%s/%d", macname, vid); + VERIFY(mod_hash_insert(i_dls_devnet_hash, + (mod_hash_key_t)ddp->dd_spa, (mod_hash_val_t)ddp) == 0); + +newphys: + + ddp->dd_vlanid = vlan_linkid; + if (ddp->dd_vlanid != DATALINK_INVALID_LINKID) { + ddp->dd_linkid = linkid; + + VERIFY(mod_hash_insert(i_dls_devnet_id_hash, + (mod_hash_key_t)(uintptr_t)vlan_linkid, + (mod_hash_val_t)ddp) == 0); + devnet_need_rebuild = B_TRUE; + dls_devnet_stat_create(ddp); + } + err = 0; +done: + rw_exit(&i_dls_devnet_lock); + if (err == 0 && ddpp != NULL) + *ddpp = ddp; + return (err); +} + +static void +dls_devnet_unset_common(dls_devnet_t *ddp) +{ + mod_hash_val_t val; + + ASSERT(RW_WRITE_HELD(&i_dls_devnet_lock)); + + ASSERT(ddp->dd_ref == 0); + + /* + * Remove this dls_devnet_t from the hash table. + */ + VERIFY(mod_hash_remove(i_dls_devnet_hash, + (mod_hash_key_t)ddp->dd_spa, &val) == 0); + + if (ddp->dd_vlanid != DATALINK_INVALID_LINKID) { + VERIFY(mod_hash_remove(i_dls_devnet_id_hash, + (mod_hash_key_t)(uintptr_t)ddp->dd_vlanid, &val) == 0); + + dls_devnet_stat_destroy(ddp); + devnet_need_rebuild = B_TRUE; + } + + /* + * Wait until all temporary references are released. + */ + mutex_enter(&ddp->dd_mutex); + while (ddp->dd_tref != 0) + cv_wait(&ddp->dd_cv, &ddp->dd_mutex); + mutex_exit(&ddp->dd_mutex); + + if (!ddp->dd_explicit) { + ASSERT(ddp->dd_vid != VLAN_ID_NONE); + ASSERT(ddp->dd_vlanid != DATALINK_INVALID_LINKID); + (void) dls_mgmt_destroy(ddp->dd_vlanid, B_FALSE); + } + + ddp->dd_vlanid = DATALINK_INVALID_LINKID; + ddp->dd_zid = GLOBAL_ZONEID; + ddp->dd_explicit = B_FALSE; + kmem_cache_free(i_dls_devnet_cachep, ddp); +} + +/* + * Disassociate a linkid with a given link (identified by <macname/vid>) + */ +static int +dls_devnet_unset(const char *macname, uint16_t vid, datalink_id_t *id) +{ + dls_devnet_t *ddp; + char spa[MAXSPALEN]; + int err; + + (void) snprintf(spa, MAXSPALEN, "%s/%d", macname, vid); + + rw_enter(&i_dls_devnet_lock, RW_WRITER); + if ((err = mod_hash_find(i_dls_devnet_hash, + (mod_hash_key_t)spa, (mod_hash_val_t *)&ddp)) != 0) { + ASSERT(err == MH_ERR_NOTFOUND); + rw_exit(&i_dls_devnet_lock); + return (ENOENT); + } + + ASSERT(ddp->dd_ref != 0); + + if (ddp->dd_ref != 1) { + rw_exit(&i_dls_devnet_lock); + return (EBUSY); + } + + ddp->dd_ref--; + + if (id != NULL) + *id = ddp->dd_vlanid; + + dls_devnet_unset_common(ddp); + rw_exit(&i_dls_devnet_lock); + return (0); +} + +static int +dls_devnet_hold(datalink_id_t linkid, dls_devnet_t **ddpp) +{ + dls_devnet_t *ddp; + dev_t phydev = 0; + dls_dev_handle_t ddh = NULL; + int err; + + /* + * Hold this link to prevent it being detached in case of a + * physical link. + */ + if (dls_mgmt_get_phydev(linkid, &phydev) == 0) + (void) softmac_hold_device(phydev, &ddh); + + rw_enter(&i_dls_devnet_lock, RW_WRITER); + if ((err = mod_hash_find(i_dls_devnet_id_hash, + (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&ddp)) != 0) { + ASSERT(err == MH_ERR_NOTFOUND); + rw_exit(&i_dls_devnet_lock); + softmac_rele_device(ddh); + return (ENOENT); + } + + ASSERT(ddp->dd_ref > 0); + ddp->dd_ref++; + rw_exit(&i_dls_devnet_lock); + softmac_rele_device(ddh); + +done: + *ddpp = ddp; + return (0); +} + +/* + * This funtion is called when a DLS client tries to open a device node. + * This dev_t could a result of a /dev/net node access (returned by + * devnet_create_rvp->dls_devnet_open()) or a direct /dev node access. + * In both cases, this function returns 0. In the first case, bump the + * reference count of the dls_devnet_t structure, so that it will not be + * freed when devnet_inactive_callback->dls_devnet_close() is called + * (Note that devnet_inactive_callback() is called right after dld_open, + * not when the /dev/net access is done). In the second case, ddhp would + * be NULL. + * + * To undo this function, call dls_devnet_close() in the first case, and call + * dls_vlan_rele() in the second case. + */ +int +dls_devnet_open_by_dev(dev_t dev, dls_vlan_t **dvpp, dls_dl_handle_t *ddhp) +{ + dls_dev_handle_t ddh = NULL; + char spa[MAXSPALEN]; + dls_devnet_t *ddp; + dls_vlan_t *dvp; + int err; + + /* + * Hold this link to prevent it being detached in case of a + * GLDv3 physical link. + */ + if (getminor(dev) - 1 < MAC_MAX_MINOR) + (void) softmac_hold_device(dev, &ddh); + + /* + * Found the dls_vlan_t with the given dev. + */ + err = dls_vlan_hold_by_dev(dev, &dvp); + softmac_rele_device(ddh); + + if (err != 0) + return (err); + + (void) snprintf(spa, MAXSPALEN, "%s/%d", + dvp->dv_dlp->dl_name, dvp->dv_id); + + rw_enter(&i_dls_devnet_lock, RW_WRITER); + if ((err = mod_hash_find(i_dls_devnet_hash, + (mod_hash_key_t)spa, (mod_hash_val_t *)&ddp)) != 0) { + ASSERT(err == MH_ERR_NOTFOUND); + rw_exit(&i_dls_devnet_lock); + *ddhp = NULL; + *dvpp = dvp; + return (0); + } + + ASSERT(ddp->dd_ref > 0); + ddp->dd_ref++; + rw_exit(&i_dls_devnet_lock); + *ddhp = ddp; + *dvpp = dvp; + return (0); +} + +static void +dls_devnet_rele(dls_devnet_t *ddp) +{ + rw_enter(&i_dls_devnet_lock, RW_WRITER); + ASSERT(ddp->dd_ref != 0); + if (--ddp->dd_ref != 0) { + rw_exit(&i_dls_devnet_lock); + return; + } + /* + * This should only happen for implicitly-created VLAN. + */ + ASSERT(ddp->dd_vid != VLAN_ID_NONE); + dls_devnet_unset_common(ddp); + rw_exit(&i_dls_devnet_lock); +} + +static int +dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp, zoneid_t zid) +{ + char link_under[MAXLINKNAMELEN]; + char drv[MAXLINKNAMELEN]; + uint_t ppa; + major_t major; + dev_t phy_dev, tmp_dev; + uint_t vid; + datalink_id_t linkid; + dls_devnet_t *ddp; + dls_dev_handle_t ddh; + int err; + + if ((err = dls_mgmt_get_linkid(link, &linkid)) == 0) + return (dls_devnet_hold(linkid, ddpp)); + + /* + * If we failed to get the link's linkid because the dlmgmtd daemon + * has not been started, return ENOENT so that the application can + * fallback to open the /dev node. + */ + if (err == EBADF) + return (ENOENT); + + if (err != ENOENT) + return (err); + + if (ddi_parse(link, drv, &ppa) != DDI_SUCCESS) + return (ENOENT); + + if ((vid = DLS_PPA2VID(ppa)) > VLAN_ID_MAX) + return (ENOENT); + + ppa = (uint_t)DLS_PPA2INST(ppa); + (void) snprintf(link_under, sizeof (link_under), "%s%d", drv, ppa); + + if (vid != VLAN_ID_NONE) { + /* + * Only global zone can implicitly create a VLAN. + */ + if (zid != GLOBAL_ZONEID) + return (ENOENT); + + /* + * This is potentially an implicitly-created VLAN. Hold the + * link this VLAN is created on. + */ + if (dls_mgmt_get_linkid(link_under, &linkid) == 0 && + dls_devnet_hold_tmp(linkid, &ddp) == 0) { + if (ddp->dd_vid != VLAN_ID_NONE) { + dls_devnet_rele_tmp(ddp); + return (ENOENT); + } + goto implicit; + } + } + + /* + * If this link (or the link that an implicit vlan is created on) + * (a) is a physical device, (b) this is the first boot, (c) the MAC + * is not registered yet, and (d) we cannot find its linkid, then the + * linkname is the same as the devname. + * + * First filter out invalid names. + */ + if ((major = ddi_name_to_major(drv)) == (major_t)-1) + return (ENOENT); + + phy_dev = makedevice(major, (minor_t)ppa + 1); + if (softmac_hold_device(phy_dev, &ddh) != 0) + return (ENOENT); + + /* + * At this time, the MAC should be registered, check its phy_dev using + * the given name. + */ + if ((err = dls_mgmt_get_linkid(link_under, &linkid)) != 0 || + (err = dls_mgmt_get_phydev(linkid, &tmp_dev)) != 0) { + softmac_rele_device(ddh); + return (err); + } + if (tmp_dev != phy_dev) { + softmac_rele_device(ddh); + return (ENOENT); + } + + if (vid == VLAN_ID_NONE) { + /* + * For non-VLAN, we are done. + */ + err = dls_devnet_hold(linkid, ddpp); + softmac_rele_device(ddh); + return (err); + } + + /* + * If this is an implicit VLAN, temporarily hold this non-VLAN. + */ + VERIFY(dls_devnet_hold_tmp(linkid, &ddp) == 0); + softmac_rele_device(ddh); + ASSERT(ddp->dd_vid == VLAN_ID_NONE); + + /* + * Again, this is potentially an implicitly-created VLAN. + */ + +implicit: + ASSERT(vid != VLAN_ID_NONE); + err = dls_devnet_set(ddp->dd_mac, vid, DATALINK_INVALID_LINKID, + linkid, link, ddpp); + dls_devnet_rele_tmp(ddp); + return (err); +} + +/* + * Get linkid for the given dev. + */ +int +dls_devnet_dev2linkid(dev_t dev, datalink_id_t *linkidp) +{ + dls_vlan_t *dvp; + dls_devnet_t *ddp; + char spa[MAXSPALEN]; + int err; + + if ((err = dls_vlan_hold_by_dev(dev, &dvp)) != 0) + return (err); + + (void) snprintf(spa, MAXSPALEN, "%s/%d", + dvp->dv_dlp->dl_name, dvp->dv_id); + + rw_enter(&i_dls_devnet_lock, RW_READER); + if (mod_hash_find(i_dls_devnet_hash, (mod_hash_key_t)spa, + (mod_hash_val_t *)&ddp) != 0) { + rw_exit(&i_dls_devnet_lock); + dls_vlan_rele(dvp); + return (ENOENT); + } + + *linkidp = ddp->dd_vlanid; + rw_exit(&i_dls_devnet_lock); + dls_vlan_rele(dvp); + return (0); +} + +/* + * Get the link's physical dev_t. It this is a VLAN, get the dev_t of the + * link this VLAN is created on. + */ +int +dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp) +{ + dls_devnet_t *ddp; + int err; + + if ((err = dls_devnet_hold_tmp(vlanid, &ddp)) != 0) + return (err); + + err = dls_mgmt_get_phydev(ddp->dd_linkid, devp); + dls_devnet_rele_tmp(ddp); + return (err); +} + +/* + * Handle the renaming requests. There are two rename cases: + * + * 1. Request to rename a valid link (id1) to an non-existent link name + * (id2). In this case id2 is DATALINK_INVALID_LINKID. Just check whether + * id1 is held by any applications. + * + * In this case, the link's kstats need to be updated using the given name. + * + * 2. Request to rename a valid link (id1) to the name of a REMOVED + * physical link (id2). In this case, check htat id1 and its associated + * mac is not held by any application, and update the link's linkid to id2. + * + * This case does not change the <link name, linkid> mapping, so the link's + * kstats need to be updated with using name associated the given id2. + */ +int +dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link) +{ + dls_dev_handle_t ddh = NULL; + char linkname[MAXLINKNAMELEN]; + int err = 0; + dev_t phydev = 0; + dls_devnet_t *ddp; + mac_handle_t mh; + mod_hash_val_t val; + + /* + * In the second case, id2 must be a REMOVED physical link. + */ + if ((id2 != DATALINK_INVALID_LINKID) && + (dls_mgmt_get_phydev(id2, &phydev) == 0) && + softmac_hold_device(phydev, &ddh) == 0) { + softmac_rele_device(ddh); + return (EEXIST); + } + + /* + * Hold id1 to prevent it from being detached (if a physical link). + */ + if (dls_mgmt_get_phydev(id1, &phydev) == 0) + (void) softmac_hold_device(phydev, &ddh); + + rw_enter(&i_dls_devnet_lock, RW_WRITER); + if ((err = mod_hash_find(i_dls_devnet_id_hash, + (mod_hash_key_t)(uintptr_t)id1, (mod_hash_val_t *)&ddp)) != 0) { + ASSERT(err == MH_ERR_NOTFOUND); + err = ENOENT; + goto done; + } + + /* + * Return EBUSY if any applications have this link open. + */ + if ((ddp->dd_explicit && ddp->dd_ref > 1) || + (!ddp->dd_explicit && ddp->dd_ref > 0)) { + err = EBUSY; + goto done; + } + + if (id2 == DATALINK_INVALID_LINKID) { + (void) strlcpy(linkname, link, sizeof (linkname)); + goto done; + } + + /* + * The second case, check whether the MAC is used by any MAC + * user. This must be a physical link so ddh must not be NULL. + */ + if (ddh == NULL) { + err = EINVAL; + goto done; + } + + if ((err = mac_open(ddp->dd_mac, &mh)) != 0) + goto done; + + /* + * We release the reference of the MAC which mac_open() is + * holding. Note that this mac will not be unregistered + * because the physical device is hold. + */ + mac_close(mh); + + /* + * Check if there is any other MAC clients, if not, hold this mac + * exclusively until we are done. + */ + if ((err = mac_hold_exclusive(mh)) != 0) + goto done; + + /* + * Update the link's linkid. + */ + if ((err = mod_hash_find(i_dls_devnet_id_hash, + (mod_hash_key_t)(uintptr_t)id2, &val)) != MH_ERR_NOTFOUND) { + mac_rele_exclusive(mh); + err = EEXIST; + goto done; + } + + err = dls_mgmt_get_linkinfo(id2, linkname, NULL, NULL, NULL); + if (err != 0) { + mac_rele_exclusive(mh); + goto done; + } + + (void) mod_hash_remove(i_dls_devnet_id_hash, + (mod_hash_key_t)(uintptr_t)id1, &val); + + ddp->dd_vlanid = id2; + (void) mod_hash_insert(i_dls_devnet_id_hash, + (mod_hash_key_t)(uintptr_t)ddp->dd_vlanid, (mod_hash_val_t)ddp); + + mac_rele_exclusive(mh); + +done: + /* + * Change the name of the kstat based on the new link name. + */ + if (err == 0) + dls_devnet_stat_rename(ddp, linkname); + + rw_exit(&i_dls_devnet_lock); + softmac_rele_device(ddh); + return (err); +} + +int +dls_devnet_setzid(const char *link, zoneid_t zid) +{ + dls_devnet_t *ddp; + int err; + zoneid_t old_zid; + + if ((err = dls_devnet_hold_by_name(link, &ddp, GLOBAL_ZONEID)) != 0) + return (err); + + mutex_enter(&ddp->dd_zid_mutex); + if ((old_zid = ddp->dd_zid) == zid) { + mutex_exit(&ddp->dd_zid_mutex); + dls_devnet_rele(ddp); + return (0); + } + + if ((err = dls_vlan_setzid(ddp->dd_mac, ddp->dd_vid, zid)) != 0) { + mutex_exit(&ddp->dd_zid_mutex); + dls_devnet_rele(ddp); + return (err); + } + + ddp->dd_zid = zid; + devnet_need_rebuild = B_TRUE; + mutex_exit(&ddp->dd_zid_mutex); + + /* + * Keep this open reference only if it belonged to the global zone + * and is now assigned to a non-global zone. + */ + if (old_zid != GLOBAL_ZONEID || zid == GLOBAL_ZONEID) + dls_devnet_rele(ddp); + + /* + * Then release this link if it belonged to an non-global zone + * but is now assigned back to the global zone. + */ + if (old_zid != GLOBAL_ZONEID && zid == GLOBAL_ZONEID) + dls_devnet_rele(ddp); + + return (0); +} + +int +dls_devnet_getzid(datalink_id_t linkid, zoneid_t *zidp) +{ + dls_devnet_t *ddp; + int err; + + if ((err = dls_devnet_hold_tmp(linkid, &ddp)) != 0) + return (err); + + mutex_enter(&ddp->dd_zid_mutex); + *zidp = ddp->dd_zid; + mutex_exit(&ddp->dd_zid_mutex); + + dls_devnet_rele_tmp(ddp); + return (0); +} + +/* + * Access a vanity naming node. + */ +int +dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp) +{ + dls_devnet_t *ddp; + dls_vlan_t *dvp; + zoneid_t zid = getzoneid(); + int err; + + if ((err = dls_devnet_hold_by_name(link, &ddp, zid)) != 0) + return (err); + + /* + * Opening a link that does not belong to the current non-global zone + * is not allowed. + */ + if (zid != GLOBAL_ZONEID && ddp->dd_zid != zid) { + dls_devnet_rele(ddp); + return (ENOENT); + } + + err = dls_vlan_hold(ddp->dd_mac, ddp->dd_vid, &dvp, B_FALSE, B_TRUE); + if (err != 0) { + dls_devnet_rele(ddp); + return (err); + } + + *dhp = ddp; + *devp = dvp->dv_dev; + return (0); +} + +/* + * Close access to a vanity naming node. + */ +void +dls_devnet_close(dls_dl_handle_t dlh) +{ + dls_devnet_t *ddp = dlh; + dls_vlan_t *dvp; + + /* + * The VLAN is hold in dls_open_devnet_link(). + */ + VERIFY((dls_vlan_hold(ddp->dd_mac, ddp->dd_vid, &dvp, B_FALSE, + B_FALSE)) == 0); + dls_vlan_rele(dvp); + dls_vlan_rele(dvp); + dls_devnet_rele(ddp); +} + +/* + * This is used by /dev/net to rebuild the nodes for readdir(). It is not + * critical and no protection is needed. + */ +boolean_t +dls_devnet_rebuild() +{ + boolean_t updated = devnet_need_rebuild; + + devnet_need_rebuild = B_FALSE; + return (updated); +} + +int +dls_devnet_create(mac_handle_t mh, datalink_id_t linkid) +{ + int err; + + if ((err = dls_vlan_create(mac_name(mh), 0, B_FALSE)) != 0) + return (err); + + err = dls_devnet_set(mac_name(mh), 0, linkid, linkid, NULL, NULL); + if (err != 0) + (void) dls_vlan_destroy(mac_name(mh), 0); + + return (err); +} + +/* + * Set the linkid of the dls_devnet_t and add it into the i_dls_devnet_id_hash. + * This is called in the case that the dlmgmtd daemon is started later than + * the physical devices get attached, and the linkid is only known after the + * daemon starts. + */ +int +dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid) +{ + ASSERT(linkid != DATALINK_INVALID_LINKID); + return (dls_devnet_set(mac_name(mh), 0, linkid, linkid, NULL, NULL)); +} + +int +dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp) +{ + int err; + + *idp = DATALINK_INVALID_LINKID; + err = dls_devnet_unset(mac_name(mh), 0, idp); + if (err != 0 && err != ENOENT) + return (err); + + if ((err = dls_vlan_destroy(mac_name(mh), 0)) == 0) + return (0); + + (void) dls_devnet_set(mac_name(mh), 0, *idp, *idp, NULL, NULL); + return (err); +} + +int +dls_devnet_create_vlan(datalink_id_t vlanid, datalink_id_t linkid, + uint16_t vid, boolean_t force) +{ + dls_devnet_t *lnddp, *ddp; + dls_vlan_t *dvp; + int err; + + /* + * Hold the link the VLAN is being created on (which must not be a + * VLAN). + */ + ASSERT(vid != VLAN_ID_NONE); + if ((err = dls_devnet_hold_tmp(linkid, &lnddp)) != 0) + return (err); + + if (lnddp->dd_vid != VLAN_ID_NONE) { + err = EINVAL; + goto done; + } + + /* + * A new link. + */ + err = dls_devnet_set(lnddp->dd_mac, vid, vlanid, linkid, NULL, &ddp); + if (err != 0) + goto done; + + /* + * Hold the dls_vlan_t (and create it if needed). + */ + err = dls_vlan_hold(ddp->dd_mac, ddp->dd_vid, &dvp, force, B_TRUE); + if (err != 0) + VERIFY(dls_devnet_unset(lnddp->dd_mac, vid, NULL) == 0); + +done: + dls_devnet_rele_tmp(lnddp); + return (err); +} + +int +dls_devnet_destroy_vlan(datalink_id_t vlanid) +{ + char macname[MAXNAMELEN]; + uint16_t vid; + dls_devnet_t *ddp; + dls_vlan_t *dvp; + int err; + + if ((err = dls_devnet_hold_tmp(vlanid, &ddp)) != 0) + return (err); + + if (ddp->dd_vid == VLAN_ID_NONE) { + dls_devnet_rele_tmp(ddp); + return (EINVAL); + } + + if (!ddp->dd_explicit) { + dls_devnet_rele_tmp(ddp); + return (EBUSY); + } + + (void) strncpy(macname, ddp->dd_mac, MAXNAMELEN); + vid = ddp->dd_vid; + + /* + * It is safe to release the temporary reference we just held, as the + * reference from VLAN creation is still held. + */ + dls_devnet_rele_tmp(ddp); + + if ((err = dls_devnet_unset(macname, vid, NULL)) != 0) + return (err); + + /* + * This VLAN has already been held as the result of VLAN creation. + */ + VERIFY(dls_vlan_hold(macname, vid, &dvp, B_FALSE, B_FALSE) == 0); + + /* + * Release the reference which was held when this VLAN was created, + * and the reference which was just held. + */ + dls_vlan_rele(dvp); + dls_vlan_rele(dvp); + return (0); +} + +const char * +dls_devnet_mac(dls_dl_handle_t ddh) +{ + return (ddh->dd_mac); +} + +uint16_t +dls_devnet_vid(dls_dl_handle_t ddh) +{ + return (ddh->dd_vid); +} + +datalink_id_t +dls_devnet_linkid(dls_dl_handle_t ddh) +{ + return (ddh->dd_linkid); +} + +boolean_t +dls_devnet_is_explicit(dls_dl_handle_t ddh) +{ + return (ddh->dd_explicit); +} diff --git a/usr/src/uts/common/io/dls/dls_mod.c b/usr/src/uts/common/io/dls/dls_mod.c index 9567d785ba..b93befd45c 100644 --- a/usr/src/uts/common/io/dls/dls_mod.c +++ b/usr/src/uts/common/io/dls/dls_mod.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -58,6 +57,7 @@ i_dls_mod_init(void) dls_init(); dls_vlan_init(); dls_link_init(); + dls_mgmt_init(); } static int @@ -68,6 +68,8 @@ i_dls_mod_fini(void) if ((err = dls_link_fini()) != 0) return (err); + dls_mgmt_fini(); + err = dls_vlan_fini(); ASSERT(err == 0); diff --git a/usr/src/uts/common/io/dls/dls_soft_ring.c b/usr/src/uts/common/io/dls/dls_soft_ring.c index 49d862a860..a1ac10972c 100644 --- a/usr/src/uts/common/io/dls/dls_soft_ring.c +++ b/usr/src/uts/common/io/dls/dls_soft_ring.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -529,33 +529,6 @@ destroy: } void -dls_soft_ring_rx_set(dls_channel_t dc, dls_rx_t rx, void *arg, int type) -{ - dls_impl_t *dip = (dls_impl_t *)dc; - - rw_enter(&(dip->di_lock), RW_WRITER); - dip->di_rx = rx; - if (type == SOFT_RING_NONE) - dip->di_rx_arg = arg; - else - dip->di_rx_arg = (void *)dip; - rw_exit(&(dip->di_lock)); -} - -boolean_t -dls_soft_ring_workers(dls_channel_t dc) -{ - dls_impl_t *dip = (dls_impl_t *)dc; - boolean_t ret = B_FALSE; - - rw_enter(&(dip->di_lock), RW_READER); - if (dip->di_soft_ring_list != NULL) - ret = B_TRUE; - rw_exit(&(dip->di_lock)); - return (ret); -} - -void dls_soft_ring_disable(dls_channel_t dc) { dls_impl_t *dip = (dls_impl_t *)dc; diff --git a/usr/src/uts/common/io/dls/dls_stat.c b/usr/src/uts/common/io/dls/dls_stat.c index daee626df4..99f41d0c7d 100644 --- a/usr/src/uts/common/io/dls/dls_stat.c +++ b/usr/src/uts/common/io/dls/dls_stat.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,6 +35,7 @@ #include <sys/kstat.h> #include <sys/vlan.h> #include <sys/mac.h> +#include <sys/mac_ether.h> #include <sys/ctype.h> #include <sys/dls.h> #include <sys/dls_impl.h> @@ -57,7 +58,9 @@ static mac_stat_info_t i_dls_si[] = { { MAC_STAT_RBYTES, "rbytes64", KSTAT_DATA_UINT64, 0 }, { MAC_STAT_IPACKETS, "ipackets64", KSTAT_DATA_UINT64, 0 }, { MAC_STAT_OBYTES, "obytes64", KSTAT_DATA_UINT64, 0 }, - { MAC_STAT_OPACKETS, "opackets64", KSTAT_DATA_UINT64, 0 } + { MAC_STAT_OPACKETS, "opackets64", KSTAT_DATA_UINT64, 0 }, + { MAC_STAT_LINK_STATE, "link_state", KSTAT_DATA_UINT32, + (uint64_t)LINK_STATE_UNKNOWN} }; #define STAT_INFO_COUNT (sizeof (i_dls_si) / sizeof (i_dls_si[0])) @@ -67,9 +70,19 @@ static mac_stat_info_t i_dls_si[] = { */ static int -i_dls_stat_update(kstat_t *ksp, int rw) +i_dls_mac_stat_update(kstat_t *ksp, int rw) { dls_vlan_t *dvp = ksp->ks_private; + + return (dls_stat_update(ksp, dvp, rw)); +} + +/* + * Exported functions. + */ +int +dls_stat_update(kstat_t *ksp, dls_vlan_t *dvp, int rw) +{ dls_link_t *dlp = dvp->dv_dlp; kstat_named_t *knp; uint_t i; @@ -100,40 +113,37 @@ i_dls_stat_update(kstat_t *ksp, int rw) knp++; } + /* + * Ethernet specific kstat "link_duplex" + */ + if (dlp->dl_mip->mi_nativemedia != DL_ETHER) { + knp->value.ui32 = LINK_DUPLEX_UNKNOWN; + } else { + val = mac_stat_get(dlp->dl_mh, ETHER_STAT_LINK_DUPLEX); + knp->value.ui32 = (uint32_t)val; + } + knp++; knp->value.ui32 = dlp->dl_unknowns; dls_mac_rele(dlp); return (0); } -/* - * Exported functions. - */ - -void -dls_mac_stat_create(dls_vlan_t *dvp) +int +dls_stat_create(const char *module, int instance, const char *name, + int (*update)(struct kstat *, int), void *private, kstat_t **kspp) { - dls_link_t *dlp = dvp->dv_dlp; - char module[IFNAMSIZ]; - uint_t instance; - kstat_t *ksp; - kstat_named_t *knp; - uint_t i; - int err; - - if (dls_mac_hold(dlp) != 0) - return; - - err = ddi_parse(dvp->dv_name, module, &instance); - ASSERT(err == DDI_SUCCESS); + kstat_t *ksp; + kstat_named_t *knp; + uint_t i; - if ((ksp = kstat_create(module, instance, NULL, "net", - KSTAT_TYPE_NAMED, STAT_INFO_COUNT + 1, 0)) == NULL) - goto done; + if ((ksp = kstat_create(module, instance, name, "net", + KSTAT_TYPE_NAMED, STAT_INFO_COUNT + 2, 0)) == NULL) { + return (EINVAL); + } - ksp->ks_update = i_dls_stat_update; - ksp->ks_private = (void *)dvp; - dvp->dv_ksp = ksp; + ksp->ks_update = update; + ksp->ks_private = private; knp = (kstat_named_t *)ksp->ks_data; for (i = 0; i < STAT_INFO_COUNT; i++) { @@ -142,16 +152,51 @@ dls_mac_stat_create(dls_vlan_t *dvp) knp++; } + kstat_named_init(knp++, "link_duplex", KSTAT_DATA_UINT32); kstat_named_init(knp, "unknowns", KSTAT_DATA_UINT32); - kstat_install(ksp); -done: - dls_mac_rele(dlp); + *kspp = ksp; + return (0); +} + +void +dls_mac_stat_create(dls_vlan_t *dvp) +{ + kstat_t *ksp = NULL; + major_t major; + + /* + * Create the legacy kstats to provide backward compatibility. + * These kstats need to be created even when this link does not + * have a link name, i.e., when the VLAN is accessed using its + * /dev node. + * + * Note that we only need to create the legacy kstats for GLDv3 + * physical links, aggregation links which are created using + * the 'key' option, and any VLAN links created over them. + * This can be determined by checking its dv_ppa. + */ + ASSERT(dvp->dv_ksp == NULL); + if (dvp->dv_ppa >= MAC_MAX_MINOR) + return; + + major = getmajor(dvp->dv_dev); + ASSERT(GLDV3_DRV(major) && (dvp->dv_ksp == NULL)); + + if (dls_stat_create(ddi_major_to_name(major), + dvp->dv_id * 1000 + dvp->dv_ppa, NULL, + i_dls_mac_stat_update, dvp, &ksp) != 0) { + return; + } + ASSERT(ksp != NULL); + dvp->dv_ksp = ksp; } void dls_mac_stat_destroy(dls_vlan_t *dvp) { - kstat_delete(dvp->dv_ksp); - dvp->dv_ksp = NULL; + if (dvp->dv_ksp != NULL) { + kstat_delete(dvp->dv_ksp); + dvp->dv_ksp = NULL; + } } diff --git a/usr/src/uts/common/io/dls/dls_vlan.c b/usr/src/uts/common/io/dls/dls_vlan.c index 2fcf435a3d..9df000e86a 100644 --- a/usr/src/uts/common/io/dls/dls_vlan.c +++ b/usr/src/uts/common/io/dls/dls_vlan.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,16 +31,14 @@ #include <sys/types.h> #include <sys/sysmacros.h> -#include <sys/atomic.h> -#include <sys/mkdev.h> #include <sys/modhash.h> +#include <sys/stat.h> #include <sys/kstat.h> #include <sys/vlan.h> #include <sys/mac.h> #include <sys/ctype.h> #include <sys/dls.h> #include <sys/dls_impl.h> -#include <sys/dld.h> static kmem_cache_t *i_dls_vlan_cachep; static mod_hash_t *i_dls_vlan_hash; @@ -48,10 +46,6 @@ static mod_hash_t *i_dls_vlan_dev_hash; static krwlock_t i_dls_vlan_lock; static uint_t i_dls_vlan_count; -static vmem_t *minor_arenap; -#define MINOR_TO_PTR(minor) ((void *)(uintptr_t)(minor)) -#define PTR_TO_MINOR(ptr) ((minor_t)(uintptr_t)(ptr)) - #define VLAN_HASHSZ 67 /* prime */ /* @@ -62,8 +56,10 @@ static vmem_t *minor_arenap; static int i_dls_vlan_constructor(void *buf, void *arg, int kmflag) { - bzero(buf, sizeof (dls_vlan_t)); + dls_vlan_t *dvp = buf; + bzero(buf, sizeof (dls_vlan_t)); + mutex_init(&dvp->dv_lock, NULL, MUTEX_DEFAULT, NULL); return (0); } @@ -71,15 +67,16 @@ i_dls_vlan_constructor(void *buf, void *arg, int kmflag) static void i_dls_vlan_destructor(void *buf, void *arg) { - dls_vlan_t *dvp = (dls_vlan_t *)buf; + dls_vlan_t *dvp = buf; ASSERT(dvp->dv_ref == 0); + ASSERT(dvp->dv_zone_ref == 0); + mutex_destroy(&dvp->dv_lock); } /* * Module initialization functions. */ - void dls_vlan_init(void) { @@ -92,29 +89,20 @@ dls_vlan_init(void) ASSERT(i_dls_vlan_cachep != NULL); /* - * Create a hash table, keyed by name, of dls_vlan_t. + * Create a hash table, keyed by dv_spa, of dls_vlan_t. */ i_dls_vlan_hash = mod_hash_create_extended("dls_vlan_hash", VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); + /* - * Create a second hash table, keyed by minor, of dls_vlan_t. - * The number of the hash slots is the same. + * Create a hash table, keyed by dv_dev, of dls_vlan_t. */ - i_dls_vlan_dev_hash = mod_hash_create_idhash("dls_vlan_dev_hash", - VLAN_HASHSZ, mod_hash_null_valdtor); + i_dls_vlan_dev_hash = mod_hash_create_ptrhash("dls_vlan_dev_hash", + VLAN_HASHSZ, mod_hash_null_valdtor, sizeof (dev_t)); + rw_init(&i_dls_vlan_lock, NULL, RW_DEFAULT, NULL); i_dls_vlan_count = 0; - - /* - * Allocate a vmem arena to manage minor numbers. The range of the - * arena will be from DLD_MAX_MINOR + 1 to MAXMIN (maximum legal - * minor number). - */ - minor_arenap = vmem_create("dls_minor_arena", - MINOR_TO_PTR(DLD_MAX_MINOR + 1), MAXMIN, 1, NULL, NULL, NULL, 0, - VM_SLEEP | VMC_IDENTIFIER); - ASSERT(minor_arenap != NULL); } int @@ -134,8 +122,6 @@ dls_vlan_fini(void) * Destroy the kmem_cache. */ kmem_cache_destroy(i_dls_vlan_cachep); - - vmem_destroy(minor_arenap); return (0); } @@ -143,167 +129,268 @@ dls_vlan_fini(void) * Exported functions. */ +/* + * If vid is VLAN_ID_NONE, then the minor_t to access this dls_vlan_t is + * ppa + 1, otherwise, we need to allocate the minor_t in this function. + * + * If ppa is greater than DLS_MAX_PPA, it means that we do not need to create + * the VLAN minor node for this MAC, as this MAC is (a) a legacy device, (b) + * an aggr created without the "key" argument, or (c) a new type of link + * whose ppa is allocated by mac_minor_hold() in mac_register(). + */ int -dls_vlan_create(const char *vlanname, const char *macname, uint16_t vid) +dls_vlan_create(const char *macname, uint16_t vid, boolean_t force) { + char node[MAXPATHLEN]; + char spa[MAXSPALEN]; + char *driver; dls_link_t *dlp; dls_vlan_t *dvp; - int err; - uint_t len; + minor_t minor = 0; + mac_handle_t mh; + int ppa; + dev_info_t *dip; + uint32_t margin = VLAN_TAGSZ; + int err = 0; + + if ((err = mac_open(macname, &mh)) != 0) + return (err); /* - * Check to see the name is legal. It must be less than IFNAMSIZ - * characters in length and must terminate with a digit (before the - * NUL, of course). + * First check whether VLANs are able to be created on this MAC. */ - len = strlen(vlanname); - if (len == 0 || len >= IFNAMSIZ) - return (EINVAL); - - if (!isdigit(vlanname[len - 1])) - return (EINVAL); + if (vid != VLAN_ID_NONE) { + if ((mac_info(mh)->mi_media != DL_ETHER) || + (mac_info(mh)->mi_nativemedia != DL_ETHER)) { + mac_close(mh); + return (EINVAL); + } + if (!force && + ((err = mac_margin_add(mh, &margin, B_FALSE)) != 0)) { + mac_close(mh); + return (err); + } + } /* * Get a reference to a dls_link_t representing the MAC. This call * will create one if necessary. */ - if ((err = dls_link_hold(macname, &dlp)) != 0) + if ((err = dls_link_hold(macname, &dlp)) != 0) { + if (vid != VLAN_ID_NONE && !force) + VERIFY(mac_margin_remove(mh, margin) == 0); + mac_close(mh); return (err); + } + + rw_enter(&i_dls_vlan_lock, RW_WRITER); + + /* + * Try to find this VLAN in i_dls_vlan_hash first. The spa + * is in the <macname/vid> form. + */ + (void) snprintf(spa, MAXSPALEN, "%s/%d", macname, vid); + if ((err = mod_hash_find(i_dls_vlan_hash, + (mod_hash_key_t)spa, (mod_hash_val_t)&dvp)) == 0) { + err = EEXIST; + goto fail; + } + + ppa = mac_minor(mh) - 1; + dip = mac_devinfo_get(mh); + + if (vid == VLAN_ID_NONE) { + /* + * Derives minor number directly from non-VLAN link's PPA. + */ + minor = ppa + 1; + } else if ((minor = mac_minor_hold(B_TRUE)) == 0) { + /* + * Allocate minor number from minor_arenap for VLANs. + */ + err = ENOMEM; + goto fail; + } /* - * Allocate a new dls_vlan_t. + * First create its minor node for non-legacy links, including VLANs + * and non-VLANs. This is for /dev nodes backward compatibility. */ + if (vid != VLAN_ID_NONE && ppa < MAC_MAX_MINOR) { + + driver = (char *)ddi_driver_name(dip); + + /* Create a style-1 DLPI device */ + (void) snprintf(node, MAXPATHLEN, "%s%d", driver, + vid * 1000 + ppa); + if (ddi_create_minor_node(dip, node, S_IFCHR, minor, + DDI_NT_NET, 0) != DDI_SUCCESS) { + err = EINVAL; + goto fail; + } + } + dvp = kmem_cache_alloc(i_dls_vlan_cachep, KM_SLEEP); - (void) strlcpy(dvp->dv_name, vlanname, sizeof (dvp->dv_name)); dvp->dv_id = vid; dvp->dv_dlp = dlp; + dvp->dv_dev = makedevice(ddi_driver_major(dip), minor); + dvp->dv_dip = dip; + dvp->dv_ppa = ppa; + dvp->dv_force = force; + dvp->dv_ref = 0; + dvp->dv_zone_ref = 0; + dvp->dv_zid = GLOBAL_ZONEID; + (void) strlcpy(dvp->dv_spa, spa, MAXSPALEN); + dls_mac_stat_create(dvp); + + err = mod_hash_insert(i_dls_vlan_hash, + (mod_hash_key_t)dvp->dv_spa, (mod_hash_val_t)dvp); + ASSERT(err == 0); + + err = mod_hash_insert(i_dls_vlan_dev_hash, + (mod_hash_key_t)dvp->dv_dev, (mod_hash_val_t)dvp); + ASSERT(err == 0); + + i_dls_vlan_count++; + rw_exit(&i_dls_vlan_lock); /* - * Insert the entry into the table. + * Hold the underlying MAC for VLANs to keep the margin request. + * We cannot hold the mac for non-VLANs, because a reference would + * prevent the device from detaching. */ - rw_enter(&i_dls_vlan_lock, RW_WRITER); + if (vid != VLAN_ID_NONE) + VERIFY(dls_mac_hold(dvp->dv_dlp) == 0); - if ((err = mod_hash_insert(i_dls_vlan_hash, - (mod_hash_key_t)dvp->dv_name, (mod_hash_val_t)dvp)) != 0) { - kmem_cache_free(i_dls_vlan_cachep, dvp); - dls_link_rele(dlp); - err = EEXIST; - goto done; - } - i_dls_vlan_count++; + mac_close(mh); + return (0); -done: +fail: rw_exit(&i_dls_vlan_lock); + if (vid != VLAN_ID_NONE && minor != 0) + mac_minor_rele(minor); + dls_link_rele(dlp); + if (vid != VLAN_ID_NONE && !force) + VERIFY(mac_margin_remove(mh, margin) == 0); + mac_close(mh); return (err); } int -dls_vlan_destroy(const char *name) +dls_vlan_destroy(const char *macname, uint16_t vid) { - int err; + char spa[MAXSPALEN]; dls_vlan_t *dvp; - dls_link_t *dlp; mod_hash_val_t val; + int err; /* - * Find the dls_vlan_t in the global hash table. + * Try to find this VLAN in i_dls_vlan_hash first. The spa + * is in the <macname/vid> form. */ + (void) snprintf(spa, MAXSPALEN, "%s/%d", macname, vid); + rw_enter(&i_dls_vlan_lock, RW_WRITER); - err = mod_hash_find(i_dls_vlan_hash, (mod_hash_key_t)name, - (mod_hash_val_t *)&dvp); - if (err != 0) { - err = ENOENT; - goto done; + if ((err = mod_hash_find(i_dls_vlan_hash, + (mod_hash_key_t)spa, (mod_hash_val_t)&dvp)) != 0) { + rw_exit(&i_dls_vlan_lock); + return (ENOENT); } /* * Check to see if it is referenced by any dls_impl_t. */ if (dvp->dv_ref != 0) { - err = EBUSY; - goto done; + rw_exit(&i_dls_vlan_lock); + return (EBUSY); } + ASSERT(dvp->dv_zone_ref == 0); + /* * Remove and destroy the hash table entry. */ - err = mod_hash_remove(i_dls_vlan_hash, (mod_hash_key_t)name, - (mod_hash_val_t *)&val); + err = mod_hash_remove(i_dls_vlan_hash, + (mod_hash_key_t)dvp->dv_spa, (mod_hash_val_t *)&val); ASSERT(err == 0); ASSERT(dvp == (dls_vlan_t *)val); + err = mod_hash_remove(i_dls_vlan_dev_hash, + (mod_hash_key_t)dvp->dv_dev, (mod_hash_val_t *)&val); + ASSERT(err == 0); + ASSERT(dvp == (dls_vlan_t *)val); + + if (vid != VLAN_ID_NONE && dvp->dv_ppa < MAC_MAX_MINOR) { + char node[MAXPATHLEN]; + char *driver; + + /* + * Remove the minor nodes for this link. + */ + driver = (char *)ddi_driver_name(dvp->dv_dip); + (void) snprintf(node, MAXPATHLEN, "%s%d", driver, + vid * 1000 + dvp->dv_ppa); + ddi_remove_minor_node(dvp->dv_dip, node); + } + + dls_mac_stat_destroy(dvp); + ASSERT(i_dls_vlan_count > 0); i_dls_vlan_count--; + rw_exit(&i_dls_vlan_lock); + + if (vid != VLAN_ID_NONE) { + if (!dvp->dv_force) { + (void) mac_margin_remove(dvp->dv_dlp->dl_mh, + VLAN_TAGSZ); + } + dls_mac_rele(dvp->dv_dlp); + } /* - * Save a reference to dv_dlp before freeing the dls_vlan_t back - * to the cache. + * Release minor to dls_minor_arenap for VLANs */ - dlp = dvp->dv_dlp; - kmem_cache_free(i_dls_vlan_cachep, dvp); + if (vid != VLAN_ID_NONE) + mac_minor_rele(getminor(dvp->dv_dev)); /* * Release the dls_link_t. This will destroy the dls_link_t and * release the MAC if there are no more dls_vlan_t. */ - dls_link_rele(dlp); -done: - rw_exit(&i_dls_vlan_lock); - return (err); + dls_link_rele(dvp->dv_dlp); + kmem_cache_free(i_dls_vlan_cachep, dvp); + return (0); } int -dls_vlan_hold(const char *name, dls_vlan_t **dvpp, boolean_t create_vlan) +dls_vlan_hold(const char *macname, uint16_t vid, dls_vlan_t **dvpp, + boolean_t force, boolean_t create_vlan) { - int err; + char spa[MAXSPALEN]; dls_vlan_t *dvp; - dls_link_t *dlp; - boolean_t vlan_created = B_FALSE; - uint16_t vid; - uint_t mac_ppa; + boolean_t vlan_created; + int err = 0; + + (void) snprintf(spa, MAXSPALEN, "%s/%d", macname, vid); again: rw_enter(&i_dls_vlan_lock, RW_WRITER); - - err = mod_hash_find(i_dls_vlan_hash, (mod_hash_key_t)name, - (mod_hash_val_t *)&dvp); - if (err != 0) { - char mac[MAXNAMELEN]; - uint_t index, len; + if ((err = mod_hash_find(i_dls_vlan_hash, + (mod_hash_key_t)spa, (mod_hash_val_t)&dvp)) != 0) { ASSERT(err == MH_ERR_NOTFOUND); vlan_created = B_FALSE; - if (!create_vlan) { - err = ENOENT; - goto done; + if (!create_vlan || vid == VLAN_ID_NONE) { + rw_exit(&i_dls_vlan_lock); + return (ENOENT); } - - /* - * Only create tagged vlans on demand. - * Note that if we get here, 'name' must be a sane - * value because it must have been derived from - * ddi_major_to_name(). - */ - if (ddi_parse(name, mac, &index) != DDI_SUCCESS || - (vid = DLS_PPA2VID(index)) == VLAN_ID_NONE || - vid > VLAN_ID_MAX) { - err = EINVAL; - goto done; - } - - mac_ppa = (uint_t)DLS_PPA2INST(index); - - len = strlen(mac); - ASSERT(len < MAXNAMELEN); - (void) snprintf(mac + len, MAXNAMELEN - len, "%d", mac_ppa); rw_exit(&i_dls_vlan_lock); - if ((err = dls_vlan_create(name, mac, vid)) != 0) { - rw_enter(&i_dls_vlan_lock, RW_WRITER); - goto done; - } + err = dls_vlan_create(macname, vid, force); + if ((err != 0) && (err != EEXIST)) + return (err); /* * At this point someone else could do a dls_vlan_hold and @@ -311,362 +398,164 @@ again: * destroyed. This will at worst cause us to spin a few * times. */ - vlan_created = B_TRUE; + vlan_created = (err != EEXIST); goto again; } - dlp = dvp->dv_dlp; - - if ((err = dls_mac_hold(dlp)) != 0) - goto done; - - /* Create a minor node for this VLAN */ - if (vid != 0 && vlan_created) { - /* A tagged VLAN */ - dvp->dv_minor = dls_minor_hold(B_TRUE); - dvp->dv_ppa = DLS_VIDINST2PPA(vid, mac_ppa); - - err = mod_hash_insert(i_dls_vlan_dev_hash, - (mod_hash_key_t)(uintptr_t)dvp->dv_minor, - (mod_hash_val_t)dvp); - ASSERT(err == 0); - - err = mac_vlan_create(dlp->dl_mh, name, dvp->dv_minor); - - if (err != 0) { - mod_hash_val_t val; - - err = mod_hash_remove(i_dls_vlan_dev_hash, - (mod_hash_key_t)(uintptr_t)dvp->dv_minor, - (mod_hash_val_t *)&val); - ASSERT(err == 0); - ASSERT(dvp == (dls_vlan_t *)val); - - dvp->dv_minor = 0; - dls_mac_rele(dlp); - goto done; - } - } - - /* - * Do not allow the creation of tagged VLAN interfaces on - * non-Ethernet links. Note that we cannot do this check in - * dls_vlan_create() nor in this function prior to the call to - * dls_mac_hold(). The reason is that before we do a - * dls_mac_hold(), we may not have opened the mac, and therefore do - * not know what kind of media the mac represents. In other words, - * dls_mac_hold() assigns the dl_mip of the dls_link_t we're - * interested in. - */ - if (dvp->dv_id != VLAN_ID_NONE && - (dlp->dl_mip->mi_media != DL_ETHER || - dlp->dl_mip->mi_nativemedia != DL_ETHER)) { - dls_mac_rele(dlp); - err = EINVAL; - goto done; - } - - if ((err = mac_start(dlp->dl_mh)) != 0) { - dls_mac_rele(dlp); - goto done; - } - - if (dvp->dv_ref++ == 0) - dls_mac_stat_create(dvp); - - *dvpp = dvp; -done: + dvp->dv_ref++; rw_exit(&i_dls_vlan_lock); - /* - * We could be destroying a vlan created by another thread. This - * is ok because this other thread will just loop back up and - * recreate the vlan. - */ - if (err != 0 && vlan_created) - (void) dls_vlan_destroy(name); - return (err); -} - -void -dls_vlan_rele(dls_vlan_t *dvp) -{ - dls_link_t *dlp; - char name[IFNAMSIZ]; - boolean_t destroy_vlan = B_FALSE; - - rw_enter(&i_dls_vlan_lock, RW_WRITER); - dlp = dvp->dv_dlp; - - /* a minor node has been created for this vlan */ - if (dvp->dv_ref == 1 && dvp->dv_minor > 0) { - int err; - mod_hash_val_t val; - - mac_vlan_remove(dlp->dl_mh, dvp->dv_name); - err = mod_hash_remove(i_dls_vlan_dev_hash, - (mod_hash_key_t)(uintptr_t)dvp->dv_minor, - (mod_hash_val_t *)&val); - ASSERT(err == 0); - ASSERT(dvp == (dls_vlan_t *)val); - dls_minor_rele(dvp->dv_minor); - dvp->dv_minor = 0; - } - - mac_stop(dlp->dl_mh); - dls_mac_rele(dlp); - if (--dvp->dv_ref == 0) { - dls_mac_stat_destroy(dvp); - /* - * Tagged vlans get destroyed when dv_ref drops - * to 0. We need to copy dv_name here because - * dvp could disappear after we drop i_dls_vlan_lock. - */ - if (dvp->dv_id != 0) { - (void) strlcpy(name, dvp->dv_name, IFNAMSIZ); - destroy_vlan = B_TRUE; - } + if ((err = dls_mac_hold(dvp->dv_dlp)) != 0) { + rw_enter(&i_dls_vlan_lock, RW_WRITER); + dvp->dv_ref--; + rw_exit(&i_dls_vlan_lock); + if (vlan_created) + (void) dls_vlan_destroy(macname, vid); + return (err); } - rw_exit(&i_dls_vlan_lock); - if (destroy_vlan) - (void) dls_vlan_destroy(name); -} - -typedef struct dls_vlan_walk_state { - int (*fn)(dls_vlan_t *, void *); - void *arg; - int rc; -} dls_vlan_walk_state_t; - -/*ARGSUSED*/ -static uint_t -dls_vlan_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) -{ - dls_vlan_walk_state_t *statep = arg; - dls_vlan_t *dvp; - dvp = (dls_vlan_t *)val; - statep->rc = statep->fn(dvp, statep->arg); - - return ((statep->rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); -} - -int -dls_vlan_walk(int (*fn)(dls_vlan_t *, void *), void *arg) -{ - dls_vlan_walk_state_t state; - - rw_enter(&i_dls_vlan_lock, RW_READER); - - state.fn = fn; - state.arg = arg; - state.rc = 0; - mod_hash_walk(i_dls_vlan_hash, dls_vlan_walker, (void *)&state); - - rw_exit(&i_dls_vlan_lock); - return (state.rc); + *dvpp = dvp; + return (0); } int -dls_vlan_ppa_from_minor(minor_t minor, t_uscalar_t *ppa) +dls_vlan_hold_by_dev(dev_t dev, dls_vlan_t **dvpp) { dls_vlan_t *dvp; - - if (minor <= DLD_MAX_MINOR) { - *ppa = (t_uscalar_t)minor - 1; - return (0); - } + int err; rw_enter(&i_dls_vlan_lock, RW_WRITER); - - if (mod_hash_find(i_dls_vlan_dev_hash, (mod_hash_key_t)(uintptr_t)minor, - (mod_hash_val_t *)&dvp) != 0) { + if ((err = mod_hash_find(i_dls_vlan_dev_hash, (mod_hash_key_t)dev, + (mod_hash_val_t *)&dvp)) != 0) { + ASSERT(err == MH_ERR_NOTFOUND); rw_exit(&i_dls_vlan_lock); return (ENOENT); } - *ppa = dvp->dv_ppa; + dvp->dv_ref++; rw_exit(&i_dls_vlan_lock); + + if ((err = dls_mac_hold(dvp->dv_dlp)) != 0) { + rw_enter(&i_dls_vlan_lock, RW_WRITER); + dvp->dv_ref--; + rw_exit(&i_dls_vlan_lock); + return (err); + } + + *dvpp = dvp; return (0); } -int -dls_vlan_rele_by_name(const char *name) +/* + * Free the dvp if this is a VLAN and this is the last reference. + */ +void +dls_vlan_rele(dls_vlan_t *dvp) { - dls_vlan_t *dvp; - dls_link_t *dlp; + char macname[MAXNAMELEN]; + uint16_t vid; boolean_t destroy_vlan = B_FALSE; - rw_enter(&i_dls_vlan_lock, RW_WRITER); + dls_mac_rele(dvp->dv_dlp); - if (mod_hash_find(i_dls_vlan_hash, (mod_hash_key_t)name, - (mod_hash_val_t *)&dvp) != 0) { + rw_enter(&i_dls_vlan_lock, RW_WRITER); + if (--dvp->dv_ref != 0) { rw_exit(&i_dls_vlan_lock); - return (ENOENT); + return; } - dlp = dvp->dv_dlp; - - /* a minor node has been created for this vlan */ - if (dvp->dv_ref == 1 && dvp->dv_minor > 0) { - int err; - mod_hash_val_t val; - - mac_vlan_remove(dlp->dl_mh, dvp->dv_name); - err = mod_hash_remove(i_dls_vlan_dev_hash, - (mod_hash_key_t)(uintptr_t)dvp->dv_minor, - (mod_hash_val_t *)&val); - ASSERT(err == 0); - ASSERT(dvp == (dls_vlan_t *)val); - dls_minor_rele(dvp->dv_minor); - dvp->dv_minor = 0; - } - - mac_stop(dlp->dl_mh); - dls_mac_rele(dlp); - if (--dvp->dv_ref == 0) { - dls_mac_stat_destroy(dvp); - /* Tagged vlans get destroyed when dv_ref drops to 0. */ - if (dvp->dv_id != 0) - destroy_vlan = B_TRUE; + if (dvp->dv_id != VLAN_ID_NONE) { + destroy_vlan = B_TRUE; + (void) strncpy(macname, dvp->dv_dlp->dl_name, MAXNAMELEN); + vid = dvp->dv_id; } rw_exit(&i_dls_vlan_lock); - if (destroy_vlan) - (void) dls_vlan_destroy(name); - return (0); + if (destroy_vlan) + (void) dls_vlan_destroy(macname, vid); } -typedef struct dls_vlan_dip_state { - minor_t minor; - dev_info_t *dip; -} dls_vlan_dip_k_state_t; - -static int -dls_vlan_devinfo(dls_vlan_t *dvp, void *arg) +int +dls_vlan_setzid(const char *mac, uint16_t vid, zoneid_t zid) { - dls_vlan_dip_k_state_t *statep = arg; - - if (dvp->dv_minor == statep->minor) { - dls_link_t *dlp = dvp->dv_dlp; + dls_vlan_t *dvp; + int err; + zoneid_t old_zid; - if (dls_mac_hold(dlp) != 0) - return (0); - statep->dip = mac_devinfo_get(dlp->dl_mh); - dls_mac_rele(dlp); + if ((err = dls_vlan_hold(mac, vid, &dvp, B_FALSE, B_TRUE)) != 0) + return (err); - return (1); + mutex_enter(&dvp->dv_lock); + if ((old_zid = dvp->dv_zid) == zid) { + mutex_exit(&dvp->dv_lock); + goto done; } - return (0); -} - -dev_info_t * -dls_vlan_finddevinfo(dev_t dev) -{ - dls_vlan_dip_k_state_t vlan_state; - - vlan_state.minor = getminor(dev); - vlan_state.dip = NULL; - - (void) dls_vlan_walk(dls_vlan_devinfo, &vlan_state); - return (vlan_state.dip); -} - -/* - * Allocate a new minor number. - */ -minor_t -dls_minor_hold(boolean_t sleep) -{ - /* - * Grab a value from the arena. - */ - return (PTR_TO_MINOR(vmem_alloc(minor_arenap, 1, - (sleep) ? VM_SLEEP : VM_NOSLEEP))); -} - -/* - * Release a previously allocated minor number. - */ -void -dls_minor_rele(minor_t minor) -{ /* - * Return the value to the arena. + * Check whether this dvp is used by its own zones, if yes, + * we cannot change its zoneid. */ - vmem_free(minor_arenap, MINOR_TO_PTR(minor), 1); -} - -int -dls_vlan_setzoneid(char *name, zoneid_t zid, boolean_t docheck) -{ - int err; - dls_vlan_t *dvp; - - if ((err = dls_vlan_hold(name, &dvp, B_TRUE)) != 0) - return (err); + if (dvp->dv_zone_ref != 0) { + mutex_exit(&dvp->dv_lock); + err = EBUSY; + goto done; + } - rw_enter(&i_dls_vlan_lock, RW_WRITER); - if (!docheck) { + if (zid == GLOBAL_ZONEID) { + /* + * Move the link from the local zone to the global zone, + * and release the reference to this link. At the same time + * reset the link's active state so that an aggregation is + * allowed to be created over it. + */ dvp->dv_zid = zid; - } else { - dls_impl_t *dip; - - for (dip = dvp->dv_impl_list; dip != NULL; - dip = dip->di_next_impl) - if (dip->di_zid != zid) - break; - if (dip == NULL) - dvp->dv_zid = zid; - else + mutex_exit(&dvp->dv_lock); + dls_mac_active_clear(dvp->dv_dlp); + dls_vlan_rele(dvp); + goto done; + } else if (old_zid == GLOBAL_ZONEID) { + /* + * Move the link from the global zone to the local zone, + * and hold a reference to this link. Also, set the link + * to the "active" state so that the global zone is + * not able to create an aggregation over this link. + * TODO: revisit once we allow creating aggregations + * within a local zone. + */ + if (!dls_mac_active_set(dvp->dv_dlp)) { + mutex_exit(&dvp->dv_lock); err = EBUSY; + goto done; + } + dvp->dv_zid = zid; + mutex_exit(&dvp->dv_lock); + return (0); + } else { + /* + * Move the link from a local zone to another local zone. + */ + dvp->dv_zid = zid; + mutex_exit(&dvp->dv_lock); } - rw_exit(&i_dls_vlan_lock); +done: dls_vlan_rele(dvp); return (err); } -int -dls_vlan_getzoneid(char *name, zoneid_t *zidp) +/* + * Find dev_info_t based on the minor node of the link. + */ +dev_info_t * +dls_finddevinfo(dev_t dev) { - int err; dls_vlan_t *dvp; + dev_info_t *dip; - if ((err = dls_vlan_hold(name, &dvp, B_FALSE)) != 0) - return (err); - - *zidp = dvp->dv_zid; + if (dls_vlan_hold_by_dev(dev, &dvp) != 0) + return (NULL); + dip = dvp->dv_dip; dls_vlan_rele(dvp); - - return (0); -} - -void -dls_vlan_add_impl(dls_vlan_t *dvp, dls_impl_t *dip) -{ - rw_enter(&i_dls_vlan_lock, RW_WRITER); - dip->di_next_impl = dvp->dv_impl_list; - dvp->dv_impl_list = dip; - rw_exit(&i_dls_vlan_lock); -} - - -void -dls_vlan_remove_impl(dls_vlan_t *dvp, dls_impl_t *dip) -{ - dls_impl_t **pp; - dls_impl_t *p; - - rw_enter(&i_dls_vlan_lock, RW_WRITER); - for (pp = &dvp->dv_impl_list; (p = *pp) != NULL; - pp = &(p->di_next_impl)) - if (p == dip) - break; - ASSERT(p != NULL); - *pp = p->di_next_impl; - p->di_next_impl = NULL; - rw_exit(&i_dls_vlan_lock); + return (dip); } diff --git a/usr/src/uts/common/io/dmfe/dmfe_main.c b/usr/src/uts/common/io/dmfe/dmfe_main.c index 74ef877524..93653c95bb 100644 --- a/usr/src/uts/common/io/dmfe/dmfe_main.c +++ b/usr/src/uts/common/io/dmfe/dmfe_main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -3097,6 +3097,7 @@ dmfe_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) macp->m_callbacks = &dmfe_m_callbacks; macp->m_min_sdu = 0; macp->m_max_sdu = ETHERMTU; + macp->m_margin = VLAN_TAGSZ; /* * Finally, we're ready to register ourselves with the MAC layer diff --git a/usr/src/uts/common/io/e1000g/e1000g_main.c b/usr/src/uts/common/io/e1000g/e1000g_main.c index e572e708f1..484eb0c2e8 100644 --- a/usr/src/uts/common/io/e1000g/e1000g_main.c +++ b/usr/src/uts/common/io/e1000g/e1000g_main.c @@ -564,6 +564,7 @@ e1000g_register_mac(struct e1000g *Adapter) hw->mac.max_frame_size - 256 : (hw->mac.max_frame_size != ETHERMAX) ? hw->mac.max_frame_size - 24 : ETHERMTU; + mac->m_margin = VLAN_TAGSZ; err = mac_register(mac, &Adapter->mh); mac_free(mac); diff --git a/usr/src/uts/common/io/gld.c b/usr/src/uts/common/io/gld.c index 240d81c25e..d14b0eff00 100644 --- a/usr/src/uts/common/io/gld.c +++ b/usr/src/uts/common/io/gld.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -212,8 +212,7 @@ extern void gld_sr_dump(gld_mac_info_t *); uint32_t gld_global_options = GLD_OPT_NO_ETHRXSNAP; /* - * VLANs are only supported on ethernet devices that manipulate VLAN headers - * themselves. + * The device is of DL_ETHER type and is able to support VLAN by itself. */ #define VLAN_CAPABLE(macinfo) \ ((macinfo)->gldm_type == DL_ETHER && \ @@ -655,6 +654,12 @@ gld_register(dev_info_t *devinfo, char *devname, gld_mac_info_t *macinfo) } /* + * Correct margin size if it is not set. + */ + if (VLAN_CAPABLE(macinfo) && (macinfo->gldm_margin == 0)) + macinfo->gldm_margin = VTAG_SIZE; + + /* * For now, only Infiniband drivers can use MDT. Do not add * support for Ethernet, FDDI or TR. */ @@ -2214,7 +2219,6 @@ gld_start(queue_t *q, mblk_t *mp, int caller, uint32_t upri) } return (GLD_SUCCESS); - badarg: freemsg(mp); @@ -3396,6 +3400,23 @@ gld_ioctl(queue_t *q, mblk_t *mp) gld_fastpath(gld, q, mp); break; + case DLIOCMARGININFO: { /* margin size */ + int err; + + if ((macinfo = gld->gld_mac_info) == NULL) { + miocnak(q, mp, 0, EINVAL); + break; + } + + if ((err = miocpullup(mp, sizeof (uint32_t))) != 0) { + miocnak(q, mp, 0, err); + break; + } + + *((uint32_t *)mp->b_cont->b_rptr) = macinfo->gldm_margin; + miocack(q, mp, sizeof (uint32_t), 0); + break; + } default: macinfo = gld->gld_mac_info; if (macinfo == NULL || macinfo->gldm_ioctl == NULL) { diff --git a/usr/src/uts/common/io/ib/clients/rds/rds_ioctl.c b/usr/src/uts/common/io/ib/clients/rds/rds_ioctl.c index 6e6a937114..69feb36606 100644 --- a/usr/src/uts/common/io/ib/clients/rds/rds_ioctl.c +++ b/usr/src/uts/common/io/ib/clients/rds/rds_ioctl.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -87,57 +87,6 @@ rds_do_ip_ioctl(int cmd, int len, caddr_t arg) return (err); } -static int -rds_dl_info(ldi_handle_t lh, dl_info_ack_t *info) -{ - dl_info_req_t *info_req; - union DL_primitives *dl_prim; - mblk_t *mp; - k_sigset_t smask; - int error; - - if ((mp = allocb(sizeof (dl_info_req_t), BPRI_MED)) == NULL) { - return (ENOMEM); - } - - mp->b_datap->db_type = M_PROTO; - - info_req = (dl_info_req_t *)(uintptr_t)mp->b_wptr; - mp->b_wptr += sizeof (dl_info_req_t); - info_req->dl_primitive = DL_INFO_REQ; - - sigintr(&smask, 0); - if ((error = ldi_putmsg(lh, mp)) != 0) { - sigunintr(&smask); - return (error); - } - if ((error = ldi_getmsg(lh, &mp, (timestruc_t *)NULL)) != 0) { - sigunintr(&smask); - return (error); - } - sigunintr(&smask); - - dl_prim = (union DL_primitives *)(uintptr_t)mp->b_rptr; - switch (dl_prim->dl_primitive) { - case DL_INFO_ACK: - if (((uintptr_t)mp->b_wptr - (uintptr_t)mp->b_rptr) < - sizeof (dl_info_ack_t)) { - error = -1; - } else { - *info = *(dl_info_ack_t *)(uintptr_t)mp->b_rptr; - error = 0; - } - break; - default: - error = -1; - break; - } - - freemsg(mp); - return (error); -} - - /* * Return 0 if the interface is IB. * Return error (>0) if any error is encountered during processing. @@ -153,6 +102,7 @@ rds_is_ib_interface(char *name) dl_info_ack_t info; int ret = 0; int i; + k_sigset_t smask; /* * ibd devices are only style 2 devices @@ -186,7 +136,9 @@ rds_is_ib_interface(char *name) return (ret); } - ret = rds_dl_info(lh, &info); + sigintr(&smask, 0); + ret = dl_info(lh, &info, NULL, NULL, NULL); + sigunintr(&smask); (void) ldi_close(lh, FREAD|FWRITE, kcred); if (ret != 0) { return (ret); diff --git a/usr/src/uts/common/io/igb/igb_main.c b/usr/src/uts/common/io/igb/igb_main.c index 954c12d05a..3e19c58bef 100644 --- a/usr/src/uts/common/io/igb/igb_main.c +++ b/usr/src/uts/common/io/igb/igb_main.c @@ -633,6 +633,7 @@ igb_register_mac(igb_t *igb) mac->m_min_sdu = 0; mac->m_max_sdu = igb->max_frame_size - sizeof (struct ether_vlan_header) - ETHERFCSL; + mac->m_margin = VLAN_TAGSZ; status = mac_register(mac, &igb->mac_hdl); diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c index d093353ba3..d12bfdf021 100644 --- a/usr/src/uts/common/io/mac/mac.c +++ b/usr/src/uts/common/io/mac/mac.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,15 +32,18 @@ #include <sys/types.h> #include <sys/conf.h> +#include <sys/id_space.h> #include <sys/stat.h> +#include <sys/mkdev.h> #include <sys/stream.h> #include <sys/strsun.h> #include <sys/strsubr.h> #include <sys/dlpi.h> +#include <sys/dls.h> #include <sys/modhash.h> +#include <sys/vlan.h> #include <sys/mac.h> #include <sys/mac_impl.h> -#include <sys/dls.h> #include <sys/dld.h> #include <sys/modctl.h> #include <sys/fs/dv_node.h> @@ -58,6 +61,8 @@ static mod_hash_t *i_mac_impl_hash; krwlock_t i_mac_impl_lock; uint_t i_mac_impl_count; static kmem_cache_t *mac_vnic_tx_cache; +static id_space_t *minor_ids; +static uint32_t minor_count; #define MACTYPE_KMODDIR "mac" #define MACTYPE_HASHSZ 67 @@ -87,6 +92,7 @@ i_mac_constructor(void *buf, void *arg, int kmflag) mip->mi_linkstate = LINK_STATE_UNKNOWN; rw_init(&mip->mi_state_lock, NULL, RW_DRIVER, NULL); + rw_init(&mip->mi_gen_lock, NULL, RW_DRIVER, NULL); rw_init(&mip->mi_data_lock, NULL, RW_DRIVER, NULL); rw_init(&mip->mi_notify_lock, NULL, RW_DRIVER, NULL); rw_init(&mip->mi_rx_lock, NULL, RW_DRIVER, NULL); @@ -107,11 +113,13 @@ i_mac_destructor(void *buf, void *arg) mac_impl_t *mip = buf; ASSERT(mip->mi_ref == 0); + ASSERT(!mip->mi_exclusive); ASSERT(mip->mi_active == 0); ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN); ASSERT(mip->mi_devpromisc == 0); ASSERT(mip->mi_promisc == 0); ASSERT(mip->mi_mmap == NULL); + ASSERT(mip->mi_mmrp == NULL); ASSERT(mip->mi_mnfp == NULL); ASSERT(mip->mi_resource_add == NULL); ASSERT(mip->mi_ksp == NULL); @@ -119,6 +127,7 @@ i_mac_destructor(void *buf, void *arg) ASSERT(mip->mi_notify_bits == 0); ASSERT(mip->mi_notify_thread == NULL); + rw_destroy(&mip->mi_gen_lock); rw_destroy(&mip->mi_state_lock); rw_destroy(&mip->mi_data_lock); rw_destroy(&mip->mi_notify_lock); @@ -357,14 +366,26 @@ mac_init(void) MACTYPE_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); + + /* + * Allocate an id space to manage minor numbers. The range of the + * space will be from MAC_MAX_MINOR+1 to MAXMIN32 (maximum legal + * minor number is MAXMIN, but id_t is type of integer and does not + * allow MAXMIN). + */ + minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1, MAXMIN32); + ASSERT(minor_ids != NULL); + minor_count = 0; } int mac_fini(void) { - if (i_mac_impl_count > 0) + if (i_mac_impl_count > 0 || minor_count > 0) return (EBUSY); + id_space_destroy(minor_ids); + mod_hash_destroy_hash(i_mac_impl_hash); rw_destroy(&i_mac_impl_lock); @@ -379,13 +400,9 @@ mac_fini(void) * Client functions. */ -int -mac_open(const char *macname, mac_handle_t *mhp) +static int +mac_hold(const char *macname, mac_impl_t **pmip) { - char driver[MAXNAMELEN]; - uint_t ddi_instance; - major_t major; - dev_info_t *dip; mac_impl_t *mip; int err; @@ -397,74 +414,170 @@ mac_open(const char *macname, mac_handle_t *mhp) return (EINVAL); /* - * Split the device name into driver and instance components. + * Look up its entry in the global hash table. */ - if (ddi_parse(macname, driver, &ddi_instance) != DDI_SUCCESS) - return (EINVAL); + rw_enter(&i_mac_impl_lock, RW_WRITER); + err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname, + (mod_hash_val_t *)&mip); - if ((strcmp(driver, "aggr") == 0) || (strcmp(driver, "vnic") == 0)) - ddi_instance = 0; + if (err != 0) { + rw_exit(&i_mac_impl_lock); + return (ENOENT); + } - /* - * Get the major number of the driver. - */ - if ((major = ddi_name_to_major(driver)) == (major_t)-1) - return (EINVAL); + if (mip->mi_disabled) { + rw_exit(&i_mac_impl_lock); + return (ENOENT); + } - /* - * Hold the given instance to prevent it from being detached. - * This will also attach the instance if it is not currently attached. - * Currently we ensure that mac_register() (called by the driver's - * attach entry point) and all code paths under it cannot possibly - * call mac_open() because this would lead to a recursive attach - * panic. - */ - if ((dip = ddi_hold_devi_by_instance(major, ddi_instance, 0)) == NULL) - return (EINVAL); + if (mip->mi_exclusive) { + rw_exit(&i_mac_impl_lock); + return (EBUSY); + } + + mip->mi_ref++; + rw_exit(&i_mac_impl_lock); + + *pmip = mip; + return (0); +} + +static void +mac_rele(mac_impl_t *mip) +{ + rw_enter(&i_mac_impl_lock, RW_WRITER); + ASSERT(mip->mi_ref != 0); + if (--mip->mi_ref == 0) + ASSERT(!mip->mi_activelink); + rw_exit(&i_mac_impl_lock); +} + +int +mac_hold_exclusive(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; /* * Look up its entry in the global hash table. */ -again: rw_enter(&i_mac_impl_lock, RW_WRITER); - err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname, - (mod_hash_val_t *)&mip); - if (err != 0) { - err = ENOENT; - goto failed; + if (mip->mi_disabled) { + rw_exit(&i_mac_impl_lock); + return (ENOENT); } - if (mip->mi_disabled) { + if (mip->mi_ref != 0) { rw_exit(&i_mac_impl_lock); - goto again; + return (EBUSY); } + ASSERT(!mip->mi_exclusive); + mip->mi_ref++; + mip->mi_exclusive = B_TRUE; + rw_exit(&i_mac_impl_lock); + return (0); +} + +void +mac_rele_exclusive(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + /* + * Look up its entry in the global hash table. + */ + rw_enter(&i_mac_impl_lock, RW_WRITER); + ASSERT(mip->mi_ref == 1 && mip->mi_exclusive); + mip->mi_ref--; + mip->mi_exclusive = B_FALSE; rw_exit(&i_mac_impl_lock); +} + +int +mac_open(const char *macname, mac_handle_t *mhp) +{ + mac_impl_t *mip; + int err; + + /* + * Look up its entry in the global hash table. + */ + if ((err = mac_hold(macname, &mip)) != 0) + return (err); + + rw_enter(&mip->mi_gen_lock, RW_WRITER); + + if ((mip->mi_oref != 0) || + !(mip->mi_callbacks->mc_callbacks & MC_OPEN)) { + goto done; + } + /* + * Note that we do not hold i_mac_impl_lock when calling the + * mc_open() callback function to avoid deadlock with the + * i_mac_notify() function. + */ + if ((err = mip->mi_open(mip->mi_driver)) != 0) { + rw_exit(&mip->mi_gen_lock); + mac_rele(mip); + return (err); + } + +done: + mip->mi_oref++; + rw_exit(&mip->mi_gen_lock); *mhp = (mac_handle_t)mip; return (0); +} -failed: - rw_exit(&i_mac_impl_lock); - ddi_release_devi(dip); +int +mac_open_by_linkid(datalink_id_t linkid, mac_handle_t *mhp) +{ + dls_dl_handle_t dlh; + int err; + + if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0) + return (err); + + if (dls_devnet_vid(dlh) != VLAN_ID_NONE) { + err = EINVAL; + goto done; + } + + err = mac_open(dls_devnet_mac(dlh), mhp); + +done: + dls_devnet_rele_tmp(dlh); return (err); } +int +mac_open_by_linkname(const char *link, mac_handle_t *mhp) +{ + datalink_id_t linkid; + int err; + + if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0) + return (err); + return (mac_open_by_linkid(linkid, mhp)); +} + void mac_close(mac_handle_t mh) { mac_impl_t *mip = (mac_impl_t *)mh; - dev_info_t *dip = mip->mi_dip; - rw_enter(&i_mac_impl_lock, RW_WRITER); + rw_enter(&mip->mi_gen_lock, RW_WRITER); - ASSERT(mip->mi_ref != 0); - if (--mip->mi_ref == 0) { - ASSERT(!mip->mi_activelink); + ASSERT(mip->mi_oref != 0); + if (--mip->mi_oref == 0) { + if ((mip->mi_callbacks->mc_callbacks & MC_CLOSE)) + mip->mi_close(mip->mi_driver); } - ddi_release_devi(dip); - rw_exit(&i_mac_impl_lock); + rw_exit(&mip->mi_gen_lock); + + mac_rele(mip); } const mac_info_t * @@ -479,6 +592,18 @@ mac_devinfo_get(mac_handle_t mh) return (((mac_impl_t *)mh)->mi_dip); } +const char * +mac_name(mac_handle_t mh) +{ + return (((mac_impl_t *)mh)->mi_name); +} + +minor_t +mac_minor(mac_handle_t mh) +{ + return (((mac_impl_t *)mh)->mi_minor); +} + uint64_t mac_stat_get(mac_handle_t mh, uint_t stat) { @@ -751,10 +876,8 @@ mac_unicst_set(mac_handle_t mh, const uint8_t *addr) * This check is necessary otherwise it may call into mac_unicst_set * recursively. */ - if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0) { - err = 0; + if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0) goto done; - } if ((err = mip->mi_unicst(mip->mi_driver, addr)) != 0) goto done; @@ -838,7 +961,6 @@ mac_promisc_set(mac_handle_t mh, boolean_t on, mac_promisc_type_t ptype) err = EPROTO; goto done; } - /* * Disable promiscuous mode on the device if this is the last * enabling. @@ -1248,6 +1370,59 @@ mac_free(mac_register_t *mregp) } /* + * Allocate a minor number. + */ +minor_t +mac_minor_hold(boolean_t sleep) +{ + minor_t minor; + + /* + * Grab a value from the arena. + */ + atomic_add_32(&minor_count, 1); + + if (sleep) + minor = (uint_t)id_alloc(minor_ids); + else + minor = (uint_t)id_alloc_nosleep(minor_ids); + + if (minor == 0) { + atomic_add_32(&minor_count, -1); + return (0); + } + + return (minor); +} + +/* + * Release a previously allocated minor number. + */ +void +mac_minor_rele(minor_t minor) +{ + /* + * Return the value to the arena. + */ + id_free(minor_ids, minor); + atomic_add_32(&minor_count, -1); +} + +uint32_t +mac_no_notification(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + return (mip->mi_unsup_note); +} + +boolean_t +mac_is_legacy(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + return (mip->mi_legacy); +} + +/* * mac_register() is how drivers register new MACs with the GLDv3 * framework. The mregp argument is allocated by drivers using the * mac_alloc() function, and can be freed using mac_free() immediately upon @@ -1258,12 +1433,16 @@ mac_free(mac_register_t *mregp) int mac_register(mac_register_t *mregp, mac_handle_t *mhp) { - mac_impl_t *mip; - mactype_t *mtype; - int err = EINVAL; - struct devnames *dnp; - minor_t minor; - boolean_t style1_created = B_FALSE, style2_created = B_FALSE; + mac_impl_t *mip; + mactype_t *mtype; + int err = EINVAL; + struct devnames *dnp = NULL; + uint_t instance; + boolean_t style1_created = B_FALSE; + boolean_t style2_created = B_FALSE; + mac_capab_legacy_t legacy; + char *driver; + minor_t minor = 0; /* Find the required MAC-Type plugin. */ if ((mtype = i_mactype_getplugin(mregp->m_type_ident)) == NULL) @@ -1277,23 +1456,59 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) */ mip->mi_disabled = B_TRUE; - mip->mi_drvname = ddi_driver_name(mregp->m_dip); /* - * Some drivers such as aggr need to register multiple MACs. Such - * drivers must supply a non-zero "instance" argument so that each - * MAC can be assigned a unique MAC name and can have unique - * kstats. - */ - mip->mi_instance = ((mregp->m_instance == 0) ? - ddi_get_instance(mregp->m_dip) : mregp->m_instance); + * When a mac is registered, the m_instance field can be set to: + * + * 0: Get the mac's instance number from m_dip. + * This is usually used for physical device dips. + * + * [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number. + * For example, when an aggregation is created with the key option, + * "key" will be used as the instance number. + * + * -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1]. + * This is often used when a MAC of a virtual link is registered + * (e.g., aggregation when "key" is not specified, or vnic). + * + * Note that the instance number is used to derive the mi_minor field + * of mac_impl_t, which will then be used to derive the name of kstats + * and the devfs nodes. The first 2 cases are needed to preserve + * backward compatibility. + */ + switch (mregp->m_instance) { + case 0: + instance = ddi_get_instance(mregp->m_dip); + break; + case ((uint_t)-1): + minor = mac_minor_hold(B_TRUE); + if (minor == 0) { + err = ENOSPC; + goto fail; + } + instance = minor - 1; + break; + default: + instance = mregp->m_instance; + if (instance >= MAC_MAX_MINOR) { + err = EINVAL; + goto fail; + } + break; + } + + mip->mi_minor = (minor_t)(instance + 1); + mip->mi_dip = mregp->m_dip; + + driver = (char *)ddi_driver_name(mip->mi_dip); /* Construct the MAC name as <drvname><instance> */ (void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d", - mip->mi_drvname, mip->mi_instance); + driver, instance); mip->mi_driver = mregp->m_driver; mip->mi_type = mtype; + mip->mi_margin = mregp->m_margin; mip->mi_info.mi_media = mtype->mt_type; mip->mi_info.mi_nativemedia = mtype->mt_nativetype; mip->mi_info.mi_sdu_min = mregp->m_min_sdu; @@ -1374,20 +1589,39 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) } mip->mi_callbacks = mregp->m_callbacks; - mip->mi_dip = mregp->m_dip; - /* * Set up the possible transmit routines. */ mip->mi_txinfo.mt_fn = mip->mi_tx; mip->mi_txinfo.mt_arg = mip->mi_driver; + mip->mi_legacy = mac_capab_get((mac_handle_t)mip, + MAC_CAPAB_LEGACY, &legacy); + + if (mip->mi_legacy) { + /* + * Legacy device. Messages being sent will be looped back + * by the underlying driver. Therefore the txloop function + * pointer is the same as the tx function pointer. + */ + mip->mi_txloopinfo.mt_fn = mip->mi_txinfo.mt_fn; + mip->mi_txloopinfo.mt_arg = mip->mi_txinfo.mt_arg; + mip->mi_unsup_note = legacy.ml_unsup_note; + mip->mi_phy_dev = legacy.ml_dev; + } else { + /* + * Normal device. The framework needs to do the loopback. + */ + mip->mi_txloopinfo.mt_fn = mac_txloop; + mip->mi_txloopinfo.mt_arg = mip; + mip->mi_unsup_note = 0; + mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip), + ddi_get_instance(mip->mi_dip) + 1); + } + mip->mi_vnic_txinfo.mt_fn = mac_vnic_tx; mip->mi_vnic_txinfo.mt_arg = mip; - mip->mi_txloopinfo.mt_fn = mac_txloop; - mip->mi_txloopinfo.mt_arg = mip; - mip->mi_vnic_txloopinfo.mt_fn = mac_vnic_txloop; mip->mi_vnic_txloopinfo.mt_arg = mip; @@ -1404,39 +1638,31 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) */ mac_stat_create(mip); - err = EEXIST; - /* Create a style-2 DLPI device */ - if (ddi_create_minor_node(mip->mi_dip, (char *)mip->mi_drvname, - S_IFCHR, 0, DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS) - goto fail; - style2_created = B_TRUE; - - /* Create a style-1 DLPI device */ - minor = (minor_t)mip->mi_instance + 1; - if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR, minor, - DDI_NT_NET, 0) != DDI_SUCCESS) - goto fail; - style1_created = B_TRUE; - - /* - * Create a link for this MAC. The link name will be the same as - * the MAC name. - */ - err = dls_create(mip->mi_name, mip->mi_name); - if (err != 0) - goto fail; - /* set the gldv3 flag in dn_flags */ dnp = &devnamesp[ddi_driver_major(mip->mi_dip)]; LOCK_DEV_OPS(&dnp->dn_lock); - dnp->dn_flags |= DN_GLDV3_DRIVER; + dnp->dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER); UNLOCK_DEV_OPS(&dnp->dn_lock); + if (mip->mi_minor < MAC_MAX_MINOR + 1) { + /* Create a style-2 DLPI device */ + if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0, + DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS) + goto fail; + style2_created = B_TRUE; + + /* Create a style-1 DLPI device */ + if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR, + mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS) + goto fail; + style1_created = B_TRUE; + } + rw_enter(&i_mac_impl_lock, RW_WRITER); if (mod_hash_insert(i_mac_impl_hash, (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) { + rw_exit(&i_mac_impl_lock); - VERIFY(dls_destroy(mip->mi_name) == 0); err = EEXIST; goto fail; } @@ -1446,15 +1672,21 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) */ mip->mi_disabled = B_FALSE; - cmn_err(CE_NOTE, "!%s registered", mip->mi_name); - rw_exit(&i_mac_impl_lock); atomic_inc_32(&i_mac_impl_count); + + cmn_err(CE_NOTE, "!%s registered", mip->mi_name); *mhp = (mac_handle_t)mip; return (0); fail: + if (style1_created) + ddi_remove_minor_node(mip->mi_dip, mip->mi_name); + + if (style2_created) + ddi_remove_minor_node(mip->mi_dip, driver); + /* clean up notification thread */ if (mip->mi_notify_thread != NULL) { mutex_enter(&mip->mi_notify_bits_lock); @@ -1470,10 +1702,6 @@ fail: mip->mi_type->mt_addr_length); mip->mi_info.mi_unicst_addr = NULL; } - if (style1_created) - ddi_remove_minor_node(mip->mi_dip, mip->mi_name); - if (style2_created) - ddi_remove_minor_node(mip->mi_dip, (char *)mip->mi_drvname); mac_stat_destroy(mip); @@ -1488,6 +1716,11 @@ fail: mip->mi_pdata_size = 0; } + if (minor != 0) { + ASSERT(minor > MAC_MAX_MINOR); + mac_minor_rele(minor); + } + kmem_cache_free(i_mac_impl_cachep, mip); return (err); } @@ -1495,7 +1728,6 @@ fail: int mac_disable(mac_handle_t mh) { - int err; mac_impl_t *mip = (mac_impl_t *)mh; /* @@ -1510,14 +1742,6 @@ mac_disable(mac_handle_t mh) } mip->mi_disabled = B_TRUE; rw_exit(&i_mac_impl_lock); - - if ((err = dls_destroy(mip->mi_name)) != 0) { - rw_enter(&i_mac_impl_lock, RW_WRITER); - mip->mi_disabled = B_FALSE; - rw_exit(&i_mac_impl_lock); - return (err); - } - return (0); } @@ -1528,6 +1752,7 @@ mac_unregister(mac_handle_t mh) mac_impl_t *mip = (mac_impl_t *)mh; mod_hash_val_t val; mac_multicst_addr_t *p, *nextp; + mac_margin_req_t *mmr, *nextmmr; /* * See if there are any other references to this mac_t (e.g., VLAN's). @@ -1551,22 +1776,24 @@ mac_unregister(mac_handle_t mh) cv_wait(&mip->mi_notify_cv, &mip->mi_notify_bits_lock); mutex_exit(&mip->mi_notify_bits_lock); - /* - * Remove both style 1 and style 2 minor nodes - */ - ddi_remove_minor_node(mip->mi_dip, (char *)mip->mi_drvname); - ddi_remove_minor_node(mip->mi_dip, mip->mi_name); + if (mip->mi_minor < MAC_MAX_MINOR + 1) { + ddi_remove_minor_node(mip->mi_dip, mip->mi_name); + ddi_remove_minor_node(mip->mi_dip, + (char *)ddi_driver_name(mip->mi_dip)); + } ASSERT(!mip->mi_activelink); mac_stat_destroy(mip); - (void) mod_hash_remove(i_mac_impl_hash, (mod_hash_key_t)mip->mi_name, - &val); + rw_enter(&i_mac_impl_lock, RW_WRITER); + (void) mod_hash_remove(i_mac_impl_hash, + (mod_hash_key_t)mip->mi_name, &val); ASSERT(mip == (mac_impl_t *)val); ASSERT(i_mac_impl_count > 0); atomic_dec_32(&i_mac_impl_count); + rw_exit(&i_mac_impl_lock); if (mip->mi_pdata != NULL) kmem_free(mip->mi_pdata, mip->mi_pdata_size); @@ -1582,6 +1809,15 @@ mac_unregister(mac_handle_t mh) } mip->mi_mmap = NULL; + /* + * Free the list of margin request. + */ + for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) { + nextmmr = mmr->mmr_nextp; + kmem_free(mmr, sizeof (mac_margin_req_t)); + } + mip->mi_mmrp = NULL; + mip->mi_linkstate = LINK_STATE_UNKNOWN; kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length); mip->mi_info.mi_unicst_addr = NULL; @@ -1589,6 +1825,9 @@ mac_unregister(mac_handle_t mh) atomic_dec_32(&mip->mi_type->mt_ref); mip->mi_type = NULL; + if (mip->mi_minor > MAC_MAX_MINOR) + mac_minor_rele(mip->mi_minor); + cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name); kmem_cache_free(i_mac_impl_cachep, mip); @@ -1888,6 +2127,12 @@ mac_unicst_update(mac_handle_t mh, const uint8_t *addr) return; /* + * If the address has not changed, do nothing. + */ + if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0) + return; + + /* * Save the address. */ bcopy(addr, mip->mi_addr, mip->mi_type->mt_addr_length); @@ -2035,6 +2280,150 @@ mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg) refresh(arg, (mip->mi_devpromisc != 0)); } +/* + * The mac client requests that the mac not to change its margin size to + * be less than the specified value. If "current" is B_TRUE, then the client + * requests the mac not to change its margin size to be smaller than the + * current size. Further, return the current margin size value in this case. + * + * We keep every requested size in an ordered list from largest to smallest. + */ +int +mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + mac_margin_req_t **pp, *p; + int err = 0; + + rw_enter(&(mip->mi_data_lock), RW_WRITER); + if (current) + *marginp = mip->mi_margin; + + /* + * If the current margin value cannot satisfy the margin requested, + * return ENOTSUP directly. + */ + if (*marginp > mip->mi_margin) { + err = ENOTSUP; + goto done; + } + + /* + * Check whether the given margin is already in the list. If so, + * bump the reference count. + */ + for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) { + if (p->mmr_margin == *marginp) { + /* + * The margin requested is already in the list, + * so just bump the reference count. + */ + p->mmr_ref++; + goto done; + } + if (p->mmr_margin < *marginp) + break; + } + + + if ((p = kmem_zalloc(sizeof (mac_margin_req_t), KM_NOSLEEP)) == NULL) { + err = ENOMEM; + goto done; + } + + p->mmr_margin = *marginp; + p->mmr_ref++; + p->mmr_nextp = *pp; + *pp = p; + +done: + rw_exit(&(mip->mi_data_lock)); + return (err); +} + +/* + * The mac client requests to cancel its previous mac_margin_add() request. + * We remove the requested margin size from the list. + */ +int +mac_margin_remove(mac_handle_t mh, uint32_t margin) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + mac_margin_req_t **pp, *p; + int err = 0; + + rw_enter(&(mip->mi_data_lock), RW_WRITER); + /* + * Find the entry in the list for the given margin. + */ + for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) { + if (p->mmr_margin == margin) { + if (--p->mmr_ref == 0) + break; + + /* + * There is still a reference to this address so + * there's nothing more to do. + */ + goto done; + } + } + + /* + * We did not find an entry for the given margin. + */ + if (p == NULL) { + err = ENOENT; + goto done; + } + + ASSERT(p->mmr_ref == 0); + + /* + * Remove it from the list. + */ + *pp = p->mmr_nextp; + kmem_free(p, sizeof (mac_margin_req_t)); +done: + rw_exit(&(mip->mi_data_lock)); + return (err); +} + +/* + * The mac client requests to get the mac's current margin value. + */ +void +mac_margin_get(mac_handle_t mh, uint32_t *marginp) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + rw_enter(&(mip->mi_data_lock), RW_READER); + *marginp = mip->mi_margin; + rw_exit(&(mip->mi_data_lock)); +} + +boolean_t +mac_margin_update(mac_handle_t mh, uint32_t margin) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + uint32_t margin_needed = 0; + + rw_enter(&(mip->mi_data_lock), RW_WRITER); + + if (mip->mi_mmrp != NULL) + margin_needed = mip->mi_mmrp->mmr_margin; + + if (margin_needed <= margin) + mip->mi_margin = margin; + + rw_exit(&(mip->mi_data_lock)); + + if (margin_needed <= margin) + i_mac_notify(mip, MAC_NOTE_MARGIN); + + return (margin_needed <= margin); +} + boolean_t mac_do_active_set(mac_handle_t mh, boolean_t shareable) { @@ -2428,27 +2817,3 @@ done: mutex_exit(&i_mactype_lock); return (err); } - -int -mac_vlan_create(mac_handle_t mh, const char *name, minor_t minor) -{ - mac_impl_t *mip = (mac_impl_t *)mh; - - /* Create a style-1 DLPI device */ - if (ddi_create_minor_node(mip->mi_dip, (char *)name, S_IFCHR, minor, - DDI_NT_NET, 0) != DDI_SUCCESS) { - return (-1); - } - return (0); -} - -void -mac_vlan_remove(mac_handle_t mh, const char *name) -{ - mac_impl_t *mip = (mac_impl_t *)mh; - dev_info_t *dipp; - - ddi_remove_minor_node(mip->mi_dip, (char *)name); - dipp = ddi_get_parent(mip->mi_dip); - (void) devfs_clean(dipp, NULL, 0); -} diff --git a/usr/src/uts/common/io/mac/mac_stat.c b/usr/src/uts/common/io/mac/mac_stat.c index f25afd3f1f..c7fdb0d8d5 100644 --- a/usr/src/uts/common/io/mac/mac_stat.c +++ b/usr/src/uts/common/io/mac/mac_stat.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -154,10 +154,19 @@ mac_stat_create(mac_impl_t *mip) kstat_t *ksp; kstat_named_t *knp; uint_t count; + major_t major = getmajor(mip->mi_phy_dev); count = MAC_MOD_NKSTAT + MAC_NKSTAT + mip->mi_type->mt_statcount; - ksp = kstat_create(mip->mi_drvname, mip->mi_instance, MAC_KSTAT_NAME, - MAC_KSTAT_CLASS, KSTAT_TYPE_NAMED, count, 0); + if (!GLDV3_DRV(major)) { + ksp = kstat_create((const char *)ddi_major_to_name(major), + getminor(mip->mi_phy_dev) - 1, MAC_KSTAT_NAME, + MAC_KSTAT_CLASS, KSTAT_TYPE_NAMED, count, 0); + } else { + major = ddi_driver_major(mip->mi_dip); + ksp = kstat_create((const char *)ddi_major_to_name(major), + mip->mi_minor - 1, MAC_KSTAT_NAME, + MAC_KSTAT_CLASS, KSTAT_TYPE_NAMED, count, 0); + } if (ksp == NULL) return; diff --git a/usr/src/uts/common/io/mac/plugins/mac_ib.c b/usr/src/uts/common/io/mac/plugins/mac_ib.c index 97fd438dbd..35503c6c7f 100644 --- a/usr/src/uts/common/io/mac/plugins/mac_ib.c +++ b/usr/src/uts/common/io/mac/plugins/mac_ib.c @@ -69,6 +69,7 @@ _init(void) mtrp->mtr_ident = MAC_PLUGIN_IDENT_IB; mtrp->mtr_ops = &mac_ib_type_ops; mtrp->mtr_mactype = DL_IB; + mtrp->mtr_nativetype = DL_IB; mtrp->mtr_addrlen = IPOIB_ADDRL; mtrp->mtr_brdcst_addr = ib_brdcst; diff --git a/usr/src/uts/common/io/mxfe/mxfe.c b/usr/src/uts/common/io/mxfe/mxfe.c index e8468e4a61..091b877159 100644 --- a/usr/src/uts/common/io/mxfe/mxfe.c +++ b/usr/src/uts/common/io/mxfe/mxfe.c @@ -52,6 +52,7 @@ #include <sys/mac_ether.h> #include <sys/ddi.h> #include <sys/sunddi.h> +#include <sys/vlan.h> #include "mxfe.h" #include "mxfeimpl.h" @@ -522,6 +523,7 @@ mxfe_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) macp->m_callbacks = &mxfe_m_callbacks; macp->m_min_sdu = 0; macp->m_max_sdu = ETHERMTU; + macp->m_margin = VLAN_TAGSZ; if (mac_register(macp, &mxfep->mxfe_mh) == DDI_SUCCESS) { mac_free(macp); diff --git a/usr/src/uts/common/io/net_dacf.c b/usr/src/uts/common/io/net_dacf.c new file mode 100644 index 0000000000..b0f4907425 --- /dev/null +++ b/usr/src/uts/common/io/net_dacf.c @@ -0,0 +1,136 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This module provides the dacf functions to be called after a device + * of "ddi_network" node type has attached and before it detaches. + * Specifically, net_postattach() will be called during the post-attach + * process of each "ddi_network" device, and net_predetach() will be + * called during the pre-detach process of each device. + */ +#include <sys/modctl.h> +#include <sys/sunddi.h> +#include <sys/ddi.h> +#include <sys/dacf.h> +#include <sys/softmac.h> + +/* + * DACF entry points + */ +static int net_postattach(dacf_infohdl_t, dacf_arghdl_t, int); +static int net_predetach(dacf_infohdl_t, dacf_arghdl_t, int); + +static dacf_op_t net_config_op[] = { + { DACF_OPID_POSTATTACH, net_postattach }, + { DACF_OPID_PREDETACH, net_predetach }, + { DACF_OPID_END, NULL }, +}; + +static dacf_opset_t opsets[] = { + { "net_config", net_config_op }, + { NULL, NULL } +}; + +static struct dacfsw dacfsw = { + DACF_MODREV_1, + opsets +}; + +static struct modldacf modldacf = { + &mod_dacfops, + "net DACF", + &dacfsw +}; + +struct modlinkage modlinkage = { + MODREV_1, &modldacf, NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +/* + * Post-attach routine invoked for DDI_NT_NET drivers by DACF framework + */ +/* ARGSUSED */ +static int +net_postattach(dacf_infohdl_t info_hdl, dacf_arghdl_t arg_hdl, int flags) +{ + dev_info_t *dip; + dev_t dev; + int err; + + dip = dacf_devinfo_node(info_hdl); + dev = dacf_get_dev(info_hdl); + + if ((err = softmac_create(dip, dev)) != 0) { + const char *drvname; + int ppa; + + drvname = ddi_driver_name(dip); + ppa = i_ddi_devi_get_ppa(dip); + cmn_err(CE_WARN, "net_postattach: cannot create softmac " + "for device %s%d (%d)", drvname, ppa, err); + return (DACF_FAILURE); + } + + return (DACF_SUCCESS); +} + +/* + * Pre-detach routine invoked for DDI_NT_NET drivers by DACF framework + */ +/* ARGSUSED */ +static int +net_predetach(dacf_infohdl_t info_hdl, dacf_arghdl_t arg_hdl, int flags) +{ + dev_info_t *dip; + dev_t dev; + + dip = dacf_devinfo_node(info_hdl); + dev = dacf_get_dev(info_hdl); + + if (softmac_destroy(dip, dev) != 0) + return (DACF_FAILURE); + + return (DACF_SUCCESS); +} diff --git a/usr/src/uts/common/io/nge/nge_main.c b/usr/src/uts/common/io/nge/nge_main.c index 11987c5742..c062ab4e87 100644 --- a/usr/src/uts/common/io/nge/nge_main.c +++ b/usr/src/uts/common/io/nge/nge_main.c @@ -1839,6 +1839,7 @@ nge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) macp->m_callbacks = &nge_m_callbacks; macp->m_min_sdu = 0; macp->m_max_sdu = ngep->default_mtu; + macp->m_margin = VTAG_SIZE; /* * Finally, we're ready to register ourselves with the mac * interface; if this succeeds, we're all ready to start() diff --git a/usr/src/uts/common/io/nxge/nxge_main.c b/usr/src/uts/common/io/nxge/nxge_main.c index 1debecb937..9f856e5d92 100644 --- a/usr/src/uts/common/io/nxge/nxge_main.c +++ b/usr/src/uts/common/io/nxge/nxge_main.c @@ -4655,6 +4655,7 @@ nxge_mac_register(p_nxge_t nxgep) macp->m_min_sdu = 0; macp->m_max_sdu = nxgep->mac.maxframesize - sizeof (struct ether_header) - ETHERFCSL - 4; + macp->m_margin = VLAN_TAGSZ; status = mac_register(macp, &nxgep->mach); mac_free(macp); diff --git a/usr/src/uts/common/io/rge/rge_main.c b/usr/src/uts/common/io/rge/rge_main.c index c9bdf4d6d0..107cb01eca 100755 --- a/usr/src/uts/common/io/rge/rge_main.c +++ b/usr/src/uts/common/io/rge/rge_main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1758,6 +1758,7 @@ rge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) macp->m_callbacks = &rge_m_callbacks; macp->m_min_sdu = 0; macp->m_max_sdu = rgep->default_mtu; + macp->m_margin = VLAN_TAGSZ; /* * Finally, we're ready to register ourselves with the MAC layer diff --git a/usr/src/uts/common/io/sfe/sfe_util.c b/usr/src/uts/common/io/sfe/sfe_util.c index 0225b26088..0d04a520b5 100644 --- a/usr/src/uts/common/io/sfe/sfe_util.c +++ b/usr/src/uts/common/io/sfe/sfe_util.c @@ -4533,6 +4533,7 @@ gem_gld3_init(struct gem_dev *dp, mac_register_t *macp) macp->m_callbacks = &gem_m_callbacks; macp->m_min_sdu = 0; macp->m_max_sdu = dp->mtu; + macp->m_margin = VTAG_SIZE; } /* ======================================================================== */ diff --git a/usr/src/uts/common/io/softmac/softmac.conf b/usr/src/uts/common/io/softmac/softmac.conf new file mode 100644 index 0000000000..72163c27e6 --- /dev/null +++ b/usr/src/uts/common/io/softmac/softmac.conf @@ -0,0 +1,27 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +name="softmac" parent="pseudo" instance=0; diff --git a/usr/src/uts/common/io/softmac/softmac_capab.c b/usr/src/uts/common/io/softmac/softmac_capab.c new file mode 100644 index 0000000000..d1178d19aa --- /dev/null +++ b/usr/src/uts/common/io/softmac/softmac_capab.c @@ -0,0 +1,756 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/mac.h> +#include <sys/softmac_impl.h> + +typedef struct softmac_capab_ops { + int (*sc_hcksum_ack)(void *, t_uscalar_t); + int (*sc_zcopy_ack)(void *, t_uscalar_t); + int (*sc_mdt_ack)(void *, dl_capab_mdt_t *); +} softmac_capab_ops_t; + +static int dl_capab(ldi_handle_t, mblk_t **); +static int softmac_fill_hcksum_ack(void *, t_uscalar_t); +static int softmac_fill_zcopy_ack(void *, t_uscalar_t); +static int softmac_fill_mdt_ack(void *, dl_capab_mdt_t *); +static int softmac_adv_hcksum_ack(void *, t_uscalar_t); +static int softmac_adv_zcopy_ack(void *, t_uscalar_t); +static int softmac_adv_mdt_ack(void *, dl_capab_mdt_t *); +static int softmac_enable_hcksum_ack(void *, t_uscalar_t); +static int softmac_enable_mdt_ack(void *, dl_capab_mdt_t *); +static int softmac_capab_send(softmac_lower_t *, boolean_t); +static int i_capab_ack(mblk_t *, queue_t *, softmac_capab_ops_t *, void *); +static int i_capab_id_ack(mblk_t *, dl_capability_sub_t *, queue_t *, + softmac_capab_ops_t *, void *); +static int i_capab_sub_ack(mblk_t *, dl_capability_sub_t *, queue_t *, + softmac_capab_ops_t *, void *); +static int i_capab_hcksum_ack(dl_capab_hcksum_t *, queue_t *, + softmac_capab_ops_t *, void *); +static int i_capab_zcopy_ack(dl_capab_zerocopy_t *, queue_t *, + softmac_capab_ops_t *, void *); +static int i_capab_mdt_ack(dl_capab_mdt_t *, queue_t *, + softmac_capab_ops_t *, void *); +static int i_capab_hcksum_verify(dl_capab_hcksum_t *, queue_t *); +static int i_capab_zcopy_verify(dl_capab_zerocopy_t *, queue_t *); +static int i_capab_mdt_verify(dl_capab_mdt_t *, queue_t *); + +static softmac_capab_ops_t softmac_fill_capab_ops = +{ + softmac_fill_hcksum_ack, + softmac_fill_zcopy_ack, + softmac_fill_mdt_ack, +}; + +static softmac_capab_ops_t softmac_adv_capab_ops = +{ + softmac_adv_hcksum_ack, + softmac_adv_zcopy_ack, + softmac_adv_mdt_ack +}; + +static softmac_capab_ops_t softmac_enable_capab_ops = +{ + softmac_enable_hcksum_ack, + NULL, + softmac_enable_mdt_ack +}; + +int +softmac_fill_capab(ldi_handle_t lh, softmac_t *softmac) +{ + mblk_t *mp = NULL; + union DL_primitives *prim; + int err = 0; + + if ((err = dl_capab(lh, &mp)) != 0) + goto exit; + + prim = (union DL_primitives *)mp->b_rptr; + if (prim->dl_primitive == DL_ERROR_ACK) { + err = -1; + goto exit; + } + + err = i_capab_ack(mp, NULL, &softmac_fill_capab_ops, softmac); + +exit: + freemsg(mp); + return (err); +} + +static int +dl_capab(ldi_handle_t lh, mblk_t **mpp) +{ + dl_capability_req_t *capb; + union DL_primitives *dl_prim; + mblk_t *mp; + int err; + + if ((mp = allocb(sizeof (dl_capability_req_t), BPRI_MED)) == NULL) + return (ENOMEM); + mp->b_datap->db_type = M_PROTO; + + capb = (dl_capability_req_t *)mp->b_wptr; + mp->b_wptr += sizeof (dl_capability_req_t); + bzero(mp->b_rptr, sizeof (dl_capability_req_t)); + capb->dl_primitive = DL_CAPABILITY_REQ; + + (void) ldi_putmsg(lh, mp); + if ((err = ldi_getmsg(lh, &mp, (timestruc_t *)NULL)) != 0) + return (err); + + dl_prim = (union DL_primitives *)mp->b_rptr; + switch (dl_prim->dl_primitive) { + case DL_CAPABILITY_ACK: + if (MBLKL(mp) < DL_CAPABILITY_ACK_SIZE) { + printf("dl_capability: DL_CAPABILITY_ACK " + "protocol err\n"); + break; + } + *mpp = mp; + return (0); + + case DL_ERROR_ACK: + if (MBLKL(mp) < DL_ERROR_ACK_SIZE) { + printf("dl_capability: DL_ERROR_ACK protocol err\n"); + break; + } + if (((dl_error_ack_t *)dl_prim)->dl_error_primitive != + DL_CAPABILITY_REQ) { + printf("dl_capability: DL_ERROR_ACK rtnd prim %u\n", + ((dl_error_ack_t *)dl_prim)->dl_error_primitive); + break; + } + + *mpp = mp; + return (0); + + default: + printf("dl_capability: bad ACK header %u\n", + dl_prim->dl_primitive); + break; + } + + freemsg(mp); + return (-1); +} + +static int +softmac_fill_hcksum_ack(void *arg, t_uscalar_t flags) +{ + softmac_t *softmac = (softmac_t *)arg; + + /* + * There are two types of acks we process here: + * 1. acks in reply to a (first form) generic capability req + * (no ENABLE flag set) + * 2. acks in reply to a ENABLE capability req. + * (ENABLE flag set) + * Only the first type should be expected here. + */ + + if (flags & HCKSUM_ENABLE) { + cmn_err(CE_WARN, "softmac_fill_hcksum_ack: unexpected " + "HCKSUM_ENABLE flag in hardware checksum capability"); + } else if (flags & (HCKSUM_INET_PARTIAL | HCKSUM_INET_FULL_V4 | + HCKSUM_INET_FULL_V6 | HCKSUM_IPHDRCKSUM)) { + softmac->smac_capab_flags |= MAC_CAPAB_HCKSUM; + softmac->smac_hcksum_txflags = flags; + } + return (0); +} + +static int +softmac_fill_zcopy_ack(void *arg, t_uscalar_t flags) +{ + softmac_t *softmac = (softmac_t *)arg; + + ASSERT(flags == DL_CAPAB_VMSAFE_MEM); + softmac->smac_capab_flags &= (~MAC_CAPAB_NO_ZCOPY); + return (0); +} + +static int +softmac_fill_mdt_ack(void *arg, dl_capab_mdt_t *mdt) +{ + softmac_t *softmac = (softmac_t *)arg; + + /* + * There are two types of acks we process here: + * 1. acks in reply to a (first form) generic capability req + * (ENABLE flag might be set by some drivers) + * 2. acks in reply to a ENABLE capability req. + * (ENABLE flag set) + */ + + ASSERT(mdt->mdt_version == MDT_VERSION_2); + softmac->smac_mdt = B_TRUE; + softmac->smac_mdt_capab.mdt_hdr_head = mdt->mdt_hdr_head; + softmac->smac_mdt_capab.mdt_hdr_tail = mdt->mdt_hdr_tail; + softmac->smac_mdt_capab.mdt_max_pld = mdt->mdt_max_pld; + softmac->smac_mdt_capab.mdt_span_limit = mdt->mdt_span_limit; + return (0); +} + +int +softmac_capab_enable(softmac_lower_t *slp) +{ + softmac_t *softmac = slp->sl_softmac; + int err; + + if (softmac->smac_no_capability_req) + return (0); + + /* + * Send DL_CAPABILITY_REQ to get capability advertisement. + */ + if ((err = softmac_capab_send(slp, B_FALSE)) != 0) + return (err); + + /* + * Send DL_CAPABILITY_REQ to enable specific capabilities. + */ + if ((err = softmac_capab_send(slp, B_TRUE)) != 0) + return (err); + + return (0); +} + +static int +softmac_capab_send(softmac_lower_t *slp, boolean_t enable) +{ + softmac_t *softmac; + dl_capability_req_t *capb; + dl_capability_sub_t *subcapb; + mblk_t *reqmp, *ackmp; + int err; + size_t size = 0; + + softmac = slp->sl_softmac; + + if (enable) { + /* No need to enable DL_CAPAB_ZEROCOPY */ + if (softmac->smac_capab_flags & MAC_CAPAB_HCKSUM) + size += sizeof (dl_capability_sub_t) + + sizeof (dl_capab_hcksum_t); + + if (softmac->smac_mdt) { + if (!(softmac->smac_mdt_capab.mdt_flags & + DL_CAPAB_MDT_ENABLE)) { + /* + * The MDT capability was not enabled for the + * first time, enable it now. + */ + size += sizeof (dl_capability_sub_t) + + sizeof (dl_capab_mdt_t); + } + } + + if (size == 0) + return (0); + } + + /* + * Create DL_CAPABILITY_REQ message and send it down + */ + reqmp = allocb(sizeof (dl_capability_req_t) + size, BPRI_MED); + if (reqmp == NULL) + return (ENOMEM); + + bzero(reqmp->b_rptr, sizeof (dl_capability_req_t) + size); + + DB_TYPE(reqmp) = M_PROTO; + reqmp->b_wptr = reqmp->b_rptr + sizeof (dl_capability_req_t) + size; + + capb = (dl_capability_req_t *)reqmp->b_rptr; + capb->dl_primitive = DL_CAPABILITY_REQ; + + if (!enable) + goto output; + + capb->dl_sub_offset = sizeof (dl_capability_req_t); + + if (softmac->smac_capab_flags & MAC_CAPAB_HCKSUM) { + dl_capab_hcksum_t *hck_subcapp; + + size = sizeof (dl_capability_sub_t) + + sizeof (dl_capab_hcksum_t); + capb->dl_sub_length += size; + + subcapb = (dl_capability_sub_t *)(capb + 1); + subcapb->dl_cap = DL_CAPAB_HCKSUM; + subcapb->dl_length = sizeof (dl_capab_hcksum_t); + hck_subcapp = (dl_capab_hcksum_t *)(subcapb + 1); + hck_subcapp->hcksum_version = HCKSUM_VERSION_1; + hck_subcapp->hcksum_txflags = + softmac->smac_hcksum_txflags | HCKSUM_ENABLE; + } + + if (softmac->smac_mdt) { + if (!(softmac->smac_mdt_capab.mdt_flags & + DL_CAPAB_MDT_ENABLE)) { + dl_capab_mdt_t *mdt_subcapp; + + size = sizeof (dl_capability_sub_t) + + sizeof (dl_capab_mdt_t); + capb->dl_sub_length += size; + + subcapb = (dl_capability_sub_t *) + ((uint8_t *)(subcapb + 1) + subcapb->dl_length); + + subcapb->dl_cap = DL_CAPAB_MDT; + subcapb->dl_length = sizeof (dl_capab_mdt_t); + mdt_subcapp = (dl_capab_mdt_t *)(subcapb + 1); + mdt_subcapp->mdt_version = MDT_VERSION_2; + mdt_subcapp->mdt_flags = + (softmac->smac_mdt_capab.mdt_flags | + DL_CAPAB_MDT_ENABLE); + mdt_subcapp->mdt_hdr_head = + softmac->smac_mdt_capab.mdt_hdr_head; + mdt_subcapp->mdt_hdr_tail = + softmac->smac_mdt_capab.mdt_hdr_tail; + mdt_subcapp->mdt_max_pld = + softmac->smac_mdt_capab.mdt_max_pld; + mdt_subcapp->mdt_span_limit = + softmac->smac_mdt_capab.mdt_span_limit; + } + } + +output: + err = softmac_proto_tx(slp, reqmp, &ackmp); + if (err == 0) { + if (enable) { + err = i_capab_ack(ackmp, NULL, + &softmac_enable_capab_ops, softmac); + } else { + err = i_capab_ack(ackmp, NULL, + &softmac_adv_capab_ops, softmac); + } + } + freemsg(ackmp); + + return (err); +} + +static int +softmac_adv_hcksum_ack(void *arg, t_uscalar_t flags) +{ + softmac_t *softmac = (softmac_t *)arg; + + /* + * There are two types of acks we process here: + * 1. acks in reply to a (first form) generic capability req + * (no ENABLE flag set) + * 2. acks in reply to a ENABLE capability req. + * (ENABLE flag set) + * Only the first type should be expected here. + */ + + if (flags & HCKSUM_ENABLE) { + cmn_err(CE_WARN, "softmac_adv_hcksum_ack: unexpected " + "HCKSUM_ENABLE flag in hardware checksum capability"); + return (-1); + } else if (flags & (HCKSUM_INET_PARTIAL | HCKSUM_INET_FULL_V4 | + HCKSUM_INET_FULL_V6 | HCKSUM_IPHDRCKSUM)) { + /* + * The acknowledgement should be the same as we got when + * the softmac is created. + */ + if (!(softmac->smac_capab_flags & MAC_CAPAB_HCKSUM)) { + ASSERT(B_FALSE); + return (-1); + } + if (softmac->smac_hcksum_txflags != flags) { + ASSERT(B_FALSE); + return (-1); + } + } + + return (0); +} + +static int +softmac_adv_zcopy_ack(void *arg, t_uscalar_t flags) +{ + softmac_t *softmac = (softmac_t *)arg; + + /* + * The acknowledgement should be the same as we got when + * the softmac is created. + */ + ASSERT(flags == DL_CAPAB_VMSAFE_MEM); + if (softmac->smac_capab_flags & MAC_CAPAB_NO_ZCOPY) { + ASSERT(B_FALSE); + return (-1); + } + + return (0); +} + +static int +softmac_adv_mdt_ack(void *arg, dl_capab_mdt_t *mdt) +{ + softmac_t *softmac = (softmac_t *)arg; + + /* + * The acknowledgement should be the same as we got when + * the softmac is created. + */ + if (!softmac->smac_mdt) { + ASSERT(B_FALSE); + return (-1); + } + + if ((softmac->smac_mdt_capab.mdt_hdr_head != mdt->mdt_hdr_head) || + (softmac->smac_mdt_capab.mdt_hdr_tail != mdt->mdt_hdr_tail) || + (softmac->smac_mdt_capab.mdt_max_pld != mdt->mdt_max_pld) || + (softmac->smac_mdt_capab.mdt_span_limit != mdt->mdt_span_limit)) { + ASSERT(B_FALSE); + return (-1); + } + /* + * We need the mdt_flags field to know whether an additional + * DL_CAPAB_MDT_ENABLE is necessary. + */ + softmac->smac_mdt_capab.mdt_flags = mdt->mdt_flags; + return (0); +} + +static int +softmac_enable_hcksum_ack(void *arg, t_uscalar_t flags) +{ + softmac_t *softmac = (softmac_t *)arg; + + /* + * There are two types of acks we process here: + * 1. acks in reply to a (first form) generic capability req + * (no ENABLE flag set) + * 2. acks in reply to a ENABLE capability req. + * (ENABLE flag set) + * Only the second type should be expected here. + */ + + if (flags & HCKSUM_ENABLE) { + if ((flags & ~HCKSUM_ENABLE) != softmac->smac_hcksum_txflags) { + cmn_err(CE_WARN, "softmac_enable_hcksum_ack: unexpected" + " hardware capability flag value 0x%x", flags); + return (-1); + } + } else { + cmn_err(CE_WARN, "softmac_enable_hcksum_ack: " + "hardware checksum flag HCKSUM_ENABLE is not set"); + return (-1); + } + + return (0); +} + +static int +softmac_enable_mdt_ack(void *arg, dl_capab_mdt_t *mdt) +{ + softmac_t *softmac = (softmac_t *)arg; + + /* + * There are two types of acks we process here: + * 1. acks in reply to a (first form) generic capability req + * (no ENABLE flag set) + * 2. acks in reply to a ENABLE capability req. + * (ENABLE flag set) + * Only the second type should be expected here. + */ + + if (mdt->mdt_flags & DL_CAPAB_MDT_ENABLE) { + if ((softmac->smac_mdt_capab.mdt_hdr_head != + mdt->mdt_hdr_head) || + (softmac->smac_mdt_capab.mdt_hdr_tail != + mdt->mdt_hdr_tail) || + (softmac->smac_mdt_capab.mdt_max_pld != + mdt->mdt_max_pld) || + (softmac->smac_mdt_capab.mdt_span_limit != + mdt->mdt_span_limit)) { + cmn_err(CE_WARN, "softmac_enable_mdt_ack: " + "unexpected MDT capability value"); + return (-1); + } + softmac->smac_mdt_capab.mdt_flags = mdt->mdt_flags; + } else { + cmn_err(CE_WARN, "softmac_enable_mdt_ack: " + "MDT flag DL_CAPAB_MDT_ENABLE is not set"); + return (-1); + } + + return (0); +} + +static int +i_capab_ack(mblk_t *mp, queue_t *q, softmac_capab_ops_t *op, void *arg) +{ + union DL_primitives *prim; + dl_capability_ack_t *cap; + dl_capability_sub_t *sub, *end; + int err = 0; + + prim = (union DL_primitives *)mp->b_rptr; + ASSERT(prim->dl_primitive == DL_CAPABILITY_ACK); + + cap = (dl_capability_ack_t *)prim; + if (cap->dl_sub_length == 0) + goto exit; + + /* Is dl_sub_length correct? */ + if ((sizeof (*cap) + cap->dl_sub_length) > MBLKL(mp)) { + err = EINVAL; + goto exit; + } + + sub = (dl_capability_sub_t *)((caddr_t)cap + cap->dl_sub_offset); + end = (dl_capability_sub_t *)((caddr_t)cap + cap->dl_sub_length + - sizeof (*sub)); + for (; (sub <= end) && (err == 0); ) { + switch (sub->dl_cap) { + case DL_CAPAB_ID_WRAPPER: + err = i_capab_id_ack(mp, sub, q, op, arg); + break; + default: + err = i_capab_sub_ack(mp, sub, q, op, arg); + break; + } + sub = (dl_capability_sub_t *)((caddr_t)sub + sizeof (*sub) + + sub->dl_length); + } + +exit: + return (err); +} + +static int +i_capab_id_ack(mblk_t *mp, dl_capability_sub_t *outers, + queue_t *q, softmac_capab_ops_t *op, void *arg) +{ + dl_capab_id_t *capab_id; + dl_capability_sub_t *inners; + caddr_t capend; + int err = EINVAL; + + ASSERT(outers->dl_cap == DL_CAPAB_ID_WRAPPER); + + capend = (caddr_t)(outers + 1) + outers->dl_length; + if (capend > (caddr_t)mp->b_wptr) { + cmn_err(CE_WARN, "i_capab_id_ack: malformed " + "sub-capability too long"); + return (err); + } + + capab_id = (dl_capab_id_t *)(outers + 1); + + if (outers->dl_length < sizeof (*capab_id) || + (inners = &capab_id->id_subcap, + inners->dl_length > (outers->dl_length - sizeof (*inners)))) { + cmn_err(CE_WARN, "i_capab_id_ack: malformed " + "encapsulated capab type %d too long", + inners->dl_cap); + return (err); + } + + if ((q != NULL) && (!dlcapabcheckqid(&capab_id->id_mid, q))) { + cmn_err(CE_WARN, "i_capab_id_ack: pass-thru module(s) " + "detected, discarding capab type %d", inners->dl_cap); + return (err); + } + + /* Process the encapsulated sub-capability */ + return (i_capab_sub_ack(mp, inners, q, op, arg)); +} + +static int +i_capab_sub_ack(mblk_t *mp, dl_capability_sub_t *sub, queue_t *q, + softmac_capab_ops_t *op, void *arg) +{ + caddr_t capend; + dl_capab_hcksum_t *hcksum; + dl_capab_zerocopy_t *zcopy; + dl_capab_mdt_t *mdt; + int err = 0; + + capend = (caddr_t)(sub + 1) + sub->dl_length; + if (capend > (caddr_t)mp->b_wptr) { + cmn_err(CE_WARN, "i_capab_sub_ack: " + "malformed sub-capability too long"); + return (EINVAL); + } + + switch (sub->dl_cap) { + case DL_CAPAB_HCKSUM: + hcksum = (dl_capab_hcksum_t *)(sub + 1); + err = i_capab_hcksum_ack(hcksum, q, op, arg); + break; + + case DL_CAPAB_ZEROCOPY: + zcopy = (dl_capab_zerocopy_t *)(sub + 1); + err = i_capab_zcopy_ack(zcopy, q, op, arg); + break; + + case DL_CAPAB_MDT: + mdt = (dl_capab_mdt_t *)(sub + 1); + err = i_capab_mdt_ack(mdt, q, op, arg); + break; + + default: + cmn_err(CE_WARN, "i_capab_sub_ack: unknown capab type %d", + sub->dl_cap); + err = EINVAL; + } + + return (err); +} + +static int +i_capab_hcksum_ack(dl_capab_hcksum_t *hcksum, queue_t *q, + softmac_capab_ops_t *op, void *arg) +{ + t_uscalar_t flags; + int err = 0; + + if ((err = i_capab_hcksum_verify(hcksum, q)) != 0) + return (err); + + flags = hcksum->hcksum_txflags; + + if (!(flags & (HCKSUM_INET_PARTIAL | HCKSUM_INET_FULL_V4 | + HCKSUM_INET_FULL_V6 | HCKSUM_IPHDRCKSUM | HCKSUM_ENABLE))) { + cmn_err(CE_WARN, "i_capab_hcksum_ack: invalid " + "hardware checksum capability flags 0x%x", flags); + return (EINVAL); + } + + if (op->sc_hcksum_ack) + return (op->sc_hcksum_ack(arg, flags)); + else { + cmn_err(CE_WARN, "i_capab_hcksum_ack: unexpected hardware " + "checksum acknowledgement"); + return (EINVAL); + } +} + +static int +i_capab_zcopy_ack(dl_capab_zerocopy_t *zcopy, queue_t *q, + softmac_capab_ops_t *op, void *arg) +{ + t_uscalar_t flags; + int err = 0; + + if ((err = i_capab_zcopy_verify(zcopy, q)) != 0) + return (err); + + flags = zcopy->zerocopy_flags; + if (!(flags & DL_CAPAB_VMSAFE_MEM)) { + cmn_err(CE_WARN, "i_capab_zcopy_ack: invalid zcopy capability " + "flags 0x%x", flags); + return (EINVAL); + } + if (op->sc_zcopy_ack) + return (op->sc_zcopy_ack(arg, flags)); + else { + cmn_err(CE_WARN, "i_capab_zcopy_ack: unexpected zcopy " + "acknowledgement"); + return (EINVAL); + } +} + +static int +i_capab_mdt_ack(dl_capab_mdt_t *mdt, queue_t *q, + softmac_capab_ops_t *op, void *arg) +{ + int err; + + if ((err = i_capab_mdt_verify(mdt, q)) != 0) + return (err); + + if (op->sc_mdt_ack) + return (op->sc_mdt_ack(arg, mdt)); + else { + cmn_err(CE_WARN, "i_capab_mdt_ack: unexpected MDT " + "acknowledgement"); + return (EINVAL); + } +} + +static int +i_capab_hcksum_verify(dl_capab_hcksum_t *hcksum, queue_t *q) +{ + if (hcksum->hcksum_version != HCKSUM_VERSION_1) { + cmn_err(CE_WARN, "i_capab_hcksum_verify: " + "unsupported hardware checksum capability (version %d, " + "expected %d)", hcksum->hcksum_version, HCKSUM_VERSION_1); + return (-1); + } + + if ((q != NULL) && !dlcapabcheckqid(&hcksum->hcksum_mid, q)) { + cmn_err(CE_WARN, "i_capab_hcksum_verify: unexpected pass-thru " + "module detected; hardware checksum capability discarded"); + return (-1); + } + return (0); +} + +static int +i_capab_zcopy_verify(dl_capab_zerocopy_t *zcopy, queue_t *q) +{ + if (zcopy->zerocopy_version != ZEROCOPY_VERSION_1) { + cmn_err(CE_WARN, "i_capab_zcopy_verify: unsupported zcopy " + "capability (version %d, expected %d)", + zcopy->zerocopy_version, ZEROCOPY_VERSION_1); + return (-1); + } + + if ((q != NULL) && !dlcapabcheckqid(&zcopy->zerocopy_mid, q)) { + cmn_err(CE_WARN, "i_capab_zcopy_verify: unexpected pass-thru " + "module detected; zcopy checksum capability discarded"); + return (-1); + } + return (0); +} + +static int +i_capab_mdt_verify(dl_capab_mdt_t *mdt, queue_t *q) +{ + if (mdt->mdt_version != MDT_VERSION_2) { + cmn_err(CE_WARN, "i_capab_mdt_verify: unsupported MDT " + "capability (version %d, expected %d)", + mdt->mdt_version, MDT_VERSION_2); + return (-1); + } + + if ((q != NULL) && !dlcapabcheckqid(&mdt->mdt_mid, q)) { + cmn_err(CE_WARN, "i_capab_mdt_verify: unexpected pass-thru " + "module detected; MDT capability discarded"); + return (-1); + } + return (0); +} diff --git a/usr/src/uts/common/io/softmac/softmac_ctl.c b/usr/src/uts/common/io/softmac/softmac_ctl.c new file mode 100644 index 0000000000..33472bd303 --- /dev/null +++ b/usr/src/uts/common/io/softmac/softmac_ctl.c @@ -0,0 +1,389 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/stropts.h> +#include <sys/softmac_impl.h> + +int +softmac_send_notify_req(softmac_lower_t *slp, uint32_t notifications) +{ + mblk_t *reqmp; + + /* + * create notify req message and send it down + */ + reqmp = mexchange(NULL, NULL, DL_NOTIFY_REQ_SIZE, M_PROTO, + DL_NOTIFY_REQ); + if (reqmp == NULL) + return (ENOMEM); + + ((dl_notify_req_t *)reqmp->b_rptr)->dl_notifications = notifications; + + return (softmac_proto_tx(slp, reqmp, NULL)); +} + +int +softmac_send_bind_req(softmac_lower_t *slp, uint_t sap) +{ + dl_bind_req_t *bind; + mblk_t *reqmp; + + /* + * create bind req message and send it down + */ + reqmp = mexchange(NULL, NULL, DL_BIND_REQ_SIZE, M_PROTO, DL_BIND_REQ); + if (reqmp == NULL) + return (ENOMEM); + + bind = (dl_bind_req_t *)reqmp->b_rptr; + bind->dl_sap = sap; + bind->dl_conn_mgmt = 0; + bind->dl_max_conind = 0; + bind->dl_xidtest_flg = 0; + bind->dl_service_mode = DL_CLDLS; + + return (softmac_proto_tx(slp, reqmp, NULL)); +} + +int +softmac_send_promisc_req(softmac_lower_t *slp, t_uscalar_t level, boolean_t on) +{ + mblk_t *reqmp; + size_t size; + t_uscalar_t dl_prim; + + /* + * create promisc message and send it down + */ + if (on) { + dl_prim = DL_PROMISCON_REQ; + size = DL_PROMISCON_REQ_SIZE; + } else { + dl_prim = DL_PROMISCOFF_REQ; + size = DL_PROMISCOFF_REQ_SIZE; + } + + reqmp = mexchange(NULL, NULL, size, M_PROTO, dl_prim); + if (reqmp == NULL) + return (ENOMEM); + + if (on) + ((dl_promiscon_req_t *)reqmp->b_rptr)->dl_level = level; + else + ((dl_promiscoff_req_t *)reqmp->b_rptr)->dl_level = level; + + return (softmac_proto_tx(slp, reqmp, NULL)); +} + +int +softmac_m_promisc(void *arg, boolean_t on) +{ + softmac_t *softmac = arg; + softmac_lower_t *slp = softmac->smac_lower; + + ASSERT(slp != NULL); + return (softmac_send_promisc_req(slp, DL_PROMISC_PHYS, on)); +} + +int +softmac_m_multicst(void *arg, boolean_t add, const uint8_t *mca) +{ + softmac_t *softmac = arg; + softmac_lower_t *slp; + dl_enabmulti_req_t *enabmulti; + dl_disabmulti_req_t *disabmulti; + mblk_t *reqmp; + t_uscalar_t dl_prim; + uint32_t size, addr_length; + + /* + * create multicst message and send it down + */ + addr_length = softmac->smac_addrlen; + if (add) { + size = sizeof (dl_enabmulti_req_t) + addr_length; + dl_prim = DL_ENABMULTI_REQ; + } else { + size = sizeof (dl_disabmulti_req_t) + addr_length; + dl_prim = DL_DISABMULTI_REQ; + } + + reqmp = mexchange(NULL, NULL, size, M_PROTO, dl_prim); + if (reqmp == NULL) + return (ENOMEM); + + if (add) { + enabmulti = (dl_enabmulti_req_t *)reqmp->b_rptr; + enabmulti->dl_addr_offset = sizeof (dl_enabmulti_req_t); + enabmulti->dl_addr_length = addr_length; + (void) memcpy(&enabmulti[1], mca, addr_length); + } else { + disabmulti = (dl_disabmulti_req_t *)reqmp->b_rptr; + disabmulti->dl_addr_offset = sizeof (dl_disabmulti_req_t); + disabmulti->dl_addr_length = addr_length; + (void) memcpy(&disabmulti[1], mca, addr_length); + } + + slp = softmac->smac_lower; + ASSERT(slp != NULL); + return (softmac_proto_tx(slp, reqmp, NULL)); +} + +int +softmac_m_unicst(void *arg, const uint8_t *macaddr) +{ + softmac_t *softmac = arg; + softmac_lower_t *slp; + dl_set_phys_addr_req_t *phyaddr; + mblk_t *reqmp; + size_t size; + + /* + * create set_phys_addr message and send it down + */ + size = DL_SET_PHYS_ADDR_REQ_SIZE + softmac->smac_addrlen; + reqmp = mexchange(NULL, NULL, size, M_PROTO, DL_SET_PHYS_ADDR_REQ); + if (reqmp == NULL) + return (ENOMEM); + + phyaddr = (dl_set_phys_addr_req_t *)reqmp->b_rptr; + phyaddr->dl_addr_offset = sizeof (dl_set_phys_addr_req_t); + phyaddr->dl_addr_length = softmac->smac_addrlen; + (void) memcpy(&phyaddr[1], macaddr, softmac->smac_addrlen); + + slp = softmac->smac_lower; + ASSERT(slp != NULL); + return (softmac_proto_tx(slp, reqmp, NULL)); +} + +void +softmac_m_ioctl(void *arg, queue_t *wq, mblk_t *mp) +{ + softmac_lower_t *slp = ((softmac_t *)arg)->smac_lower; + mblk_t *ackmp; + + ASSERT(slp != NULL); + softmac_ioctl_tx(slp, mp, &ackmp); + qreply(wq, ackmp); +} + +static void +softmac_process_notify_ind(queue_t *rq, mblk_t *mp) +{ + softmac_lower_t *slp = rq->q_ptr; + dl_notify_ind_t *dlnip = (dl_notify_ind_t *)mp->b_rptr; + softmac_t *softmac = slp->sl_softmac; + uint_t addroff, addrlen; + + ASSERT(dlnip->dl_primitive == DL_NOTIFY_IND); + + switch (dlnip->dl_notification) { + case DL_NOTE_PHYS_ADDR: + if (dlnip->dl_data != DL_CURR_PHYS_ADDR) + break; + + addroff = dlnip->dl_addr_offset; + addrlen = dlnip->dl_addr_length - softmac->smac_saplen; + if (addroff == 0 || addrlen != softmac->smac_addrlen || + !MBLKIN(mp, addroff, addrlen)) { + cmn_err(CE_NOTE, "softmac: got malformed " + "DL_NOTIFY_IND; length/offset %d/%d", + addrlen, addroff); + break; + } + + mac_unicst_update(softmac->smac_mh, mp->b_rptr + addroff); + break; + + case DL_NOTE_LINK_UP: + mac_link_update(softmac->smac_mh, LINK_STATE_UP); + break; + + case DL_NOTE_LINK_DOWN: + mac_link_update(softmac->smac_mh, LINK_STATE_DOWN); + break; + } + + freemsg(mp); +} + +static void +softmac_process_dlpi(softmac_lower_t *slp, mblk_t *mp, uint_t minlen, + t_uscalar_t reqprim) +{ + const char *ackname; + + ackname = dl_primstr(((union DL_primitives *)mp->b_rptr)->dl_primitive); + + if (MBLKL(mp) < minlen) { + cmn_err(CE_WARN, "softmac: got short %s", ackname); + freemsg(mp); + return; + } + + mutex_enter(&slp->sl_mutex); + if (slp->sl_pending_prim != reqprim) { + cmn_err(CE_NOTE, "softmac: got unexpected %s", ackname); + mutex_exit(&slp->sl_mutex); + freemsg(mp); + return; + } + + slp->sl_pending_prim = DL_PRIM_INVAL; + slp->sl_ack_mp = mp; + cv_signal(&slp->sl_cv); + mutex_exit(&slp->sl_mutex); +} + +void +softmac_rput_process_proto(queue_t *rq, mblk_t *mp) +{ + softmac_lower_t *slp = rq->q_ptr; + union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; + ssize_t len = MBLKL(mp); + const char *primstr; + + if (len < sizeof (t_uscalar_t)) { + cmn_err(CE_WARN, "softmac: got runt DLPI message"); + goto exit; + } + + primstr = dl_primstr(dlp->dl_primitive); + + switch (dlp->dl_primitive) { + case DL_OK_ACK: + if (len < DL_OK_ACK_SIZE) + goto runt; + + softmac_process_dlpi(slp, mp, DL_OK_ACK_SIZE, + dlp->ok_ack.dl_correct_primitive); + return; + + case DL_ERROR_ACK: + if (len < DL_ERROR_ACK_SIZE) + goto runt; + + cmn_err(CE_NOTE, "softmac: received DL_ERROR_ACK for " + "%s errno/unix_errno 0x%x/%d", + dl_primstr(dlp->error_ack.dl_error_primitive), + dlp->error_ack.dl_errno, dlp->error_ack.dl_unix_errno); + + softmac_process_dlpi(slp, mp, DL_ERROR_ACK_SIZE, + dlp->error_ack.dl_error_primitive); + return; + + case DL_NOTIFY_IND: + if (len < DL_NOTIFY_IND_SIZE) + goto runt; + + softmac_process_notify_ind(rq, mp); + return; + + case DL_NOTIFY_ACK: + softmac_process_dlpi(slp, mp, DL_NOTIFY_ACK_SIZE, + DL_NOTIFY_REQ); + return; + + case DL_CAPABILITY_ACK: + softmac_process_dlpi(slp, mp, DL_CAPABILITY_ACK_SIZE, + DL_CAPABILITY_REQ); + return; + + case DL_BIND_ACK: + softmac_process_dlpi(slp, mp, DL_BIND_ACK_SIZE, DL_BIND_REQ); + return; + + case DL_CONTROL_ACK: + softmac_process_dlpi(slp, mp, DL_CONTROL_ACK_SIZE, + DL_CONTROL_REQ); + return; + + case DL_UNITDATA_IND: + case DL_PHYS_ADDR_ACK: + /* + * a. Because the stream is in DLIOCRAW mode, + * DL_UNITDATA_IND messages are not expected. + * b. The lower stream should not receive DL_PHYS_ADDR_REQ, + * so DL_PHYS_ADDR_ACK messages are also unexpected. + */ + default: + cmn_err(CE_WARN, "softmac: got unexpected %s", primstr); + break; + } +exit: + freemsg(mp); + return; +runt: + cmn_err(CE_WARN, "softmac: got runt %s", primstr); + freemsg(mp); +} + +void +softmac_rput_process_notdata(queue_t *rq, mblk_t *mp) +{ + softmac_lower_t *slp = rq->q_ptr; + + switch (DB_TYPE(mp)) { + case M_PROTO: + case M_PCPROTO: + softmac_rput_process_proto(rq, mp); + break; + + case M_FLUSH: + if (*mp->b_rptr & FLUSHR) + flushq(rq, FLUSHDATA); + if (*mp->b_rptr & FLUSHW) + flushq(OTHERQ(rq), FLUSHDATA); + putnext(rq, mp); + break; + + case M_IOCACK: + case M_IOCNAK: + case M_COPYIN: + case M_COPYOUT: + mutex_enter(&slp->sl_mutex); + if (!slp->sl_pending_ioctl) { + mutex_exit(&slp->sl_mutex); + cmn_err(CE_NOTE, "softmac: got unexpected mblk " + "type 0x%x", DB_TYPE(mp)); + freemsg(mp); + break; + } + + slp->sl_pending_ioctl = B_FALSE; + slp->sl_ack_mp = mp; + cv_broadcast(&slp->sl_cv); + mutex_exit(&slp->sl_mutex); + return; + + default: + cmn_err(CE_NOTE, "softmac: got unsupported mblk type 0x%x", + DB_TYPE(mp)); + freemsg(mp); + break; + } +} diff --git a/usr/src/uts/common/io/softmac/softmac_dev.c b/usr/src/uts/common/io/softmac/softmac_dev.c new file mode 100644 index 0000000000..501cec84da --- /dev/null +++ b/usr/src/uts/common/io/softmac/softmac_dev.c @@ -0,0 +1,417 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/dld.h> +#include <inet/common.h> +#include <sys/stropts.h> +#include <sys/modctl.h> +#include <sys/avl.h> +#include <sys/softmac_impl.h> +#include <sys/softmac.h> + +dev_info_t *softmac_dip = NULL; + +static int softmac_open(queue_t *, dev_t *, int, int, cred_t *); +static int softmac_close(queue_t *); +static void softmac_rput(queue_t *, mblk_t *); +static void softmac_rsrv(queue_t *); +static void softmac_wput(queue_t *, mblk_t *); +static void softmac_wsrv(queue_t *); +static int softmac_attach(dev_info_t *, ddi_attach_cmd_t); +static int softmac_detach(dev_info_t *, ddi_detach_cmd_t); +static int softmac_info(dev_info_t *, ddi_info_cmd_t, void *, void **); + +static struct module_info softmac_modinfo = { + 0, + SOFTMAC_DEV_NAME, + 0, + INFPSZ, + 65536, + 1024 +}; + +/* + * hi-water mark is 1 because of the flow control mechanism implemented in + * dld. Refer to the comments in dld_str.c for details. + */ +static struct module_info softmac_dld_modinfo = { + 0, + SOFTMAC_DEV_NAME, + 0, + INFPSZ, + 1, + 0 +}; + +static struct qinit softmac_urinit = { + (pfi_t)softmac_rput, /* qi_putp */ + (pfi_t)softmac_rsrv, /* qi_srvp */ + softmac_open, /* qi_qopen */ + softmac_close, /* qi_qclose */ + NULL, /* qi_qadmin */ + &softmac_modinfo /* qi_minfo */ +}; + +static struct qinit softmac_uwinit = { + (pfi_t)softmac_wput, /* qi_putp */ + (pfi_t)softmac_wsrv, /* qi_srvp */ + NULL, /* qi_qopen */ + NULL, /* qi_qclose */ + NULL, /* qi_qadmin */ + &softmac_modinfo /* qi_minfo */ +}; + +static struct streamtab softmac_tab = { + &softmac_urinit, /* st_rdinit */ + &softmac_uwinit /* st_wrinit */ +}; + +DDI_DEFINE_STREAM_OPS(softmac_ops, nulldev, nulldev, softmac_attach, + softmac_detach, nodev, softmac_info, D_MP, &softmac_tab); + +static struct qinit softmac_dld_r_qinit = { + NULL, NULL, dld_open, dld_close, NULL, &softmac_dld_modinfo +}; + +static struct qinit softmac_dld_w_qinit = { + (pfi_t)dld_wput, (pfi_t)dld_wsrv, NULL, NULL, NULL, + &softmac_dld_modinfo +}; + +static struct fmodsw softmac_fmodsw = { + SOFTMAC_DEV_NAME, + &softmac_tab, + D_MP +}; + +static struct modldrv softmac_modldrv = { + &mod_driverops, + "softmac driver", + &softmac_ops +}; + +static struct modlstrmod softmac_modlstrmod = { + &mod_strmodops, + "softmac module", + &softmac_fmodsw +}; + +static struct modlinkage softmac_modlinkage = { + MODREV_1, + &softmac_modlstrmod, + &softmac_modldrv, + NULL +}; + +int +_init(void) +{ + int err; + + softmac_init(); + + if ((err = mod_install(&softmac_modlinkage)) != 0) { + softmac_fini(); + return (err); + } + + return (0); +} + +int +_fini(void) +{ + int err; + + if (softmac_busy()) + return (EBUSY); + + if ((err = mod_remove(&softmac_modlinkage)) != 0) + return (err); + + softmac_fini(); + + return (0); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&softmac_modlinkage, modinfop)); +} + +static int +softmac_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) +{ + softmac_lower_t *slp; + /* + * This is a self-cloning driver so that each queue should only + * get opened once. + */ + if (rq->q_ptr != NULL) + return (EBUSY); + + if (sflag == MODOPEN) { + /* + * This is the softmac module pushed over an underlying + * legacy device. Initialize the lower structure. + */ + if ((slp = kmem_zalloc(sizeof (*slp), KM_NOSLEEP)) == NULL) + return (ENOMEM); + + slp->sl_wq = WR(rq); + cv_init(&slp->sl_cv, NULL, CV_DRIVER, NULL); + mutex_init(&slp->sl_mutex, NULL, MUTEX_DRIVER, NULL); + cv_init(&slp->sl_ctl_cv, NULL, CV_DRIVER, NULL); + mutex_init(&slp->sl_ctl_mutex, NULL, MUTEX_DRIVER, NULL); + slp->sl_pending_prim = DL_PRIM_INVAL; + rq->q_ptr = WR(rq)->q_ptr = slp; + qprocson(rq); + return (0); + } + + /* + * Regular device open of a softmac DLPI node. We modify + * the queues' q_qinfo pointer such that all future STREAMS + * operations will go through dld's entry points (including + * dld_close()). + */ + rq->q_qinfo = &softmac_dld_r_qinit; + WR(rq)->q_qinfo = &softmac_dld_w_qinit; + return (dld_open(rq, devp, flag, sflag, credp)); +} + +static int +softmac_close(queue_t *rq) +{ + softmac_lower_t *slp = rq->q_ptr; + + /* + * Call the appropriate delete routine depending on whether this is + * a module or device. + */ + ASSERT(WR(rq)->q_next != NULL); + + qprocsoff(rq); + + slp->sl_softmac = NULL; + slp->sl_lh = NULL; + + /* + * slp->sl_handle could be non-NULL if it is in the aggregation. + */ + slp->sl_handle = (mac_resource_handle_t)NULL; + + ASSERT(slp->sl_ack_mp == NULL); + ASSERT(slp->sl_ctl_inprogress == B_FALSE); + ASSERT(slp->sl_pending_prim == DL_PRIM_INVAL); + ASSERT(slp->sl_pending_ioctl == B_FALSE); + + cv_destroy(&slp->sl_cv); + mutex_destroy(&slp->sl_mutex); + cv_destroy(&slp->sl_ctl_cv); + mutex_destroy(&slp->sl_ctl_mutex); + + kmem_free(slp, sizeof (*slp)); + return (0); +} + +static void +softmac_rput(queue_t *rq, mblk_t *mp) +{ + softmac_lower_t *slp = rq->q_ptr; + union DL_primitives *dlp; + + /* + * This is the softmac module. + */ + ASSERT(WR(rq)->q_next != NULL); + ASSERT((mp->b_next == NULL) && (mp->b_prev == NULL)); + + switch (DB_TYPE(mp)) { + case M_DATA: + /* + * Some drivers start to send up packets even if not in the + * DL_IDLE state, where sl_softmac is not set yet. Drop the + * packet in this case. + */ + if (slp->sl_softmac == NULL) { + freemsg(mp); + return; + } + + /* + * This is the most common case. + */ + if (DB_REF(mp) == 1) { + ASSERT(slp->sl_softmac != NULL); + /* + * We don't need any locks to protect sl_handle + * because ip_input() can tolerate if sl_handle + * is reset to NULL when DL_CAPAB_POLL is + * disabled. + */ + mac_rx(slp->sl_softmac->smac_mh, slp->sl_handle, mp); + return; + } else { + softmac_rput_process_data(slp, mp); + } + break; + case M_PROTO: + case M_PCPROTO: + if (MBLKL(mp) < sizeof (dlp->dl_primitive)) { + freemsg(mp); + break; + } + dlp = (union DL_primitives *)mp->b_rptr; + if (dlp->dl_primitive == DL_UNITDATA_IND) { + cmn_err(CE_WARN, "got unexpected %s message", + dl_primstr(DL_UNITDATA_IND)); + freemsg(mp); + break; + } + /*FALLTHROUGH*/ + default: + softmac_rput_process_notdata(rq, mp); + break; + } +} + +/* ARGSUSED */ +static void +softmac_rsrv(queue_t *rq) +{ +} + +static void +softmac_wput(queue_t *wq, mblk_t *mp) +{ + /* + * This is the softmac module + */ + ASSERT(wq->q_next != NULL); + + switch (DB_TYPE(mp)) { + case M_IOCTL: { + struct iocblk *ioc = (struct iocblk *)mp->b_rptr; + + switch (ioc->ioc_cmd) { + case SMAC_IOC_START: { + softmac_lower_t *slp = wq->q_ptr; + smac_ioc_start_t *arg; + + if (ioc->ioc_count != sizeof (*arg)) { + miocnak(wq, mp, 0, EINVAL); + break; + } + + /* + * Assign the devname and perstream handle of the + * specific lower stream and return it as a part + * of the ioctl. + */ + arg = (smac_ioc_start_t *)mp->b_cont->b_rptr; + arg->si_slp = slp; + + miocack(wq, mp, sizeof (*arg), 0); + break; + } + default: + miocnak(wq, mp, 0, EINVAL); + break; + } + break; + } + default: + freemsg(mp); + break; + } +} + +static void +softmac_wsrv(queue_t *wq) +{ + softmac_lower_t *slp = wq->q_ptr; + + /* + * This is the softmac module + */ + ASSERT(wq->q_next != NULL); + + /* + * Inform that the tx resource is available; mac_tx_update() will + * inform all the upper streams sharing this lower stream. + */ + if (slp->sl_softmac != NULL) + mac_tx_update(slp->sl_softmac->smac_mh); +} + +static int +softmac_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + ASSERT(ddi_get_instance(dip) == 0); + + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + softmac_dip = dip; + + return (DDI_SUCCESS); +} + +/* ARGSUSED */ +static int +softmac_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + if (cmd != DDI_DETACH) + return (DDI_FAILURE); + + softmac_dip = NULL; + return (DDI_SUCCESS); +} + +/* ARGSUSED */ +static int +softmac_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) +{ + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + if (softmac_dip != NULL) { + *result = softmac_dip; + return (DDI_SUCCESS); + } + break; + + case DDI_INFO_DEVT2INSTANCE: + *result = NULL; + return (DDI_SUCCESS); + + } + + return (DDI_FAILURE); +} diff --git a/usr/src/uts/common/io/softmac/softmac_main.c b/usr/src/uts/common/io/softmac/softmac_main.c new file mode 100644 index 0000000000..8a218c53fb --- /dev/null +++ b/usr/src/uts/common/io/softmac/softmac_main.c @@ -0,0 +1,1192 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * The softmac driver is used to "unify" non-GLDv3 drivers to the GLDv3 + * framework. It also creates the kernel datalink structure for each + * physical network device. + * + * Specifically, a softmac will be created for each physical network device + * (dip) during the device's post-attach process. When this softmac is + * created, the following will also be done: + * - create the device's <link name, linkid> mapping; + * - register the mac if this is a non-GLDv3 device and the media type is + * supported by the GLDv3 framework; + * - create the kernel data-link structure for this physical device; + * + * This softmac will be destroyed during the device's pre-detach process, + * and all the above will be undone. + */ + +#include <sys/types.h> +#include <sys/file.h> +#include <sys/cred.h> +#include <sys/dlpi.h> +#include <sys/sunndi.h> +#include <sys/modhash.h> +#include <sys/stropts.h> +#include <sys/sysmacros.h> +#include <sys/vlan.h> +#include <sys/softmac_impl.h> +#include <sys/softmac.h> +#include <sys/dls.h> + +/* + * Softmac hash table including softmacs for both style-2 and style-1 devices. + */ +static krwlock_t softmac_hash_lock; +static mod_hash_t *softmac_hash; + +#define SOFTMAC_HASHSZ 64 + +static void softmac_mac_register(void *); +static int softmac_create_datalink(softmac_t *); +static int softmac_m_start(void *); +static void softmac_m_stop(void *); +static int softmac_m_open(void *); +static void softmac_m_close(void *); +static boolean_t softmac_m_getcapab(void *, mac_capab_t, void *); + +#define SOFTMAC_M_CALLBACK_FLAGS \ + (MC_RESOURCES | MC_IOCTL | MC_GETCAPAB | MC_OPEN | MC_CLOSE) + +static mac_callbacks_t softmac_m_callbacks = { + SOFTMAC_M_CALLBACK_FLAGS, + softmac_m_stat, + softmac_m_start, + softmac_m_stop, + softmac_m_promisc, + softmac_m_multicst, + softmac_m_unicst, + softmac_m_tx, + softmac_m_resources, + softmac_m_ioctl, + softmac_m_getcapab, + softmac_m_open, + softmac_m_close +}; + +void +softmac_init() +{ + softmac_hash = mod_hash_create_extended("softmac_hash", + SOFTMAC_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, + mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); + + rw_init(&softmac_hash_lock, NULL, RW_DEFAULT, NULL); +} + +void +softmac_fini() +{ + rw_destroy(&softmac_hash_lock); + mod_hash_destroy_hash(softmac_hash); +} + +/* ARGSUSED */ +static uint_t +softmac_exist(mod_hash_key_t key, mod_hash_val_t *val, void *arg) +{ + boolean_t *pexist = arg; + + *pexist = B_TRUE; + return (MH_WALK_TERMINATE); +} + +boolean_t +softmac_busy() +{ + boolean_t exist = B_FALSE; + + rw_enter(&softmac_hash_lock, RW_READER); + mod_hash_walk(softmac_hash, softmac_exist, &exist); + rw_exit(&softmac_hash_lock); + return (exist); +} + +/* + * This function is called for each minor node during the post-attach of + * each DDI_NT_NET device instance. Note that it is possible that a device + * instance has two minor nodes (DLPI style-1 and style-2), so that for that + * specific device, softmac_create() could be called twice. + * + * A softmac_t is used to track each DDI_NT_NET device, and a softmac_dev_t + * is created to track each minor node. + * + * For each minor node of a legacy device, a taskq is started to finish + * softmac_mac_register(), which will finish the rest of work (see comments + * above softmac_mac_register()). + */ +int +softmac_create(dev_info_t *dip, dev_t dev) +{ + char devname[MAXNAMELEN]; + softmac_t *softmac; + softmac_dev_t *softmac_dev = NULL; + datalink_id_t linkid; + int index; + int ppa, err = 0; + mac_handle_t mh; + + /* + * Force the softmac driver to be attached. + */ + if (i_ddi_attach_pseudo_node(SOFTMAC_DEV_NAME) == NULL) { + cmn_err(CE_WARN, "softmac_create:softmac attach fails"); + return (ENXIO); + } + + ppa = ddi_get_instance(dip); + (void) snprintf(devname, MAXNAMELEN, "%s%d", ddi_driver_name(dip), ppa); + + /* + * We expect legacy devices have at most two minor nodes - one style-1 + * and one style-2. + */ + if (!GLDV3_DRV(ddi_driver_major(dip)) && + i_ddi_minor_node_count(dip, DDI_NT_NET) > 2) { + cmn_err(CE_WARN, "%s has more than 2 minor nodes; unsupported", + devname); + return (ENOTSUP); + } + + /* + * Check whether the softmac for the specified device already exists + */ + rw_enter(&softmac_hash_lock, RW_WRITER); + if ((err = mod_hash_find(softmac_hash, (mod_hash_key_t)devname, + (mod_hash_val_t *)&softmac)) != 0) { + + softmac = kmem_zalloc(sizeof (softmac_t), KM_SLEEP); + mutex_init(&softmac->smac_mutex, NULL, MUTEX_DRIVER, NULL); + cv_init(&softmac->smac_cv, NULL, CV_DRIVER, NULL); + rw_init(&softmac->smac_lock, NULL, RW_DRIVER, NULL); + (void) strlcpy(softmac->smac_devname, devname, MAXNAMELEN); + + /* + * Insert the softmac into the hash table. + */ + err = mod_hash_insert(softmac_hash, + (mod_hash_key_t)softmac->smac_devname, + (mod_hash_val_t)softmac); + ASSERT(err == 0); + } + + mutex_enter(&softmac->smac_mutex); + if (softmac->smac_attachok_cnt == 0) { + /* + * Initialize the softmac if this is the post-attach of the + * first minor node. + */ + softmac->smac_flags = 0; + softmac->smac_umajor = ddi_driver_major(dip); + softmac->smac_uppa = ppa; + + /* + * Note that for GLDv3 devices, we create devfs minor nodes + * for VLANs as well. Assume a GLDv3 driver on which only + * a VLAN is created. During the detachment of this device + * instance, the following would happen: + * a. the pre-detach callback softmac_destroy() succeeds. + * Because the physical link itself is not in use, + * softmac_destroy() succeeds and destroys softmac_t; + * b. the device detach fails in mac_unregister() because + * this MAC is still used by a VLAN. + * c. the post-attach callback is then called which leads + * us here. Note that ddi_minor_node_count() returns 3 + * (including the minior node of the VLAN). In that case, + * we must correct the minor node count to 2 as that is + * the count of minor nodes that go through post-attach. + */ + if (GLDV3_DRV(ddi_driver_major(dip))) { + softmac->smac_flags |= SOFTMAC_GLDV3; + softmac->smac_cnt = 2; + } else { + softmac->smac_cnt = + i_ddi_minor_node_count(dip, DDI_NT_NET); + } + } + + index = (getmajor(dev) == ddi_name_to_major("clone")); + if (softmac->smac_softmac[index] != NULL) { + /* + * This is possible if the post_attach() is called: + * + * a. after pre_detach() fails. + * + * b. for a new round of reattachment. Note that DACF will not + * call pre_detach() for successfully post_attached minor + * nodes even when the post-attach failed after all. + * + * Both seem to be defects in the DACF framework. To work + * around it and only clear the SOFTMAC_ATTACH_DONE flag for + * the b case, a smac_attached_left field is used to tell + * the two cases apart. + */ + ASSERT(softmac->smac_attachok_cnt != 0); + + if (softmac->smac_attached_left != 0) + /* case a */ + softmac->smac_attached_left--; + else if (softmac->smac_attachok_cnt != softmac->smac_cnt) { + /* case b */ + softmac->smac_flags &= ~SOFTMAC_ATTACH_DONE; + } + mutex_exit(&softmac->smac_mutex); + rw_exit(&softmac_hash_lock); + return (0); + } + mutex_exit(&softmac->smac_mutex); + rw_exit(&softmac_hash_lock); + + /* + * Inform dlmgmtd of this link so that softmac_hold_device() is able + * to know the existence of this link. This could fail if dlmgmtd + * is not yet started. + */ + (void) dls_mgmt_create(devname, makedevice(ddi_driver_major(dip), + ppa + 1), DATALINK_CLASS_PHYS, DL_OTHER, B_TRUE, &linkid); + + /* + * No lock is needed for access this softmac pointer, as pre-detach and + * post-attach won't happen at the same time. + */ + mutex_enter(&softmac->smac_mutex); + + softmac_dev = kmem_zalloc(sizeof (softmac_dev_t), KM_SLEEP); + softmac_dev->sd_dev = dev; + softmac->smac_softmac[index] = softmac_dev; + + /* + * Continue to register the mac and create the datalink only when all + * the minor nodes are attached. + */ + if (++softmac->smac_attachok_cnt != softmac->smac_cnt) { + mutex_exit(&softmac->smac_mutex); + return (0); + } + + if (!GLDV3_DRV(ddi_driver_major(dip))) { + + /* + * Note that this function could be called as a result of + * a open() system call, and spec_open() already locked the + * snode (SLOCKED is set). Therefore, we must start a + * taskq to finish the rest of work to sidestep the risk + * that our ldi_open_by_dev() call would again try to hold + * the same lock. + * + * If all the minor nodes have been attached, start the taskq + * to finish the rest of the work. + */ + ASSERT(softmac->smac_taskq == NULL); + softmac->smac_taskq = taskq_dispatch(system_taskq, + softmac_mac_register, softmac, TQ_SLEEP); + mutex_exit(&softmac->smac_mutex); + return (0); + } + + if ((err = mac_open(softmac->smac_devname, &mh)) != 0) + goto done; + + softmac->smac_media = (mac_info(mh))->mi_nativemedia; + softmac->smac_mh = mh; + + /* + * We can safely release the reference on the mac because + * this mac will only be unregistered and destroyed when + * the device detaches, and the softmac will be destroyed + * before then (in the pre-detach routine of the device). + */ + mac_close(mh); + + /* + * Create the GLDv3 datalink for this mac. + */ + err = softmac_create_datalink(softmac); + +done: + if (err != 0) { + softmac->smac_mh = NULL; + kmem_free(softmac_dev, sizeof (softmac_dev_t)); + softmac->smac_softmac[index] = NULL; + --softmac->smac_attachok_cnt; + } + ASSERT(!(softmac->smac_flags & SOFTMAC_ATTACH_DONE)); + softmac->smac_flags |= SOFTMAC_ATTACH_DONE; + softmac->smac_attacherr = err; + cv_broadcast(&softmac->smac_cv); + mutex_exit(&softmac->smac_mutex); + return (err); +} + +static boolean_t +softmac_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) +{ + softmac_t *softmac = arg; + + if (!(softmac->smac_capab_flags & cap)) + return (B_FALSE); + + switch (cap) { + case MAC_CAPAB_HCKSUM: { + uint32_t *txflags = cap_data; + + *txflags = softmac->smac_hcksum_txflags; + break; + } + case MAC_CAPAB_LEGACY: { + mac_capab_legacy_t *legacy = cap_data; + + legacy->ml_unsup_note = ~softmac->smac_notifications & + (DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_SPEED); + legacy->ml_dev = makedevice(softmac->smac_umajor, + softmac->smac_uppa + 1); + break; + } + + /* + * For the capabilities below, there's nothing for us to fill in; + * simply return B_TRUE if we support it. + */ + case MAC_CAPAB_NO_ZCOPY: + case MAC_CAPAB_POLL: + case MAC_CAPAB_NO_NATIVEVLAN: + default: + break; + } + return (B_TRUE); +} + +static int +softmac_update_info(softmac_t *softmac, datalink_id_t *linkidp) +{ + datalink_id_t linkid = DATALINK_INVALID_LINKID; + uint32_t media; + int err; + + if ((err = dls_mgmt_update(softmac->smac_devname, softmac->smac_media, + softmac->smac_flags & SOFTMAC_NOSUPP, &media, &linkid)) == 0) { + *linkidp = linkid; + } + + if (err == EEXIST) { + /* + * There is a link name conflict. Either: + * + * - An existing link with the same device name with a + * different media type from of the given type. + * Mark this link back to persistent only; or + * + * - We cannot assign the "suggested" name because + * GLDv3 and therefore vanity naming is not supported + * for this link type. Delete this link's <link name, + * linkid> mapping. + */ + if (media != softmac->smac_media) { + cmn_err(CE_WARN, "%s device %s conflicts with " + "existing %s device %s.", + dl_mactypestr(softmac->smac_media), + softmac->smac_devname, dl_mactypestr(media), + softmac->smac_devname); + (void) dls_mgmt_destroy(linkid, B_FALSE); + } else { + cmn_err(CE_WARN, "link name %s is already in-use.", + softmac->smac_devname); + (void) dls_mgmt_destroy(linkid, B_TRUE); + } + + cmn_err(CE_WARN, "%s device might not be available " + "for use.", softmac->smac_devname); + cmn_err(CE_WARN, "See dladm(1M) for more information."); + } + + return (err); +} + +/* + * This function: + * 1. provides the link's media type to dlmgmtd. + * 2. creates the GLDv3 datalink if the media type is supported by GLDv3. + */ +static int +softmac_create_datalink(softmac_t *softmac) +{ + datalink_id_t linkid = DATALINK_INVALID_LINKID; + int err; + + ASSERT(MUTEX_HELD(&softmac->smac_mutex)); + + /* + * First provide the media type of the physical link to dlmgmtd. + * + * If the new <linkname, linkid> mapping operation failed with EBADF + * or ENOENT, it might because the dlmgmtd was not started in time + * (e.g., diskless boot); ignore the failure and continue. The + * mapping will be recreated once the daemon has started. + */ + if (((err = softmac_update_info(softmac, &linkid)) != 0) && + (err != EBADF) && (err != ENOENT)) { + return (err); + } + + /* + * Create the GLDv3 datalink. + */ + if ((!(softmac->smac_flags & SOFTMAC_NOSUPP)) && + ((err = dls_devnet_create(softmac->smac_mh, linkid)) != 0)) { + cmn_err(CE_WARN, "dls_devnet_create failed for %s", + softmac->smac_devname); + return (err); + } + + if (linkid == DATALINK_INVALID_LINKID) + softmac->smac_flags |= SOFTMAC_NEED_RECREATE; + + return (0); +} + +/* + * This function is only called for legacy devices. It: + * 1. registers the MAC for the legacy devices whose media type is supported + * by the GLDv3 framework. + * 2. creates the GLDv3 datalink if the media type is supported by GLDv3. + */ +static void +softmac_mac_register(void *arg) +{ + softmac_t *softmac = arg; + softmac_dev_t *softmac_dev; + dev_t dev; + ldi_handle_t lh = NULL; + ldi_ident_t li = NULL; + int index; + boolean_t native_vlan = B_FALSE; + int err; + + /* + * Note that we do not need any locks to access this softmac pointer, + * as softmac_destroy() will wait until this function is called. + */ + ASSERT(softmac != NULL); + + if ((err = ldi_ident_from_dip(softmac_dip, &li)) != 0) { + mutex_enter(&softmac->smac_mutex); + goto done; + } + + /* + * Determine whether this legacy device support VLANs by opening + * the style-2 device node (if it exists) and attaching to a VLAN + * PPA (1000 + ppa). + */ + dev = makedevice(ddi_name_to_major("clone"), softmac->smac_umajor); + err = ldi_open_by_dev(&dev, OTYP_CHR, FREAD|FWRITE, kcred, &lh, li); + if (err == 0) { + if (dl_attach(lh, softmac->smac_uppa + 1 * 1000, NULL) == 0) + native_vlan = B_TRUE; + (void) ldi_close(lh, FREAD|FWRITE, kcred); + } + + err = EINVAL; + for (index = 0; index < 2; index++) { + dl_info_ack_t dlia; + dl_error_ack_t dlea; + uint32_t notes; + struct strioctl iocb; + uint32_t margin; + int rval; + + if ((softmac_dev = softmac->smac_softmac[index]) == NULL) + continue; + + softmac->smac_dev = dev = softmac_dev->sd_dev; + if (ldi_open_by_dev(&dev, OTYP_CHR, FREAD|FWRITE, kcred, &lh, + li) != 0) { + continue; + } + + /* + * Pop all the intermediate modules in order to negotiate + * capabilities correctly. + */ + while (ldi_ioctl(lh, I_POP, 0, FKIOCTL, kcred, &rval) == 0) + ; + + /* DLPI style-1 or DLPI style-2? */ + if ((rval = dl_info(lh, &dlia, NULL, NULL, &dlea)) != 0) { + if (rval == ENOTSUP) { + cmn_err(CE_NOTE, "softmac: received " + "DL_ERROR_ACK to DL_INFO_ACK; " + "DLPI errno 0x%x, UNIX errno %d", + dlea.dl_errno, dlea.dl_unix_errno); + } + (void) ldi_close(lh, FREAD|FWRITE, kcred); + continue; + } + + /* + * Currently only DL_ETHER has GLDv3 mac plugin support. + * For media types that GLDv3 does not support, create a + * link id for it. + */ + if ((softmac->smac_media = dlia.dl_mac_type) != DL_ETHER) { + (void) ldi_close(lh, FREAD|FWRITE, kcred); + err = 0; + break; + } + + if ((dlia.dl_provider_style == DL_STYLE2) && + (dl_attach(lh, softmac->smac_uppa, NULL) != 0)) { + (void) ldi_close(lh, FREAD|FWRITE, kcred); + continue; + } + + if ((rval = dl_bind(lh, 0, NULL)) != 0) { + if (rval == ENOTSUP) { + cmn_err(CE_NOTE, "softmac: received " + "DL_ERROR_ACK to DL_BIND_ACK; " + "DLPI errno 0x%x, UNIX errno %d", + dlea.dl_errno, dlea.dl_unix_errno); + } + (void) ldi_close(lh, FREAD|FWRITE, kcred); + continue; + } + + /* + * Call dl_info() after dl_bind() because some drivers only + * provide correct information (e.g. MAC address) once bound. + */ + softmac->smac_addrlen = sizeof (softmac->smac_unicst_addr); + if ((rval = dl_info(lh, &dlia, softmac->smac_unicst_addr, + &softmac->smac_addrlen, &dlea)) != 0) { + if (rval == ENOTSUP) { + cmn_err(CE_NOTE, "softmac: received " + "DL_ERROR_ACK to DL_INFO_ACK; " + "DLPI errno 0x%x, UNIX errno %d", + dlea.dl_errno, dlea.dl_unix_errno); + } + (void) ldi_close(lh, FREAD|FWRITE, kcred); + continue; + } + + softmac->smac_style = dlia.dl_provider_style; + softmac->smac_saplen = ABS(dlia.dl_sap_length); + softmac->smac_min_sdu = dlia.dl_min_sdu; + softmac->smac_max_sdu = dlia.dl_max_sdu; + + if ((softmac->smac_saplen != sizeof (uint16_t)) || + (softmac->smac_addrlen != ETHERADDRL) || + (dlia.dl_brdcst_addr_length != ETHERADDRL) || + (dlia.dl_brdcst_addr_offset == 0)) { + (void) ldi_close(lh, FREAD|FWRITE, kcred); + continue; + } + + /* + * Check other DLPI capabilities. Note that this must be after + * dl_bind() because some drivers return DL_ERROR_ACK if the + * stream is not bound. It is also before mac_register(), so + * we don't need any lock protection here. + * + * Softmac always supports POLL. + */ + softmac->smac_capab_flags = + (MAC_CAPAB_POLL | MAC_CAPAB_NO_ZCOPY | MAC_CAPAB_LEGACY); + + softmac->smac_no_capability_req = B_FALSE; + if (softmac_fill_capab(lh, softmac) != 0) + softmac->smac_no_capability_req = B_TRUE; + + /* + * Check the margin of the underlying driver. + */ + margin = 0; + iocb.ic_cmd = DLIOCMARGININFO; + iocb.ic_timout = INFTIM; + iocb.ic_len = sizeof (margin); + iocb.ic_dp = (char *)&margin; + softmac->smac_margin = 0; + + if (ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, kcred, + &rval) == 0) { + softmac->smac_margin = margin; + } + + /* + * If the legacy driver doesn't support DLIOCMARGININFO, but + * it can support native VLAN, correct its margin value to 4. + */ + if (native_vlan) { + if (softmac->smac_margin == 0) + softmac->smac_margin = VLAN_TAGSZ; + } else { + softmac->smac_capab_flags |= MAC_CAPAB_NO_NATIVEVLAN; + } + + /* + * Not all drivers support DL_NOTIFY_REQ, so ignore ENOTSUP. + */ + softmac->smac_notifications = 0; + notes = DL_NOTE_PHYS_ADDR | DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN; + switch (dl_notify(lh, ¬es, NULL)) { + case 0: + softmac->smac_notifications = notes; + break; + case ENOTSUP: + break; + default: + (void) ldi_close(lh, FREAD|FWRITE, kcred); + continue; + } + + (void) ldi_close(lh, FREAD|FWRITE, kcred); + err = 0; + break; + } + ldi_ident_release(li); + + mutex_enter(&softmac->smac_mutex); + + if (err != 0) + goto done; + + if (softmac->smac_media != DL_ETHER) + softmac->smac_flags |= SOFTMAC_NOSUPP; + + /* + * Finally, we're ready to register ourselves with the MAC layer + * interface; if this succeeds, we're all ready to start() + */ + if (!(softmac->smac_flags & SOFTMAC_NOSUPP)) { + mac_register_t *macp; + + if ((macp = mac_alloc(MAC_VERSION)) == NULL) { + err = ENOMEM; + goto done; + } + + macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; + macp->m_driver = softmac; + macp->m_dip = softmac_dip; + + macp->m_margin = softmac->smac_margin; + macp->m_src_addr = softmac->smac_unicst_addr; + macp->m_min_sdu = softmac->smac_min_sdu; + macp->m_max_sdu = softmac->smac_max_sdu; + macp->m_callbacks = &softmac_m_callbacks; + macp->m_instance = (uint_t)-1; + + err = mac_register(macp, &softmac->smac_mh); + mac_free(macp); + if (err != 0) { + cmn_err(CE_WARN, "mac_register failed for %s", + softmac->smac_devname); + goto done; + } + } + + /* + * Try to create the datalink for this softmac. + */ + if ((err = softmac_create_datalink(softmac)) != 0) { + if (!(softmac->smac_flags & SOFTMAC_NOSUPP)) { + (void) mac_unregister(softmac->smac_mh); + softmac->smac_mh = NULL; + } + } + +done: + ASSERT(!(softmac->smac_flags & SOFTMAC_ATTACH_DONE)); + softmac->smac_flags |= SOFTMAC_ATTACH_DONE; + softmac->smac_attacherr = err; + softmac->smac_taskq = NULL; + cv_broadcast(&softmac->smac_cv); + mutex_exit(&softmac->smac_mutex); +} + +int +softmac_destroy(dev_info_t *dip, dev_t dev) +{ + char devname[MAXNAMELEN]; + softmac_t *softmac; + softmac_dev_t *softmac_dev; + int index; + int ppa, err; + datalink_id_t linkid; + + ppa = ddi_get_instance(dip); + (void) snprintf(devname, MAXNAMELEN, "%s%d", ddi_driver_name(dip), ppa); + + rw_enter(&softmac_hash_lock, RW_WRITER); + err = mod_hash_find(softmac_hash, (mod_hash_key_t)devname, + (mod_hash_val_t *)&softmac); + ASSERT(err == 0); + + mutex_enter(&softmac->smac_mutex); + + /* + * Fail the predetach routine if this softmac is in-use. + */ + if (softmac->smac_hold_cnt != 0) { + softmac->smac_attached_left = softmac->smac_attachok_cnt; + mutex_exit(&softmac->smac_mutex); + rw_exit(&softmac_hash_lock); + return (EBUSY); + } + + /* + * Even if the predetach of one minor node has already failed + * (smac_attached_left is not 0), the DACF framework will continue + * to call the predetach routines of the other minor nodes, + * so we fail these calls here. + */ + if (softmac->smac_attached_left != 0) { + mutex_exit(&softmac->smac_mutex); + rw_exit(&softmac_hash_lock); + return (EBUSY); + } + + if (softmac->smac_attachok_cnt != softmac->smac_cnt) + goto done; + + /* + * This is the detach for the first minor node. Wait until all the + * minor nodes are attached. + */ + while (!(softmac->smac_flags & SOFTMAC_ATTACH_DONE)) + cv_wait(&softmac->smac_cv, &softmac->smac_mutex); + + if (softmac->smac_mh != NULL) { + if (!(softmac->smac_flags & SOFTMAC_NOSUPP)) { + if ((err = dls_devnet_destroy(softmac->smac_mh, + &linkid)) != 0) { + goto done; + } + } + /* + * If softmac_mac_register() succeeds in registering the mac + * of the legacy device, unregister it. + */ + if (!(softmac->smac_flags & (SOFTMAC_GLDV3 | SOFTMAC_NOSUPP))) { + if ((err = mac_unregister(softmac->smac_mh)) != 0) { + (void) dls_devnet_create(softmac->smac_mh, + linkid); + goto done; + } + } + softmac->smac_mh = NULL; + } + softmac->smac_flags &= ~SOFTMAC_ATTACH_DONE; + +done: + if (err == 0) { + /* + * Free softmac_dev + */ + index = (getmajor(dev) == ddi_name_to_major("clone")); + softmac_dev = softmac->smac_softmac[index]; + ASSERT(softmac_dev != NULL); + softmac->smac_softmac[index] = NULL; + kmem_free(softmac_dev, sizeof (softmac_dev_t)); + + if (--softmac->smac_attachok_cnt == 0) { + mod_hash_val_t hashval; + + err = mod_hash_remove(softmac_hash, + (mod_hash_key_t)devname, + (mod_hash_val_t *)&hashval); + ASSERT(err == 0); + + mutex_exit(&softmac->smac_mutex); + rw_exit(&softmac_hash_lock); + + ASSERT(softmac->smac_taskq == NULL); + ASSERT(!(softmac->smac_flags & SOFTMAC_ATTACH_DONE)); + mutex_destroy(&softmac->smac_mutex); + cv_destroy(&softmac->smac_cv); + rw_destroy(&softmac->smac_lock); + kmem_free(softmac, sizeof (softmac_t)); + return (0); + } + } else { + softmac->smac_attached_left = softmac->smac_attachok_cnt; + } + + mutex_exit(&softmac->smac_mutex); + rw_exit(&softmac_hash_lock); + return (err); +} + +/* + * This function is called as the result of a newly started dlmgmtd daemon. + * + * We walk through every softmac that was created but failed to notify + * dlmgmtd about it (whose SOFTMAC_NEED_RECREATE flag is set). This occurs + * when softmacs are created before dlmgmtd is ready. For example, during + * diskless boot, a network device is used (and therefore attached) before + * the datalink-management service starts dlmgmtd. + */ +/* ARGSUSED */ +static uint_t +softmac_mac_recreate(mod_hash_key_t key, mod_hash_val_t *val, void *arg) +{ + softmac_t *softmac = (softmac_t *)val; + datalink_id_t linkid; + int err; + + ASSERT(RW_READ_HELD(&softmac_hash_lock)); + + /* + * Wait for softmac_create() and softmac_mac_register() to exit. + */ + mutex_enter(&softmac->smac_mutex); + while (!(softmac->smac_flags & SOFTMAC_ATTACH_DONE)) + cv_wait(&softmac->smac_cv, &softmac->smac_mutex); + + if ((softmac->smac_attacherr != 0) || + !(softmac->smac_flags & SOFTMAC_NEED_RECREATE)) { + mutex_exit(&softmac->smac_mutex); + return (MH_WALK_CONTINUE); + } + + if (dls_mgmt_create(softmac->smac_devname, + makedevice(softmac->smac_umajor, softmac->smac_uppa + 1), + DATALINK_CLASS_PHYS, softmac->smac_media, B_TRUE, &linkid) != 0) { + mutex_exit(&softmac->smac_mutex); + return (MH_WALK_CONTINUE); + } + + if ((err = softmac_update_info(softmac, &linkid)) != 0) { + cmn_err(CE_WARN, "softmac: softmac_update_info() for %s " + "failed (%d)", softmac->smac_devname, err); + mutex_exit(&softmac->smac_mutex); + return (MH_WALK_CONTINUE); + } + + /* + * Create a link for this MAC. The link name will be the same + * as the MAC name. + */ + if (!(softmac->smac_flags & SOFTMAC_NOSUPP)) { + err = dls_devnet_recreate(softmac->smac_mh, linkid); + if (err != 0) { + cmn_err(CE_WARN, "softmac: dls_devnet_recreate() for " + "%s (linkid %d) failed (%d)", + softmac->smac_devname, linkid, err); + } + } + + softmac->smac_flags &= ~SOFTMAC_NEED_RECREATE; + mutex_exit(&softmac->smac_mutex); + + return (MH_WALK_CONTINUE); +} + +/* + * See comments above softmac_mac_recreate(). + */ +void +softmac_recreate() +{ + /* + * Walk through the softmac_hash table. Request to create the + * [link name, linkid] mapping if we failed to do so. + */ + rw_enter(&softmac_hash_lock, RW_READER); + mod_hash_walk(softmac_hash, softmac_mac_recreate, NULL); + rw_exit(&softmac_hash_lock); +} + +/* ARGSUSED */ +static int +softmac_m_start(void *arg) +{ + return (0); +} + +/* ARGSUSED */ +static void +softmac_m_stop(void *arg) +{ +} + +/* + * Set up the lower stream above the legacy device which is shared by + * GLDv3 MAC clients. Put the lower stream into DLIOCRAW mode to send + * and receive the raw data. Further, put the lower stream into + * DL_PROMISC_SAP mode to receive all packets of interest. + */ +static int +softmac_lower_setup(softmac_t *softmac, softmac_lower_t **slpp) +{ + ldi_ident_t li; + dev_t dev; + ldi_handle_t lh = NULL; + softmac_lower_t *slp = NULL; + smac_ioc_start_t start_arg; + struct strioctl strioc; + uint32_t notifications; + int err, rval; + + if ((err = ldi_ident_from_dip(softmac_dip, &li)) != 0) + return (err); + + dev = softmac->smac_dev; + err = ldi_open_by_dev(&dev, OTYP_CHR, FREAD|FWRITE, kcred, &lh, li); + ldi_ident_release(li); + if (err != 0) + goto done; + + /* + * Pop all the intermediate modules. The autopushed modules will + * be pushed when the softmac node is opened. + */ + while (ldi_ioctl(lh, I_POP, 0, FKIOCTL, kcred, &rval) == 0) + ; + + if ((softmac->smac_style == DL_STYLE2) && + ((err = dl_attach(lh, softmac->smac_uppa, NULL)) != 0)) { + goto done; + } + + /* + * Put the lower stream into DLIOCRAW mode to send/receive raw data. + */ + if ((err = ldi_ioctl(lh, DLIOCRAW, 0, FKIOCTL, kcred, &rval)) != 0) + goto done; + + /* + * Then push the softmac shim layer atop the lower stream. + */ + if ((err = ldi_ioctl(lh, I_PUSH, (intptr_t)SOFTMAC_DEV_NAME, FKIOCTL, + kcred, &rval)) != 0) { + goto done; + } + + /* + * Send the ioctl to get the slp pointer. + */ + strioc.ic_cmd = SMAC_IOC_START; + strioc.ic_timout = INFTIM; + strioc.ic_len = sizeof (start_arg); + strioc.ic_dp = (char *)&start_arg; + + if ((err = ldi_ioctl(lh, I_STR, (intptr_t)&strioc, FKIOCTL, + kcred, &rval)) != 0) { + goto done; + } + slp = start_arg.si_slp; + slp->sl_lh = lh; + slp->sl_softmac = softmac; + *slpp = slp; + + /* + * Bind to SAP 2 on token ring, 0 on other interface types. + * (SAP 0 has special significance on token ring). + * Note that the receive-side packets could come anytime after bind. + */ + if (softmac->smac_media == DL_TPR) + err = softmac_send_bind_req(slp, 2); + else + err = softmac_send_bind_req(slp, 0); + if (err != 0) + goto done; + + /* + * Put the lower stream into DL_PROMISC_SAP mode to receive all + * packets of interest. + * + * Some drivers (e.g. the old legacy eri driver) incorrectly pass up + * packets to DL_PROMISC_SAP stream when the lower stream is not bound, + * so we send DL_PROMISON_REQ after DL_BIND_REQ. + */ + if ((err = softmac_send_promisc_req(slp, DL_PROMISC_SAP, B_TRUE)) != 0) + goto done; + + /* + * Enable the capabilities the underlying driver claims to support. + * Some drivers require this to be called after the stream is bound. + */ + if ((err = softmac_capab_enable(slp)) != 0) + goto done; + + /* + * Send the DL_NOTIFY_REQ to enable certain DL_NOTIFY_IND. + * We don't have to wait for the ack. + */ + notifications = DL_NOTE_PHYS_ADDR | DL_NOTE_LINK_UP | + DL_NOTE_LINK_DOWN | DL_NOTE_PROMISC_ON_PHYS | + DL_NOTE_PROMISC_OFF_PHYS; + + (void) softmac_send_notify_req(slp, + (notifications & softmac->smac_notifications)); + +done: + if (err != 0) + (void) ldi_close(lh, FREAD|FWRITE, kcred); + return (err); +} + +static int +softmac_m_open(void *arg) +{ + softmac_t *softmac = arg; + softmac_lower_t *slp; + int err; + + rw_enter(&softmac->smac_lock, RW_READER); + if (softmac->smac_state == SOFTMAC_READY) + goto done; + rw_exit(&softmac->smac_lock); + + if ((err = softmac_lower_setup(softmac, &slp)) != 0) + return (err); + + rw_enter(&softmac->smac_lock, RW_WRITER); + ASSERT(softmac->smac_state == SOFTMAC_INITIALIZED); + softmac->smac_lower = slp; + softmac->smac_state = SOFTMAC_READY; +done: + rw_exit(&softmac->smac_lock); + return (0); +} + +static void +softmac_m_close(void *arg) +{ + softmac_t *softmac = arg; + softmac_lower_t *slp; + + rw_enter(&softmac->smac_lock, RW_WRITER); + slp = softmac->smac_lower; + ASSERT(slp != NULL); + + /* + * Note that slp is destroyed when lh is closed. + */ + (void) ldi_close(slp->sl_lh, FREAD|FWRITE, kcred); + softmac->smac_state = SOFTMAC_INITIALIZED; + softmac->smac_lower = NULL; + rw_exit(&softmac->smac_lock); +} + +int +softmac_hold_device(dev_t dev, dls_dev_handle_t *ddhp) +{ + dev_info_t *dip; + char devname[MAXNAMELEN]; + softmac_t *softmac; + int ppa, err; + + if ((ppa = getminor(dev) - 1) > 1000) + return (ENOENT); + + /* + * First try to hold this device instance to force the MAC + * to be registered. + */ + if ((dip = ddi_hold_devi_by_instance(getmajor(dev), ppa, 0)) == NULL) + return (ENOENT); + + if ((ddi_driver_major(dip) != getmajor(dev)) || + !NETWORK_DRV(getmajor(dev))) { + ddi_release_devi(dip); + return (ENOENT); + } + + /* + * This is a network device; wait for its softmac to be registered. + */ + (void) snprintf(devname, MAXNAMELEN, "%s%d", ddi_driver_name(dip), ppa); +again: + rw_enter(&softmac_hash_lock, RW_READER); + + if (mod_hash_find(softmac_hash, (mod_hash_key_t)devname, + (mod_hash_val_t *)&softmac) != 0) { + /* + * This is rare but possible. It could happen when pre-detach + * routine of the device succeeds. But the softmac will then + * be recreated when device fails to detach (as this device + * is held). + */ + rw_exit(&softmac_hash_lock); + goto again; + } + + /* + * Bump smac_hold_cnt to prevent device detach. + */ + mutex_enter(&softmac->smac_mutex); + softmac->smac_hold_cnt++; + mutex_exit(&softmac->smac_mutex); + + rw_exit(&softmac_hash_lock); + + /* + * Wait till the device is fully attached. + */ + mutex_enter(&softmac->smac_mutex); + while (!(softmac->smac_flags & SOFTMAC_ATTACH_DONE)) + cv_wait(&softmac->smac_cv, &softmac->smac_mutex); + + if ((err = softmac->smac_attacherr) == 0) { + /* + * If softmac is successfully attached, set smac_udip + * which is used in softmac_rele_device(). + */ + ASSERT(softmac->smac_udip == NULL || + softmac->smac_udip == dip); + softmac->smac_udip = dip; + *ddhp = (dls_dev_handle_t)softmac; + } + mutex_exit(&softmac->smac_mutex); + + if (err != 0) + softmac_rele_device((dls_dev_handle_t)softmac); + + return (err); +} + +void +softmac_rele_device(dls_dev_handle_t ddh) +{ + softmac_t *softmac; + dev_info_t *dip; + + if (ddh == NULL) + return; + + softmac = (softmac_t *)ddh; + mutex_enter(&softmac->smac_mutex); + dip = softmac->smac_udip; + if (--softmac->smac_hold_cnt == 0) + softmac->smac_udip = NULL; + mutex_exit(&softmac->smac_mutex); + + ddi_release_devi(dip); +} diff --git a/usr/src/uts/common/io/softmac/softmac_pkt.c b/usr/src/uts/common/io/softmac/softmac_pkt.c new file mode 100644 index 0000000000..8848dc755a --- /dev/null +++ b/usr/src/uts/common/io/softmac/softmac_pkt.c @@ -0,0 +1,320 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/strsubr.h> +#include <inet/led.h> +#include <sys/softmac_impl.h> + +/* + * Macro to check whether the write-queue of the lower stream is full. + * + * Because softmac is pushed right above the underlying device and + * _I_INSERT/_I_REMOVE is not processed in the lower stream, it is + * safe to directly access the q_next pointer. + */ +#define CANPUTNEXT(q) \ + (!((q)->q_next->q_nfsrv->q_flag & QFULL) || canput((q)->q_next)) + +mblk_t * +softmac_m_tx(void *arg, mblk_t *mp) +{ + queue_t *wq = ((softmac_t *)arg)->smac_lower->sl_wq; + + /* + * Optimize for the most common case. + */ + if (mp->b_cont == NULL) { + if (!CANPUTNEXT(wq)) + return (mp); + + mp->b_flag |= MSGNOLOOP; + putnext(wq, mp); + return (NULL); + } + + while (mp != NULL) { + mblk_t *next = mp->b_next; + + if (!CANPUTNEXT(wq)) + break; + mp->b_next = NULL; + mp->b_flag |= MSGNOLOOP; + putnext(wq, mp); + mp = next; + } + return (mp); +} + +/*ARGSUSED*/ +static void +softmac_blank(void *arg, time_t ticks, uint_t count) +{ +} + +void +softmac_m_resources(void *arg) +{ + softmac_t *softmac = arg; + softmac_lower_t *slp = softmac->smac_lower; + mac_rx_fifo_t mrf; + + ASSERT((softmac->smac_state == SOFTMAC_READY) && (slp != NULL)); + + /* + * Register rx resources and save resource handle for future reference. + * Note that the mac_resources() function must be called when the lower + * stream is plumbed. + */ + + mutex_enter(&slp->sl_mutex); + + mrf.mrf_type = MAC_RX_FIFO; + mrf.mrf_blank = softmac_blank; + mrf.mrf_arg = slp; + mrf.mrf_normal_blank_time = SOFTMAC_BLANK_TICKS; + mrf.mrf_normal_pkt_count = SOFTMAC_BLANK_PKT_COUNT; + + slp->sl_handle = + mac_resource_add(softmac->smac_mh, (mac_resource_t *)&mrf); + + mutex_exit(&slp->sl_mutex); +} + +void +softmac_rput_process_data(softmac_lower_t *slp, mblk_t *mp) +{ + /* + * When packets arrive, the softmac might not be fully started. + */ + ASSERT((slp->sl_softmac != NULL)); + ASSERT((mp->b_next == NULL) && (mp->b_prev == NULL)); + + if (DB_REF(mp) > 1) { + mblk_t *tmp; + + if ((tmp = copymsg(mp)) == NULL) { + cmn_err(CE_WARN, "softmac_rput_process_data: " + "copymsg failed"); + goto failed; + } + freemsg(mp); + mp = tmp; + } + + mac_rx(slp->sl_softmac->smac_mh, slp->sl_handle, mp); + return; + +failed: + freemsg(mp); +} + +#define ACKTIMEOUT (10 * hz) + +/* + * Serialize control message processing. + */ +static void +softmac_serialize_enter(softmac_lower_t *slp) +{ + mutex_enter(&slp->sl_ctl_mutex); + while (slp->sl_ctl_inprogress) + cv_wait(&slp->sl_ctl_cv, &slp->sl_ctl_mutex); + + ASSERT(!slp->sl_ctl_inprogress); + ASSERT(!slp->sl_pending_ioctl); + ASSERT(slp->sl_pending_prim == DL_PRIM_INVAL); + + slp->sl_ctl_inprogress = B_TRUE; + mutex_exit(&slp->sl_ctl_mutex); +} + +static void +softmac_serialize_exit(softmac_lower_t *slp) +{ + mutex_enter(&slp->sl_ctl_mutex); + + ASSERT(slp->sl_ctl_inprogress); + ASSERT(!slp->sl_pending_ioctl); + ASSERT(slp->sl_pending_prim == DL_PRIM_INVAL); + + slp->sl_ctl_inprogress = B_FALSE; + cv_broadcast(&slp->sl_ctl_cv); + mutex_exit(&slp->sl_ctl_mutex); +} + +static int +dlpi_get_errno(t_uscalar_t error, t_uscalar_t unix_errno) +{ + return (error == DL_SYSERR ? unix_errno : EINVAL); +} + +static int +softmac_output(softmac_lower_t *slp, mblk_t *mp, t_uscalar_t dl_prim, + t_uscalar_t ack, mblk_t **mpp) +{ + union DL_primitives *dlp; + int err = 0; + + softmac_serialize_enter(slp); + + /* + * Record the pending DLPI primitive. + */ + mutex_enter(&slp->sl_mutex); + slp->sl_pending_prim = dl_prim; + mutex_exit(&slp->sl_mutex); + + putnext(slp->sl_wq, mp); + + mutex_enter(&slp->sl_mutex); + while (slp->sl_pending_prim != DL_PRIM_INVAL) { + if (cv_timedwait(&slp->sl_cv, &slp->sl_mutex, + lbolt + ACKTIMEOUT) == -1) + break; + } + + mp = slp->sl_ack_mp; + slp->sl_ack_mp = NULL; + + /* + * If we timed out, sl_ack_mp will still be NULL, but sl_pending_prim + * won't be set to DL_PRIM_INVAL. + */ + ASSERT(mp != NULL || slp->sl_pending_prim != DL_PRIM_INVAL); + + slp->sl_pending_prim = DL_PRIM_INVAL; + mutex_exit(&slp->sl_mutex); + + if (mp != NULL) { + dlp = (union DL_primitives *)mp->b_rptr; + + if (dlp->dl_primitive == DL_ERROR_ACK) { + err = dlpi_get_errno(dlp->error_ack.dl_errno, + dlp->error_ack.dl_unix_errno); + } else { + ASSERT(dlp->dl_primitive == ack); + } + } else { + err = ENOMSG; + } + + if (mpp != NULL) + *mpp = mp; + else + freemsg(mp); + + softmac_serialize_exit(slp); + return (err); +} + +void +softmac_ioctl_tx(softmac_lower_t *slp, mblk_t *mp, mblk_t **mpp) +{ + softmac_serialize_enter(slp); + + /* + * Record that ioctl processing is currently in progress. + */ + mutex_enter(&slp->sl_mutex); + slp->sl_pending_ioctl = B_TRUE; + mutex_exit(&slp->sl_mutex); + + putnext(slp->sl_wq, mp); + + mutex_enter(&slp->sl_mutex); + while (slp->sl_pending_ioctl) + cv_wait(&slp->sl_cv, &slp->sl_mutex); + mp = slp->sl_ack_mp; + slp->sl_ack_mp = NULL; + mutex_exit(&slp->sl_mutex); + + ASSERT(mpp != NULL && mp != NULL); + *mpp = mp; + + softmac_serialize_exit(slp); +} + +static int +softmac_mexchange_error_ack(mblk_t **mpp, t_uscalar_t error_primitive, + t_uscalar_t error, t_uscalar_t unix_errno) +{ + union DL_primitives *dlp; + + if ((*mpp = mexchange(NULL, *mpp, sizeof (dl_error_ack_t), M_PCPROTO, + DL_ERROR_ACK)) == NULL) + return (ENOMEM); + + dlp = (union DL_primitives *)(*mpp)->b_rptr; + dlp->error_ack.dl_error_primitive = error_primitive; + dlp->error_ack.dl_errno = error; + dlp->error_ack.dl_unix_errno = unix_errno; + + return (0); +} + +int +softmac_proto_tx(softmac_lower_t *slp, mblk_t *mp, mblk_t **mpp) +{ + int err = 0; + t_uscalar_t dl_prim; + + dl_prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; + + ASSERT(slp->sl_softmac != NULL); + + switch (dl_prim) { + case DL_ENABMULTI_REQ: + case DL_DISABMULTI_REQ: + case DL_SET_PHYS_ADDR_REQ: + case DL_UNBIND_REQ: + case DL_UDQOS_REQ: + case DL_PROMISCON_REQ: + case DL_PROMISCOFF_REQ: + err = softmac_output(slp, mp, dl_prim, DL_OK_ACK, mpp); + break; + case DL_BIND_REQ: + err = softmac_output(slp, mp, dl_prim, DL_BIND_ACK, mpp); + break; + case DL_NOTIFY_REQ: + err = softmac_output(slp, mp, dl_prim, DL_NOTIFY_ACK, mpp); + break; + case DL_CONTROL_REQ: + err = softmac_output(slp, mp, dl_prim, DL_CONTROL_ACK, mpp); + break; + case DL_CAPABILITY_REQ: + err = softmac_output(slp, mp, dl_prim, DL_CAPABILITY_ACK, mpp); + break; + default: + if (mpp != NULL) { + *mpp = mp; + err = softmac_mexchange_error_ack(mpp, dl_prim, + DL_UNSUPPORTED, 0); + } + break; + } + return (err); +} diff --git a/usr/src/uts/common/io/softmac/softmac_stat.c b/usr/src/uts/common/io/softmac/softmac_stat.c new file mode 100644 index 0000000000..78b5306c86 --- /dev/null +++ b/usr/src/uts/common/io/softmac/softmac_stat.c @@ -0,0 +1,270 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/kstat.h> +#include <sys/mac.h> +#include <sys/dls.h> +#include <sys/softmac_impl.h> + +typedef struct i_softmac_stat_info_s { + uint_t ssi_stat; + char *ssi_name; + char *ssi_alias; +} i_softmac_stat_info_t; + +/* + * Must be the same order as mac_driver_stat. + */ +static i_softmac_stat_info_t i_softmac_driver_si[] = { + { MAC_STAT_IFSPEED, "ifspeed", "link_speed" }, + { MAC_STAT_MULTIRCV, "multircv", NULL }, + { MAC_STAT_BRDCSTRCV, "brdcstrcv", NULL }, + { MAC_STAT_MULTIXMT, "multixmt", NULL }, + { MAC_STAT_BRDCSTXMT, "brdcstxmt", NULL }, + { MAC_STAT_NORCVBUF, "norcvbuf", "rx_no_buf" }, + { MAC_STAT_IERRORS, "ierrors", NULL }, + { MAC_STAT_UNKNOWNS, "unknowns", NULL }, + { MAC_STAT_NOXMTBUF, "noxmtbuf", "No Txpkt " }, + { MAC_STAT_OERRORS, "oerrors", NULL }, + { MAC_STAT_COLLISIONS, "collisions", NULL }, + { MAC_STAT_RBYTES, "rbytes64", "rbytes" }, + { MAC_STAT_IPACKETS, "ipackets64", "ipackets" }, + { MAC_STAT_OBYTES, "obytes64", "obytes" }, + { MAC_STAT_OPACKETS, "opackets64", "opackets" }, + { MAC_STAT_UNDERFLOWS, "uflo", NULL }, + { MAC_STAT_OVERFLOWS, "oflo", NULL } +}; + +/* + * Must be the same order as ether_stat. + */ +static i_softmac_stat_info_t i_softmac_ether_si[] = { + { ETHER_STAT_ALIGN_ERRORS, "align_errors", + "alignment_err" }, + { ETHER_STAT_FCS_ERRORS, "fcs_errors", "crc_err" }, + { ETHER_STAT_FIRST_COLLISIONS, "first_collisions", NULL }, + { ETHER_STAT_MULTI_COLLISIONS, "multi_collisions", NULL }, + { ETHER_STAT_SQE_ERRORS, "sqe_errors", NULL }, + { ETHER_STAT_DEFER_XMTS, "defer_xmts", NULL }, + { ETHER_STAT_TX_LATE_COLLISIONS, "tx_late_collisions", + "late_collisions" }, + { ETHER_STAT_EX_COLLISIONS, "ex_collisions", + "excessive_collisions" }, + { ETHER_STAT_MACXMT_ERRORS, "macxmt_errors", NULL }, + { ETHER_STAT_CARRIER_ERRORS, "carrier_errors", NULL }, + { ETHER_STAT_TOOLONG_ERRORS, "toolong_errors", "length_err" }, + { ETHER_STAT_MACRCV_ERRORS, "macrcv_errors", + "Rx Error Count" }, + + { ETHER_STAT_XCVR_ADDR, "xcvr_addr", NULL }, + { ETHER_STAT_XCVR_ID, "xcvr_id", NULL }, + { ETHER_STAT_XCVR_INUSE, "xcvr_inuse", NULL }, + + { ETHER_STAT_CAP_1000FDX, "cap_1000fdx", NULL }, + { ETHER_STAT_CAP_1000HDX, "cap_1000hdx", NULL }, + { ETHER_STAT_CAP_100FDX, "cap_100fdx", NULL }, + { ETHER_STAT_CAP_100HDX, "cap_100hdx", NULL }, + { ETHER_STAT_CAP_10FDX, "cap_10fdx", NULL }, + { ETHER_STAT_CAP_10HDX, "cap_10hdx", NULL }, + { ETHER_STAT_CAP_ASMPAUSE, "cap_asmpause", NULL }, + { ETHER_STAT_CAP_PAUSE, "cap_pause", NULL }, + { ETHER_STAT_CAP_AUTONEG, "cap_autoneg", NULL }, + + { ETHER_STAT_ADV_CAP_1000FDX, "adv_cap_1000fdx", NULL }, + { ETHER_STAT_ADV_CAP_1000HDX, "adv_cap_1000hdx", NULL }, + { ETHER_STAT_ADV_CAP_100FDX, "adv_cap_100fdx", NULL }, + { ETHER_STAT_ADV_CAP_100HDX, "adv_cap_100hdx", NULL }, + { ETHER_STAT_ADV_CAP_10FDX, "adv_cap_10fdx", NULL }, + { ETHER_STAT_ADV_CAP_10HDX, "adv_cap_10hdx", NULL }, + { ETHER_STAT_ADV_CAP_ASMPAUSE, "adv_cap_asmpause", NULL }, + { ETHER_STAT_ADV_CAP_PAUSE, "adv_cap_pause", NULL }, + { ETHER_STAT_ADV_CAP_AUTONEG, "adv_cap_autoneg", NULL }, + + { ETHER_STAT_LP_CAP_1000FDX, "lp_cap_1000fdx", NULL }, + { ETHER_STAT_LP_CAP_1000HDX, "lp_cap_1000hdx", NULL }, + { ETHER_STAT_LP_CAP_100FDX, "lp_cap_100fdx", NULL }, + { ETHER_STAT_LP_CAP_100HDX, "lp_cap_100hdx", NULL }, + { ETHER_STAT_LP_CAP_10FDX, "lp_cap_10fdx", NULL }, + { ETHER_STAT_LP_CAP_10HDX, "lp_cap_10hdx", NULL }, + { ETHER_STAT_LP_CAP_ASMPAUSE, "lp_cap_asmpause", NULL }, + { ETHER_STAT_LP_CAP_PAUSE, "lp_cap_pause", NULL }, + { ETHER_STAT_LP_CAP_AUTONEG, "lp_cap_autoneg", NULL }, + + { ETHER_STAT_LINK_ASMPAUSE, "link_asmpause", NULL }, + { ETHER_STAT_LINK_PAUSE, "link_pause", NULL }, + { ETHER_STAT_LINK_AUTONEG, "link_autoneg", NULL }, + { ETHER_STAT_LINK_DUPLEX, "link_duplex", "duplex" }, + + { ETHER_STAT_TOOSHORT_ERRORS, "runt_errors", NULL }, + { ETHER_STAT_CAP_REMFAULT, "cap_rem_fault", NULL }, + { ETHER_STAT_ADV_REMFAULT, "adv_rem_fault", NULL }, + { ETHER_STAT_LP_REMFAULT, "lp_rem_fault", NULL }, + + { ETHER_STAT_JABBER_ERRORS, "jabber_errors", NULL }, + { ETHER_STAT_CAP_100T4, "cap_100T4", NULL }, + { ETHER_STAT_ADV_CAP_100T4, "adv_cap_100T4", NULL }, + { ETHER_STAT_LP_CAP_100T4, "lp_cap_100T4", NULL } +}; + +static kstat_t *softmac_hold_dev_kstat(softmac_t *); +static void softmac_rele_dev_kstat(kstat_t *); +static int softmac_get_kstat(kstat_t *, char *, uint64_t *); + +static kstat_t * +softmac_hold_dev_kstat(softmac_t *softmac) +{ + char drv[MAXLINKNAMELEN]; + uint_t ppa; + kstat_t *ksp; + + if (ddi_parse(softmac->smac_devname, drv, &ppa) != DDI_SUCCESS) + return (NULL); + + /* + * Find the kstat by the module name and the instance number. + */ + ksp = kstat_hold_byname(drv, ppa, softmac->smac_devname, ALL_ZONES); + if (ksp != NULL) { + KSTAT_ENTER(ksp); + + if ((ksp->ks_data != NULL) && + (ksp->ks_type == KSTAT_TYPE_NAMED)) { + /* + * Update the kstat to get the latest statistics. + */ + if (KSTAT_UPDATE(ksp, KSTAT_READ) == 0) + return (ksp); + } + + KSTAT_EXIT(ksp); + kstat_rele(ksp); + } + return (NULL); +} + +static void +softmac_rele_dev_kstat(kstat_t *ksp) +{ + KSTAT_EXIT(ksp); + kstat_rele(ksp); +} + +/* + * The kstat needs to be held when calling this function. + */ +static int +softmac_get_kstat(kstat_t *ksp, char *name, uint64_t *valp) +{ + kstat_named_t *knp; + int i; + int ret = ENOTSUP; + + if (name == NULL) + return (ret); + + /* + * Search the kstat with the given name. + */ + for (i = 0, knp = KSTAT_NAMED_PTR(ksp); i < ksp->ks_ndata; i++, knp++) { + if (strcmp(knp->name, name) == 0) { + switch (knp->data_type) { + case KSTAT_DATA_INT32: + case KSTAT_DATA_UINT32: + *valp = (uint64_t)(knp->value.ui32); + ret = 0; + break; + case KSTAT_DATA_INT64: + case KSTAT_DATA_UINT64: + *valp = knp->value.ui64; + ret = 0; + break; +#ifdef _LP64 + case KSTAT_DATA_LONG: + case KSTAT_DATA_ULONG: + *valp = (uint64_t)knp->value.ul; + ret = 0; + break; +#endif + case KSTAT_DATA_CHAR: + if (strcmp(name, "duplex") != 0) + break; + if (strncmp(knp->value.c, "full", 4) == 0) + *valp = LINK_DUPLEX_FULL; + else if (strncmp(knp->value.c, "half", 4) == 0) + *valp = LINK_DUPLEX_HALF; + else + *valp = LINK_DUPLEX_UNKNOWN; + ret = 0; + break; + } + break; + } + } + + return (ret); +} + +int +softmac_m_stat(void *arg, uint_t stat, uint64_t *val) +{ + softmac_t *softmac = arg; + kstat_t *ksp; + uint_t index; + int ret; + + if ((ksp = softmac_hold_dev_kstat(softmac)) == NULL) + return (ENOTSUP); + + if (IS_MAC_STAT(stat)) { + index = stat - MAC_STAT_MIN; + if ((ret = softmac_get_kstat(ksp, + i_softmac_driver_si[index].ssi_name, val)) != 0) { + ret = softmac_get_kstat(ksp, + i_softmac_driver_si[index].ssi_alias, val); + } + } else { + ASSERT(IS_MACTYPE_STAT(stat)); + index = stat - MACTYPE_STAT_MIN; + + switch (softmac->smac_media) { + case DL_ETHER: + if ((ret = softmac_get_kstat(ksp, + i_softmac_ether_si[index].ssi_name, val)) != 0) { + ret = softmac_get_kstat(ksp, + i_softmac_ether_si[index].ssi_alias, val); + } + break; + default: + ret = ENOTSUP; + break; + } + } + + softmac_rele_dev_kstat(ksp); + return (ret); +} diff --git a/usr/src/uts/common/io/strplumb.c b/usr/src/uts/common/io/strplumb.c index 644d83c352..6f20d98a5a 100644 --- a/usr/src/uts/common/io/strplumb.c +++ b/usr/src/uts/common/io/strplumb.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -626,12 +626,8 @@ done: static uchar_t boot_macaddr[16]; static int boot_maclen; -static uchar_t *getmacaddr(dev_info_t *dip, int *maclen); +static uchar_t *getmacaddr(dev_info_t *dip, size_t *maclenp); static int matchmac(dev_info_t *dip, void *arg); -int dl_attach(ldi_handle_t lh, int unit); -int dl_bind(ldi_handle_t lh, uint_t sap, uint_t max_conn, - uint_t service, uint_t conn_mgmt); -int dl_phys_addr(ldi_handle_t lh, struct ether_addr *eaddr); #endif /* !_OBP */ @@ -705,7 +701,7 @@ matchmac(dev_info_t *dip, void *arg) char **devpathp = (char **)arg; char *model_str; uchar_t *macaddr; - int maclen; + size_t maclen; /* XXX Should use "device-type" per IEEE 1275 */ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, 0, @@ -750,40 +746,15 @@ matchmac(dev_info_t *dip, void *arg) } static uchar_t * -getmacaddr_gldv3(char *drv, int inst, int *maclenp) -{ - char ifname[16]; - mac_handle_t mh; - uchar_t *macaddr; - - (void) snprintf(ifname, sizeof (ifname), "%s%d", drv, inst); - if (mac_open(ifname, &mh) < 0) { - return (NULL); - } - *maclenp = sizeof (struct ether_addr); - macaddr = kmem_alloc(*maclenp, KM_SLEEP); - mac_unicst_get(mh, macaddr); - mac_close(mh); - - return (macaddr); -} - -static uchar_t * -getmacaddr(dev_info_t *dip, int *maclenp) +getmacaddr(dev_info_t *dip, size_t *maclenp) { int rc, ppa; ldi_ident_t li; ldi_handle_t lh; - char *drv_name = (char *)ddi_driver_name(dip); + const char *drv_name = ddi_driver_name(dip); char *clonepath; uchar_t *macaddr = NULL; - /* a simpler way to get mac address for GLDv3 drivers */ - if (GLDV3_DRV(ddi_name_to_major(drv_name))) { - return (getmacaddr_gldv3(drv_name, ddi_get_instance(dip), - maclenp)); - } - if (rc = ldi_ident_from_mod(&modlinkage, &li)) { cmn_err(CE_WARN, "getmacaddr: ldi_ident_from_mod failed: %d\n", rc); @@ -806,245 +777,27 @@ getmacaddr(dev_info_t *dip, int *maclenp) kmem_free(clonepath, MAXPATHLEN); ppa = i_ddi_devi_get_ppa(dip); - if ((dl_attach(lh, ppa) != 0) || - (dl_bind(lh, ETHERTYPE_IP, 0, DL_CLDLS, 0) != 0)) { + if ((dl_attach(lh, ppa, NULL) != 0) || + (dl_bind(lh, ETHERTYPE_IP, NULL) != 0)) { (void) ldi_close(lh, FREAD|FWRITE, CRED()); cmn_err(CE_WARN, "getmacaddr: dl_attach/bind(%s%d) failed: %d\n", drv_name, ppa, rc); return (NULL); } - *maclenp = sizeof (struct ether_addr); - macaddr = kmem_alloc(*maclenp, KM_SLEEP); - if (dl_phys_addr(lh, (struct ether_addr *)macaddr) != 0) { - kmem_free(macaddr, *maclenp); + + *maclenp = ETHERADDRL; + macaddr = kmem_alloc(ETHERADDRL, KM_SLEEP); + if (dl_phys_addr(lh, macaddr, maclenp, NULL) != 0 || + *maclenp != ETHERADDRL) { + kmem_free(macaddr, ETHERADDRL); macaddr = NULL; *maclenp = 0; cmn_err(CE_WARN, - "getmacaddr: dl_macaddr(%s%d) failed: %d\n", + "getmacaddr: dl_phys_addr(%s%d) failed: %d\n", drv_name, ppa, rc); } (void) ldi_close(lh, FREAD|FWRITE, CRED()); return (macaddr); } - #endif /* !_OBP */ - -int -dl_attach(ldi_handle_t lh, int unit) -{ - dl_attach_req_t *attach_req; - union DL_primitives *dl_prim; - mblk_t *mp; - int error; - - if ((mp = allocb(sizeof (dl_attach_req_t), BPRI_MED)) == NULL) { - cmn_err(CE_WARN, "dl_attach: allocb failed"); - return (ENOSR); - } - mp->b_datap->db_type = M_PROTO; - mp->b_wptr += sizeof (dl_attach_req_t); - - attach_req = (dl_attach_req_t *)mp->b_rptr; - attach_req->dl_primitive = DL_ATTACH_REQ; - attach_req->dl_ppa = unit; - - (void) ldi_putmsg(lh, mp); - if ((error = ldi_getmsg(lh, &mp, (timestruc_t *)NULL)) != 0) { - cmn_err(CE_NOTE, "!dl_attach: ldi_getmsg failed: %d", error); - return (error); - } - - dl_prim = (union DL_primitives *)mp->b_rptr; - switch (dl_prim->dl_primitive) { - case DL_OK_ACK: - if ((mp->b_wptr-mp->b_rptr) < sizeof (dl_ok_ack_t)) { - cmn_err(CE_NOTE, - "!dl_attach: DL_OK_ACK protocol error"); - break; - } - if (((dl_ok_ack_t *)dl_prim)->dl_correct_primitive != - DL_ATTACH_REQ) { - cmn_err(CE_NOTE, "!dl_attach: DL_OK_ACK rtnd prim %u", - ((dl_ok_ack_t *)dl_prim)->dl_correct_primitive); - break; - } - freemsg(mp); - return (0); - - case DL_ERROR_ACK: - if ((mp->b_wptr-mp->b_rptr) < sizeof (dl_error_ack_t)) { - cmn_err(CE_NOTE, - "!dl_attach: DL_ERROR_ACK protocol error"); - break; - } - break; - - default: - cmn_err(CE_NOTE, "!dl_attach: bad ACK header %u", - dl_prim->dl_primitive); - break; - } - - /* - * Error return only. - */ - freemsg(mp); - return (-1); -} - -int -dl_bind(ldi_handle_t lh, uint_t sap, uint_t max_conn, uint_t service, - uint_t conn_mgmt) -{ - dl_bind_req_t *bind_req; - union DL_primitives *dl_prim; - mblk_t *mp; - int error; - - if ((mp = allocb(sizeof (dl_bind_req_t), BPRI_MED)) == NULL) { - cmn_err(CE_WARN, "dl_bind: allocb failed"); - return (ENOSR); - } - mp->b_datap->db_type = M_PROTO; - - bind_req = (dl_bind_req_t *)mp->b_wptr; - mp->b_wptr += sizeof (dl_bind_req_t); - bind_req->dl_primitive = DL_BIND_REQ; - bind_req->dl_sap = sap; - bind_req->dl_max_conind = max_conn; - bind_req->dl_service_mode = service; - bind_req->dl_conn_mgmt = conn_mgmt; - bind_req->dl_xidtest_flg = 0; - - (void) ldi_putmsg(lh, mp); - if ((error = ldi_getmsg(lh, &mp, (timestruc_t *)NULL)) != 0) { - cmn_err(CE_NOTE, "!dl_bind: ldi_getmsg failed: %d", error); - return (error); - } - - dl_prim = (union DL_primitives *)mp->b_rptr; - switch (dl_prim->dl_primitive) { - case DL_BIND_ACK: - if ((mp->b_wptr-mp->b_rptr) < sizeof (dl_bind_ack_t)) { - cmn_err(CE_NOTE, - "!dl_bind: DL_BIND_ACK protocol error"); - break; - } - if (((dl_bind_ack_t *)dl_prim)->dl_sap != sap) { - cmn_err(CE_NOTE, "!dl_bind: DL_BIND_ACK bad sap %u", - ((dl_bind_ack_t *)dl_prim)->dl_sap); - break; - } - freemsg(mp); - return (0); - - case DL_ERROR_ACK: - if ((mp->b_wptr-mp->b_rptr) < sizeof (dl_error_ack_t)) { - cmn_err(CE_NOTE, - "!dl_bind: DL_ERROR_ACK protocol error"); - break; - } - break; - - default: - cmn_err(CE_NOTE, "!dl_bind: bad ACK header %u", - dl_prim->dl_primitive); - break; - } - - /* - * Error return only. - */ - freemsg(mp); - return (-1); -} - -int -dl_phys_addr(ldi_handle_t lh, struct ether_addr *eaddr) -{ - dl_phys_addr_req_t *phys_addr_req; - dl_phys_addr_ack_t *phys_addr_ack; - union DL_primitives *dl_prim; - mblk_t *mp; - int error; - uchar_t *addrp; - timestruc_t tv; - - if ((mp = allocb(sizeof (dl_phys_addr_req_t), BPRI_MED)) == - (mblk_t *)NULL) { - cmn_err(CE_WARN, "dl_phys_addr: allocb failed"); - return (ENOSR); - } - mp->b_datap->db_type = M_PROTO; - mp->b_wptr += sizeof (dl_phys_addr_req_t); - - phys_addr_req = (dl_phys_addr_req_t *)mp->b_rptr; - phys_addr_req->dl_primitive = DL_PHYS_ADDR_REQ; - phys_addr_req->dl_addr_type = DL_CURR_PHYS_ADDR; - - /* - * In case some provider doesn't implement or nack the - * request just wait for 15 seconds. - */ - tv.tv_sec = 15; - tv.tv_nsec = 0; - - (void) ldi_putmsg(lh, mp); - error = ldi_getmsg(lh, &mp, &tv); - if (error == ETIME) { - cmn_err(CE_NOTE, "!dl_phys_addr: timed out"); - return (-1); - } else if (error != 0) { - cmn_err(CE_NOTE, "!dl_phys_addr: ldi_getmsg failed: %d", error); - return (error); - } - - dl_prim = (union DL_primitives *)mp->b_rptr; - switch (dl_prim->dl_primitive) { - case DL_PHYS_ADDR_ACK: - if ((mp->b_wptr-mp->b_rptr) < sizeof (dl_phys_addr_ack_t)) { - cmn_err(CE_NOTE, "!dl_phys_addr: " - "DL_PHYS_ADDR_ACK protocol error"); - break; - } - phys_addr_ack = &dl_prim->physaddr_ack; - if (phys_addr_ack->dl_addr_length != sizeof (*eaddr)) { - cmn_err(CE_NOTE, - "!dl_phys_addr: DL_PHYS_ADDR_ACK bad len %u", - phys_addr_ack->dl_addr_length); - break; - } - if (phys_addr_ack->dl_addr_length + - phys_addr_ack->dl_addr_offset > (mp->b_wptr-mp->b_rptr)) { - cmn_err(CE_NOTE, - "!dl_phys_addr: DL_PHYS_ADDR_ACK bad len %u", - phys_addr_ack->dl_addr_length); - break; - } - addrp = mp->b_rptr + phys_addr_ack->dl_addr_offset; - bcopy(addrp, eaddr, sizeof (*eaddr)); - freemsg(mp); - return (0); - - case DL_ERROR_ACK: - if ((mp->b_wptr-mp->b_rptr) < sizeof (dl_error_ack_t)) { - cmn_err(CE_NOTE, - "!dl_phys_addr: DL_ERROR_ACK protocol error"); - break; - } - - break; - - default: - cmn_err(CE_NOTE, "!dl_phys_addr: bad ACK header %u", - dl_prim->dl_primitive); - break; - } - - /* - * Error return only. - */ - freemsg(mp); - return (-1); -} diff --git a/usr/src/uts/common/io/sundlpi.c b/usr/src/uts/common/io/sundlpi.c index 80c3a6d722..43d5db0e5e 100644 --- a/usr/src/uts/common/io/sundlpi.c +++ b/usr/src/uts/common/io/sundlpi.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -36,6 +36,10 @@ #include <sys/stream.h> #include <sys/strsun.h> #include <sys/dlpi.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/sunldi.h> +#include <sys/cmn_err.h> #define DLADDRL (80) @@ -195,3 +199,390 @@ dlnotifyack( dlp->notify_ack.dl_notifications = notifications; qreply(wq, mp); } + +static int +dl_op(ldi_handle_t lh, mblk_t **mpp, t_uscalar_t expprim, size_t minlen, + dl_error_ack_t *dleap, timestruc_t *tvp) +{ + int err; + size_t len; + mblk_t *mp = *mpp; + t_uscalar_t reqprim, ackprim, ackreqprim; + union DL_primitives *dlp; + + reqprim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; + + (void) ldi_putmsg(lh, mp); + + switch (err = ldi_getmsg(lh, &mp, tvp)) { + case 0: + break; + case ETIME: + cmn_err(CE_NOTE, "!dl_op: timed out waiting for %s to %s", + dl_primstr(reqprim), dl_primstr(expprim)); + return (ETIME); + default: + cmn_err(CE_NOTE, "!dl_op: ldi_getmsg() for %s failed: %d", + dl_primstr(expprim), err); + return (err); + } + + len = MBLKL(mp); + if (len < sizeof (t_uscalar_t)) { + cmn_err(CE_NOTE, "!dl_op: received runt DLPI message"); + freemsg(mp); + return (EBADMSG); + } + + dlp = (union DL_primitives *)mp->b_rptr; + ackprim = dlp->dl_primitive; + + if (ackprim == expprim) { + if (len < minlen) + goto runt; + + if (ackprim == DL_OK_ACK) { + if (dlp->ok_ack.dl_correct_primitive != reqprim) { + ackreqprim = dlp->ok_ack.dl_correct_primitive; + goto mixup; + } + } + *mpp = mp; + return (0); + } + + if (ackprim == DL_ERROR_ACK) { + if (len < DL_ERROR_ACK_SIZE) + goto runt; + + if (dlp->error_ack.dl_error_primitive != reqprim) { + ackreqprim = dlp->error_ack.dl_error_primitive; + goto mixup; + } + + /* + * Return a special error code (ENOTSUP) indicating that the + * caller has returned DL_ERROR_ACK. Callers that want more + * details an pass a non-NULL dleap. + */ + if (dleap != NULL) + *dleap = dlp->error_ack; + + freemsg(mp); + return (ENOTSUP); + } + + cmn_err(CE_NOTE, "!dl_op: expected %s but received %s", + dl_primstr(expprim), dl_primstr(ackprim)); + freemsg(mp); + return (EBADMSG); +runt: + cmn_err(CE_NOTE, "!dl_op: received runt %s", dl_primstr(ackprim)); + freemsg(mp); + return (EBADMSG); +mixup: + cmn_err(CE_NOTE, "!dl_op: received %s for %s instead of %s", + dl_primstr(ackprim), dl_primstr(ackreqprim), dl_primstr(reqprim)); + freemsg(mp); + return (EBADMSG); +} + +/* + * Send a DL_ATTACH_REQ for `ppa' over `lh' and wait for the response. + * + * Returns an errno; ENOTSUP indicates a DL_ERROR_ACK response (and the + * caller can get the contents by passing a non-NULL `dleap'). + */ +int +dl_attach(ldi_handle_t lh, int ppa, dl_error_ack_t *dleap) +{ + mblk_t *mp; + int err; + + mp = mexchange(NULL, NULL, DL_ATTACH_REQ_SIZE, M_PROTO, DL_ATTACH_REQ); + if (mp == NULL) + return (ENOMEM); + + ((dl_attach_req_t *)mp->b_rptr)->dl_ppa = ppa; + + err = dl_op(lh, &mp, DL_OK_ACK, DL_OK_ACK_SIZE, dleap, NULL); + if (err == 0) + freemsg(mp); + return (err); +} + +/* + * Send a DL_BIND_REQ for `sap' over `lh' and wait for the response. + * + * Returns an errno; ENOTSUP indicates a DL_ERROR_ACK response (and the + * caller can get the contents by passing a non-NULL `dleap'). + */ +int +dl_bind(ldi_handle_t lh, uint_t sap, dl_error_ack_t *dleap) +{ + dl_bind_req_t *dlbrp; + dl_bind_ack_t *dlbap; + mblk_t *mp; + int err; + + mp = mexchange(NULL, NULL, DL_BIND_REQ_SIZE, M_PROTO, DL_BIND_REQ); + if (mp == NULL) + return (ENOMEM); + + dlbrp = (dl_bind_req_t *)mp->b_rptr; + dlbrp->dl_sap = sap; + dlbrp->dl_conn_mgmt = 0; + dlbrp->dl_max_conind = 0; + dlbrp->dl_xidtest_flg = 0; + dlbrp->dl_service_mode = DL_CLDLS; + + err = dl_op(lh, &mp, DL_BIND_ACK, DL_BIND_ACK_SIZE, dleap, NULL); + if (err == 0) { + dlbap = (dl_bind_ack_t *)mp->b_rptr; + if (dlbap->dl_sap != sap) { + cmn_err(CE_NOTE, "!dl_bind: DL_BIND_ACK: bad sap %u", + dlbap->dl_sap); + err = EPROTO; + } + freemsg(mp); + } + return (err); +} + +/* + * Send a DL_PHYS_ADDR_REQ over `lh' and wait for the response. The caller + * must set `*physlenp' to the size of `physaddr' (both of which must be + * non-NULL); upon success they will be updated to contain the actual physical + * address and length. + * + * Returns an errno; ENOTSUP indicates a DL_ERROR_ACK response (and the + * caller can get the contents by passing a non-NULL `dleap'). + */ +int +dl_phys_addr(ldi_handle_t lh, uchar_t *physaddr, size_t *physlenp, + dl_error_ack_t *dleap) +{ + dl_phys_addr_ack_t *dlpap; + mblk_t *mp; + int err; + t_uscalar_t paddrlen, paddroff; + timestruc_t tv; + + mp = mexchange(NULL, NULL, DL_PHYS_ADDR_REQ_SIZE, M_PROTO, + DL_PHYS_ADDR_REQ); + if (mp == NULL) + return (ENOMEM); + + ((dl_phys_addr_req_t *)mp->b_rptr)->dl_addr_type = DL_CURR_PHYS_ADDR; + + /* + * In case some provider doesn't implement or NAK the + * request, just wait for 15 seconds. + */ + tv.tv_sec = 15; + tv.tv_nsec = 0; + + err = dl_op(lh, &mp, DL_PHYS_ADDR_ACK, DL_PHYS_ADDR_ACK_SIZE, dleap, + &tv); + if (err == 0) { + dlpap = (dl_phys_addr_ack_t *)mp->b_rptr; + paddrlen = dlpap->dl_addr_length; + paddroff = dlpap->dl_addr_offset; + if (paddroff == 0 || paddrlen == 0 || paddrlen > *physlenp || + !MBLKIN(mp, paddroff, paddrlen)) { + cmn_err(CE_NOTE, "!dl_phys_addr: DL_PHYS_ADDR_ACK: " + "bad length/offset %d/%d", paddrlen, paddroff); + err = EBADMSG; + } else { + bcopy(mp->b_rptr + paddroff, physaddr, paddrlen); + *physlenp = paddrlen; + } + freemsg(mp); + } + return (err); +} + +/* + * Send a DL_INFO_REQ over `lh' and wait for the response. The caller must + * pass a non-NULL `dliap', which upon success will contain the dl_info_ack_t + * from the provider. The caller may optionally get the provider's physical + * address by passing a non-NULL `physaddr' and setting `*physlenp' to its + * size; upon success they will be updated to contain the actual physical + * address and its length. + * + * Returns an errno; ENOTSUP indicates a DL_ERROR_ACK response (and the + * caller can get the contents by passing a non-NULL `dleap'). + */ +int +dl_info(ldi_handle_t lh, dl_info_ack_t *dliap, uchar_t *physaddr, + size_t *physlenp, dl_error_ack_t *dleap) +{ + mblk_t *mp; + int err; + int addrlen, addroff; + + mp = mexchange(NULL, NULL, DL_INFO_REQ_SIZE, M_PCPROTO, DL_INFO_REQ); + if (mp == NULL) + return (ENOMEM); + + err = dl_op(lh, &mp, DL_INFO_ACK, DL_INFO_ACK_SIZE, dleap, NULL); + if (err != 0) + return (err); + + *dliap = *(dl_info_ack_t *)mp->b_rptr; + if (physaddr != NULL) { + addrlen = dliap->dl_addr_length - ABS(dliap->dl_sap_length); + addroff = dliap->dl_addr_offset; + if (addroff == 0 || addrlen <= 0 || addrlen > *physlenp || + !MBLKIN(mp, addroff, dliap->dl_addr_length)) { + cmn_err(CE_NOTE, "!dl_info: DL_INFO_ACK: " + "bad length/offset %d/%d", addrlen, addroff); + freemsg(mp); + return (EBADMSG); + } + + if (dliap->dl_sap_length > 0) + addroff += dliap->dl_sap_length; + bcopy(mp->b_rptr + addroff, physaddr, addrlen); + *physlenp = addrlen; + } + freemsg(mp); + return (err); +} + +/* + * Send a DL_NOTIFY_REQ over `lh' and wait for the response. The caller + * should set `notesp' to the set of notifications they wish to enable; + * upon success it will contain the notifications enabled by the provider. + * + * Returns an errno; ENOTSUP indicates a DL_ERROR_ACK response (and the + * caller can get the contents by passing a non-NULL `dleap'). + */ +int +dl_notify(ldi_handle_t lh, uint32_t *notesp, dl_error_ack_t *dleap) +{ + mblk_t *mp; + int err; + + mp = mexchange(NULL, NULL, DL_NOTIFY_REQ_SIZE, M_PROTO, DL_NOTIFY_REQ); + if (mp == NULL) + return (ENOMEM); + + ((dl_notify_req_t *)mp->b_rptr)->dl_notifications = *notesp; + + err = dl_op(lh, &mp, DL_NOTIFY_ACK, DL_NOTIFY_ACK_SIZE, dleap, NULL); + if (err == 0) { + *notesp = ((dl_notify_ack_t *)mp->b_rptr)->dl_notifications; + freemsg(mp); + } + return (err); +} + +const char * +dl_primstr(t_uscalar_t prim) +{ + switch (prim) { + case DL_INFO_REQ: return ("DL_INFO_REQ"); + case DL_INFO_ACK: return ("DL_INFO_ACK"); + case DL_ATTACH_REQ: return ("DL_ATTACH_REQ"); + case DL_DETACH_REQ: return ("DL_DETACH_REQ"); + case DL_BIND_REQ: return ("DL_BIND_REQ"); + case DL_BIND_ACK: return ("DL_BIND_ACK"); + case DL_UNBIND_REQ: return ("DL_UNBIND_REQ"); + case DL_OK_ACK: return ("DL_OK_ACK"); + case DL_ERROR_ACK: return ("DL_ERROR_ACK"); + case DL_ENABMULTI_REQ: return ("DL_ENABMULTI_REQ"); + case DL_DISABMULTI_REQ: return ("DL_DISABMULTI_REQ"); + case DL_PROMISCON_REQ: return ("DL_PROMISCON_REQ"); + case DL_PROMISCOFF_REQ: return ("DL_PROMISCOFF_REQ"); + case DL_UNITDATA_REQ: return ("DL_UNITDATA_REQ"); + case DL_UNITDATA_IND: return ("DL_UNITDATA_IND"); + case DL_UDERROR_IND: return ("DL_UDERROR_IND"); + case DL_PHYS_ADDR_REQ: return ("DL_PHYS_ADDR_REQ"); + case DL_PHYS_ADDR_ACK: return ("DL_PHYS_ADDR_ACK"); + case DL_SET_PHYS_ADDR_REQ: return ("DL_SET_PHYS_ADDR_REQ"); + case DL_NOTIFY_REQ: return ("DL_NOTIFY_REQ"); + case DL_NOTIFY_ACK: return ("DL_NOTIFY_ACK"); + case DL_NOTIFY_IND: return ("DL_NOTIFY_IND"); + case DL_CAPABILITY_REQ: return ("DL_CAPABILITY_REQ"); + case DL_CAPABILITY_ACK: return ("DL_CAPABILITY_ACK"); + case DL_CONTROL_REQ: return ("DL_CONTROL_REQ"); + case DL_CONTROL_ACK: return ("DL_CONTROL_ACK"); + case DL_PASSIVE_REQ: return ("DL_PASSIVE_REQ"); + case DL_INTR_MODE_REQ: return ("DL_INTR_MODE_REQ"); + case DL_UDQOS_REQ: return ("DL_UDQOS_REQ"); + default: return ("<unknown primitive>"); + } +} + +const char * +dl_errstr(t_uscalar_t err) +{ + switch (err) { + case DL_ACCESS: return ("DL_ACCESS"); + case DL_BADADDR: return ("DL_BADADDR"); + case DL_BADCORR: return ("DL_BADCORR"); + case DL_BADDATA: return ("DL_BADDATA"); + case DL_BADPPA: return ("DL_BADPPA"); + case DL_BADPRIM: return ("DL_BADPRIM"); + case DL_BADQOSPARAM: return ("DL_BADQOSPARAM"); + case DL_BADQOSTYPE: return ("DL_BADQOSTYPE"); + case DL_BADSAP: return ("DL_BADSAP"); + case DL_BADTOKEN: return ("DL_BADTOKEN"); + case DL_BOUND: return ("DL_BOUND"); + case DL_INITFAILED: return ("DL_INITFAILED"); + case DL_NOADDR: return ("DL_NOADDR"); + case DL_NOTINIT: return ("DL_NOTINIT"); + case DL_OUTSTATE: return ("DL_OUTSTATE"); + case DL_SYSERR: return ("DL_SYSERR"); + case DL_UNSUPPORTED: return ("DL_UNSUPPORTED"); + case DL_UNDELIVERABLE: return ("DL_UNDELIVERABLE"); + case DL_NOTSUPPORTED: return ("DL_NOTSUPPORTED "); + case DL_TOOMANY: return ("DL_TOOMANY"); + case DL_NOTENAB: return ("DL_NOTENAB"); + case DL_BUSY: return ("DL_BUSY"); + case DL_NOAUTO: return ("DL_NOAUTO"); + case DL_NOXIDAUTO: return ("DL_NOXIDAUTO"); + case DL_NOTESTAUTO: return ("DL_NOTESTAUTO"); + case DL_XIDAUTO: return ("DL_XIDAUTO"); + case DL_TESTAUTO: return ("DL_TESTAUTO"); + case DL_PENDING: return ("DL_PENDING"); + default: return ("<unknown error>"); + } +} + +const char * +dl_mactypestr(t_uscalar_t mactype) +{ + switch (mactype) { + case DL_CSMACD: return ("CSMA/CD"); + case DL_TPB: return ("Token Bus"); + case DL_TPR: return ("Token Ring"); + case DL_METRO: return ("Metro Net"); + case DL_ETHER: return ("Ethernet"); + case DL_HDLC: return ("HDLC"); + case DL_CHAR: return ("Sync Character"); + case DL_CTCA: return ("CTCA"); + case DL_FDDI: return ("FDDI"); + case DL_FRAME: return ("Frame Relay (LAPF)"); + case DL_MPFRAME: return ("MP Frame Relay"); + case DL_ASYNC: return ("Async Character"); + case DL_IPX25: return ("X.25 (Classic IP)"); + case DL_LOOP: return ("Software Loopback"); + case DL_FC: return ("Fiber Channel"); + case DL_ATM: return ("ATM"); + case DL_IPATM: return ("ATM (Classic IP)"); + case DL_X25: return ("X.25 (LAPB)"); + case DL_ISDN: return ("ISDN"); + case DL_HIPPI: return ("HIPPI"); + case DL_100VG: return ("100BaseVG Ethernet"); + case DL_100VGTPR: return ("100BaseVG Token Ring"); + case DL_ETH_CSMA: return ("Ethernet/IEEE 802.3"); + case DL_100BT: return ("100BaseT"); + case DL_IB: return ("Infiniband"); + case DL_IPV4: return ("IPv4 Tunnel"); + case DL_IPV6: return ("IPv6 Tunnel"); + case DL_WIFI: return ("IEEE 802.11"); + default: return ("<unknown mactype>"); + } +} diff --git a/usr/src/uts/common/io/vnic/vnic_ctl.c b/usr/src/uts/common/io/vnic/vnic_ctl.c index 863a519088..77aab2f7f1 100644 --- a/usr/src/uts/common/io/vnic/vnic_ctl.c +++ b/usr/src/uts/common/io/vnic/vnic_ctl.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -54,21 +54,16 @@ static int vnic_detach(dev_info_t *, ddi_detach_cmd_t); static int vnic_open(queue_t *, dev_t *, int, int, cred_t *); static int vnic_close(queue_t *); static void vnic_wput(queue_t *, mblk_t *); +static void vnic_ioctl(queue_t *, mblk_t *); -typedef struct vnic_taskq_args_s { - queue_t *tq_vnic_q; - mblk_t *tq_vnic_mp; - int tq_vnic_flag; -} vnic_taskq_args_t; - -static void vnic_ioc_create(vnic_taskq_args_t *); -static void vnic_ioc_modify(vnic_taskq_args_t *); -static void vnic_ioc_delete(vnic_taskq_args_t *); -static void vnic_ioc_info(vnic_taskq_args_t *); +static int vnic_ioc_create(mblk_t *, int); +static int vnic_ioc_modify(mblk_t *, int); +static int vnic_ioc_delete(mblk_t *, int); +static int vnic_ioc_info(mblk_t *, int); typedef struct ioc_cmd_s { int ic_cmd; - void (*ic_func)(vnic_taskq_args_t *); + int (*ic_func)(mblk_t *, int); } ioc_cmd_t; static ioc_cmd_t ioc_cmd[] = { @@ -176,6 +171,12 @@ vnic_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) return (ENOSR); /* + * The ioctl handling callback to process control ioctl + * messages; see comments above dld_ioctl() for details. + */ + dsp->ds_ioctl = vnic_ioctl; + + /* * The VNIC control node uses its own set of entry points. */ WR(q)->q_qinfo = &vnic_w_ctl_qinit; @@ -193,21 +194,21 @@ vnic_close(queue_t *q) if (dsp->ds_type == DLD_CONTROL) { qprocsoff(q); + dld_finish_pending_task(dsp); + dsp->ds_ioctl = NULL; dld_str_destroy(dsp); return (0); } return (dld_close(q)); } -void +static void vnic_ioctl(queue_t *wq, mblk_t *mp) { /* LINTED alignment */ struct iocblk *iocp = (struct iocblk *)mp->b_rptr; - int i, err = 0; + int i, err = EINVAL; mblk_t *nmp; - void (*func)(); - vnic_taskq_args_t *taskq_args; if (mp->b_cont == NULL) { err = EINVAL; @@ -228,45 +229,29 @@ vnic_ioctl(queue_t *wq, mblk_t *mp) for (i = 0; i < IOC_CMD_SZ; i++) { if (iocp->ioc_cmd == ioc_cmd[i].ic_cmd) { - func = ioc_cmd[i].ic_func; + err = ioc_cmd[i].ic_func(mp, (int)iocp->ioc_flag); break; } } - if (i == IOC_CMD_SZ) { - freemsg(mp->b_cont); - err = EINVAL; - goto done; - } + if (err == 0) { + int len = 0; - taskq_args = kmem_zalloc(sizeof (vnic_taskq_args_t), KM_NOSLEEP); - if (taskq_args == NULL) { - freemsg(mp->b_cont); - err = ENOMEM; - goto done; - } - - taskq_args->tq_vnic_q = wq; - taskq_args->tq_vnic_mp = mp; - taskq_args->tq_vnic_flag = (int)iocp->ioc_flag; - if (taskq_dispatch(system_taskq, - func, taskq_args, TQ_NOSLEEP) == NULL) { - kmem_free(taskq_args, sizeof (vnic_taskq_args_t)); - freemsg(mp->b_cont); - err = ENOMEM; - goto done; + if (mp->b_cont != NULL) + len = MBLKL(mp->b_cont); + miocack(wq, mp, len, 0); + return; } done: - if (err != 0) - miocnak(wq, mp, 0, err); + miocnak(wq, mp, 0, err); } static void vnic_wput(queue_t *q, mblk_t *mp) { if (DB_TYPE(mp) == M_IOCTL) - vnic_ioctl(q, mp); + dld_ioctl(q, mp); else freemsg(mp); } @@ -368,39 +353,29 @@ vnic_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) /* * Process a VNICIOC_CREATE request. */ -static void -vnic_ioc_create(vnic_taskq_args_t *taskq_args) +static int +vnic_ioc_create(mblk_t *mp, int mode) { STRUCT_HANDLE(vnic_ioc_create, create_arg); - queue_t *wq = taskq_args->tq_vnic_q; - mblk_t *mp = taskq_args->tq_vnic_mp; - int mode = taskq_args->tq_vnic_flag; int rc = 0; int mac_len; uchar_t mac_addr[MAXMACADDRLEN]; - uint_t vnic_id; - char dev_name[MAXNAMELEN + 1]; + datalink_id_t vnic_id, linkid; vnic_mac_addr_type_t mac_addr_type; - kmem_free(taskq_args, sizeof (vnic_taskq_args_t)); STRUCT_SET_HANDLE(create_arg, mode, (void *)mp->b_cont->b_rptr); - if (MBLKL(mp->b_cont) < STRUCT_SIZE(create_arg)) { - rc = EINVAL; - goto bail; - } + if (MBLKL(mp->b_cont) < STRUCT_SIZE(create_arg)) + return (EINVAL); /* - * VNIC id. For now it is specified by the user. Once we have - * vanity naming, we can pick a value for the user, and let - * the user assign a generic name to the VNIC (XXXND) + * VNIC link id */ vnic_id = STRUCT_FGET(create_arg, vc_vnic_id); /* - * Device name and number of the MAC port the VNIC is defined - * on top of. + * Linkid of the link the VNIC is defined on top of. */ - bcopy(STRUCT_FGET(create_arg, vc_dev_name), dev_name, MAXNAMELEN); + linkid = STRUCT_FGET(create_arg, vc_link_id); /* MAC address */ mac_addr_type = STRUCT_FGET(create_arg, vc_mac_addr_type); @@ -412,41 +387,27 @@ vnic_ioc_create(vnic_taskq_args_t *taskq_args) MAXMACADDRLEN); break; default: - rc = ENOTSUP; - goto bail; + return (ENOTSUP); } - rc = vnic_dev_create(vnic_id, dev_name, mac_len, mac_addr); - -bail: - freemsg(mp->b_cont); - mp->b_cont = NULL; - if (rc != 0) - miocnak(wq, mp, 0, rc); - else - miocack(wq, mp, 0, 0); + rc = vnic_dev_create(vnic_id, linkid, mac_len, mac_addr); + return (rc); } -static void -vnic_ioc_modify(vnic_taskq_args_t *taskq_args) +static int +vnic_ioc_modify(mblk_t *mp, int mode) { STRUCT_HANDLE(vnic_ioc_modify, modify_arg); - queue_t *wq = taskq_args->tq_vnic_q; - mblk_t *mp = taskq_args->tq_vnic_mp; - int mode = taskq_args->tq_vnic_flag; int err = 0; - uint_t vnic_id; + datalink_id_t vnic_id; uint_t modify_mask; vnic_mac_addr_type_t mac_addr_type; uint_t mac_len; uchar_t mac_addr[MAXMACADDRLEN]; - kmem_free(taskq_args, sizeof (vnic_taskq_args_t)); STRUCT_SET_HANDLE(modify_arg, mode, (void *)mp->b_cont->b_rptr); - if (MBLKL(mp->b_cont) < STRUCT_SIZE(modify_arg)) { - err = EINVAL; - goto done; - } + if (MBLKL(mp->b_cont) < STRUCT_SIZE(modify_arg)) + return (EINVAL); vnic_id = STRUCT_FGET(modify_arg, vm_vnic_id); modify_mask = STRUCT_FGET(modify_arg, vm_modify_mask); @@ -460,42 +421,23 @@ vnic_ioc_modify(vnic_taskq_args_t *taskq_args) err = vnic_dev_modify(vnic_id, modify_mask, mac_addr_type, mac_len, mac_addr); -done: - freemsg(mp->b_cont); - mp->b_cont = NULL; - if (err != 0) - miocnak(wq, mp, 0, err); - else - miocack(wq, mp, 0, 0); + return (err); } -static void -vnic_ioc_delete(vnic_taskq_args_t *taskq_args) +static int +vnic_ioc_delete(mblk_t *mp, int mode) { STRUCT_HANDLE(vnic_ioc_delete, delete_arg); - queue_t *wq = taskq_args->tq_vnic_q; - mblk_t *mp = taskq_args->tq_vnic_mp; - int mode = taskq_args->tq_vnic_flag; - uint_t vnic_id; + datalink_id_t vnic_id; int err = 0; - kmem_free(taskq_args, sizeof (vnic_taskq_args_t)); STRUCT_SET_HANDLE(delete_arg, mode, (void *)mp->b_cont->b_rptr); - if (STRUCT_SIZE(delete_arg) > MBLKL(mp)) { - err = EINVAL; - goto fail; - } + if (STRUCT_SIZE(delete_arg) > MBLKL(mp)) + return (EINVAL); vnic_id = STRUCT_FGET(delete_arg, vd_vnic_id); err = vnic_dev_delete(vnic_id); - -fail: - freemsg(mp->b_cont); - mp->b_cont = NULL; - if (err != 0) - miocnak(wq, mp, 0, err); - else - miocack(wq, mp, 0, err); + return (err); } typedef struct vnic_ioc_info_state { @@ -504,8 +446,9 @@ typedef struct vnic_ioc_info_state { } vnic_ioc_info_state_t; static int -vnic_ioc_info_new_vnic(void *arg, uint32_t id, vnic_mac_addr_type_t addr_type, - uint_t mac_len, uint8_t *mac_addr, char *dev_name) +vnic_ioc_info_new_vnic(void *arg, datalink_id_t id, + vnic_mac_addr_type_t addr_type, uint_t mac_len, uint8_t *mac_addr, + datalink_id_t linkid) { vnic_ioc_info_state_t *state = arg; /*LINTED*/ @@ -515,10 +458,10 @@ vnic_ioc_info_new_vnic(void *arg, uint32_t id, vnic_mac_addr_type_t addr_type, return (ENOSPC); vn->vn_vnic_id = id; + vn->vn_link_id = linkid; vn->vn_mac_addr_type = addr_type; vn->vn_mac_len = mac_len; bcopy(mac_addr, &(vn->vn_mac_addr), mac_len); - bcopy(dev_name, &(vn->vn_dev_name), MAXNAMELEN); state->where += sizeof (*vn); state->bytes_left -= sizeof (*vn); @@ -526,22 +469,18 @@ vnic_ioc_info_new_vnic(void *arg, uint32_t id, vnic_mac_addr_type_t addr_type, return (0); } -static void -vnic_ioc_info(vnic_taskq_args_t *taskq_args) +/* ARGSUSED */ +static int +vnic_ioc_info(mblk_t *mp, int mode) { - queue_t *wq = taskq_args->tq_vnic_q; - mblk_t *mp = taskq_args->tq_vnic_mp; vnic_ioc_info_t *info_argp; int rc, len; - uint32_t nvnics, vnic_id; - char dev_name[MAXNAMELEN]; + uint32_t nvnics; + datalink_id_t vnic_id, linkid; vnic_ioc_info_state_t state; - kmem_free(taskq_args, sizeof (vnic_taskq_args_t)); - if ((len = MBLKL(mp->b_cont)) < sizeof (*info_argp)) { - rc = EINVAL; - goto bail; - } + if ((len = MBLKL(mp->b_cont)) < sizeof (*info_argp)) + return (EINVAL); /* LINTED alignment */ info_argp = (vnic_ioc_info_t *)mp->b_cont->b_rptr; @@ -552,22 +491,12 @@ vnic_ioc_info(vnic_taskq_args_t *taskq_args) * regarding all vnics currently defined. */ vnic_id = info_argp->vi_vnic_id; - if (info_argp->vi_dev_name) - bcopy(info_argp->vi_dev_name, dev_name, MAXNAMELEN); + linkid = info_argp->vi_linkid; state.bytes_left = len - sizeof (vnic_ioc_info_t); state.where = (uchar_t *)(info_argp +1); - rc = vnic_info(&nvnics, vnic_id, dev_name, &state, + rc = vnic_info(&nvnics, vnic_id, linkid, &state, vnic_ioc_info_new_vnic); - -bail: - if (rc == 0) { - info_argp->vi_nvnics = nvnics; - miocack(wq, mp, len, 0); - } else { - freemsg(mp->b_cont); - mp->b_cont = NULL; - miocnak(wq, mp, 0, rc); - } + return (rc); } diff --git a/usr/src/uts/common/io/vnic/vnic_dev.c b/usr/src/uts/common/io/vnic/vnic_dev.c index 2433e499b0..676bcc1e6f 100644 --- a/usr/src/uts/common/io/vnic/vnic_dev.c +++ b/usr/src/uts/common/io/vnic/vnic_dev.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,10 +44,8 @@ #include <sys/dlpi.h> #include <sys/mac.h> #include <sys/mac_ether.h> +#include <sys/dls.h> #include <sys/pattr.h> -#if 0 -#include <sys/vlan.h> -#endif #include <sys/vnic.h> #include <sys/vnic_impl.h> #include <sys/gld.h> @@ -105,8 +103,8 @@ static uchar_t vnic_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; /* used by vnic_walker */ typedef struct vnic_info_state { - uint32_t vs_vnic_id; - char vs_dev_name[MAXNAMELEN]; + datalink_id_t vs_vnic_id; + datalink_id_t vs_linkid; boolean_t vs_vnic_found; vnic_info_new_vnic_fn_t vs_new_vnic_fn; void *vs_fn_arg; @@ -165,7 +163,7 @@ vnic_dev_init(void) vnic_hash = mod_hash_create_idhash("vnic_hash", VNIC_HASHSZ, mod_hash_null_valdtor); - vnic_mac_hash = mod_hash_create_strhash("vnic_mac_hash", + vnic_mac_hash = mod_hash_create_idhash("vnic_mac_hash", VNIC_MAC_HASHSZ, mod_hash_null_valdtor); rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL); @@ -182,7 +180,7 @@ vnic_dev_fini(void) mutex_destroy(&vnic_mac_lock); rw_destroy(&vnic_lock); - mod_hash_destroy_strhash(vnic_mac_hash); + mod_hash_destroy_idhash(vnic_mac_hash); mod_hash_destroy_idhash(vnic_hash); kmem_cache_destroy(vnic_mac_cache); kmem_cache_destroy(vnic_cache); @@ -195,9 +193,8 @@ vnic_dev_count(void) } static int -vnic_mac_open(const char *dev_name, vnic_mac_t **vmp) +vnic_mac_open(datalink_id_t linkid, vnic_mac_t **vmp) { - char *str_key; int err; vnic_mac_t *vnic_mac = NULL; const mac_info_t *mip; @@ -206,7 +203,7 @@ vnic_mac_open(const char *dev_name, vnic_mac_t **vmp) mutex_enter(&vnic_mac_lock); - err = mod_hash_find(vnic_mac_hash, (mod_hash_key_t)dev_name, + err = mod_hash_find(vnic_mac_hash, (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&vnic_mac); if (err == 0) { /* this MAC is already opened, increment reference count */ @@ -217,12 +214,20 @@ vnic_mac_open(const char *dev_name, vnic_mac_t **vmp) } vnic_mac = kmem_cache_alloc(vnic_mac_cache, KM_SLEEP); - - if ((err = mac_open(dev_name, &vnic_mac->va_mh)) != 0) { + if ((err = mac_open_by_linkid(linkid, &vnic_mac->va_mh)) != 0) { vnic_mac->va_mh = NULL; goto bail; } + /* + * For now, we do not support VNICs over legacy drivers. This will + * soon be changed. + */ + if (mac_is_legacy(vnic_mac->va_mh)) { + err = ENOTSUP; + goto bail; + } + /* only ethernet support, for now */ mip = mac_info(vnic_mac->va_mh); if (mip->mi_media != DL_ETHER) { @@ -234,12 +239,10 @@ vnic_mac_open(const char *dev_name, vnic_mac_t **vmp) goto bail; } - (void) strcpy(vnic_mac->va_dev_name, dev_name); + vnic_mac->va_linkid = linkid; /* add entry to hash table */ - str_key = kmem_alloc(strlen(dev_name) + 1, KM_SLEEP); - (void) strcpy(str_key, dev_name); - err = mod_hash_insert(vnic_mac_hash, (mod_hash_key_t)str_key, + err = mod_hash_insert(vnic_mac_hash, (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t)vnic_mac); ASSERT(err == 0); @@ -585,7 +588,7 @@ vnic_mac_free(vnic_mac_t *vnic_mac) mac_close(vnic_mac->va_mh); (void) mod_hash_remove(vnic_mac_hash, - (mod_hash_key_t)vnic_mac->va_dev_name, &val); + (mod_hash_key_t)(uintptr_t)vnic_mac->va_linkid, &val); ASSERT(vnic_mac == (vnic_mac_t *)val); kmem_cache_free(vnic_mac_cache, vnic_mac); @@ -678,8 +681,8 @@ vnic_add_unicstaddr(vnic_t *vnic, mac_multi_addr_t *maddr) * been used up. */ set_promisc: - err = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC); - if (err != 0) { + if ((err = mac_promisc_set(vnic_mac->va_mh, B_TRUE, + MAC_DEVPROMISC)) != 0) { return (err); } @@ -719,7 +722,8 @@ vnic_remove_unicstaddr(vnic_t *vnic) * Returns 0 on success, an errno on failure. */ int -vnic_dev_create(uint_t vnic_id, char *dev_name, int mac_len, uchar_t *mac_addr) +vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, int mac_len, + uchar_t *mac_addr) { vnic_t *vnic = NULL; mac_register_t *mac; @@ -751,7 +755,7 @@ vnic_dev_create(uint_t vnic_id, char *dev_name, int mac_len, uchar_t *mac_addr) } /* open underlying MAC */ - err = vnic_mac_open(dev_name, &vnic_mac); + err = vnic_mac_open(linkid, &vnic_mac); if (err != 0) { kmem_cache_free(vnic_cache, vnic); rw_exit(&vnic_lock); @@ -788,7 +792,7 @@ vnic_dev_create(uint_t vnic_id, char *dev_name, int mac_len, uchar_t *mac_addr) mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; mac->m_driver = vnic; mac->m_dip = vnic_get_dip(); - mac->m_instance = vnic_id; + mac->m_instance = (uint_t)-1; mac->m_src_addr = vnic->vn_addr; mac->m_callbacks = &vnic_m_callbacks; @@ -796,10 +800,29 @@ vnic_dev_create(uint_t vnic_id, char *dev_name, int mac_len, uchar_t *mac_addr) mac->m_min_sdu = lower_mac_info->mi_sdu_min; mac->m_max_sdu = lower_mac_info->mi_sdu_max; + /* + * As the current margin size of the underlying mac is used to + * determine the margin size of the VNIC itself, request the + * underlying mac not to change to a smaller margin size. + */ + err = mac_margin_add(vnic_mac->va_mh, &(vnic->vn_margin), B_TRUE); + if (err != 0) + goto bail; + mac->m_margin = vnic->vn_margin; err = mac_register(mac, &vnic->vn_mh); mac_free(mac); - if (err != 0) + if (err != 0) { + VERIFY(mac_margin_remove(vnic_mac->va_mh, + vnic->vn_margin) == 0); goto bail; + } + + if ((err = dls_devnet_create(vnic->vn_mh, vnic->vn_id)) != 0) { + VERIFY(mac_margin_remove(vnic_mac->va_mh, + vnic->vn_margin) == 0); + (void) mac_unregister(vnic->vn_mh); + goto bail; + } /* add new VNIC to hash table */ err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id), @@ -880,7 +903,7 @@ bail_unlocked: */ /* ARGSUSED */ int -vnic_dev_modify(uint_t vnic_id, uint_t modify_mask, +vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask, vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr) { vnic_t *vnic = NULL; @@ -910,11 +933,12 @@ vnic_dev_modify(uint_t vnic_id, uint_t modify_mask, } int -vnic_dev_delete(uint_t vnic_id) +vnic_dev_delete(datalink_id_t vnic_id) { vnic_t *vnic = NULL; mod_hash_val_t val; vnic_flow_t *flent; + datalink_id_t tmpid; int rc; vnic_mac_t *vnic_mac; @@ -926,6 +950,13 @@ vnic_dev_delete(uint_t vnic_id) return (ENOENT); } + if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid)) != 0) { + rw_exit(&vnic_lock); + return (rc); + } + + ASSERT(vnic_id == tmpid); + /* * We cannot unregister the MAC yet. Unregistering would * free up mac_impl_t which should not happen at this time. @@ -935,6 +966,7 @@ vnic_dev_delete(uint_t vnic_id) * new claims on mac_impl_t. */ if (mac_disable(vnic->vn_mh) != 0) { + (void) dls_devnet_create(vnic->vn_mh, vnic_id); rw_exit(&vnic_lock); return (EBUSY); } @@ -955,6 +987,8 @@ vnic_dev_delete(uint_t vnic_id) vnic_classifier_flow_destroy(flent); } + rc = mac_margin_remove(vnic->vn_vnic_mac->va_mh, vnic->vn_margin); + ASSERT(rc == 0); rc = mac_unregister(vnic->vn_mh); ASSERT(rc == 0); (void) vnic_remove_unicstaddr(vnic); @@ -1329,8 +1363,8 @@ vnic_m_unicst(void *arg, const uint8_t *mac_addr) } int -vnic_info(uint_t *nvnics, uint32_t vnic_id, char *dev_name, void *fn_arg, - vnic_info_new_vnic_fn_t new_vnic_fn) +vnic_info(uint_t *nvnics, datalink_id_t vnic_id, datalink_id_t linkid, + void *fn_arg, vnic_info_new_vnic_fn_t new_vnic_fn) { vnic_info_state_t state; int rc = 0; @@ -1341,13 +1375,13 @@ vnic_info(uint_t *nvnics, uint32_t vnic_id, char *dev_name, void *fn_arg, bzero(&state, sizeof (state)); state.vs_vnic_id = vnic_id; - bcopy(state.vs_dev_name, dev_name, MAXNAMELEN); + state.vs_linkid = linkid; state.vs_new_vnic_fn = new_vnic_fn; state.vs_fn_arg = fn_arg; mod_hash_walk(vnic_hash, vnic_info_walker, &state); - if ((rc = state.vs_rc) == 0 && vnic_id != 0 && + if ((rc = state.vs_rc) == 0 && vnic_id != DATALINK_ALL_LINKID && !state.vs_vnic_found) rc = ENOENT; @@ -1371,14 +1405,16 @@ vnic_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) vnic = (vnic_t *)val; - if (state->vs_vnic_id != 0 && vnic->vn_id != state->vs_vnic_id) + if (state->vs_vnic_id != DATALINK_ALL_LINKID && + vnic->vn_id != state->vs_vnic_id) { goto bail; + } state->vs_vnic_found = B_TRUE; state->vs_rc = state->vs_new_vnic_fn(state->vs_fn_arg, vnic->vn_id, vnic->vn_addr_type, vnic->vn_vnic_mac->va_addr_len, - vnic->vn_addr, vnic->vn_vnic_mac->va_dev_name); + vnic->vn_addr, vnic->vn_vnic_mac->va_linkid); bail: return ((state->vs_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); } @@ -1473,9 +1509,10 @@ vnic_promisc_set(vnic_t *vnic, boolean_t on) return (0); if (on) { - r = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC); - if (r != 0) + if ((r = mac_promisc_set(vnic_mac->va_mh, B_TRUE, + MAC_DEVPROMISC)) != 0) { return (r); + } rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); vnic->vn_promisc_next = vnic_mac->va_promisc; diff --git a/usr/src/uts/common/io/xge/drv/xgell.c b/usr/src/uts/common/io/xge/drv/xgell.c index b236c7eefc..f3b1ef1295 100644 --- a/usr/src/uts/common/io/xge/drv/xgell.c +++ b/usr/src/uts/common/io/xge/drv/xgell.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2236,6 +2236,7 @@ xgell_device_register(xgelldev_t *lldev, xgell_config_t *config) macp->m_callbacks = &xgell_m_callbacks; macp->m_min_sdu = 0; macp->m_max_sdu = hldev->config.mtu; + macp->m_margin = VLAN_TAGSZ; /* * Finally, we're ready to register ourselves with the Nemo * interface; if this succeeds, we're all ready to start() diff --git a/usr/src/uts/common/os/dacf.c b/usr/src/uts/common/os/dacf.c index 91e34dabbf..1ab2afa986 100644 --- a/usr/src/uts/common/os/dacf.c +++ b/usr/src/uts/common/os/dacf.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1181,6 +1180,19 @@ dacf_minor_number(dacf_infohdl_t info_hdl) } /* + * dacf_get_dev() + * given a dacf_infohdl_t, obtain the dev_t of the instance being + * configured. + */ +dev_t +dacf_get_dev(dacf_infohdl_t info_hdl) +{ + struct ddi_minor_data *dmdp = (struct ddi_minor_data *)info_hdl; + + return (dmdp->ddm_dev); +} + +/* * dacf_driver_name() * given a dacf_infohdl_t, obtain the device driver name of the device * instance being configured. @@ -1269,7 +1281,7 @@ dacf_retrieve_info(dacf_infohdl_t info_hdl) void *data; if (mod_hash_find(dacf_info_hash, (mod_hash_key_t)dmdp, - (mod_hash_val_t *)&data) != 0) { + (mod_hash_val_t *)&data) != 0) { return (NULL); } diff --git a/usr/src/uts/common/os/dacf_clnt.c b/usr/src/uts/common/os/dacf_clnt.c index 58c57b5224..e40509d33b 100644 --- a/usr/src/uts/common/os/dacf_clnt.c +++ b/usr/src/uts/common/os/dacf_clnt.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,8 +19,8 @@ * CDDL HEADER END */ /* - * Copyright (c) 1999 by Sun Microsystems, Inc. - * All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -75,7 +74,10 @@ dacfc_match_create_minor(char *name, char *node_type, dev_info_t *dip, char *dev_path, *dev_pathp, *drv_mname = NULL; dacf_rsrvlist_t *pa_rsrv, *pd_rsrv; - if (flag & CLONE_DEV) { + /* + * Check the dacf rule for non-clone devices or for network devices. + */ + if ((flag & CLONE_DEV) && (strcmp(node_type, DDI_NT_NET) != 0)) { return; } diff --git a/usr/src/uts/common/os/devcfg.c b/usr/src/uts/common/os/devcfg.c index 48ce29785d..bfc829934f 100644 --- a/usr/src/uts/common/os/devcfg.c +++ b/usr/src/uts/common/os/devcfg.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -6637,6 +6637,20 @@ i_ddi_devs_attached(major_t major) return (error); } +int +i_ddi_minor_node_count(dev_info_t *ddip, const char *node_type) +{ + struct ddi_minor_data *dp; + int count = 0; + + mutex_enter(&(DEVI(ddip)->devi_lock)); + for (dp = DEVI(ddip)->devi_minor; dp != NULL; dp = dp->next) + if (strcmp(dp->ddm_node_type, node_type) == 0) + count++; + mutex_exit(&(DEVI(ddip)->devi_lock)); + return (count); +} + /* * ddi_hold_installed_driver configures and attaches all * instances of the specified driver. To accomplish this diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c index 2bbbca12b6..d80fa67f56 100644 --- a/usr/src/uts/common/os/streamio.c +++ b/usr/src/uts/common/os/streamio.c @@ -77,9 +77,9 @@ #include <sys/sunldi_impl.h> #include <sys/autoconf.h> #include <sys/policy.h> +#include <sys/dld.h> #include <sys/zone.h> - /* * This define helps improve the readability of streams code while * still maintaining a very old streams performance enhancement. The @@ -239,8 +239,9 @@ stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp) struct stdata *stp; queue_t *qp; int s; - dev_t dummydev; + dev_t dummydev, savedev; struct autopush *ap; + struct dlautopush dlap; int error = 0; ssize_t rmin, rmax; int cloneopen; @@ -439,6 +440,7 @@ ckreturn: /* * Open driver and create stream to it (via qattach). */ + savedev = *devp; cloneopen = (getmajor(*devp) == clone_major); if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) { mutex_enter(&vp->v_lock); @@ -476,15 +478,53 @@ ckreturn: " streams module"); } + if (!NETWORK_DRV(major)) { + savedev = *devp; + } else { + /* + * For network devices, process differently based on the + * return value from dld_autopush(): + * + * 0: the passed-in device points to a GLDv3 datalink with + * per-link autopush configuration; use that configuration + * and ignore any per-driver autopush configuration. + * + * 1: the passed-in device points to a physical GLDv3 + * datalink without per-link autopush configuration. The + * passed in device was changed to refer to the actual + * physical device (if it's not already); we use that new + * device to look up any per-driver autopush configuration. + * + * -1: neither of the above cases applied; use the initial + * device to look up any per-driver autopush configuration. + */ + switch (dld_autopush(&savedev, &dlap)) { + case 0: + zoneid = crgetzoneid(crp); + for (s = 0; s < dlap.dap_npush; s++) { + error = push_mod(qp, &dummydev, stp, + dlap.dap_aplist[s], dlap.dap_anchor, crp, + zoneid); + if (error != 0) + break; + } + goto opendone; + case 1: + break; + case -1: + savedev = *devp; + break; + } + } /* - * Check for autopush. Start with the global zone. If not found - * check in the local zone. + * Find the autopush configuration based on "savedev". Start with the + * global zone. If not found check in the local zone. */ zoneid = GLOBAL_ZONEID; retryap: ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))-> netstack_str; - if ((ap = sad_ap_find_by_dev(*devp, ss)) == NULL) { + if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) { netstack_rele(ss->ss_netstack); if (zoneid == GLOBAL_ZONEID) { /* @@ -507,6 +547,8 @@ retryap: sad_ap_rele(ap, ss); netstack_rele(ss->ss_netstack); +opendone: + /* * let specfs know that open failed part way through */ @@ -516,8 +558,6 @@ retryap: mutex_exit(&stp->sd_lock); } -opendone: - /* * Wake up others that are waiting for stream to be created. */ diff --git a/usr/src/uts/common/os/swapgeneric.c b/usr/src/uts/common/os/swapgeneric.c index 33ce90b489..d4217c77f3 100644 --- a/usr/src/uts/common/os/swapgeneric.c +++ b/usr/src/uts/common/os/swapgeneric.c @@ -20,7 +20,7 @@ */ /* ONC_PLUS EXTRACT START */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -360,7 +360,6 @@ loadrootmodules(void) if ((err = load_boot_driver(this)) != 0) { cmn_err(CE_WARN, "Cannot load drv/%s", this); return (err); - /* NOTREACHED */ } } /* @@ -371,7 +370,6 @@ loadrootmodules(void) if ((err = load_boot_platform_modules(this)) != 0) { cmn_err(CE_WARN, "Cannot load drv/%s", this); return (err); - /* NOTREACHED */ } } @@ -407,7 +405,6 @@ loop: if (err != 0) { cmn_err(CE_CONT, "Cannot load drivers for %s\n", name); goto out; - /* NOTREACHED */ } /* @@ -474,34 +471,38 @@ loop: if (strncmp(rootfs.bo_fstype, "nfs", 3) == 0) { ++netboot; + /* + * Preload (load-only, no init) the dacf module. We cannot + * init the module because one of its requisite modules is + * dld whose _init function will call taskq_create(), which + * will panic the system at this point. + */ + if ((err = modloadonly("dacf", "net_dacf")) < 0) { + cmn_err(CE_CONT, "Cannot load dacf/net_dacf\n"); + goto out; + } if ((err = modload("misc", "tlimod")) < 0) { cmn_err(CE_CONT, "Cannot load misc/tlimod\n"); goto out; - /* NOTREACHED */ } if ((err = modload("strmod", "rpcmod")) < 0) { cmn_err(CE_CONT, "Cannot load strmod/rpcmod\n"); goto out; - /* NOTREACHED */ } if ((err = modload("misc", "nfs_dlboot")) < 0) { cmn_err(CE_CONT, "Cannot load misc/nfs_dlboot\n"); goto out; - /* NOTREACHED */ } if ((err = modload("mac", "mac_ether")) < 0) { cmn_err(CE_CONT, "Cannot load mac/mac_ether\n"); goto out; - /* NOTREACHED */ } if ((err = modload("misc", "strplumb")) < 0) { cmn_err(CE_CONT, "Cannot load misc/strplumb\n"); goto out; - /* NOTREACHED */ } if ((err = strplumb_load()) < 0) { goto out; - /* NOTREACHED */ } } diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 75354330ef..2721748a7c 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -6149,3 +6149,26 @@ zone_find_by_id_nolock(zoneid_t zoneid) mutex_exit(&zonehash_lock); return (zone); } + +/* + * Walk the datalinks for a given zone + */ +int +zone_datalink_walk(zoneid_t zoneid, int (*cb)(const char *, void *), void *data) +{ + zone_t *zone; + struct dlnamelist *dlnl; + int ret = 0; + + if ((zone = zone_find_by_id(zoneid)) == NULL) + return (ENOENT); + + mutex_enter(&zone->zone_lock); + for (dlnl = zone->zone_dl_list; dlnl != NULL; dlnl = dlnl->dlnl_next) { + if ((ret = (*cb)(dlnl->dlnl_name, data)) != 0) + break; + } + mutex_exit(&zone->zone_lock); + zone_rele(zone); + return (ret); +} diff --git a/usr/src/uts/common/sys/aggr.h b/usr/src/uts/common/sys/aggr.h index 1b6f14b5ce..c89cca4d46 100644 --- a/usr/src/uts/common/sys/aggr.h +++ b/usr/src/uts/common/sys/aggr.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,6 +30,8 @@ #include <sys/types.h> #include <sys/ethernet.h> +#include <sys/mac.h> +#include <sys/dls.h> #include <sys/param.h> #ifdef __cplusplus @@ -74,6 +76,13 @@ typedef enum { #define AGGR_MAX_PORTS 256 /* + * The largest configurable aggregation key. Because by default the key is + * used as the DLPI device PPA and default VLAN PPA's are calculated as + * ((1000 * vid) + PPA), the largest key can't be > 999. + */ +#define AGGR_MAX_KEY 999 + +/* * LACP port state. */ typedef union { @@ -107,31 +116,37 @@ typedef union { /* one of the ports of a link aggregation group */ typedef struct laioc_port { - char lp_devname[MAXNAMELEN + 1]; + datalink_id_t lp_linkid; } laioc_port_t; #define LAIOC_CREATE LAIOC(1) typedef struct laioc_create { + datalink_id_t lc_linkid; uint32_t lc_key; uint32_t lc_nports; uint32_t lc_policy; uchar_t lc_mac[ETHERADDRL]; - boolean_t lc_mac_fixed; aggr_lacp_mode_t lc_lacp_mode; aggr_lacp_timer_t lc_lacp_timer; + uint32_t lc_mac_fixed : 1, + lc_force : 1, + lc_pad_bits : 30; } laioc_create_t; #ifdef _SYSCALL32 typedef struct laioc_create32 { + datalink_id_t lc_linkid; uint32_t lc_key; uint32_t lc_nports; uint32_t lc_policy; uchar_t lc_mac[ETHERADDRL]; - boolean_t lc_mac_fixed; aggr_lacp_mode_t lc_lacp_mode; aggr_lacp_timer_t lc_lacp_timer; + uint32_t lc_mac_fixed : 1, + lc_force : 1, + lc_pad_bits : 30; } laioc_create32_t; #endif /* _SYSCALL32 */ @@ -139,13 +154,13 @@ typedef struct laioc_create32 { #define LAIOC_DELETE LAIOC(2) typedef struct laioc_delete { - uint32_t ld_key; + datalink_id_t ld_linkid; } laioc_delete_t; #ifdef _SYSCALL32 typedef struct laioc_delete32 { - uint32_t ld_key; + datalink_id_t ld_linkid; } laioc_delete32_t; #endif /* _SYSCALL32 */ @@ -165,16 +180,18 @@ typedef enum aggr_link_state { } aggr_link_state_t; typedef struct laioc_info_port { - char lp_devname[MAXNAMELEN + 1]; + datalink_id_t lp_linkid; uchar_t lp_mac[ETHERADDRL]; aggr_port_state_t lp_state; aggr_lacp_state_t lp_lacp_state; } laioc_info_port_t; typedef struct laioc_info_group { + datalink_id_t lg_linkid; uint32_t lg_key; uchar_t lg_mac[ETHERADDRL]; boolean_t lg_mac_fixed; + boolean_t lg_force; uint32_t lg_policy; uint32_t lg_nports; aggr_lacp_mode_t lg_lacp_mode; @@ -182,23 +199,25 @@ typedef struct laioc_info_group { } laioc_info_group_t; typedef struct laioc_info { - uint32_t li_ngroups; - uint32_t li_group_key; /* 0 returns all */ + /* Must not be DLADM_INVALID_LINKID */ + datalink_id_t li_group_linkid; } laioc_info_t; #define LAIOC_ADD LAIOC(4) #define LAIOC_REMOVE LAIOC(5) typedef struct laioc_add_rem { - uint32_t la_key; + datalink_id_t la_linkid; uint32_t la_nports; + uint32_t la_force; } laioc_add_rem_t; #ifdef _SYSCALL32 typedef struct laioc_add_rem32 { - uint32_t la_key; + datalink_id_t la_linkid; uint32_t la_nports; + uint32_t la_force; } laioc_add_rem32_t; #endif /* _SYSCALL32 */ @@ -211,7 +230,7 @@ typedef struct laioc_add_rem32 { #define LAIOC_MODIFY_LACP_TIMER 0x08 typedef struct laioc_modify { - uint32_t lu_key; + datalink_id_t lu_linkid; uint8_t lu_modify_mask; uint32_t lu_policy; uchar_t lu_mac[ETHERADDRL]; @@ -223,7 +242,7 @@ typedef struct laioc_modify { #ifdef _SYSCALL32 typedef struct laioc_modify32 { - uint32_t lu_key; + datalink_id_t lu_linkid; uint8_t lu_modify_mask; uint32_t lu_policy; uchar_t lu_mac[ETHERADDRL]; diff --git a/usr/src/uts/common/sys/aggr_impl.h b/usr/src/uts/common/sys/aggr_impl.h index e114624a06..7bc12b3d31 100644 --- a/usr/src/uts/common/sys/aggr_impl.h +++ b/usr/src/uts/common/sys/aggr_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -55,7 +55,7 @@ extern "C" { typedef struct aggr_port_s { struct aggr_port_s *lp_next; struct aggr_grp_s *lp_grp; /* back ptr to group */ - char lp_devname[MAXNAMELEN + 1]; + datalink_id_t lp_linkid; uint16_t lp_portid; uint8_t lp_addr[ETHERADDRL]; /* port MAC address */ uint32_t lp_refs; /* refcount */ @@ -64,7 +64,8 @@ typedef struct aggr_port_s { lp_tx_enabled : 1, lp_collector_enabled : 1, lp_promisc_on : 1, - lp_pad_bits : 28; + lp_no_link_update : 1, + lp_pad_bits : 27; uint32_t lp_closing; mac_handle_t lp_mh; const mac_info_t *lp_mip; @@ -80,6 +81,7 @@ typedef struct aggr_port_s { aggr_lacp_port_t lp_lacp; /* LACP state */ lacp_stats_t lp_lacp_stats; const mac_txinfo_t *lp_txinfo; + uint32_t lp_margin; } aggr_port_t; /* @@ -100,6 +102,7 @@ typedef struct aggr_port_s { */ typedef struct aggr_grp_s { krwlock_t lg_lock; + datalink_id_t lg_linkid; uint16_t lg_key; /* key (group port number) */ uint32_t lg_refs; /* refcount */ uint16_t lg_nports; /* number of MAC ports */ @@ -110,7 +113,10 @@ typedef struct aggr_grp_s { lg_started : 1, /* group started? */ lg_promisc : 1, /* in promiscuous mode? */ lg_gldv3_polling : 1, - lg_pad_bits : 11; + lg_zcopy : 1, + lg_vlan : 1, + lg_force : 1, + lg_pad_bits : 8; aggr_port_t *lg_ports; /* list of configured ports */ aggr_port_t *lg_mac_addr_port; mac_handle_t lg_mh; @@ -129,6 +135,7 @@ typedef struct aggr_grp_s { Agg_t aggr; /* 802.3ad data */ uint32_t lg_hcksum_txflags; uint_t lg_max_sdu; + uint32_t lg_margin; } aggr_grp_t; #define AGGR_LACP_LOCK(grp) mutex_enter(&(grp)->aggr.gl_lock); @@ -162,36 +169,39 @@ typedef struct aggr_grp_s { extern dev_info_t *aggr_dip; extern void aggr_ioctl(queue_t *, mblk_t *); -typedef int (*aggr_grp_info_new_grp_fn_t)(void *, uint32_t, uchar_t *, - boolean_t, uint32_t, uint32_t, aggr_lacp_mode_t, aggr_lacp_timer_t); -typedef int (*aggr_grp_info_new_port_fn_t)(void *, char *, uchar_t *, +typedef int (*aggr_grp_info_new_grp_fn_t)(void *, datalink_id_t, uint32_t, + uchar_t *, boolean_t, boolean_t, uint32_t, uint32_t, aggr_lacp_mode_t, + aggr_lacp_timer_t); +typedef int (*aggr_grp_info_new_port_fn_t)(void *, datalink_id_t, uchar_t *, aggr_port_state_t, aggr_lacp_state_t *); extern void aggr_grp_init(void); extern void aggr_grp_fini(void); -extern int aggr_grp_create(uint32_t, uint_t, laioc_port_t *, uint32_t, - boolean_t, uchar_t *, aggr_lacp_mode_t, aggr_lacp_timer_t); -extern int aggr_grp_delete(uint32_t); +extern int aggr_grp_create(datalink_id_t, uint32_t, uint_t, laioc_port_t *, + uint32_t, boolean_t, boolean_t, uchar_t *, aggr_lacp_mode_t, + aggr_lacp_timer_t); +extern int aggr_grp_delete(datalink_id_t); extern void aggr_grp_free(aggr_grp_t *); -extern int aggr_grp_info(uint_t *, uint32_t, void *, - aggr_grp_info_new_grp_fn_t, aggr_grp_info_new_port_fn_t); +extern int aggr_grp_info(datalink_id_t, void *, aggr_grp_info_new_grp_fn_t, + aggr_grp_info_new_port_fn_t); extern void aggr_grp_notify(aggr_grp_t *, uint32_t); extern boolean_t aggr_grp_attach_port(aggr_grp_t *, aggr_port_t *); extern boolean_t aggr_grp_detach_port(aggr_grp_t *, aggr_port_t *); extern void aggr_grp_port_mac_changed(aggr_grp_t *, aggr_port_t *, boolean_t *, boolean_t *); -extern int aggr_grp_add_ports(uint32_t, uint_t, laioc_port_t *); -extern int aggr_grp_rem_ports(uint32_t, uint_t, laioc_port_t *); +extern int aggr_grp_add_ports(datalink_id_t, uint_t, boolean_t, + laioc_port_t *); +extern int aggr_grp_rem_ports(datalink_id_t, uint_t, laioc_port_t *); extern boolean_t aggr_grp_update_ports_mac(aggr_grp_t *); -extern int aggr_grp_modify(uint32_t, aggr_grp_t *, uint8_t, uint32_t, +extern int aggr_grp_modify(datalink_id_t, aggr_grp_t *, uint8_t, uint32_t, boolean_t, const uchar_t *, aggr_lacp_mode_t, aggr_lacp_timer_t); extern void aggr_grp_multicst_port(aggr_port_t *, boolean_t); extern uint_t aggr_grp_count(void); extern void aggr_port_init(void); extern void aggr_port_fini(void); -extern int aggr_port_create(const char *, aggr_port_t **); +extern int aggr_port_create(const datalink_id_t, boolean_t, aggr_port_t **); extern void aggr_port_delete(aggr_port_t *); extern void aggr_port_free(aggr_port_t *); extern int aggr_port_start(aggr_port_t *); diff --git a/usr/src/uts/common/sys/dacf.h b/usr/src/uts/common/sys/dacf.h index b014d98767..b6b7124870 100644 --- a/usr/src/uts/common/sys/dacf.h +++ b/usr/src/uts/common/sys/dacf.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 1999-2002 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -76,6 +75,7 @@ extern struct dacfsw kmod_dacfsw; /* kernel provided module */ const char *dacf_minor_name(dacf_infohdl_t); minor_t dacf_minor_number(dacf_infohdl_t); +dev_t dacf_get_dev(dacf_infohdl_t); const char *dacf_driver_name(dacf_infohdl_t); dev_info_t *dacf_devinfo_node(dacf_infohdl_t); const char *dacf_get_arg(dacf_arghdl_t, char *); diff --git a/usr/src/uts/common/sys/ddi_implfuncs.h b/usr/src/uts/common/sys/ddi_implfuncs.h index 4aa213c1b2..1a9a84384a 100644 --- a/usr/src/uts/common/sys/ddi_implfuncs.h +++ b/usr/src/uts/common/sys/ddi_implfuncs.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -248,6 +248,7 @@ int i_ddi_attach_node_hierarchy(dev_info_t *); dev_info_t *i_ddi_attach_pseudo_node(char *); int i_ddi_attach_hw_nodes(char *); int i_ddi_devs_attached(major_t); +int i_ddi_minor_node_count(dev_info_t *, const char *); /* non-DDI functions: wrapper around mod_hold/rele_dev_by_major() */ struct dev_ops *ddi_hold_driver(major_t); diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h index 71555d364b..8cc70e52f9 100644 --- a/usr/src/uts/common/sys/dld.h +++ b/usr/src/uts/common/sys/dld.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -36,6 +36,8 @@ #include <sys/stream.h> #include <sys/mac.h> #include <sys/dls.h> +#include <sys/conf.h> +#include <sys/sad.h> #include <net/if.h> #ifdef __cplusplus @@ -47,9 +49,6 @@ extern "C" { */ #define DLD_INFO "Data-Link Driver v%I%" -#define DLD_MAX_PPA 999 -#define DLD_MAX_MINOR (DLD_MAX_PPA + 1) - /* * Options: To enable an option set the property name to a non-zero value * in kernel/drv/dld.conf. @@ -93,31 +92,33 @@ extern "C" { */ #define DLDIOC ('D' << 24 | 'L' << 16 | 'D' << 8) -#define DLDIOCATTR (DLDIOC | 0x03) +#define DLDIOC_ATTR (DLDIOC | 0x03) typedef struct dld_ioc_attr { - char dia_name[IFNAMSIZ]; - char dia_dev[MAXNAMELEN]; + datalink_id_t dia_linkid; uint_t dia_max_sdu; - uint16_t dia_vid; } dld_ioc_attr_t; -#define DLDIOCVLAN (DLDIOC | 0x04) - -typedef struct dld_ioc_vlan { - char div_name[IFNAMSIZ]; - uint_t div_count; -} dld_ioc_vlan_t; - -typedef struct dld_vlan_info { - char dvi_name[IFNAMSIZ]; -} dld_vlan_info_t; - -typedef struct dld_hold_vlan { - char dhv_name[IFNAMSIZ]; - zoneid_t dhv_zid; - boolean_t dhv_docheck; -} dld_hold_vlan_t; +#define DLDIOC_VLAN_ATTR (DLDIOC | 0x04) +typedef struct dld_ioc_vlan_attr { + datalink_id_t div_vlanid; + uint16_t div_vid; + datalink_id_t div_linkid; + boolean_t div_force; + boolean_t div_implicit; +} dld_ioc_vlan_attr_t; + +#define DLDIOC_PHYS_ATTR (DLDIOC | 0x05) +typedef struct dld_ioc_phys_attr { + datalink_id_t dip_linkid; + /* + * Whether this physical link supports vanity naming. Note that + * physical links whose media type is not supported by GLDv3 + * can not support vanity naming. + */ + boolean_t dip_novanity; + char dip_dev[MAXLINKNAMELEN]; +} dld_ioc_phys_attr_t; /* * Secure objects ioctls @@ -137,33 +138,90 @@ typedef struct dld_secobj { uint_t so_len; } dld_secobj_t; -#define DLDIOCSECOBJSET (DLDIOC | 0x05) +#define DLDIOC_SECOBJ_SET (DLDIOC | 0x06) typedef struct dld_ioc_secobj_set { dld_secobj_t ss_obj; uint_t ss_flags; } dld_ioc_secobj_set_t; -#define DLDIOCSECOBJGET (DLDIOC | 0x06) +#define DLDIOC_SECOBJ_GET (DLDIOC | 0x07) typedef struct dld_ioc_secobj_get { dld_secobj_t sg_obj; uint_t sg_count; } dld_ioc_secobj_get_t; -#define DLDIOCSECOBJUNSET (DLDIOC | 0x07) +/* + * The following two slots were used outside of ON, so don't reuse them. + * + * #define DLDIOCHOLDVLAN (DLDIOC | 0x08) + * #define DLDIOCRELEVLAN (DLDIOC | 0x09) + */ + +#define DLDIOC_SECOBJ_UNSET (DLDIOC | 0x0a) typedef struct dld_ioc_secobj_unset { char su_name[DLD_SECOBJ_NAME_MAX]; } dld_ioc_secobj_unset_t; +#define DLDIOC_CREATE_VLAN (DLDIOC | 0x0b) +typedef struct dld_ioc_create_vlan { + datalink_id_t dic_vlanid; + datalink_id_t dic_linkid; + uint16_t dic_vid; + boolean_t dic_force; +} dld_ioc_create_vlan_t; + +#define DLDIOC_DELETE_VLAN (DLDIOC | 0x0c) +typedef struct dld_ioc_delete_vlan { + datalink_id_t did_linkid; +} dld_ioc_delete_vlan_t; + +#define DLDIOC_SETAUTOPUSH (DLDIOC | 0x0d) +#define DLDIOC_GETAUTOPUSH (DLDIOC | 0x0e) +#define DLDIOC_CLRAUTOPUSH (DLDIOC | 0x0f) +typedef struct dld_ioc_ap { + datalink_id_t dia_linkid; + uint_t dia_anchor; + uint_t dia_npush; + char dia_aplist[MAXAPUSH][FMNAMESZ+1]; +} dld_ioc_ap_t; + +#define DLDIOC_DOORSERVER (DLDIOC | 0x10) +typedef struct dld_ioc_door { + boolean_t did_start_door; +} dld_ioc_door_t; + +#define DLDIOC_RENAME (DLDIOC | 0x11) +typedef struct dld_ioc_rename { + datalink_id_t dir_linkid1; + datalink_id_t dir_linkid2; + char dir_link[MAXLINKNAMELEN]; +} dld_ioc_rename_t; + /* - * DLDIOCHOLDVLAN/DLDIOCRELEVLAN are added to support a "hold/release" - * operation on a VLAN. A hold will cause a VLAN to be created or the - * reference count will be increased, release will do the reverse. + * DLDIOC_SETZID sets the zoneid of a given link. It could cause a VLAN to be + * implicitly created. Note that we will hold a reference for the given link + * whenever it has a zoneid other than the global zone. */ -#define DLDIOCHOLDVLAN (DLDIOC | 0x08) +#define DLDIOC_SETZID (DLDIOC | 0x12) +typedef struct dld_ioc_setzid { + char dis_link[MAXLINKNAMELEN]; + zoneid_t dis_zid; +} dld_ioc_setzid_t; + +#define DLDIOC_GETZID (DLDIOC | 0x13) +typedef struct dld_ioc_getzid { + datalink_id_t dig_linkid; + zoneid_t dig_zid; +} dld_ioc_getzid_t; -#define DLDIOCRELEVLAN (DLDIOC | 0x09) - -#define DLDIOCZIDGET (DLDIOC | 0x0a) +/* + * data-link autopush configuration. + */ +struct dlautopush { + uint_t dap_anchor; + uint_t dap_npush; + char dap_aplist[MAXAPUSH][FMNAMESZ+1]; +}; #ifdef _KERNEL int dld_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); @@ -173,6 +231,7 @@ void dld_wput(queue_t *, mblk_t *); void dld_wsrv(queue_t *); void dld_init_ops(struct dev_ops *, const char *); void dld_fini_ops(struct dev_ops *); +int dld_autopush(dev_t *, struct dlautopush *); #endif #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/dld_impl.h b/usr/src/uts/common/sys/dld_impl.h index 14f799dcd9..1df8c8e1cf 100644 --- a/usr/src/uts/common/sys/dld_impl.h +++ b/usr/src/uts/common/sys/dld_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +29,7 @@ #pragma ident "%Z%%M% %I% %E% SMI" #include <sys/types.h> +#include <sys/conf.h> #include <sys/ethernet.h> #include <sys/stream.h> #include <sys/dlpi.h> @@ -56,6 +57,7 @@ typedef enum { } dld_passivestate_t; typedef struct dld_str dld_str_t; +typedef void (*dld_tx_t)(struct dld_str *, mblk_t *); /* * dld_str_t object definition. @@ -72,11 +74,6 @@ struct dld_str { minor_t ds_minor; /* - * PPA number this stream is attached to. - */ - t_uscalar_t ds_ppa; - - /* * Read/write queues for the stream which the object represents. */ queue_t *ds_rq; @@ -200,21 +197,60 @@ struct dld_str { mblk_t *ds_tx_list_tail; uint_t ds_tx_cnt; uint_t ds_tx_msgcnt; + timeout_id_t ds_tx_qdepth_tid; boolean_t ds_tx_qbusy; + dld_tx_t ds_tx; + dld_tx_t ds_unitdata_tx; + kmutex_t ds_tx_lock; + kcondvar_t ds_tx_cv; + uint32_t ds_intx_cnt; + boolean_t ds_detaching; + + /* + * Pending control messages to be processed. + */ + mblk_t *ds_pending_head; + mblk_t *ds_pending_tail; + + taskqid_t ds_tid; + kmutex_t ds_disp_lock; + kcondvar_t ds_disp_cv; + boolean_t ds_closing; + /* - * Number of threads currently in dld. If there is a pending - * request, it is placed in ds_pending_req and the operation - * will finish when dld becomes single-threaded. + * Used to process ioctl message for control node. See comments + * above dld_ioctl(). */ - kmutex_t ds_thr_lock; - uint_t ds_thr; - uint_t ds_pending_cnt; - mblk_t *ds_pending_req; - task_func_t *ds_pending_op; - kcondvar_t ds_pending_cv; + void (*ds_ioctl)(queue_t *, mblk_t *); } dld_str; +#define DLD_TX_ENTER(dsp) { \ + mutex_enter(&(dsp)->ds_tx_lock); \ + (dsp)->ds_intx_cnt++; \ + mutex_exit(&(dsp)->ds_tx_lock); \ +} + +#define DLD_TX_EXIT(dsp) { \ + mutex_enter(&(dsp)->ds_tx_lock); \ + if ((--(dsp)->ds_intx_cnt == 0) && (dsp)->ds_detaching) \ + cv_signal(&(dsp)->ds_tx_cv); \ + mutex_exit(&(dsp)->ds_tx_lock); \ +} + +/* + * Quiesce the traffic. + */ +#define DLD_TX_QUIESCE(dsp) { \ + mutex_enter(&(dsp)->ds_tx_lock); \ + (dsp)->ds_tx = (dsp)->ds_unitdata_tx = NULL; \ + (dsp)->ds_detaching = B_TRUE; \ + while ((dsp)->ds_intx_cnt != 0) \ + cv_wait(&(dsp)->ds_tx_cv, &(dsp)->ds_tx_lock); \ + (dsp)->ds_detaching = B_FALSE; \ + mutex_exit(&(dsp)->ds_tx_lock); \ +} + /* * dld_str.c module. */ @@ -232,22 +268,26 @@ extern void dld_str_rx_fastpath(void *, mac_resource_handle_t, mblk_t *, mac_header_info_t *); extern void dld_str_rx_unitdata(void *, mac_resource_handle_t, mblk_t *, mac_header_info_t *); + extern void dld_tx_flush(dld_str_t *); -extern void dld_tx_enqueue(dld_str_t *, mblk_t *, boolean_t); extern void dld_str_notify_ind(dld_str_t *); -extern void str_mdata_fastpath_put(dld_str_t *, mblk_t *); extern void dld_tx_single(dld_str_t *, mblk_t *); +extern void str_mdata_fastpath_put(dld_str_t *, mblk_t *); +extern void str_mdata_raw_put(dld_str_t *, mblk_t *); + +extern void dld_ioctl(queue_t *, mblk_t *); +extern void dld_finish_pending_task(dld_str_t *); /* * dld_proto.c */ -extern void dld_proto(dld_str_t *, mblk_t *); -extern void dld_finish_pending_ops(dld_str_t *); +extern void dld_wput_proto_nondata(dld_str_t *, mblk_t *); +extern void dld_wput_proto_data(dld_str_t *, mblk_t *); extern void dld_capabilities_disable(dld_str_t *); /* * Options: there should be a separate bit defined here for each - * DLD_PROP... defined in dld.h. + * DLD_PROP... defined in dld.h. */ #define DLD_OPT_NO_FASTPATH 0x00000001 #define DLD_OPT_NO_POLL 0x00000002 @@ -257,34 +297,23 @@ extern void dld_capabilities_disable(dld_str_t *); extern uint32_t dld_opt; /* - * Useful macros. + * autopush information */ +typedef struct dld_ap { + datalink_id_t da_linkid; + struct dlautopush da_ap; -#define IMPLY(p, c) (!(p) || (c)) +#define da_anchor da_ap.dap_anchor +#define da_npush da_ap.dap_npush +#define da_aplist da_ap.dap_aplist -#define DLD_ENTER(dsp) { \ - mutex_enter(&dsp->ds_thr_lock); \ - ++dsp->ds_thr; \ - ASSERT(dsp->ds_thr != 0); \ - mutex_exit(&dsp->ds_thr_lock); \ -} +} dld_ap_t; -#define DLD_EXIT(dsp) { \ - mutex_enter(&dsp->ds_thr_lock); \ - ASSERT(dsp->ds_thr > 0); \ - if (--dsp->ds_thr == 0 && dsp->ds_pending_req != NULL) \ - dld_finish_pending_ops(dsp); \ - else \ - mutex_exit(&dsp->ds_thr_lock); \ -} +/* + * Useful macros. + */ -#define DLD_WAKEUP(dsp) { \ - mutex_enter(&dsp->ds_thr_lock); \ - ASSERT(dsp->ds_pending_cnt > 0); \ - if (--dsp->ds_pending_cnt == 0) \ - cv_signal(&dsp->ds_pending_cv); \ - mutex_exit(&dsp->ds_thr_lock); \ -} +#define IMPLY(p, c) (!(p) || (c)) #ifdef DEBUG #define DLD_DBG cmn_err diff --git a/usr/src/uts/common/sys/dlpi.h b/usr/src/uts/common/sys/dlpi.h index 1d1451c082..355280f920 100644 --- a/usr/src/uts/common/sys/dlpi.h +++ b/usr/src/uts/common/sys/dlpi.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -50,6 +50,7 @@ extern "C" { #define DLIOC ('D' << 8) #define DLIOCRAW (DLIOC|1) /* M_DATA "raw" mode */ #define DLIOCNATIVE (DLIOC|2) /* Native traffic mode */ +#define DLIOCMARGININFO (DLIOC|3) /* margin size info */ #define DLIOCHDRINFO (DLIOC|10) /* IP fast-path */ #define DL_IOC_HDR_INFO DLIOCHDRINFO @@ -1663,6 +1664,26 @@ extern void dlphysaddrack(queue_t *, mblk_t *, void *, t_uscalar_t); extern void dlcapabsetqid(dl_mid_t *, const queue_t *); extern boolean_t dlcapabcheckqid(const dl_mid_t *, const queue_t *); extern void dlnotifyack(queue_t *, mblk_t *, uint32_t); + +/* + * The ldi_handle_t typedef is in <sys/sunldi.h>, which in turn requires + * <sys/sunddi.h>, which pulls in <sys/cmn_err.h>, which declares kernel + * versions of the printf() functions that conflict with the libc ones. + * This causes conflicts when building MDB modules like ARP that #define + * _KERNEL. So we use `struct __ldi_handle *' instead. + */ +struct __ldi_handle; +extern int dl_attach(struct __ldi_handle *, int, dl_error_ack_t *); +extern int dl_bind(struct __ldi_handle *, uint_t, dl_error_ack_t *); +extern int dl_phys_addr(struct __ldi_handle *, uchar_t *, size_t *, + dl_error_ack_t *); +extern int dl_info(struct __ldi_handle *, dl_info_ack_t *, uchar_t *, size_t *, + dl_error_ack_t *); +extern int dl_notify(struct __ldi_handle *, uint32_t *, dl_error_ack_t *); +extern const char *dl_errstr(t_uscalar_t); +extern const char *dl_primstr(t_uscalar_t); +extern const char *dl_mactypestr(t_uscalar_t); + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/dls.h b/usr/src/uts/common/sys/dls.h index 77bf2c1e58..f69a14e740 100644 --- a/usr/src/uts/common/sys/dls.h +++ b/usr/src/uts/common/sys/dls.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,6 +30,7 @@ #include <sys/types.h> #include <sys/stream.h> +#include <net/if.h> #include <sys/mac.h> /* @@ -54,28 +55,172 @@ extern "C" { * Macros for converting ppas to instance #s, Vlan ID, or minor. */ #define DLS_PPA2INST(ppa) ((int)((ppa) % 1000)) -#define DLS_PPA2VID(ppa) ((uint16_t)((ppa) / 1000)) -#define DLS_PPA2MINOR(ppa) ((minor_t)((DLS_PPA2INST(ppa)) + 1)) +#define DLS_PPA2VID(ppa) ((ppa) / 1000) /* - * Maps a (VID, INST) pair to ppa + * Converts a minor to an instance#; makes sense only when minor <= 1000. */ -#define DLS_VIDINST2PPA(vid, inst) ((minor_t)((vid) * 1000 + (inst))) +#define DLS_MINOR2INST(minor) ((int)((minor) - 1)) + +typedef enum { + DATALINK_CLASS_PHYS = 0x01, + DATALINK_CLASS_VLAN = 0x02, + DATALINK_CLASS_AGGR = 0x04, + DATALINK_CLASS_VNIC = 0x08 +} datalink_class_t; + +#define DATALINK_CLASS_ALL (DATALINK_CLASS_PHYS | \ + DATALINK_CLASS_VLAN | DATALINK_CLASS_AGGR | DATALINK_CLASS_VNIC) /* - * Converts a minor to an instance#; makes sense only when minor <= 1000. + * A combination of flags and media. + * flags is the higher 32 bits, and if it is 0x01, it indicates all media + * types can be accepted; otherwise, only the given media type (specified + * in the lower 32 bits) is accepted. */ -#define DLS_MINOR2INST(minor) ((int)((minor) - 1)) +typedef uint64_t datalink_media_t; + +#define DATALINK_ANY_MEDIATYPE \ + ((datalink_media_t)(((datalink_media_t)0x01) << 32)) + +#define DATALINK_MEDIA_ACCEPTED(dmedia, media) \ + (((uint32_t)(((dmedia) >> 32) & 0xfffffffful) & 0x01) ? \ + B_TRUE : ((uint32_t)((dmedia) & 0xfffffffful) == (media))) + +#define MAXLINKATTRLEN 32 + +/* + * Link attributes used by the kernel. + */ +/* + * The major number and instance number of the underlying physical device + * are kept as FPHYMAJ and FPHYINST (major, instance + 1). + * + * Set for physical links only. + */ +#define FPHYMAJ "phymaj" /* uint64_t */ +#define FPHYINST "phyinst" /* uint64_t */ + +/* + * The devname of the physical link. For example, bge0, ce1. Set for physical + * links only. + */ +#define FDEVNAME "devname" /* string */ + +#define DLMGMT_DOOR "/etc/.dlmgmt_door" +/* + * Door upcall command. + */ +#define DLMGMT_CMD_DLS_CREATE 1 +#define DLMGMT_CMD_DLS_GETATTR 2 +#define DLMGMT_CMD_DLS_DESTROY 3 +#define DLMGMT_CMD_GETNAME 4 +#define DLMGMT_CMD_GETLINKID 5 +#define DLMGMT_CMD_GETNEXT 6 +#define DLMGMT_CMD_DLS_UPDATE 7 +#define DLMGMT_CMD_BASE 128 + +/* + * Indicate the link mapping is active or persistent + */ +#define DLMGMT_ACTIVE 0x01 +#define DLMGMT_PERSIST 0x02 + +/* upcall argument */ +typedef struct dlmgmt_upcall_arg_create { + int ld_cmd; + datalink_class_t ld_class; + uint32_t ld_media; + boolean_t ld_persist; + uint64_t ld_phymaj; + uint64_t ld_phyinst; + char ld_devname[MAXNAMELEN]; +} dlmgmt_upcall_arg_create_t; + +typedef struct dlmgmt_upcall_arg_destroy { + int ld_cmd; + datalink_id_t ld_linkid; + boolean_t ld_persist; + int ld_reserved; +} dlmgmt_upcall_arg_destroy_t; + +typedef struct dlmgmt_upcall_arg_update { + int ld_cmd; + boolean_t ld_novanity; + uint32_t ld_media; + uint32_t ld_reserved; + char ld_devname[MAXNAMELEN]; +} dlmgmt_upcall_arg_update_t; + +typedef struct dlmgmt_upcall_arg_getattr { + int ld_cmd; + datalink_id_t ld_linkid; + char ld_attr[MAXLINKATTRLEN]; +} dlmgmt_upcall_arg_getattr_t; + +typedef struct dlmgmt_door_getname { + int ld_cmd; + datalink_id_t ld_linkid; +} dlmgmt_door_getname_t; + +typedef struct dlmgmt_door_getlinkid { + int ld_cmd; + char ld_link[MAXLINKNAMELEN]; +} dlmgmt_door_getlinkid_t; + +typedef struct dlmgmt_door_getnext_s { + int ld_cmd; + datalink_id_t ld_linkid; + datalink_class_t ld_class; + uint32_t ld_flags; + datalink_media_t ld_dmedia; +} dlmgmt_door_getnext_t; + +/* upcall return value */ +struct dlmgmt_linkid_retval_s { + uint_t lr_err; + datalink_id_t lr_linkid; + uint32_t lr_flags; + datalink_class_t lr_class; + uint32_t lr_media; + uint32_t lr_reserved; +}; + +typedef struct dlmgmt_linkid_retval_s dlmgmt_create_retval_t, + dlmgmt_update_retval_t, + dlmgmt_getlinkid_retval_t, + dlmgmt_getnext_retval_t; + +typedef struct dlmgmt_getname_retval_s { + uint_t lr_err; + char lr_link[MAXLINKNAMELEN]; + datalink_class_t lr_class; + uint32_t lr_media; + uint32_t lr_flags; +} dlmgmt_getname_retval_t; + +struct dlmgmt_null_retval_s { + uint_t lr_err; +}; + +typedef struct dlmgmt_null_retval_s dlmgmt_destroy_retval_t; + +typedef struct dlmgmt_getattr_retval_s { + uint_t lr_err; + uint_t lr_type; + char lr_attr[1]; +} dlmgmt_getattr_retval_t; #ifdef _KERNEL -extern int dls_create(const char *, const char *); -extern int dls_destroy(const char *); +#define DLS_MAX_PPA 999 +#define DLS_MAX_MINOR (DLS_MAX_PPA + 1) -typedef struct dls_t *dls_channel_t; +typedef struct dls_t *dls_channel_t; -extern int dls_open(const char *, dls_channel_t *); -extern void dls_close(dls_channel_t); +extern int dls_open_style2_vlan(major_t, uint_t, dls_channel_t *); +extern int dls_open_by_dev(dev_t, dls_channel_t *); +extern void dls_close(dls_channel_t); extern mac_handle_t dls_mac(dls_channel_t); extern uint16_t dls_vid(dls_channel_t); @@ -83,26 +228,27 @@ extern uint16_t dls_vid(dls_channel_t); #define DLS_SAP_LLC 0 #define DLS_SAP_PROMISC (1 << 16) -extern int dls_bind(dls_channel_t, uint32_t); -extern void dls_unbind(dls_channel_t); +extern int dls_bind(dls_channel_t, uint32_t); +extern void dls_unbind(dls_channel_t); #define DLS_PROMISC_SAP 0x00000001 #define DLS_PROMISC_MULTI 0x00000002 #define DLS_PROMISC_PHYS 0x00000004 -extern int dls_promisc(dls_channel_t, uint32_t); +extern int dls_promisc(dls_channel_t, uint32_t); -extern int dls_multicst_add(dls_channel_t, const uint8_t *); -extern int dls_multicst_remove(dls_channel_t, const uint8_t *); +extern int dls_multicst_add(dls_channel_t, const uint8_t *); +extern int dls_multicst_remove(dls_channel_t, const uint8_t *); -extern mblk_t *dls_header(dls_channel_t, const uint8_t *, uint16_t, uint_t, - mblk_t **); -extern int dls_header_info(dls_channel_t, mblk_t *, mac_header_info_t *); +extern mblk_t *dls_header(dls_channel_t, const uint8_t *, + uint16_t, uint_t, mblk_t **); +extern int dls_header_info(dls_channel_t, mblk_t *, + mac_header_info_t *); -typedef void (*dls_rx_t)(void *, mac_resource_handle_t, mblk_t *, - mac_header_info_t *); +typedef void (*dls_rx_t)(void *, mac_resource_handle_t, mblk_t *, + mac_header_info_t *); -extern void dls_rx_set(dls_channel_t, dls_rx_t, void *); +extern void dls_rx_set(dls_channel_t, dls_rx_t, void *); extern mblk_t *dls_tx(dls_channel_t, mblk_t *); @@ -110,7 +256,47 @@ extern boolean_t dls_active_set(dls_channel_t); extern void dls_active_clear(dls_channel_t); extern dev_info_t *dls_finddevinfo(dev_t); -extern int dls_ppa_from_minor(minor_t, t_uscalar_t *); + +typedef struct dls_devnet_s *dls_dl_handle_t; +typedef struct dls_dev_t *dls_dev_handle_t; + +extern int dls_devnet_open(const char *, + dls_dl_handle_t *, dev_t *); +extern void dls_devnet_close(dls_dl_handle_t); +extern boolean_t dls_devnet_rebuild(); + +extern int dls_devnet_rename(datalink_id_t, datalink_id_t, + const char *); +extern int dls_devnet_create(mac_handle_t, datalink_id_t); +extern int dls_devnet_destroy(mac_handle_t, datalink_id_t *); +extern int dls_devnet_recreate(mac_handle_t, datalink_id_t); +extern int dls_devnet_create_vlan(datalink_id_t, + datalink_id_t, uint16_t, boolean_t); +extern int dls_devnet_destroy_vlan(datalink_id_t); +extern int dls_devnet_hold_tmp(datalink_id_t, dls_dl_handle_t *); +extern void dls_devnet_rele_tmp(dls_dl_handle_t); + +extern const char *dls_devnet_mac(dls_dl_handle_t); +extern uint16_t dls_devnet_vid(dls_dl_handle_t); +extern datalink_id_t dls_devnet_linkid(dls_dl_handle_t); +extern boolean_t dls_devnet_is_explicit(dls_dl_handle_t); +extern int dls_devnet_dev2linkid(dev_t, datalink_id_t *); +extern int dls_devnet_phydev(datalink_id_t, dev_t *); +extern int dls_devnet_setzid(const char *, zoneid_t); +extern int dls_devnet_getzid(datalink_id_t, zoneid_t *); + +extern int dls_mgmt_door_set(boolean_t); +extern int dls_mgmt_create(const char *, dev_t, datalink_class_t, + uint32_t, boolean_t, datalink_id_t *); +extern int dls_mgmt_destroy(datalink_id_t, boolean_t); +extern int dls_mgmt_update(const char *, uint32_t, boolean_t, + uint32_t *, datalink_id_t *); +extern int dls_mgmt_get_linkinfo(datalink_id_t, char *, + datalink_class_t *, uint32_t *, uint32_t *); +extern int dls_mgmt_get_linkid(const char *, datalink_id_t *); +extern datalink_id_t dls_mgmt_get_next(datalink_id_t, datalink_class_t, + datalink_media_t, uint32_t); + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/dls_impl.h b/usr/src/uts/common/sys/dls_impl.h index c2a2dcf24c..83bccd20bb 100644 --- a/usr/src/uts/common/sys/dls_impl.h +++ b/usr/src/uts/common/sys/dls_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -71,16 +71,56 @@ struct dls_link_s { typedef struct dls_impl_s dls_impl_t; typedef struct dls_head_s dls_head_t; +/* + * The maximum length of an SPA (subnetwork point of attachment). It is of + * the form <macname/vid>. + */ +#define MAXSPALEN (MAXNAMELEN + 5) + typedef struct dls_vlan_s { - char dv_name[IFNAMSIZ]; - uint_t dv_ref; + /* + * The following fields will not change after dls_vlan_t creation. + */ dls_link_t *dv_dlp; uint16_t dv_id; + + /* + * Unique SPA (of the form <macname/vid>) identifying a data-link; + * is needed to avoid name collisions between an explicitly and + * implicitly created VLANs. + */ + char dv_spa[MAXSPALEN]; + + /* + * The ppa value of the associated device. Used to derive this link's + * devfs node name. + */ + uint_t dv_ppa; + + /* + * The dev_t used to access this dls_vlan_t. + */ + dev_t dv_dev; + + dev_info_t *dv_dip; kstat_t *dv_ksp; - minor_t dv_minor; - t_uscalar_t dv_ppa; + uint32_t dv_force : 1; + + /* + * The following fields are protected by dv_lock. + */ + kmutex_t dv_lock; + + /* + * Reference count of dls_impl_t plus explicit creation of the link + */ + uint_t dv_ref; + + /* + * The reference count of this vlan is opened in its own zone. + */ + uint_t dv_zone_ref; zoneid_t dv_zid; - dls_impl_t *dv_impl_list; } dls_vlan_t; struct dls_impl_s { @@ -98,14 +138,15 @@ struct dls_impl_s { void *di_rx_arg; mac_resource_add_t di_ring_add; const mac_txinfo_t *di_txinfo; - boolean_t di_bound; - boolean_t di_removing; - boolean_t di_active; + uint_t di_bound : 1, + di_removing : 1, + di_active : 1, + di_local : 1; + uint8_t di_unicst_addr[MAXMACADDRLEN]; soft_ring_t **di_soft_ring_list; uint_t di_soft_ring_size; - zoneid_t di_zid; - dls_impl_t *di_next_impl; + dls_dl_handle_t di_ddh; }; struct dls_head_s { @@ -121,37 +162,43 @@ extern void dls_link_rele(dls_link_t *); extern void dls_link_add(dls_link_t *, uint32_t, dls_impl_t *); extern void dls_link_remove(dls_link_t *, dls_impl_t *); extern int dls_link_header_info(dls_link_t *, mblk_t *, - mac_header_info_t *); + mac_header_info_t *); extern int dls_mac_hold(dls_link_t *); extern void dls_mac_rele(dls_link_t *); +extern boolean_t dls_mac_active_set(dls_link_t *); +extern void dls_mac_active_clear(dls_link_t *); extern void dls_mac_stat_create(dls_vlan_t *); extern void dls_mac_stat_destroy(dls_vlan_t *); extern void dls_vlan_init(void); extern int dls_vlan_fini(void); -extern int dls_vlan_create(const char *, const char *, uint16_t); -extern int dls_vlan_destroy(const char *); -extern int dls_vlan_hold(const char *, dls_vlan_t **, boolean_t); +extern int dls_vlan_hold(const char *, uint16_t, dls_vlan_t **, + boolean_t, boolean_t); +extern int dls_vlan_hold_by_dev(dev_t, dls_vlan_t **); extern void dls_vlan_rele(dls_vlan_t *); -extern int dls_vlan_walk(int (*)(dls_vlan_t *, void *), void *); -extern dev_info_t *dls_vlan_finddevinfo(dev_t); -extern int dls_vlan_ppa_from_minor(minor_t, t_uscalar_t *); -extern int dls_vlan_rele_by_name(const char *); -extern minor_t dls_minor_hold(boolean_t); -extern void dls_minor_rele(minor_t); -extern int dls_vlan_setzoneid(char *, zoneid_t, boolean_t); -extern int dls_vlan_getzoneid(char *, zoneid_t *); -extern void dls_vlan_add_impl(dls_vlan_t *, dls_impl_t *); -extern void dls_vlan_remove_impl(dls_vlan_t *, dls_impl_t *); +extern int dls_vlan_destroy(const char *, uint16_t); +extern int dls_vlan_create(const char *, uint16_t, boolean_t); +extern int dls_vlan_setzid(const char *, uint16_t, zoneid_t); +extern int dls_stat_update(kstat_t *, dls_vlan_t *, int); +extern int dls_stat_create(const char *, int, const char *, + int (*)(struct kstat *, int), void *, kstat_t **); + +extern int dls_devnet_open_by_dev(dev_t, dls_vlan_t **, + dls_dl_handle_t *); extern void dls_init(void); extern int dls_fini(void); extern void dls_link_txloop(void *, mblk_t *); extern boolean_t dls_accept(dls_impl_t *, mac_header_info_t *, - dls_rx_t *, void **); + dls_rx_t *, void **); extern boolean_t dls_accept_loopback(dls_impl_t *, mac_header_info_t *, - dls_rx_t *, void **); + dls_rx_t *, void **); + +extern void dls_mgmt_init(void); +extern void dls_mgmt_fini(void); + +extern int dls_mgmt_get_phydev(datalink_id_t, dev_t *); #ifdef __cplusplus } diff --git a/usr/src/uts/common/sys/dls_soft_ring.h b/usr/src/uts/common/sys/dls_soft_ring.h index 9a1a833eb7..403623853a 100644 --- a/usr/src/uts/common/sys/dls_soft_ring.h +++ b/usr/src/uts/common/sys/dls_soft_ring.h @@ -1,5 +1,5 @@ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -88,8 +88,6 @@ extern void soft_ring_unbind(void *); extern void dls_soft_ring_fanout(void *, void *, mblk_t *, mac_header_info_t *); extern boolean_t dls_soft_ring_enable(dls_channel_t, dl_capab_dls_t *); extern void dls_soft_ring_disable(dls_channel_t); -extern boolean_t dls_soft_ring_workers(dls_channel_t); -extern void dls_soft_ring_rx_set(dls_channel_t, dls_rx_t, void *, int); #ifdef __cplusplus } diff --git a/usr/src/uts/common/sys/fs/sdev_impl.h b/usr/src/uts/common/sys/fs/sdev_impl.h index 281bc2faf7..7e5f75d7f7 100644 --- a/usr/src/uts/common/sys/fs/sdev_impl.h +++ b/usr/src/uts/common/sys/fs/sdev_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -193,6 +193,8 @@ typedef struct sdev_node { struct sdev_global_data sdev_globaldata; struct sdev_local_data sdev_localdata; } sdev_instance_data; + + void *sdev_private; } sdev_node_t; #define sdev_ldata sdev_instance_data.sdev_localdata @@ -245,13 +247,16 @@ typedef enum { } sdev_node_state_t; /* sdev_flags */ -#define SDEV_BUILD 0x0001 /* directory cache out-of-date */ -#define SDEV_STALE 0x0002 /* stale sdev nodes */ -#define SDEV_GLOBAL 0x0004 /* global /dev nodes */ -#define SDEV_PERSIST 0x0008 /* backing store persisted node */ -#define SDEV_NO_NCACHE 0x0010 /* do not include in neg. cache */ -#define SDEV_DYNAMIC 0x0020 /* special-purpose vnode ops (ex: pts) */ -#define SDEV_VTOR 0x0040 /* validate sdev_nodes during search */ +#define SDEV_BUILD 0x0001 /* directory cache out-of-date */ +#define SDEV_STALE 0x0002 /* stale sdev nodes */ +#define SDEV_GLOBAL 0x0004 /* global /dev nodes */ +#define SDEV_PERSIST 0x0008 /* backing store persisted node */ +#define SDEV_NO_NCACHE 0x0010 /* do not include in neg. cache */ +#define SDEV_DYNAMIC 0x0020 /* special-purpose vnode ops */ + /* (ex: pts) */ +#define SDEV_VTOR 0x0040 /* validate sdev_nodes during search */ +#define SDEV_ATTR_INVALID 0x0080 /* invalid node attributes, */ + /* need update */ /* sdev_lookup_flags */ #define SDEV_LOOKUP 0x0001 /* node creation in progress */ @@ -339,6 +344,12 @@ extern int devname_setattr_func(struct vnode *, struct vattr *, int, struct cred *, int (*)(struct sdev_node *, struct vattr *, int), int); /* + * devname_inactive_func() + */ +extern void devname_inactive_func(struct vnode *, struct cred *, + void (*)(struct vnode *)); + +/* * /dev file system instance defines */ /* @@ -607,6 +618,7 @@ extern int sdev_reserve_subdirs(struct sdev_node *); extern int prof_lookup(); extern void prof_filldir(struct sdev_node *); extern int devpts_validate(struct sdev_node *dv); +extern int devnet_validate(struct sdev_node *dv); extern void *sdev_get_vtor(struct sdev_node *dv); /* @@ -616,7 +628,6 @@ extern int sdev_modctl_readdir(const char *, char ***, int *, int *); extern void sdev_modctl_readdir_free(char **, int, int); extern int sdev_modctl_devexists(const char *); - /* * ncache handlers */ @@ -637,9 +648,11 @@ extern int devtype; extern kmem_cache_t *sdev_node_cache; extern struct vnodeops *sdev_vnodeops; extern struct vnodeops *devpts_vnodeops; +extern struct vnodeops *devnet_vnodeops; extern struct sdev_data *sdev_origins; /* mount info for global /dev instance */ extern const fs_operation_def_t sdev_vnodeops_tbl[]; extern const fs_operation_def_t devpts_vnodeops_tbl[]; +extern const fs_operation_def_t devnet_vnodeops_tbl[]; extern const fs_operation_def_t devsys_vnodeops_tbl[]; extern const fs_operation_def_t devpseudo_vnodeops_tbl[]; @@ -669,6 +682,7 @@ extern int sdev_debug; #define SDEV_DEBUG_PROFILE 0x200 /* trace sdev_profile */ #define SDEV_DEBUG_MODCTL 0x400 /* trace modctl activity */ #define SDEV_DEBUG_FLK 0x800 /* trace failed lookups */ +#define SDEV_DEBUG_NET 0x1000 /* /dev/net tracing */ #define sdcmn_err(args) if (sdev_debug & SDEV_DEBUG) printf args #define sdcmn_err2(args) if (sdev_debug & SDEV_DEBUG_VOPS) printf args @@ -681,6 +695,7 @@ extern int sdev_debug; #define sdcmn_err9(args) if (sdev_debug & SDEV_DEBUG_SDEV_NODE) printf args #define sdcmn_err10(args) if (sdev_debug & SDEV_DEBUG_PROFILE) printf args #define sdcmn_err11(args) if (sdev_debug & SDEV_DEBUG_MODCTL) printf args +#define sdcmn_err12(args) if (sdev_debug & SDEV_DEBUG_NET) printf args #define impossible(args) printf args #else #define sdcmn_err(args) /* does nothing */ @@ -694,6 +709,7 @@ extern int sdev_debug; #define sdcmn_err9(args) /* does nothing */ #define sdcmn_err10(args) /* does nothing */ #define sdcmn_err11(args) /* does nothing */ +#define sdcmn_err12(args) /* does nothing */ #define impossible(args) /* does nothing */ #endif diff --git a/usr/src/uts/common/sys/gld.h b/usr/src/uts/common/sys/gld.h index 3f3c01848e..fd7b8d37d2 100644 --- a/usr/src/uts/common/sys/gld.h +++ b/usr/src/uts/common/sys/gld.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -176,7 +176,7 @@ typedef struct gld_mac_info { uchar_t *gldm_broadcast_addr; /* SET BY DRIVER */ gld_lock_t gldm_lock; /* GLD PRIVATE */ ddi_iblock_cookie_t gldm_cookie; /* SET BY DRIVER */ - uint32_t reserved3; /* GLD PRIVATE */ + uint32_t gldm_margin; /* SET BY DRIVER */ uint32_t reserved4; /* GLD PRIVATE */ uint32_t gldm_maxpkt; /* SET BY DRIVER */ uint32_t gldm_minpkt; /* SET BY DRIVER */ diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h index 63d7ff42a0..4fd83059c4 100644 --- a/usr/src/uts/common/sys/mac.h +++ b/usr/src/uts/common/sys/mac.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -84,6 +84,11 @@ typedef enum { LINK_DUPLEX_FULL } link_duplex_t; +typedef uint32_t datalink_id_t; +#define DATALINK_INVALID_LINKID 0 +#define DATALINK_ALL_LINKID 0 +#define DATALINK_MAX_LINKID 0xffffffff + /* * Maximum MAC address length */ @@ -113,6 +118,16 @@ typedef struct mac_stat_info_s { #define IS_MACTYPE_STAT(stat) (stat >= MACTYPE_STAT_MIN) /* + * Statistics maintained by the mac module, and possibly populated as link + * statistics. + */ +enum mac_mod_stat { + MAC_STAT_LINK_STATE, + MAC_STAT_LINK_UP, + MAC_STAT_PROMISC +}; + +/* * Do not reorder, and add only to the end of this list. */ enum mac_driver_stat { @@ -191,6 +206,20 @@ typedef struct mac_capab_lso_s { } mac_capab_lso_t; /* + * Information for legacy devices. + */ +typedef struct mac_capab_legacy_s { + /* + * Notifications that the legacy device does not support. + */ + uint32_t ml_unsup_note; + /* + * dev_t of the legacy device; can be held to force attach. + */ + dev_t ml_dev; +} mac_capab_legacy_t; + +/* * MAC layer capabilities. These capabilities are handled by the drivers' * mc_capab_get() callbacks. Some capabilities require the driver to fill * in a given data structure, and others are simply boolean capabilities. @@ -199,11 +228,16 @@ typedef struct mac_capab_lso_s { * care about by keeping a bitfield of these things around somewhere. */ typedef enum { - MAC_CAPAB_HCKSUM = 0x01, /* data is a uint32_t for the txflags */ + MAC_CAPAB_HCKSUM = 0x01, /* data is a uint32_t for the txflags */ MAC_CAPAB_POLL = 0x02, /* boolean only, no data */ - MAC_CAPAB_MULTIADDRESS = 0x04, /* data is multiaddress_capab_t */ - MAC_CAPAB_LSO = 0x08 /* data is mac_capab_lso_t */ + MAC_CAPAB_MULTIADDRESS = 0x04, /* data is multiaddress_capab_t */ + MAC_CAPAB_LSO = 0x08, /* data is mac_capab_lso_t */ + MAC_CAPAB_NO_NATIVEVLAN = 0x10, /* boolean only, no data */ + MAC_CAPAB_NO_ZCOPY = 0x20, /* boolean only, no data */ /* add new capabilities here */ + + /* The following capabilities are specific to softmac. */ + MAC_CAPAB_LEGACY = 0x8001, /* data is mac_capab_legacy_t */ } mac_capab_t; typedef int mac_addr_slot_t; @@ -256,6 +290,8 @@ typedef void (*mac_ioctl_t)(void *, queue_t *, mblk_t *); typedef void (*mac_resources_t)(void *); typedef mblk_t *(*mac_tx_t)(void *, mblk_t *); typedef boolean_t (*mac_getcapab_t)(void *, mac_capab_t, void *); +typedef int (*mac_open_t)(void *); +typedef void (*mac_close_t)(void *); /* * Drivers must set all of these callbacks except for mc_resources, @@ -277,6 +313,8 @@ typedef struct mac_callbacks_s { mac_resources_t mc_resources; /* Get the device resources */ mac_ioctl_t mc_ioctl; /* Process an unknown ioctl */ mac_getcapab_t mc_getcapab; /* Get capability information */ + mac_open_t mc_open; /* Open the device */ + mac_close_t mc_close; /* Close the device */ } mac_callbacks_t; /* @@ -288,6 +326,10 @@ typedef struct mac_callbacks_s { #define MC_RESOURCES 0x001 #define MC_IOCTL 0x002 #define MC_GETCAPAB 0x004 +#define MC_OPEN 0x008 +#define MC_CLOSE 0x010 + +#define MAC_MAX_MINOR 1000 typedef struct mac_register_s { uint_t m_version; /* set by mac_alloc() */ @@ -302,6 +344,7 @@ typedef struct mac_register_s { uint_t m_max_sdu; void *m_pdata; size_t m_pdata_size; + uint32_t m_margin; } mac_register_t; /* @@ -325,6 +368,7 @@ typedef enum { MAC_NOTE_DEVPROMISC, MAC_NOTE_FASTPATH_FLUSH, MAC_NOTE_VNIC, + MAC_NOTE_MARGIN, MAC_NNOTE /* must be the last entry */ } mac_notify_type_t; @@ -382,8 +426,9 @@ typedef struct mac_header_info_s { uint32_t mhi_origsap; uint32_t mhi_bindsap; mac_addrtype_t mhi_dsttype; - boolean_t mhi_istagged; uint16_t mhi_tci; + uint_t mhi_istagged:1, + mhi_prom_looped:1; } mac_header_info_t; /* @@ -493,6 +538,10 @@ typedef struct mactype_register_s { * Client interface functions. */ extern int mac_open(const char *, mac_handle_t *); +extern int mac_open_by_linkid(datalink_id_t, + mac_handle_t *); +extern int mac_open_by_linkname(const char *, + mac_handle_t *); extern void mac_close(mac_handle_t); extern const mac_info_t *mac_info(mac_handle_t); extern boolean_t mac_info_get(const char *, mac_info_t *); @@ -500,14 +549,14 @@ extern uint64_t mac_stat_get(mac_handle_t, uint_t); extern int mac_start(mac_handle_t); extern void mac_stop(mac_handle_t); extern int mac_promisc_set(mac_handle_t, boolean_t, - mac_promisc_type_t); + mac_promisc_type_t); extern boolean_t mac_promisc_get(mac_handle_t, - mac_promisc_type_t); + mac_promisc_type_t); extern int mac_multicst_add(mac_handle_t, const uint8_t *); extern int mac_multicst_remove(mac_handle_t, - const uint8_t *); + const uint8_t *); extern boolean_t mac_unicst_verify(mac_handle_t, - const uint8_t *, uint_t); + const uint8_t *, uint_t); extern int mac_unicst_set(mac_handle_t, const uint8_t *); extern void mac_unicst_get(mac_handle_t, uint8_t *); extern void mac_dest_get(mac_handle_t, uint8_t *); @@ -517,44 +566,48 @@ extern const mac_txinfo_t *mac_tx_get(mac_handle_t); extern const mac_txinfo_t *mac_vnic_tx_get(mac_handle_t); extern link_state_t mac_link_get(mac_handle_t); extern mac_notify_handle_t mac_notify_add(mac_handle_t, mac_notify_t, - void *); + void *); extern void mac_notify_remove(mac_handle_t, - mac_notify_handle_t); + mac_notify_handle_t); extern void mac_notify(mac_handle_t); extern mac_rx_handle_t mac_rx_add(mac_handle_t, mac_rx_t, void *); extern mac_rx_handle_t mac_active_rx_add(mac_handle_t, mac_rx_t, - void *); + void *); extern void mac_rx_remove(mac_handle_t, mac_rx_handle_t, - boolean_t); + boolean_t); extern void mac_rx_remove_wait(mac_handle_t); extern mblk_t *mac_txloop(void *, mblk_t *); extern mac_txloop_handle_t mac_txloop_add(mac_handle_t, mac_txloop_t, - void *); + void *); extern void mac_txloop_remove(mac_handle_t, - mac_txloop_handle_t); + mac_txloop_handle_t); extern boolean_t mac_active_set(mac_handle_t); extern boolean_t mac_active_shareable_set(mac_handle_t); extern void mac_active_clear(mac_handle_t); extern void mac_active_rx(void *, mac_resource_handle_t, - mblk_t *); + mblk_t *); extern boolean_t mac_vnic_set(mac_handle_t, mac_txinfo_t *, - mac_getcapab_t, void *); + mac_getcapab_t, void *); extern void mac_vnic_clear(mac_handle_t); extern void mac_resource_set(mac_handle_t, - mac_resource_add_t, void *); + mac_resource_add_t, void *); extern dev_info_t *mac_devinfo_get(mac_handle_t); +extern const char *mac_name(mac_handle_t); +extern minor_t mac_minor(mac_handle_t); extern boolean_t mac_capab_get(mac_handle_t, mac_capab_t, - void *); + void *); extern boolean_t mac_vnic_capab_get(mac_handle_t, mac_capab_t, - void *); + void *); extern boolean_t mac_sap_verify(mac_handle_t, uint32_t, - uint32_t *); + uint32_t *); extern mblk_t *mac_header(mac_handle_t, const uint8_t *, - uint32_t, mblk_t *, size_t); + uint32_t, mblk_t *, size_t); extern int mac_header_info(mac_handle_t, mblk_t *, - mac_header_info_t *); + mac_header_info_t *); extern mblk_t *mac_header_cook(mac_handle_t, mblk_t *); extern mblk_t *mac_header_uncook(mac_handle_t, mblk_t *); +extern minor_t mac_minor_hold(boolean_t); +extern void mac_minor_rele(minor_t); /* * Driver interface functions. @@ -565,31 +618,38 @@ extern int mac_register(mac_register_t *, mac_handle_t *); extern int mac_disable(mac_handle_t); extern int mac_unregister(mac_handle_t); extern void mac_rx(mac_handle_t, mac_resource_handle_t, - mblk_t *); + mblk_t *); extern void mac_link_update(mac_handle_t, link_state_t); extern void mac_unicst_update(mac_handle_t, - const uint8_t *); + const uint8_t *); extern void mac_tx_update(mac_handle_t); extern void mac_resource_update(mac_handle_t); extern mac_resource_handle_t mac_resource_add(mac_handle_t, - mac_resource_t *); + mac_resource_t *); extern int mac_pdata_update(mac_handle_t, void *, - size_t); + size_t); extern void mac_multicst_refresh(mac_handle_t, - mac_multicst_t, void *, boolean_t); + mac_multicst_t, void *, boolean_t); extern void mac_unicst_refresh(mac_handle_t, mac_unicst_t, - void *); + void *); extern void mac_promisc_refresh(mac_handle_t, - mac_setpromisc_t, void *); + mac_setpromisc_t, void *); +extern boolean_t mac_margin_update(mac_handle_t, uint32_t); +extern void mac_margin_get(mac_handle_t, uint32_t *); +extern int mac_margin_remove(mac_handle_t, uint32_t); +extern int mac_margin_add(mac_handle_t, uint32_t *, + boolean_t); extern void mac_init_ops(struct dev_ops *, const char *); extern void mac_fini_ops(struct dev_ops *); +extern uint32_t mac_no_notification(mac_handle_t); +extern boolean_t mac_is_legacy(mac_handle_t); +extern int mac_hold_exclusive(mac_handle_t); +extern void mac_rele_exclusive(mac_handle_t); + extern mactype_register_t *mactype_alloc(uint_t); extern void mactype_free(mactype_register_t *); extern int mactype_register(mactype_register_t *); extern int mactype_unregister(const char *); -extern int mac_vlan_create(mac_handle_t, const char *, - minor_t); -extern void mac_vlan_remove(mac_handle_t, const char *); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h index d8fdcaf8dc..c0acfc5c37 100644 --- a/usr/src/uts/common/sys/mac_impl.h +++ b/usr/src/uts/common/sys/mac_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,15 +35,6 @@ extern "C" { #endif -/* - * Statistics maintained internally by the mac module. - */ -enum mac_mod_stat { - MAC_STAT_LINK_STATE, - MAC_STAT_LINK_UP, - MAC_STAT_PROMISC -}; - typedef struct mac_multicst_addr_s mac_multicst_addr_t; struct mac_multicst_addr_s { @@ -52,6 +43,14 @@ struct mac_multicst_addr_s { uint8_t mma_addr[MAXMACADDRLEN]; }; +typedef struct mac_margin_req_s mac_margin_req_t; + +struct mac_margin_req_s { + mac_margin_req_t *mmr_nextp; + uint_t mmr_ref; + uint32_t mmr_margin; +}; + typedef struct mac_notify_fn_s mac_notify_fn_t; struct mac_notify_fn_s { @@ -118,9 +117,11 @@ typedef struct mac_vnic_tx_s { * Each registered MAC is associated with a mac_t structure. */ typedef struct mac_impl_s { + /* + * The following fields are set in mac_register() and will not be + * changed until mac_unregister(). No lock is needed to access them. + */ char mi_name[LIFNAMSIZ]; - const char *mi_drvname; - uint_t mi_instance; void *mi_driver; /* Driver private data */ mac_info_t mi_info; mactype_t *mi_type; @@ -128,10 +129,22 @@ typedef struct mac_impl_s { size_t mi_pdata_size; mac_callbacks_t *mi_callbacks; dev_info_t *mi_dip; + minor_t mi_minor; + dev_t mi_phy_dev; + kstat_t *mi_ksp; + uint_t mi_kstat_count; + mac_txinfo_t mi_txinfo; + mac_txinfo_t mi_txloopinfo; + + krwlock_t mi_gen_lock; + uint32_t mi_oref; uint32_t mi_ref; boolean_t mi_disabled; + boolean_t mi_exclusive; + krwlock_t mi_state_lock; uint_t mi_active; + krwlock_t mi_data_lock; link_state_t mi_linkstate; link_state_t mi_lastlinkstate; @@ -140,25 +153,26 @@ typedef struct mac_impl_s { uint8_t mi_addr[MAXMACADDRLEN]; uint8_t mi_dstaddr[MAXMACADDRLEN]; mac_multicst_addr_t *mi_mmap; + krwlock_t mi_notify_lock; uint32_t mi_notify_bits; kmutex_t mi_notify_bits_lock; kthread_t *mi_notify_thread; mac_notify_fn_t *mi_mnfp; kcondvar_t mi_notify_cv; + krwlock_t mi_rx_lock; mac_rx_fn_t *mi_mrfp; krwlock_t mi_tx_lock; mac_txloop_fn_t *mi_mtfp; + krwlock_t mi_resource_lock; mac_resource_add_t mi_resource_add; void *mi_resource_add_arg; - kstat_t *mi_ksp; - uint_t mi_kstat_count; + kmutex_t mi_activelink_lock; boolean_t mi_activelink; - mac_txinfo_t mi_txinfo; - mac_txinfo_t mi_txloopinfo; + uint32_t mi_rx_ref; /* #threads in mac_rx() */ uint32_t mi_rx_removed; /* #callbacks marked */ /* for removal */ @@ -171,11 +185,23 @@ typedef struct mac_impl_s { mac_txinfo_t mi_vnic_txloopinfo; mac_getcapab_t mi_vnic_getcapab_fn; void *mi_vnic_getcapab_arg; + + boolean_t mi_legacy; + uint32_t mi_unsup_note; + uint32_t mi_margin; + + /* + * List of margin value requests added by mac clients. This list is + * sorted: the first one has the greatest value. + */ + mac_margin_req_t *mi_mmrp; } mac_impl_t; #define mi_getstat mi_callbacks->mc_getstat #define mi_start mi_callbacks->mc_start #define mi_stop mi_callbacks->mc_stop +#define mi_open mi_callbacks->mc_open +#define mi_close mi_callbacks->mc_close #define mi_setpromisc mi_callbacks->mc_setpromisc #define mi_multicst mi_callbacks->mc_multicst #define mi_unicst mi_callbacks->mc_unicst diff --git a/usr/src/uts/common/sys/param.h b/usr/src/uts/common/sys/param.h index d73d4cf8be..bfd895e137 100644 --- a/usr/src/uts/common/sys/param.h +++ b/usr/src/uts/common/sys/param.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -188,6 +188,13 @@ extern "C" { #define MAXSYMLINKS 20 #define MAXNAMELEN 256 +/* + * MAXLINKNAMELEN defines the longest possible permitted datalink name, + * including the terminating NUL. Note that this must not be larger + * than related networking constants such as LIFNAMSIZ. + */ +#define MAXLINKNAMELEN 32 + #ifndef NADDR #define NADDR 13 #endif diff --git a/usr/src/uts/common/sys/softmac.h b/usr/src/uts/common/sys/softmac.h new file mode 100644 index 0000000000..c82a27c0d8 --- /dev/null +++ b/usr/src/uts/common/sys/softmac.h @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_SOFTMAC_H +#define _SYS_SOFTMAC_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/sunddi.h> +#include <sys/mac.h> +#include <sys/dls.h> + +#ifdef __cplusplus +extern "C" { +#endif + +int softmac_create(dev_info_t *, dev_t); +int softmac_destroy(dev_info_t *, dev_t); +int softmac_hold_device(dev_t, dls_dev_handle_t *); +void softmac_rele_device(dls_dev_handle_t); +void softmac_recreate(); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SOFTMAC_H */ diff --git a/usr/src/uts/common/sys/softmac_impl.h b/usr/src/uts/common/sys/softmac_impl.h new file mode 100644 index 0000000000..93071c17a9 --- /dev/null +++ b/usr/src/uts/common/sys/softmac_impl.h @@ -0,0 +1,234 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_SOFTMAC_IMPL_H +#define _SYS_SOFTMAC_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/ethernet.h> +#include <sys/taskq.h> +#include <sys/sunddi.h> +#include <sys/sunldi.h> +#include <sys/strsun.h> +#include <sys/stream.h> +#include <sys/dlpi.h> +#include <sys/mac.h> +#include <sys/mac_ether.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct softmac_lower_s { + struct softmac *sl_softmac; + queue_t *sl_wq; + + /* + * sl_ctl_inprogress is used to serialize the control path. It will + * be set when either an ioctl or an M_{PC,}PROTO message is received + * from the upper layer, and will be cleared when processing done. + */ + kmutex_t sl_ctl_mutex; + kcondvar_t sl_ctl_cv; + boolean_t sl_ctl_inprogress; + + /* + * When a control message is processed, either sl_pending_prim or + * sl_pending_ioctl will be set. They will be cleared when the + * acknowledgement of the specific control message is received + * from the underlying legacy driver. + */ + kmutex_t sl_mutex; + kcondvar_t sl_cv; + t_uscalar_t sl_pending_prim; + boolean_t sl_pending_ioctl; + mblk_t *sl_ack_mp; + + mac_resource_handle_t sl_handle; + ldi_handle_t sl_lh; +} softmac_lower_t; + +enum softmac_state { + SOFTMAC_INITIALIZED, + SOFTMAC_READY +}; + +typedef struct softmac_dev_s { + dev_t sd_dev; +} softmac_dev_t; + +/* + * smac_flag values. + */ +#define SOFTMAC_GLDV3 0x01 +#define SOFTMAC_NOSUPP 0x02 +#define SOFTMAC_ATTACH_DONE 0x04 +#define SOFTMAC_NEED_RECREATE 0x08 + +/* + * The softmac structure allows all minor nodes (at most two, style-1 and + * style-2) for the same device to be processed. A softmac_dev_t will be + * created for each minor node. + * + * We try to "register" the mac after all the softmac_dev_t's are processed so + * that even if DLPI operations fail (because of driver bugs) for one minor + * node, the other minor node can still be used to register the mac. + * (Specifically, an incorrect xxx_getinfo() implementation will cause style-2 + * minor node mac registration to fail.) + */ +typedef struct softmac { + /* + * The following fields will be set when the softmac is created and + * will not change. No lock is required. + */ + char smac_devname[MAXNAMELEN]; + major_t smac_umajor; + int smac_uppa; + uint32_t smac_cnt; /* # of minor nodes for this device */ + + /* + * The following fields are protected by softmac_hash_lock. + */ + /* + * The smac_hold_cnt field increases when softmac_hold_device() is + * called to force the dls_vlan_t of the device to be created. The + * device pre-detach fails if this counter is not 0. + */ + uint32_t smac_hold_cnt; + + /* + * The following fields are protected by smac_lock. + */ + kmutex_t smac_mutex; + kcondvar_t smac_cv; + uint32_t smac_flags; + int smac_attacherr; + mac_handle_t smac_mh; + softmac_dev_t *smac_softmac[2]; + taskqid_t smac_taskq; + /* + * Number of minor nodes whose post-attach routine has succeeded. + * This should be the same as the numbers of softmac_dev_t. + * Note that it does not imply SOFTMAC_ATTACH_DONE as the taskq might + * be still ongoing. + */ + uint32_t smac_attachok_cnt; + /* + * Number of softmac_dev_t left when pre-detach fails. This is used + * to indicate whether postattach is called because of a failed + * pre-detach. + */ + uint32_t smac_attached_left; + + /* + * This field is set and cleared by users of softmac (who calls + * softmac_hold/rele_device()). It is protected by smac_mutex. + */ + dev_info_t *smac_udip; + + /* + * The remaining fields are used to register the MAC for a legacy + * device. They are set in softmac_mac_register() and do not change. + * One can access them when mac_register() is done without locks. + */ + + /* + * media type is needed for create <link name, linkid> mapping, so + * it is set for GLDv3 device as well + */ + uint_t smac_media; + /* DLPI style of the underlying device */ + int smac_style; + dev_t smac_dev; + size_t smac_saplen; + size_t smac_addrlen; + uchar_t smac_unicst_addr[MAXMACADDRLEN]; + uint_t smac_min_sdu; + uint_t smac_max_sdu; + uint32_t smac_margin; + + /* Notifications the underlying driver can support. */ + uint32_t smac_notifications; + + /* + * Capabilities of the underlying driver. + */ + uint32_t smac_capab_flags; + uint32_t smac_hcksum_txflags; + boolean_t smac_no_capability_req; + dl_capab_mdt_t smac_mdt_capab; + boolean_t smac_mdt; + + /* + * The following fields are protected by smac_lock + */ + krwlock_t smac_lock; + enum softmac_state smac_state; + /* Lower stream structure */ + softmac_lower_t *smac_lower; +} softmac_t; + +typedef struct smac_ioc_start_s { + softmac_lower_t *si_slp; +} smac_ioc_start_t; + +#define SMAC_IOC ('S' << 24 | 'M' << 16 | 'C' << 8) +#define SMAC_IOC_START (SMAC_IOC | 0x01) + +#define SOFTMAC_BLANK_TICKS 128 +#define SOFTMAC_BLANK_PKT_COUNT 8 + +extern dev_info_t *softmac_dip; +#define SOFTMAC_DEV_NAME "softmac" + +extern int softmac_send_bind_req(softmac_lower_t *, uint_t); +extern int softmac_send_notify_req(softmac_lower_t *, uint32_t); +extern int softmac_send_promisc_req(softmac_lower_t *, t_uscalar_t, + boolean_t); +extern void softmac_init(void); +extern void softmac_fini(void); +extern boolean_t softmac_busy(void); +extern int softmac_fill_capab(ldi_handle_t, softmac_t *); +extern int softmac_capab_enable(softmac_lower_t *); +extern void softmac_rput_process_notdata(queue_t *, mblk_t *); +extern void softmac_rput_process_data(softmac_lower_t *, mblk_t *); + +extern int softmac_m_promisc(void *, boolean_t); +extern int softmac_m_multicst(void *, boolean_t, const uint8_t *); +extern int softmac_m_unicst(void *, const uint8_t *); +extern void softmac_m_ioctl(void *, queue_t *, mblk_t *); +extern int softmac_m_stat(void *, uint_t, uint64_t *); +extern mblk_t *softmac_m_tx(void *, mblk_t *); +extern void softmac_m_resources(void *); +extern int softmac_proto_tx(softmac_lower_t *, mblk_t *, mblk_t **); +extern void softmac_ioctl_tx(softmac_lower_t *, mblk_t *, mblk_t **); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SOFTMAC_IMPL_H */ diff --git a/usr/src/uts/common/sys/vnic.h b/usr/src/uts/common/sys/vnic.h index 16cfbf85fc..58622037b3 100644 --- a/usr/src/uts/common/sys/vnic.h +++ b/usr/src/uts/common/sys/vnic.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -54,9 +54,9 @@ typedef enum { #define VNIC_IOC_CREATE VNIC_IOC(1) typedef struct vnic_ioc_create { - uint_t vc_vnic_id; + datalink_id_t vc_vnic_id; + datalink_id_t vc_link_id; uint_t vc_mac_len; - uchar_t vc_dev_name[MAXNAMELEN]; vnic_mac_addr_type_t vc_mac_addr_type; uchar_t vc_mac_addr[MAXMACADDRLEN]; } vnic_ioc_create_t; @@ -64,9 +64,9 @@ typedef struct vnic_ioc_create { #ifdef _SYSCALL32 typedef struct vnic_ioc_create32 { - uint32_t vc_vnic_id; + datalink_id_t vc_vnic_id; + datalink_id_t vc_link_id; uint32_t vc_mac_len; - uchar_t vc_dev_name[MAXNAMELEN]; vnic_mac_addr_type_t vc_mac_addr_type; uchar_t vc_mac_addr[MAXMACADDRLEN]; } vnic_ioc_create32_t; @@ -76,13 +76,13 @@ typedef struct vnic_ioc_create32 { #define VNIC_IOC_DELETE VNIC_IOC(2) typedef struct vnic_ioc_delete { - uint_t vd_vnic_id; + datalink_id_t vd_vnic_id; } vnic_ioc_delete_t; #ifdef _SYSCALL32 typedef struct vnic_ioc_delete32 { - uint32_t vd_vnic_id; + datalink_id_t vd_vnic_id; } vnic_ioc_delete32_t; #endif /* _SYSCALL32 */ @@ -90,25 +90,25 @@ typedef struct vnic_ioc_delete32 { #define VNIC_IOC_INFO VNIC_IOC(3) typedef struct vnic_ioc_info_vnic { - uint32_t vn_vnic_id; + datalink_id_t vn_vnic_id; + datalink_id_t vn_link_id; uint32_t vn_mac_len; uchar_t vn_mac_addr[MAXMACADDRLEN]; - char vn_dev_name[MAXNAMELEN]; vnic_mac_addr_type_t vn_mac_addr_type; } vnic_ioc_info_vnic_t; typedef struct vnic_ioc_info { uint_t vi_nvnics; - uint_t vi_vnic_id; /* 0 returns all */ - char vi_dev_name[MAXNAMELEN]; + datalink_id_t vi_vnic_id; /* DATALINK_ALL_LINKID returns all */ + datalink_id_t vi_linkid; } vnic_ioc_info_t; #ifdef _SYSCALL32 typedef struct vnic_ioc_info32 { uint32_t vi_nvnics; - uint32_t vi_vnic_id; /* 0 returns all */ - char vi_dev_name[MAXNAMELEN]; + datalink_id_t vi_vnic_id; /* DATALINK_ALL_LINKID returns all */ + datalink_id_t vi_linkid; } vnic_ioc_info32_t; #endif /* _SYSCALL32 */ @@ -118,7 +118,7 @@ typedef struct vnic_ioc_info32 { #define VNIC_IOC_MODIFY_ADDR 0x01 typedef struct vnic_ioc_modify { - uint_t vm_vnic_id; + datalink_id_t vm_vnic_id; uint_t vm_modify_mask; uchar_t vm_mac_addr[MAXMACADDRLEN]; vnic_mac_addr_type_t vm_mac_addr_type; @@ -128,7 +128,7 @@ typedef struct vnic_ioc_modify { #ifdef _SYSCALL32 typedef struct vnic_ioc_modify32 { - uint32_t vm_vnic_id; + datalink_id_t vm_vnic_id; uint32_t vm_modify_mask; uchar_t vm_mac_addr[MAXMACADDRLEN]; vnic_mac_addr_type_t vm_mac_addr_type; diff --git a/usr/src/uts/common/sys/vnic_impl.h b/usr/src/uts/common/sys/vnic_impl.h index 3cab13b96c..6cb64523a8 100644 --- a/usr/src/uts/common/sys/vnic_impl.h +++ b/usr/src/uts/common/sys/vnic_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -65,7 +65,7 @@ typedef struct vnic_flow_tab_s { typedef struct vnic_mac_s { mac_handle_t va_mh; uint_t va_refs; - char va_dev_name[MAXNAMELEN]; + datalink_id_t va_linkid; const mac_txinfo_t *va_txinfo; struct vnic_bcast_grp_s *va_bcast_grp; krwlock_t va_bcast_grp_lock; @@ -81,7 +81,7 @@ typedef struct vnic_mac_s { } vnic_mac_t; typedef struct vnic_s { - uint_t vn_id; + datalink_id_t vn_id; uint32_t vn_started : 1, vn_promisc : 1, @@ -96,6 +96,7 @@ typedef struct vnic_s { vnic_mac_addr_type_t vn_addr_type; mac_handle_t vn_mh; + uint32_t vn_margin; vnic_mac_t *vn_vnic_mac; vnic_flow_t *vn_flow_ent; uint32_t vn_hcksum_txflags; @@ -140,20 +141,20 @@ typedef struct vnic_s { mutex_exit(&(flow)->vf_lock); \ } -extern int vnic_dev_create(uint_t, char *, int, uchar_t *); -extern int vnic_dev_modify(uint_t, uint_t, vnic_mac_addr_type_t, +extern int vnic_dev_create(datalink_id_t, datalink_id_t, int, uchar_t *); +extern int vnic_dev_modify(datalink_id_t, uint_t, vnic_mac_addr_type_t, uint_t, uchar_t *); -extern int vnic_dev_delete(uint_t); +extern int vnic_dev_delete(datalink_id_t); -typedef int (*vnic_info_new_vnic_fn_t)(void *, uint32_t, vnic_mac_addr_type_t, - uint_t, uint8_t *, char *); +typedef int (*vnic_info_new_vnic_fn_t)(void *, datalink_id_t, + vnic_mac_addr_type_t, uint_t, uint8_t *, datalink_id_t); extern void vnic_dev_init(void); extern void vnic_dev_fini(void); extern uint_t vnic_dev_count(void); extern dev_info_t *vnic_get_dip(void); -extern int vnic_info(uint_t *, uint32_t, char *, void *, +extern int vnic_info(uint_t *, datalink_id_t, datalink_id_t, void *, vnic_info_new_vnic_fn_t); extern void vnic_rx(void *, void *, mblk_t *); diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 0a93e8651e..34cdf78a33 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -455,6 +455,7 @@ extern zone_t *zone_find_by_any_path(const char *, boolean_t); extern zone_t *zone_find_by_path(const char *); extern zoneid_t getzoneid(void); extern zone_t *zone_find_by_id_nolock(zoneid_t); +extern int zone_datalink_walk(zoneid_t, int (*)(const char *, void *), void *); /* * Zone-specific data (ZSD) APIs diff --git a/usr/src/uts/common/xen/io/xnbo.c b/usr/src/uts/common/xen/io/xnbo.c index 723d650c55..91714e9117 100644 --- a/usr/src/uts/common/xen/io/xnbo.c +++ b/usr/src/uts/common/xen/io/xnbo.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -248,9 +248,9 @@ xnbo_open_mac(xnb_t *xnbp, char *mac) xsname = xvdi_get_xsname(xnbp->xnb_devinfo); - if ((err = mac_open(mac, &xnbop->o_mh)) != 0) { + if ((err = mac_open_by_linkname(mac, &xnbop->o_mh)) != 0) { cmn_err(CE_WARN, "xnbo_open_mac: " - "cannot open mac device %s (%d)", mac, err); + "cannot open mac for link %s (%d)", mac, err); return (B_FALSE); } ASSERT(xnbop->o_mh != NULL); @@ -260,13 +260,13 @@ xnbo_open_mac(xnb_t *xnbp, char *mac) if (mi->mi_media != DL_ETHER) { cmn_err(CE_WARN, "xnbo_open_mac: " - "mac device is not DL_ETHER (%d)", mi->mi_media); + "device is not DL_ETHER (%d)", mi->mi_media); xnbo_close_mac(xnbop); return (B_FALSE); } if (mi->mi_media != mi->mi_nativemedia) { cmn_err(CE_WARN, "xnbo_open_mac: " - "mac device media and native media mismatch (%d != %d)", + "device media and native media mismatch (%d != %d)", mi->mi_media, mi->mi_nativemedia); xnbo_close_mac(xnbop); return (B_FALSE); diff --git a/usr/src/uts/common/xen/io/xnbu.c b/usr/src/uts/common/xen/io/xnbu.c index fa9604194b..f7e84987a1 100644 --- a/usr/src/uts/common/xen/io/xnbu.c +++ b/usr/src/uts/common/xen/io/xnbu.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -432,6 +432,12 @@ xnbu_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) mr->m_callbacks = &xnb_callbacks; mr->m_min_sdu = 0; mr->m_max_sdu = XNBMAXPKT; + /* + * xnbu is a virtual device, and it is not associated with any + * physical device. Its margin size is determined by the maximum + * packet size it can handle, which is PAGESIZE. + */ + mr->m_margin = PAGESIZE - XNBMAXPKT - sizeof (struct ether_header); (void) memset(xnbp->xnb_mac_addr, 0xff, ETHERADDRL); xnbp->xnb_mac_addr[0] &= 0xfe; |
