diff options
author | jv227347 <Jordan.Vaughan@Sun.com> | 2010-08-12 17:34:26 -0700 |
---|---|---|
committer | jv227347 <Jordan.Vaughan@Sun.com> | 2010-08-12 17:34:26 -0700 |
commit | a19609f85693e4e7d7e744d836a4e87193c934e4 (patch) | |
tree | 7ea27321d90fa4b9b61e48ec06e5b52f5f7b509a /usr/src/uts/common | |
parent | a69116193464f859a8b27a2db19ad330ce163a55 (diff) | |
download | illumos-gate-a19609f85693e4e7d7e744d836a4e87193c934e4.tar.gz |
6906740 Zones need an improved reference counting mechanism
Diffstat (limited to 'usr/src/uts/common')
24 files changed, 515 insertions, 142 deletions
diff --git a/usr/src/uts/common/fs/mntfs/mntvfsops.c b/usr/src/uts/common/fs/mntfs/mntvfsops.c index 8f7f7c9f75..9c81e9bc89 100644 --- a/usr/src/uts/common/fs/mntfs/mntvfsops.c +++ b/usr/src/uts/common/fs/mntfs/mntvfsops.c @@ -213,7 +213,8 @@ mntmount(struct vfs *vfsp, struct vnode *mvp, } mutex_exit(&mvp->v_lock); - zone_hold(mnt->mnt_zone = zone); + zone_init_ref(&mnt->mnt_zone_ref); + zone_hold_ref(zone, &mnt->mnt_zone_ref, ZONE_REF_MNTFS); mnp = &mnt->mnt_node; vfsp->vfs_fstype = mntfstype; @@ -256,7 +257,7 @@ mntunmount(struct vfs *vfsp, int flag, struct cred *cr) } mutex_exit(&vp->v_lock); - zone_rele(mnt->mnt_zone); + zone_rele_ref(&mnt->mnt_zone_ref, ZONE_REF_MNTFS); vn_invalid(vp); vn_free(vp); kmem_free(mnt, sizeof (*mnt)); diff --git a/usr/src/uts/common/fs/mntfs/mntvnops.c b/usr/src/uts/common/fs/mntfs/mntvnops.c index da72519dd2..7fff58a602 100644 --- a/usr/src/uts/common/fs/mntfs/mntvnops.c +++ b/usr/src/uts/common/fs/mntfs/mntvnops.c @@ -398,7 +398,7 @@ mntfs_get_next_elem(mntsnap_t *snapp, mntelem_t *elemp) static void mntfs_freesnap(mntnode_t *mnp, mntsnap_t *snapp) { - zone_t *zonep = MTOD(mnp)->mnt_zone; + zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone; krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; mntelem_t **elempp = &zonep->zone_mntfs_db; mntelem_t *elemp; @@ -500,7 +500,7 @@ static void mntfs_snapshot(mntnode_t *mnp, mntsnap_t *snapp) { mntdata_t *mnd = MTOD(mnp); - zone_t *zonep = mnd->mnt_zone; + zone_t *zonep = mnd->mnt_zone_ref.zref_zone; int is_global_zone = (zonep == global_zone); int show_hidden = mnp->mnt_flags & MNT_SHOWHIDDEN; vfs_t *vfsp, *firstvfsp, *lastvfsp; @@ -885,7 +885,7 @@ static int mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct) { mntnode_t *mnp = VTOM(vp); - zone_t *zonep = MTOD(mnp)->mnt_zone; + zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone; mntsnap_t *snapp = &mnp->mnt_read; off_t off = uio->uio_offset; size_t len = uio->uio_resid; @@ -1472,7 +1472,7 @@ mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cr, mntnode_t *mnp = VTOM(vp); mntsnap_t *snapp = &mnp->mnt_ioctl; int error = 0; - zone_t *zonep = MTOD(mnp)->mnt_zone; + zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone; krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; model_t datamodel = flag & DATAMODEL_MASK; @@ -1554,7 +1554,7 @@ mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cr, size_t len; uint_t start = 0; mntdata_t *mntdata = MTOD(mnp); - zone_t *zone = mntdata->mnt_zone; + zone_t *zone = mntdata->mnt_zone_ref.zref_zone; STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK); if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) { diff --git a/usr/src/uts/common/fs/nfs/nfs3_vfsops.c b/usr/src/uts/common/fs/nfs/nfs3_vfsops.c index 2111e9fabf..207a708771 100644 --- a/usr/src/uts/common/fs/nfs/nfs3_vfsops.c +++ b/usr/src/uts/common/fs/nfs/nfs3_vfsops.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -1122,7 +1121,9 @@ nfs3rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); mi->mi_vfsp = vfsp; - zone_hold(mi->mi_zone = zone); + mi->mi_zone = zone; + zone_init_ref(&mi->mi_zone_ref); + zone_hold_ref(zone, &mi->mi_zone_ref, ZONE_REF_NFS); nfs_mi_zonelist_add(mi); /* diff --git a/usr/src/uts/common/fs/nfs/nfs4_client.c b/usr/src/uts/common/fs/nfs/nfs4_client.c index 927fbd0456..d85c9d7e31 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_client.c +++ b/usr/src/uts/common/fs/nfs/nfs4_client.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -2932,7 +2931,8 @@ nfs4_mi_shutdown(zoneid_t zoneid, void *data) */ list_remove(&mig->mig_list, mi); mutex_exit(&mig->mig_lock); - zone_rele(mi->mi_zone); + zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4); + /* * Release hold on vfs and mi done to prevent race with zone * shutdown. This releases the hold in nfs4_mi_zonelist_add. diff --git a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c index 35152c3a39..1752a28542 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c +++ b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c @@ -1179,7 +1179,8 @@ errout: /* need to remove it from the zone */ removed = nfs4_mi_zonelist_remove(mi); if (removed) - zone_rele(mi->mi_zone); + zone_rele_ref(&mi->mi_zone_ref, + ZONE_REF_NFSV4); MI4_RELE(mi); if (!(uap->flags & MS_SYSSPACE) && args) { nfs4_free_args(args); @@ -2383,7 +2384,9 @@ nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL); mi->mi_vfsp = vfsp; - zone_hold(mi->mi_zone = zone); + mi->mi_zone = zone; + zone_init_ref(&mi->mi_zone_ref); + zone_hold_ref(zone, &mi->mi_zone_ref, ZONE_REF_NFSV4); nfs4_mi_zonelist_add(mi); /* @@ -2646,7 +2649,7 @@ bad: nfs4_async_manager_stop(vfsp); removed = nfs4_mi_zonelist_remove(mi); if (removed) - zone_rele(mi->mi_zone); + zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4); /* * This releases the initial "hold" of the mi since it will never @@ -2771,7 +2774,7 @@ nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) nfs4_remove_mi_from_server(mi, NULL); removed = nfs4_mi_zonelist_remove(mi); if (removed) - zone_rele(mi->mi_zone); + zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4); return (0); } @@ -4402,7 +4405,7 @@ nfs4_free_mount(vfs_t *vfsp, int flag, cred_t *cr) removed = nfs4_mi_zonelist_remove(mi); if (removed) - zone_rele(mi->mi_zone); + zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4); } /* Referral related sub-routines */ diff --git a/usr/src/uts/common/fs/nfs/nfs_client.c b/usr/src/uts/common/fs/nfs/nfs_client.c index 53047f4cd8..7a6c545d9a 100644 --- a/usr/src/uts/common/fs/nfs/nfs_client.c +++ b/usr/src/uts/common/fs/nfs/nfs_client.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. * * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. * All rights reserved. @@ -3229,7 +3228,7 @@ nfs_free_mi(mntinfo_t *mi) cv_destroy(&mi->mi_async_work_cv[NFS_ASYNC_PGOPS_QUEUE]); cv_destroy(&mi->mi_async_reqs_cv); cv_destroy(&mi->mi_async_cv); - zone_rele(mi->mi_zone); + zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFS); kmem_free(mi, sizeof (*mi)); } diff --git a/usr/src/uts/common/fs/nfs/nfs_vfsops.c b/usr/src/uts/common/fs/nfs/nfs_vfsops.c index 8dff0eb2df..57b21778b4 100644 --- a/usr/src/uts/common/fs/nfs/nfs_vfsops.c +++ b/usr/src/uts/common/fs/nfs/nfs_vfsops.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. * * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. * All rights reserved. @@ -1241,7 +1240,9 @@ nfsrootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); mi->mi_vfsp = vfsp; - zone_hold(mi->mi_zone = zone); + mi->mi_zone = zone; + zone_init_ref(&mi->mi_zone_ref); + zone_hold_ref(zone, &mi->mi_zone_ref, ZONE_REF_NFS); nfs_mi_zonelist_add(mi); /* diff --git a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs.h b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs.h index 6565be5ef3..fd9f4ac7f3 100644 --- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs.h +++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs.h @@ -33,8 +33,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SMBFS_SMBFS_H @@ -56,6 +55,7 @@ #include <sys/vfs.h> #include <sys/vfs_opreg.h> #include <sys/fs/smbfs_mount.h> +#include <sys/zone.h> /* * Path component length @@ -140,7 +140,7 @@ typedef struct smbmntinfo { /* * Zones support. */ - struct zone *smi_zone; /* Zone mounted in */ + zone_ref_t smi_zone_ref; /* Zone FS is mounted in */ list_node_t smi_zone_node; /* Link to per-zone smi list */ /* Lock for the list is: smi_globals_t -> smg_lock */ diff --git a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_client.c b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_client.c index 8d4c617b52..008b8fdc7f 100644 --- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_client.c +++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_client.c @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. * All rights reserved. @@ -418,7 +417,7 @@ smbfsgetattr(vnode_t *vp, struct vattr *vap, cred_t *cr) smi = VTOSMI(vp); - ASSERT(curproc->p_zone == smi->smi_zone); + ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone); /* * If asked for UID or GID, update n_uid, n_gid. @@ -610,7 +609,7 @@ smbfs_zonelist_add(smbmntinfo_t *smi) { smi_globals_t *smg; - smg = zone_getspecific(smi_list_key, smi->smi_zone); + smg = zone_getspecific(smi_list_key, smi->smi_zone_ref.zref_zone); mutex_enter(&smg->smg_lock); list_insert_head(&smg->smg_list, smi); mutex_exit(&smg->smg_lock); @@ -624,7 +623,7 @@ smbfs_zonelist_remove(smbmntinfo_t *smi) { smi_globals_t *smg; - smg = zone_getspecific(smi_list_key, smi->smi_zone); + smg = zone_getspecific(smi_list_key, smi->smi_zone_ref.zref_zone); mutex_enter(&smg->smg_lock); list_remove(&smg->smg_list, smi); /* diff --git a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vfsops.c b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vfsops.c index d649e9d664..86c8111b72 100644 --- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vfsops.c +++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vfsops.c @@ -314,8 +314,8 @@ smbfs_free_smi(smbmntinfo_t *smi) if (smi == NULL) return; - if (smi->smi_zone != NULL) - zone_rele(smi->smi_zone); + if (smi->smi_zone_ref.zref_zone != NULL) + zone_rele_ref(&smi->smi_zone_ref, ZONE_REF_SMBFS); if (smi->smi_share != NULL) smb_share_rele(smi->smi_share); @@ -481,7 +481,14 @@ smbfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) smi->smi_share = ssp; ssp = NULL; - smi->smi_zone = mntzone; + + /* + * Convert the anonymous zone hold acquired via zone_hold() above + * into a zone reference. + */ + zone_init_ref(&smi->smi_zone_ref); + zone_hold_ref(mntzone, &smi->smi_zone_ref, ZONE_REF_SMBFS); + zone_rele(mntzone); mntzone = NULL; /* @@ -736,7 +743,7 @@ smbfs_root(vfs_t *vfsp, vnode_t **vpp) smi = VFTOSMI(vfsp); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EPERM); if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED) @@ -770,7 +777,7 @@ smbfs_statvfs(vfs_t *vfsp, statvfs64_t *sbp) hrtime_t now; smb_cred_t scred; - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EPERM); if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED) diff --git a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vnops.c b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vnops.c index 048c159fdf..5748a5c181 100644 --- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vnops.c +++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vnops.c @@ -257,7 +257,7 @@ smbfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) smi = VTOSMI(vp); ssp = smi->smi_share; - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EIO); if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) @@ -434,7 +434,7 @@ smbfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, * open; if we happen to get here from the wrong zone we can't do * anything over the wire. */ - if (smi->smi_zone != curproc->p_zone) { + if (smi->smi_zone_ref.zref_zone != curproc->p_zone) { /* * We could attempt to clean up locks, except we're sure * that the current process didn't acquire any locks on @@ -591,7 +591,7 @@ smbfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr, smi = VTOSMI(vp); ssp = smi->smi_share; - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EIO); if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) @@ -676,7 +676,7 @@ smbfs_write(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr, smi = VTOSMI(vp); ssp = smi->smi_share; - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EIO); if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) @@ -787,7 +787,7 @@ smbfs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, smi = VTOSMI(vp); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EIO); if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) @@ -853,7 +853,7 @@ smbfs_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, smi = VTOSMI(vp); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EIO); if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) @@ -908,7 +908,7 @@ smbfs_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, vfsp = vp->v_vfsp; smi = VFTOSMI(vfsp); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EIO); if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED) @@ -980,7 +980,7 @@ smbfssetattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr) int have_fid = 0; uint32_t rights = 0; - ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone); + ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone_ref.zref_zone); /* * There are no settable attributes on the XATTR dir, @@ -1252,7 +1252,7 @@ smbfs_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct) vfsp = vp->v_vfsp; smi = VFTOSMI(vfsp); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EIO); if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED) @@ -1281,7 +1281,7 @@ smbfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) np = VTOSMB(vp); smi = VTOSMI(vp); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EIO); if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) @@ -1399,7 +1399,7 @@ smbfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, vfs = dvp->v_vfsp; smi = VFTOSMI(vfs); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EPERM); if (smi->smi_flags & SMI_DEAD || vfs->vfs_flag & VFS_UNMOUNTED) @@ -1456,7 +1456,7 @@ smbfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr, smi = VTOSMI(dvp); dnp = VTOSMB(dvp); - ASSERT(curproc->p_zone == smi->smi_zone); + ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone); #ifdef NOT_YET vcp = SSTOVC(smi->smi_share); @@ -1775,7 +1775,7 @@ smbfs_create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive, dnp = VTOSMB(dvp); vp = NULL; - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EPERM); if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED) @@ -2007,7 +2007,7 @@ smbfs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct, smi = VTOSMI(dvp); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EPERM); if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED) @@ -2101,8 +2101,8 @@ smbfs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr, { /* vnode_t *realvp; */ - if (curproc->p_zone != VTOSMI(odvp)->smi_zone || - curproc->p_zone != VTOSMI(ndvp)->smi_zone) + if (curproc->p_zone != VTOSMI(odvp)->smi_zone_ref.zref_zone || + curproc->p_zone != VTOSMI(ndvp)->smi_zone_ref.zref_zone) return (EPERM); if (VTOSMI(odvp)->smi_flags & SMI_DEAD || @@ -2133,7 +2133,7 @@ smbfsrename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr, struct smb_cred scred; /* enum smbfsstat status; */ - ASSERT(curproc->p_zone == VTOSMI(odvp)->smi_zone); + ASSERT(curproc->p_zone == VTOSMI(odvp)->smi_zone_ref.zref_zone); if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 || strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0) @@ -2361,7 +2361,7 @@ smbfs_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp, int nmlen = strlen(name); int error, hiderr; - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EPERM); if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED) @@ -2440,7 +2440,7 @@ smbfs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr, struct smb_cred scred; int error; - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EPERM); if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED) @@ -2536,7 +2536,7 @@ smbfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp, smi = VTOSMI(vp); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EIO); if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) @@ -2599,7 +2599,7 @@ smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int nmlen, error; ushort_t reclen; - ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone); + ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone_ref.zref_zone); /* Make sure we serialize for n_dirseq use. */ ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER)); @@ -2840,7 +2840,7 @@ smbfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) smi = VTOSMI(vp); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EPERM); if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) @@ -2871,7 +2871,7 @@ smbfs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset, struct flk_callback *flk_cbp, cred_t *cr, caller_context_t *ct) { - if (curproc->p_zone != VTOSMI(vp)->smi_zone) + if (curproc->p_zone != VTOSMI(vp)->smi_zone_ref.zref_zone) return (EIO); if (VTOSMI(vp)->smi_flags & SMI_LLOCK) @@ -2897,7 +2897,7 @@ smbfs_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, smi = VTOSMI(vp); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EIO); if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) @@ -2951,7 +2951,7 @@ smbfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, vfs = vp->v_vfsp; smi = VFTOSMI(vfs); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EIO); if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) @@ -3019,7 +3019,7 @@ smbfs_getsecattr(vnode_t *vp, vsecattr_t *vsa, int flag, cred_t *cr, vfsp = vp->v_vfsp; smi = VFTOSMI(vfsp); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EIO); if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED) @@ -3060,7 +3060,7 @@ smbfs_setsecattr(vnode_t *vp, vsecattr_t *vsa, int flag, cred_t *cr, vfsp = vp->v_vfsp; smi = VFTOSMI(vfsp); - if (curproc->p_zone != smi->smi_zone) + if (curproc->p_zone != smi->smi_zone_ref.zref_zone) return (EIO); if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED) @@ -3102,7 +3102,7 @@ static int smbfs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr, caller_context_t *ct) { - if (curproc->p_zone != VTOSMI(vp)->smi_zone) + if (curproc->p_zone != VTOSMI(vp)->smi_zone_ref.zref_zone) return (EIO); if (VTOSMI(vp)->smi_flags & SMI_LLOCK) diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c index 9a106fd3d0..cbb2adfb37 100644 --- a/usr/src/uts/common/fs/vfs.c +++ b/usr/src/uts/common/fs/vfs.c @@ -3513,11 +3513,13 @@ vfs_list_add(struct vfs *vfsp) /* * The zone that owns the mount is the one that performed the mount. * Note that this isn't necessarily the same as the zone mounted into. - * The corresponding zone_rele() will be done when the vfs_t is - * being free'd. + * The corresponding zone_rele_ref() will be done when the vfs_t + * is being free'd. */ vfsp->vfs_zone = curproc->p_zone; - zone_hold(vfsp->vfs_zone); + zone_init_ref(&vfsp->vfs_implp->vi_zone_ref); + zone_hold_ref(vfsp->vfs_zone, &vfsp->vfs_implp->vi_zone_ref, + ZONE_REF_VFS); /* * Find the zone mounted into, and put this mount on its vfs list. @@ -4342,7 +4344,8 @@ vfs_rele(vfs_t *vfsp) VFS_FREEVFS(vfsp); lofi_remove(vfsp); if (vfsp->vfs_zone) - zone_rele(vfsp->vfs_zone); + zone_rele_ref(&vfsp->vfs_implp->vi_zone_ref, + ZONE_REF_VFS); vfs_freemnttab(vfsp); vfs_free(vfsp); } diff --git a/usr/src/uts/common/io/lofi.c b/usr/src/uts/common/io/lofi.c index 2bc7cf3620..049a0235fd 100644 --- a/usr/src/uts/common/io/lofi.c +++ b/usr/src/uts/common/io/lofi.c @@ -373,8 +373,8 @@ lofi_destroy(struct lofi_state *lsp, cred_t *credp) lsp->ls_uncomp_seg_sz = 0; } - rctl_decr_lofi(lsp->ls_zone, 1); - zone_rele(lsp->ls_zone); + rctl_decr_lofi(lsp->ls_zone.zref_zone, 1); + zone_rele_ref(&lsp->ls_zone, ZONE_REF_LOFI); mutex_destroy(&lsp->ls_comp_cache_lock); mutex_destroy(&lsp->ls_comp_bufs_lock); @@ -418,7 +418,7 @@ lofi_zone_shutdown(zoneid_t zoneid, void *arg) /* lofi_destroy() frees lsp */ next = list_next(&lofi_list, lsp); - if (lsp->ls_zone->zone_id != zoneid) + if (lsp->ls_zone.zref_zone->zone_id != zoneid) continue; /* @@ -1610,7 +1610,7 @@ static int lofi_access(struct lofi_state *lsp) { ASSERT(MUTEX_HELD(&lofi_lock)); - if (INGLOBALZONE(curproc) || lsp->ls_zone == curproc->p_zone) + if (INGLOBALZONE(curproc) || lsp->ls_zone.zref_zone == curzone) return (0); return (EPERM); } @@ -2184,8 +2184,8 @@ lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, newdev = makedevice(getmajor(dev), minor); lsp->ls_dev = newdev; - lsp->ls_zone = zone_find_by_id(getzoneid()); - ASSERT(lsp->ls_zone != NULL); + zone_init_ref(&lsp->ls_zone); + zone_hold_ref(curzone, &lsp->ls_zone, ZONE_REF_LOFI); lsp->ls_uncomp_seg_sz = 0; lsp->ls_comp_algorithm[0] = '\0'; lsp->ls_crypto_offset = 0; diff --git a/usr/src/uts/common/nfs/nfs4_clnt.h b/usr/src/uts/common/nfs/nfs4_clnt.h index 8654f16da2..d582d6f6f0 100644 --- a/usr/src/uts/common/nfs/nfs4_clnt.h +++ b/usr/src/uts/common/nfs/nfs4_clnt.h @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -48,6 +47,7 @@ #include <rpc/auth.h> #include <sys/door.h> #include <sys/condvar_impl.h> +#include <sys/zone.h> #ifdef __cplusplus extern "C" { @@ -1083,7 +1083,8 @@ typedef struct mntinfo4 { /* * Zones support. */ - struct zone *mi_zone; /* Zone mounted in */ + struct zone *mi_zone; /* Zone in which FS is mounted */ + zone_ref_t mi_zone_ref; /* Reference to aforementioned zone */ list_node_t mi_zone_node; /* linkage into per-zone mi list */ /* diff --git a/usr/src/uts/common/nfs/nfs_clnt.h b/usr/src/uts/common/nfs/nfs_clnt.h index 462a9b345c..e32b6b5d54 100644 --- a/usr/src/uts/common/nfs/nfs_clnt.h +++ b/usr/src/uts/common/nfs/nfs_clnt.h @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -37,6 +36,7 @@ #include <nfs/rnode.h> #include <sys/list.h> #include <sys/condvar_impl.h> +#include <sys/zone.h> #ifdef __cplusplus extern "C" { @@ -287,6 +287,7 @@ typedef struct servinfo { #define ORIG_KNCONF(mi) (mi->mi_curr_serv->sv_origknconf ? \ mi->mi_curr_serv->sv_origknconf : mi->mi_curr_serv->sv_knconf) +#if defined(_KERNEL) /* * NFS private data per mounted file system * The mi_lock mutex protects the following fields: @@ -418,7 +419,8 @@ typedef struct mntinfo { /* * Zones support. */ - struct zone *mi_zone; /* Zone mounted in */ + struct zone *mi_zone; /* Zone in which FS is mounted */ + zone_ref_t mi_zone_ref; /* Reference to aforementioned zone */ list_node_t mi_zone_node; /* Linkage into per-zone mi list */ /* * Serializes threads in failover_remap. @@ -427,6 +429,7 @@ typedef struct mntinfo { */ kmutex_t mi_remap_lock; } mntinfo_t; +#endif /* _KERNEL */ /* * vfs pointer to mount info diff --git a/usr/src/uts/common/os/ipc.c b/usr/src/uts/common/os/ipc.c index 8f47821566..9381019cd1 100644 --- a/usr/src/uts/common/os/ipc.c +++ b/usr/src/uts/common/os/ipc.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -842,7 +841,7 @@ ipc_rele(ipc_service_t *s, kipc_perm_t *perm) ASSERT(IPC_FREE(perm)); /* ipc_rmid clears IPC_ALLOC */ s->ipcs_dtor(perm); project_rele(perm->ipc_proj); - zone_rele(perm->ipc_zone); + zone_rele_ref(&perm->ipc_zone_ref, ZONE_REF_IPC); kmem_free(perm, s->ipcs_ssize); } } @@ -1064,12 +1063,13 @@ ipc_commit_begin(ipc_service_t *service, key_t key, int flag, ASSERT(IPC_FREE(newperm)); /* - * Set ipc_proj and ipc_zone so that future calls to ipc_cleanup() + * Set ipc_proj and ipc_zone_ref so that future calls to ipc_cleanup() * clean up the necessary state. This must be done before the * potential call to ipcs_dtor() below. */ newperm->ipc_proj = pp->p_task->tk_proj; - newperm->ipc_zone = pp->p_zone; + zone_init_ref(&newperm->ipc_zone_ref); + zone_hold_ref(pp->p_zone, &newperm->ipc_zone_ref, ZONE_REF_IPC); mutex_enter(&service->ipcs_lock); /* @@ -1097,6 +1097,7 @@ ipc_commit_begin(ipc_service_t *service, key_t key, int flag, errout: mutex_exit(&service->ipcs_lock); service->ipcs_dtor(newperm); + zone_rele_ref(&newperm->ipc_zone_ref, ZONE_REF_IPC); kmem_free(newperm, service->ipcs_ssize); return (error); } @@ -1118,7 +1119,6 @@ ipc_commit_end(ipc_service_t *service, kipc_perm_t *perm) ASSERT(MUTEX_HELD(&curproc->p_lock)); (void) project_hold(perm->ipc_proj); - (void) zone_hold(perm->ipc_zone); mutex_exit(&curproc->p_lock); /* @@ -1173,6 +1173,8 @@ ipc_cleanup(ipc_service_t *service, kipc_perm_t *perm) mutex_exit(&service->ipcs_lock); service->ipcs_dtor(perm); } + if (perm->ipc_zone_ref.zref_zone != NULL) + zone_rele_ref(&perm->ipc_zone_ref, ZONE_REF_IPC); kmem_free(perm, service->ipcs_ssize); } diff --git a/usr/src/uts/common/os/shm.c b/usr/src/uts/common/os/shm.c index bf18627c34..bacc595f78 100644 --- a/usr/src/uts/common/os/shm.c +++ b/usr/src/uts/common/os/shm.c @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -605,7 +604,7 @@ shm_dtor(kipc_perm_t *perm) rsize = ptob(btopr(sp->shm_segsz)); ipcs_lock(shm_svc); sp->shm_perm.ipc_proj->kpj_data.kpd_shmmax -= rsize; - sp->shm_perm.ipc_zone->zone_shmmax -= rsize; + sp->shm_perm.ipc_zone_ref.zref_zone->zone_shmmax -= rsize; ipcs_unlock(shm_svc); } } @@ -924,13 +923,13 @@ top: sp->shm_perm.ipc_proj->kpj_rctls, pp, rsize, RCA_SAFE) & RCT_DENY) || (rctl_test(rc_zone_shmmax, - sp->shm_perm.ipc_zone->zone_rctls, pp, rsize, + sp->shm_perm.ipc_zone_ref.zref_zone->zone_rctls, pp, rsize, RCA_SAFE) & RCT_DENY)) { ipc_cleanup(shm_svc, (kipc_perm_t *)sp); return (EINVAL); } sp->shm_perm.ipc_proj->kpj_data.kpd_shmmax += rsize; - sp->shm_perm.ipc_zone->zone_shmmax += rsize; + sp->shm_perm.ipc_zone_ref.zref_zone->zone_shmmax += rsize; lock = ipc_commit_end(shm_svc, &sp->shm_perm); } @@ -1258,7 +1257,7 @@ shm_rm_amp(kshmid_t *sp) struct anon_map *amp = sp->shm_amp; zone_t *zone; - zone = sp->shm_perm.ipc_zone; + zone = sp->shm_perm.ipc_zone_ref.zref_zone; ASSERT(zone != NULL); /* * Free up the anon_map. diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 6b44f53ebf..79ccd94ae4 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -223,6 +223,7 @@ #include <sys/pool.h> #include <sys/pool_pset.h> #include <sys/pset.h> +#include <sys/strlog.h> #include <sys/sysmacros.h> #include <sys/callb.h> #include <sys/vmparam.h> @@ -249,6 +250,17 @@ #include <vm/seg.h> #include <sys/mac.h> +/* + * This constant specifies the number of seconds that threads waiting for + * subsystems to release a zone's general-purpose references will wait before + * they log the zone's reference counts. The constant's value shouldn't + * be so small that reference counts are unnecessarily reported for zones + * whose references are slowly released. On the other hand, it shouldn't be so + * large that users reboot their systems out of frustration over hung zones + * before the system logs the zones' reference counts. + */ +#define ZONE_DESTROY_TIMEOUT_SECS 60 + /* List of data link IDs which are accessible from the zone */ typedef struct zone_dl { datalink_id_t zdl_id; @@ -338,6 +350,20 @@ const char *zone_status_table[] = { }; /* + * This array contains the names of the subsystems listed in zone_ref_subsys_t + * (see sys/zone.h). + */ +static char *zone_ref_subsys_names[] = { + "NFS", /* ZONE_REF_NFS */ + "NFSv4", /* ZONE_REF_NFSV4 */ + "SMBFS", /* ZONE_REF_SMBFS */ + "MNTFS", /* ZONE_REF_MNTFS */ + "LOFI", /* ZONE_REF_LOFI */ + "VFS", /* ZONE_REF_VFS */ + "IPC" /* ZONE_REF_IPC */ +}; + +/* * This isn't static so lint doesn't complain. */ rctl_hndl_t rc_zone_cpu_shares; @@ -1881,6 +1907,8 @@ zone_zsd_init(void) zone0.zone_lockedmem_kstat = NULL; zone0.zone_swapresv_kstat = NULL; zone0.zone_nprocs_kstat = NULL; + list_create(&zone0.zone_ref_list, sizeof (zone_ref_t), + offsetof(zone_ref_t, zref_linkage)); list_create(&zone0.zone_zsd, sizeof (struct zsd_entry), offsetof(struct zsd_entry, zsd_linkage)); list_insert_head(&zone_active, &zone0); @@ -2144,6 +2172,7 @@ zone_free(zone_t *zone) ASSERT(zone->zone_kcred == NULL); ASSERT(zone_status_get(zone) == ZONE_IS_DEAD || zone_status_get(zone) == ZONE_IS_UNINITIALIZED); + ASSERT(list_is_empty(&zone->zone_ref_list)); /* * Remove any zone caps. @@ -2160,6 +2189,7 @@ zone_free(zone_t *zone) mutex_exit(&zone_deathrow_lock); } + list_destroy(&zone->zone_ref_list); zone_free_zsd(zone); zone_free_datasets(zone); list_destroy(&zone->zone_dl_list); @@ -2494,6 +2524,11 @@ zone_status_timedwait_sig(zone_t *zone, clock_t tim, zone_status_t status) * to force halt/reboot to block waiting for the zone_cred_ref to drop * to 0. This can be useful to flush out other sources of cached creds * that may be less innocuous than the driver case. + * + * Zones also provide a tracked reference counting mechanism in which zone + * references are represented by "crumbs" (zone_ref structures). Crumbs help + * debuggers determine the sources of leaked zone references. See + * zone_hold_ref() and zone_rele_ref() below for more information. */ int zone_wait_for_cred = 0; @@ -2506,6 +2541,14 @@ zone_hold_locked(zone_t *z) ASSERT(z->zone_ref != 0); } +/* + * Increment the specified zone's reference count. The zone's zone_t structure + * will not be freed as long as the zone's reference count is nonzero. + * Decrement the zone's reference count via zone_rele(). + * + * NOTE: This function should only be used to hold zones for short periods of + * time. Use zone_hold_ref() if the zone must be held for a long time. + */ void zone_hold(zone_t *z) { @@ -2522,14 +2565,27 @@ zone_hold(zone_t *z) #define ZONE_IS_UNREF(zone) ((zone)->zone_ref == 1 && \ (!zone_wait_for_cred || (zone)->zone_cred_ref == 0)) -void -zone_rele(zone_t *z) +/* + * Common zone reference release function invoked by zone_rele() and + * zone_rele_ref(). If subsys is ZONE_REF_NUM_SUBSYS, then the specified + * zone's subsystem-specific reference counters are not affected by the + * release. If ref is not NULL, then the zone_ref_t to which it refers is + * removed from the specified zone's reference list. ref must be non-NULL iff + * subsys is not ZONE_REF_NUM_SUBSYS. + */ +static void +zone_rele_common(zone_t *z, zone_ref_t *ref, zone_ref_subsys_t subsys) { boolean_t wakeup; mutex_enter(&z->zone_lock); ASSERT(z->zone_ref != 0); z->zone_ref--; + if (subsys != ZONE_REF_NUM_SUBSYS) { + ASSERT(z->zone_subsys_ref[subsys] != 0); + z->zone_subsys_ref[subsys]--; + list_remove(&z->zone_ref_list, ref); + } if (z->zone_ref == 0 && z->zone_cred_ref == 0) { /* no more refs, free the structure */ mutex_exit(&z->zone_lock); @@ -2551,6 +2607,83 @@ zone_rele(zone_t *z) } } +/* + * Decrement the specified zone's reference count. The specified zone will + * cease to exist after this function returns if the reference count drops to + * zero. This function should be paired with zone_hold(). + */ +void +zone_rele(zone_t *z) +{ + zone_rele_common(z, NULL, ZONE_REF_NUM_SUBSYS); +} + +/* + * Initialize a zone reference structure. This function must be invoked for + * a reference structure before the structure is passed to zone_hold_ref(). + */ +void +zone_init_ref(zone_ref_t *ref) +{ + ref->zref_zone = NULL; + list_link_init(&ref->zref_linkage); +} + +/* + * Acquire a reference to zone z. The caller must specify the + * zone_ref_subsys_t constant associated with its subsystem. The specified + * zone_ref_t structure will represent a reference to the specified zone. Use + * zone_rele_ref() to release the reference. + * + * The referenced zone_t structure will not be freed as long as the zone_t's + * zone_status field is not ZONE_IS_DEAD and the zone has outstanding + * references. + * + * NOTE: The zone_ref_t structure must be initialized before it is used. + * See zone_init_ref() above. + */ +void +zone_hold_ref(zone_t *z, zone_ref_t *ref, zone_ref_subsys_t subsys) +{ + ASSERT(subsys >= 0 && subsys < ZONE_REF_NUM_SUBSYS); + + /* + * Prevent consumers from reusing a reference structure before + * releasing it. + */ + VERIFY(ref->zref_zone == NULL); + + ref->zref_zone = z; + mutex_enter(&z->zone_lock); + zone_hold_locked(z); + z->zone_subsys_ref[subsys]++; + ASSERT(z->zone_subsys_ref[subsys] != 0); + list_insert_head(&z->zone_ref_list, ref); + mutex_exit(&z->zone_lock); +} + +/* + * Release the zone reference represented by the specified zone_ref_t. + * The reference is invalid after it's released; however, the zone_ref_t + * structure can be reused without having to invoke zone_init_ref(). + * subsys should be the same value that was passed to zone_hold_ref() + * when the reference was acquired. + */ +void +zone_rele_ref(zone_ref_t *ref, zone_ref_subsys_t subsys) +{ + zone_rele_common(ref->zref_zone, ref, subsys); + + /* + * Set the zone_ref_t's zref_zone field to NULL to generate panics + * when consumers dereference the reference. This helps us catch + * consumers who use released references. Furthermore, this lets + * consumers reuse the zone_ref_t structure without having to + * invoke zone_init_ref(). + */ + ref->zref_zone = NULL; +} + void zone_cred_hold(zone_t *z) { @@ -3984,6 +4117,8 @@ zone_create(const char *zone_name, const char *zone_root, mutex_init(&zone->zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zone->zone_mem_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&zone->zone_cv, NULL, CV_DEFAULT, NULL); + list_create(&zone->zone_ref_list, sizeof (zone_ref_t), + offsetof(zone_ref_t, zref_linkage)); list_create(&zone->zone_zsd, sizeof (struct zsd_entry), offsetof(struct zsd_entry, zsd_linkage)); list_create(&zone->zone_datasets, sizeof (zone_dataset_t), @@ -4269,7 +4404,7 @@ errout: * zone_kcred. To free the zone, we call crfree, which will call * zone_cred_rele, which will call zone_free. */ - ASSERT(zone->zone_cred_ref == 1); /* for zone_kcred */ + ASSERT(zone->zone_cred_ref == 1); ASSERT(zone->zone_kcred->cr_ref == 1); ASSERT(zone->zone_ref == 0); zkcr = zone->zone_kcred; @@ -4555,6 +4690,101 @@ zone_shutdown(zoneid_t zoneid) } /* + * Log the specified zone's reference counts. The caller should not be + * holding the zone's zone_lock. + */ +static void +zone_log_refcounts(zone_t *zone) +{ + char *buffer; + char *buffer_position; + uint32_t buffer_size; + uint32_t index; + uint_t ref; + uint_t cred_ref; + + /* + * Construct a string representing the subsystem-specific reference + * counts. The counts are printed in ascending order by index into the + * zone_t::zone_subsys_ref array. The list will be surrounded by + * square brackets [] and will only contain nonzero reference counts. + * + * The buffer will hold two square bracket characters plus ten digits, + * one colon, one space, one comma, and some characters for a + * subsystem name per subsystem-specific reference count. (Unsigned 32- + * bit integers have at most ten decimal digits.) The last + * reference count's comma is replaced by the closing square + * bracket and a NULL character to terminate the string. + * + * NOTE: We have to grab the zone's zone_lock to create a consistent + * snapshot of the zone's reference counters. + * + * First, figure out how much space the string buffer will need. + * The buffer's size is stored in buffer_size. + */ + buffer_size = 2; /* for the square brackets */ + mutex_enter(&zone->zone_lock); + zone->zone_flags |= ZF_REFCOUNTS_LOGGED; + ref = zone->zone_ref; + cred_ref = zone->zone_cred_ref; + for (index = 0; index < ZONE_REF_NUM_SUBSYS; ++index) + if (zone->zone_subsys_ref[index] != 0) + buffer_size += strlen(zone_ref_subsys_names[index]) + + 13; + if (buffer_size == 2) { + /* + * No subsystems had nonzero reference counts. Don't bother + * with allocating a buffer; just log the general-purpose and + * credential reference counts. + */ + mutex_exit(&zone->zone_lock); + (void) strlog(0, 0, 1, SL_CONSOLE | SL_NOTE, + "Zone '%s' (ID: %d) is shutting down, but %u zone " + "references and %u credential references are still extant", + zone->zone_name, zone->zone_id, ref, cred_ref); + return; + } + + /* + * buffer_size contains the exact number of characters that the + * buffer will need. Allocate the buffer and fill it with nonzero + * subsystem-specific reference counts. Surround the results with + * square brackets afterwards. + */ + buffer = kmem_alloc(buffer_size, KM_SLEEP); + buffer_position = &buffer[1]; + for (index = 0; index < ZONE_REF_NUM_SUBSYS; ++index) { + /* + * NOTE: The DDI's version of sprintf() returns a pointer to + * the modified buffer rather than the number of bytes written + * (as in snprintf(3C)). This is unfortunate and annoying. + * Therefore, we'll use snprintf() with INT_MAX to get the + * number of bytes written. Using INT_MAX is safe because + * the buffer is perfectly sized for the data: we'll never + * overrun the buffer. + */ + if (zone->zone_subsys_ref[index] != 0) + buffer_position += snprintf(buffer_position, INT_MAX, + "%s: %u,", zone_ref_subsys_names[index], + zone->zone_subsys_ref[index]); + } + mutex_exit(&zone->zone_lock); + buffer[0] = '['; + ASSERT((uintptr_t)(buffer_position - buffer) < buffer_size); + ASSERT(buffer_position[0] == '\0' && buffer_position[-1] == ','); + buffer_position[-1] = ']'; + + /* + * Log the reference counts and free the message buffer. + */ + (void) strlog(0, 0, 1, SL_CONSOLE | SL_NOTE, + "Zone '%s' (ID: %d) is shutting down, but %u zone references and " + "%u credential references are still extant %s", zone->zone_name, + zone->zone_id, ref, cred_ref, buffer); + kmem_free(buffer, buffer_size); +} + +/* * Systemcall entry point to finalize the zone halt process. The caller * must have already successfully called zone_shutdown(). * @@ -4568,6 +4798,8 @@ zone_destroy(zoneid_t zoneid) uint64_t uniqid; zone_t *zone; zone_status_t status; + clock_t wait_time; + boolean_t log_refcounts; if (secpolicy_zone_config(CRED()) != 0) return (set_errno(EPERM)); @@ -4611,9 +4843,12 @@ zone_destroy(zoneid_t zoneid) zone_rele(zone); zone = NULL; /* potentially free'd */ + log_refcounts = B_FALSE; + wait_time = SEC_TO_TICK(ZONE_DESTROY_TIMEOUT_SECS); mutex_enter(&zonehash_lock); for (; /* ever */; ) { boolean_t unref; + boolean_t refs_have_been_logged; if ((zone = zone_find_all_by_id(zoneid)) == NULL || zone->zone_uniqid != uniqid) { @@ -4626,6 +4861,8 @@ zone_destroy(zoneid_t zoneid) } mutex_enter(&zone->zone_lock); unref = ZONE_IS_UNREF(zone); + refs_have_been_logged = (zone->zone_flags & + ZF_REFCOUNTS_LOGGED); mutex_exit(&zone->zone_lock); if (unref) { /* @@ -4638,12 +4875,69 @@ zone_destroy(zoneid_t zoneid) break; } + /* + * Wait for zone_rele_common() or zone_cred_rele() to signal + * zone_destroy_cv. zone_destroy_cv is signaled only when + * some zone's general-purpose reference count reaches one. + * If ZONE_DESTROY_TIMEOUT_SECS seconds elapse while waiting + * on zone_destroy_cv, then log the zone's reference counts and + * continue to wait for zone_rele() and zone_cred_rele(). + */ + if (!refs_have_been_logged) { + if (!log_refcounts) { + /* + * This thread hasn't timed out waiting on + * zone_destroy_cv yet. Wait wait_time clock + * ticks (initially ZONE_DESTROY_TIMEOUT_SECS + * seconds) for the zone's references to clear. + */ + ASSERT(wait_time > 0); + wait_time = cv_reltimedwait_sig( + &zone_destroy_cv, &zonehash_lock, wait_time, + TR_SEC); + if (wait_time > 0) { + /* + * A thread in zone_rele() or + * zone_cred_rele() signaled + * zone_destroy_cv before this thread's + * wait timed out. The zone might have + * only one reference left; find out! + */ + continue; + } else if (wait_time == 0) { + /* The thread's process was signaled. */ + mutex_exit(&zonehash_lock); + return (set_errno(EINTR)); + } + + /* + * The thread timed out while waiting on + * zone_destroy_cv. Even though the thread + * timed out, it has to check whether another + * thread woke up from zone_destroy_cv and + * destroyed the zone. + * + * If the zone still exists and has more than + * one unreleased general-purpose reference, + * then log the zone's reference counts. + */ + log_refcounts = B_TRUE; + continue; + } + + /* + * The thread already timed out on zone_destroy_cv while + * waiting for subsystems to release the zone's last + * general-purpose references. Log the zone's reference + * counts and wait indefinitely on zone_destroy_cv. + */ + zone_log_refcounts(zone); + } if (cv_wait_sig(&zone_destroy_cv, &zonehash_lock) == 0) { - /* Signaled */ + /* The thread's process was signaled. */ mutex_exit(&zonehash_lock); return (set_errno(EINTR)); } - } /* diff --git a/usr/src/uts/common/sys/fs/mntdata.h b/usr/src/uts/common/sys/fs/mntdata.h index c0cd7ebf30..8d0efeaf48 100644 --- a/usr/src/uts/common/sys/fs/mntdata.h +++ b/usr/src/uts/common/sys/fs/mntdata.h @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_MNTFS_MNTDATA_H @@ -29,6 +28,7 @@ #include <sys/vnode.h> #include <sys/poll.h> #include <sys/mnttab.h> +#include <sys/zone.h> #ifdef __cplusplus extern "C" { @@ -70,18 +70,6 @@ typedef struct mntnode { mntsnap_t mnt_ioctl; /* data for ioctl() */ } mntnode_t; -struct zone; - -typedef struct mntdata { - struct zone *mnt_zone; /* zone for mount point */ - uint_t mnt_nopen; /* count of vnodes open */ - size_t mnt_size; /* size of last normal snapshot */ - size_t mnt_hidden_size; /* size of last hidden snapshot */ - timespec_t mnt_mtime; /* mtime at last normal snapshot */ - timespec_t mnt_hidden_mtime; /* mtime at last hidden snapshot */ - struct mntnode mnt_node; /* embedded mntnode */ -} mntdata_t; - /* * Conversion macros. */ @@ -95,6 +83,16 @@ typedef struct mntdata { #if defined(_KERNEL) +typedef struct mntdata { + zone_ref_t mnt_zone_ref; /* zone for mount point */ + uint_t mnt_nopen; /* count of vnodes open */ + size_t mnt_size; /* size of last normal snapshot */ + size_t mnt_hidden_size; /* size of last hidden snapshot */ + timespec_t mnt_mtime; /* mtime at last normal snapshot */ + timespec_t mnt_hidden_mtime; /* mtime at last hidden snapshot */ + struct mntnode mnt_node; /* embedded mntnode */ +} mntdata_t; + /* * Value for a mntsnap_t's mnts_flags. */ diff --git a/usr/src/uts/common/sys/ipc_impl.h b/usr/src/uts/common/sys/ipc_impl.h index 712317d2e1..0569c3e967 100644 --- a/usr/src/uts/common/sys/ipc_impl.h +++ b/usr/src/uts/common/sys/ipc_impl.h @@ -19,15 +19,12 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _IPC_IMPL_H #define _IPC_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/ipc.h> #include <sys/mutex.h> @@ -151,7 +148,7 @@ struct msqid_ds64 { (*(rctl_qty_t *)(((char *)&p->ipc_proj->kpj_data.kpd_ipc) + \ s->ipcs_rctlofs)) #define IPC_ZONE_USAGE(p, s) \ - (*(rctl_qty_t *)(((char *)&p->ipc_zone->zone_ipc) + \ + (*(rctl_qty_t *)(((char *)&p->ipc_zone_ref.zref_zone->zone_ipc) + \ s->ipcs_rctlofs)) #define IPC_LOCKED(s, o) \ MUTEX_HELD(&s->ipcs_table[IPC_INDEX(o->ipc_id)].ipct_lock) @@ -172,7 +169,7 @@ typedef struct kipc_perm { kproject_t *ipc_proj; /* creator's project */ uint_t ipc_id; /* id */ zoneid_t ipc_zoneid; /* creator's zone id */ - zone_t *ipc_zone; /* creator's zone */ + zone_ref_t ipc_zone_ref; /* reference to creator's zone */ } kipc_perm_t; typedef struct ipc_slot { diff --git a/usr/src/uts/common/sys/lofi.h b/usr/src/uts/common/sys/lofi.h index 8e385b6fe2..d82cc0341e 100644 --- a/usr/src/uts/common/sys/lofi.h +++ b/usr/src/uts/common/sys/lofi.h @@ -229,7 +229,7 @@ struct lofi_state { struct dk_geom ls_dkg; struct vtoc ls_vtoc; struct dk_cinfo ls_ci; - zone_t *ls_zone; + zone_ref_t ls_zone; list_node_t ls_list; /* all lofis */ dev_t ls_dev; /* this node's dev_t */ diff --git a/usr/src/uts/common/sys/vfs.h b/usr/src/uts/common/sys/vfs.h index 5fca4584eb..ba013a0eda 100644 --- a/usr/src/uts/common/sys/vfs.h +++ b/usr/src/uts/common/sys/vfs.h @@ -33,6 +33,7 @@ #ifndef _SYS_VFS_H #define _SYS_VFS_H +#include <sys/zone.h> #include <sys/types.h> #include <sys/t_lock.h> #include <sys/cred.h> @@ -173,26 +174,6 @@ extern avl_tree_t vskstat_tree; extern kmutex_t vskstat_tree_lock; /* - * Private vfs data, NOT to be used by a file system implementation. - */ - -#define VFS_FEATURE_MAXSZ 4 - -typedef struct vfs_impl { - /* Counted array - Bitmap of vfs features */ - uint32_t vi_featureset[VFS_FEATURE_MAXSZ]; - /* - * Support for statistics on the vnode operations - */ - vsk_anchor_t *vi_vskap; /* anchor for vopstats' kstat */ - vopstats_t *vi_fstypevsp; /* ptr to per-fstype vopstats */ - vopstats_t vi_vopstats; /* per-mount vnode op stats */ - - timespec_t vi_hrctime; /* High-res creation time */ -} vfs_impl_t; - - -/* * Structure per mounted file system. Each mounted file system has * an array of operations and an instance record. * @@ -241,7 +222,7 @@ typedef struct vfs { refstr_t *vfs_resource; /* mounted resource name */ refstr_t *vfs_mntpt; /* mount point name */ time_t vfs_mtime; /* time we were mounted */ - vfs_impl_t *vfs_implp; /* impl specific data */ + struct vfs_impl *vfs_implp; /* impl specific data */ /* * Zones support. Note that the zone that "owns" the mount isn't * necessarily the same as the zone in which the zone is visible. @@ -448,6 +429,28 @@ enum { #define VFSSP_VERBATIM 0x1 /* do not prefix the supplied path */ #if defined(_KERNEL) + +/* + * Private vfs data, NOT to be used by a file system implementation. + */ + +#define VFS_FEATURE_MAXSZ 4 + +typedef struct vfs_impl { + /* Counted array - Bitmap of vfs features */ + uint32_t vi_featureset[VFS_FEATURE_MAXSZ]; + /* + * Support for statistics on the vnode operations + */ + vsk_anchor_t *vi_vskap; /* anchor for vopstats' kstat */ + vopstats_t *vi_fstypevsp; /* ptr to per-fstype vopstats */ + vopstats_t vi_vopstats; /* per-mount vnode op stats */ + + timespec_t vi_hrctime; /* High-res creation time */ + + zone_ref_t vi_zone_ref; /* reference to zone */ +} vfs_impl_t; + /* * Public operations. */ diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index b89a6902db..3ba7bf47f4 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -204,7 +204,12 @@ typedef struct { #define ZE_AREMOUNTS 2 /* there are mounts within the zone */ #define ZE_LABELINUSE 3 /* label is already in use by some other zone */ -/* zone_status */ +/* + * zone_status values + * + * You must modify zone_status_names in mdb(1M)'s genunix module + * (genunix/zone.c) when you modify this enum. + */ typedef enum { ZONE_IS_UNINITIALIZED = 0, ZONE_IS_INITIALIZED, @@ -274,11 +279,22 @@ typedef struct zone_cmd_rval { */ #define ZONE_DOOR_PATH ZONES_TMPDIR "/%s.zoneadmd_door" + /* zone_flags */ +/* + * Threads that read or write the following flag must hold zone_lock. + */ +#define ZF_REFCOUNTS_LOGGED 0x1 /* a thread logged the zone's refs */ + +/* + * The following threads are set when the zone is created and never changed. + * Threads that test for these flags don't have to hold zone_lock. + */ #define ZF_HASHED_LABEL 0x2 /* zone has a unique label */ #define ZF_IS_SCRATCH 0x4 /* scratch zone */ #define ZF_NET_EXCL 0x8 /* Zone has an exclusive IP stack */ + /* zone_create flags */ #define ZCF_NET_EXCL 0x1 /* Create a zone with exclusive IP */ @@ -298,6 +314,7 @@ typedef struct zone_net_data { #ifdef _KERNEL + /* * We need to protect the definition of 'list_t' from userland applications and * libraries which may be defining ther own versions. @@ -310,6 +327,38 @@ struct pool; struct brand; /* + * Each of these constants identifies a kernel subsystem that acquires and + * releases zone references. Each subsystem that invokes + * zone_hold_ref() and zone_rele_ref() should specify the + * zone_ref_subsys_t constant associated with the subsystem. Tracked holds + * help users and developers quickly identify subsystems that stall zone + * shutdowns indefinitely. + * + * NOTE: You must modify zone_ref_subsys_names in usr/src/uts/common/os/zone.c + * when you modify this enumeration. + */ +typedef enum zone_ref_subsys { + ZONE_REF_NFS, /* NFS */ + ZONE_REF_NFSV4, /* NFSv4 */ + ZONE_REF_SMBFS, /* SMBFS */ + ZONE_REF_MNTFS, /* MNTFS */ + ZONE_REF_LOFI, /* LOFI devices */ + ZONE_REF_VFS, /* VFS infrastructure */ + ZONE_REF_IPC, /* IPC infrastructure */ + ZONE_REF_NUM_SUBSYS /* This must be the last entry. */ +} zone_ref_subsys_t; + +/* + * zone_ref represents a general-purpose references to a zone. Each zone's + * references are linked into the zone's zone_t::zone_ref_list. This allows + * debuggers to walk zones' references. + */ +typedef struct zone_ref { + struct zone *zref_zone; /* the zone to which the reference refers */ + list_node_t zref_linkage; /* linkage for zone_t::zone_ref_list */ +} zone_ref_t; + +/* * Structure to record list of ZFS datasets exported to a zone. */ typedef struct zone_dataset { @@ -353,6 +402,8 @@ typedef struct zone { * zone_lock protects the following fields of a zone_t: * zone_ref * zone_cred_ref + * zone_subsys_ref + * zone_ref_list * zone_ntasks * zone_flags * zone_zsd @@ -368,6 +419,14 @@ typedef struct zone { uint_t zone_ref; /* count of zone_hold()s on zone */ uint_t zone_cred_ref; /* count of zone_hold_cred()s on zone */ /* + * Fixed-sized array of subsystem-specific reference counts + * The sum of all of the counts must be less than or equal to zone_ref. + * The array is indexed by the counts' subsystems' zone_ref_subsys_t + * constants. + */ + uint_t zone_subsys_ref[ZONE_REF_NUM_SUBSYS]; + list_t zone_ref_list; /* list of zone_ref_t structs */ + /* * zone_rootvp and zone_rootpath can never be modified once set. */ struct vnode *zone_rootvp; /* zone's root vnode */ @@ -494,6 +553,9 @@ extern void zone_zsd_init(void); extern void zone_init(void); extern void zone_hold(zone_t *); extern void zone_rele(zone_t *); +extern void zone_init_ref(zone_ref_t *); +extern void zone_hold_ref(zone_t *, zone_ref_t *, zone_ref_subsys_t); +extern void zone_rele_ref(zone_ref_t *, zone_ref_subsys_t); extern void zone_cred_hold(zone_t *); extern void zone_cred_rele(zone_t *); extern void zone_task_hold(zone_t *); diff --git a/usr/src/uts/common/vm/seg_spt.c b/usr/src/uts/common/vm/seg_spt.c index 821b2ea8b5..a96951830d 100644 --- a/usr/src/uts/common/vm/seg_spt.c +++ b/usr/src/uts/common/vm/seg_spt.c @@ -466,7 +466,7 @@ segspt_create(struct seg *seg, caddr_t argsp) * The zone will never be NULL, as a fully created * shm always has an owning zone. */ - zone = sp->shm_perm.ipc_zone; + zone = sp->shm_perm.ipc_zone_ref.zref_zone; ASSERT(zone != NULL); if (anon_resv_zone(ptob(more_pgs), zone) == 0) { err = ENOMEM; |