diff options
Diffstat (limited to 'usr/src/uts/common')
34 files changed, 1076 insertions, 720 deletions
diff --git a/usr/src/uts/common/fs/autofs/auto_vfsops.c b/usr/src/uts/common/fs/autofs/auto_vfsops.c index 3bd5fa7591..52c0368dda 100644 --- a/usr/src/uts/common/fs/autofs/auto_vfsops.c +++ b/usr/src/uts/common/fs/autofs/auto_vfsops.c @@ -87,7 +87,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "autofs", autofs_init, - VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_STATS, + VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_STATS|VSW_ZMOUNT, &auto_mntopts }; diff --git a/usr/src/uts/common/fs/ctfs/ctfs_root.c b/usr/src/uts/common/fs/ctfs/ctfs_root.c index 8861b6d73b..1e70b36206 100644 --- a/usr/src/uts/common/fs/ctfs/ctfs_root.c +++ b/usr/src/uts/common/fs/ctfs/ctfs_root.c @@ -19,12 +19,9 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/modctl.h> #include <sys/types.h> #include <sys/param.h> @@ -111,7 +108,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "ctfs", ctfs_init, - VSW_HASPROTO, + VSW_HASPROTO|VSW_ZMOUNT, &ctfs_mntopts, }; @@ -241,10 +238,10 @@ ctfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) */ vfsp->vfs_bsize = DEV_BSIZE; vfsp->vfs_fstype = ctfs_fstype; - do + do { dev = makedevice(ctfs_major, atomic_add_32_nv(&ctfs_minor, 1) & L_MAXMIN32); - while (vfs_devismounted(dev)); + } while (vfs_devismounted(dev)); vfs_make_fsid(&vfsp->vfs_fsid, dev, ctfs_fstype); vfsp->vfs_data = data; vfsp->vfs_dev = dev; diff --git a/usr/src/uts/common/fs/dcfs/dc_vnops.c b/usr/src/uts/common/fs/dcfs/dc_vnops.c index 1e30887d7f..4cf6f9ce62 100644 --- a/usr/src/uts/common/fs/dcfs/dc_vnops.c +++ b/usr/src/uts/common/fs/dcfs/dc_vnops.c @@ -20,8 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -159,7 +158,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "dcfs", dcinit, - 0, + VSW_ZMOUNT, NULL }; diff --git a/usr/src/uts/common/fs/dev/sdev_profile.c b/usr/src/uts/common/fs/dev/sdev_profile.c index 6a0095657e..01d4aab1ae 100644 --- a/usr/src/uts/common/fs/dev/sdev_profile.c +++ b/usr/src/uts/common/fs/dev/sdev_profile.c @@ -20,12 +20,9 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * This file implements /dev filesystem operations for non-global * instances. Three major entry points: @@ -245,9 +242,6 @@ static void prof_lookup_globaldev(struct sdev_node *dir, struct sdev_node *gdir, char *name, char *rename) { - /* global OS rootdir */ - extern vnode_t *rootdir; - int error; struct vnode *avp, *gdv, *gddv; struct sdev_node *newdv; @@ -269,7 +263,6 @@ prof_lookup_globaldev(struct sdev_node *dir, struct sdev_node *gdir, /* perform a relative lookup of the global /dev instance */ gddv = SDEVTOV(gdir); VN_HOLD(gddv); - VN_HOLD(rootdir); error = lookuppnvp(&pn, NULL, FOLLOW, NULLVPP, &gdv, rootdir, gddv, kcred); pn_free(&pn); @@ -528,25 +521,96 @@ end: kmem_free(dbuf, dlen); } +/* + * Last chance for a zone to see a node. If our parent dir is + * SDEV_ZONED, then we look up the "zone" property for the node. If the + * property is found and matches the current zone name, we allow it. + * Note that this isn't quite correct for the global zone peeking inside + * a zone's /dev - for that to work, we'd have to have a per-dev-mount + * zone ref squirreled away. + */ +static int +prof_zone_matched(char *name, struct sdev_node *dir) +{ + vnode_t *gvn = SDEVTOV(dir->sdev_origin); + struct pathname pn; + vnode_t *vn = NULL; + char zonename[ZONENAME_MAX]; + int znlen = ZONENAME_MAX; + int ret; + + ASSERT((dir->sdev_flags & SDEV_ZONED) != 0); + + sdcmn_err10(("sdev_node %p is zoned, looking for %s\n", + (void *)dir, name)); + + if (pn_get(name, UIO_SYSSPACE, &pn)) + return (0); + + VN_HOLD(gvn); + + ret = lookuppnvp(&pn, NULL, FOLLOW, NULLVPP, &vn, rootdir, gvn, kcred); + + pn_free(&pn); + + if (ret != 0) { + sdcmn_err10(("prof_zone_matched: %s not found\n", name)); + return (0); + } + + /* + * VBLK doesn't matter, and the property name is in fact treated + * as a const char *. + */ + ret = e_ddi_getlongprop_buf(vn->v_rdev, VBLK, (char *)"zone", + DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, (caddr_t)zonename, &znlen); + + VN_RELE(vn); + + if (ret == DDI_PROP_NOT_FOUND) { + sdcmn_err10(("vnode %p: no zone prop\n", (void *)vn)); + return (0); + } else if (ret != DDI_PROP_SUCCESS) { + sdcmn_err10(("vnode %p: zone prop error: %d\n", + (void *)vn, ret)); + return (0); + } + + sdcmn_err10(("vnode %p zone prop: %s\n", (void *)vn, zonename)); + return (strcmp(zonename, curproc->p_zone->zone_name) == 0); +} + static int -prof_make_name(char *nm, void *arg) +prof_make_name_glob(char *nm, void *arg) { struct sdev_node *ddv = (struct sdev_node *)arg; if (prof_name_matched(nm, ddv)) prof_lookup_globaldev(ddv, ddv->sdev_origin, nm, nm); + + return (WALK_DIR_CONTINUE); +} + +static int +prof_make_name_zone(char *nm, void *arg) +{ + struct sdev_node *ddv = (struct sdev_node *)arg; + + if (prof_zone_matched(nm, ddv)) + prof_lookup_globaldev(ddv, ddv->sdev_origin, nm, nm); + return (WALK_DIR_CONTINUE); } static void -prof_make_names_glob(struct sdev_node *ddv) +prof_make_names_walk(struct sdev_node *ddv, int (*cb)(char *, void *)) { struct sdev_node *gdir; gdir = ddv->sdev_origin; if (gdir == NULL) return; - walk_dir(SDEVTOV(gdir), (void *)ddv, prof_make_name); + walk_dir(SDEVTOV(gdir), (void *)ddv, cb); } static void @@ -559,11 +623,14 @@ prof_make_names(struct sdev_node *dir) ASSERT(RW_WRITE_HELD(&dir->sdev_contents)); + if ((dir->sdev_flags & SDEV_ZONED) != 0) + prof_make_names_walk(dir, prof_make_name_zone); + if (nvl == NULL) return; if (dir->sdev_prof.has_glob) { - prof_make_names_glob(dir); + prof_make_names_walk(dir, prof_make_name_glob); return; } diff --git a/usr/src/uts/common/fs/dev/sdev_subr.c b/usr/src/uts/common/fs/dev/sdev_subr.c index 5550bd6a13..3fced7bb9c 100644 --- a/usr/src/uts/common/fs/dev/sdev_subr.c +++ b/usr/src/uts/common/fs/dev/sdev_subr.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -548,6 +547,9 @@ static struct sdev_vop_table vtab[] = { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops, devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE }, + { "lofi", NULL, NULL, NULL, NULL, SDEV_ZONED }, + { "rlofi", NULL, NULL, NULL, NULL, SDEV_ZONED }, + { NULL, NULL, NULL, NULL, NULL, 0} }; diff --git a/usr/src/uts/common/fs/fd/fdops.c b/usr/src/uts/common/fs/fd/fdops.c index 3288872146..8c398be2ee 100644 --- a/usr/src/uts/common/fs/fd/fdops.c +++ b/usr/src/uts/common/fs/fd/fdops.c @@ -19,12 +19,9 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All rights reserved. */ @@ -549,7 +546,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "fd", fdinit, - VSW_HASPROTO, + VSW_HASPROTO | VSW_ZMOUNT, &fdfs_mntopts }; diff --git a/usr/src/uts/common/fs/fifofs/fifosubr.c b/usr/src/uts/common/fs/fifofs/fifosubr.c index a788124c7f..6e56000ffe 100644 --- a/usr/src/uts/common/fs/fifofs/fifosubr.c +++ b/usr/src/uts/common/fs/fifofs/fifosubr.c @@ -21,12 +21,9 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * The routines defined in this file are supporting routines for FIFOFS * file system type. @@ -80,7 +77,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "fifofs", fifoinit, - 0, + VSW_ZMOUNT, NULL }; diff --git a/usr/src/uts/common/fs/hsfs/hsfs_vfsops.c b/usr/src/uts/common/fs/hsfs/hsfs_vfsops.c index aaad2bc864..058d6925e3 100644 --- a/usr/src/uts/common/fs/hsfs/hsfs_vfsops.c +++ b/usr/src/uts/common/fs/hsfs/hsfs_vfsops.c @@ -19,12 +19,9 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * VFS operations for High Sierra filesystem */ @@ -151,7 +148,7 @@ static vfsdef_t vfw = { "hsfs", hsfsinit, /* We don't suppport remounting */ - VSW_HASPROTO|VSW_STATS|VSW_CANLOFI, + VSW_HASPROTO|VSW_STATS|VSW_CANLOFI|VSW_ZMOUNT, &hsfs_proto_opttbl }; diff --git a/usr/src/uts/common/fs/lofs/lofs_vfsops.c b/usr/src/uts/common/fs/lofs/lofs_vfsops.c index d1a927de73..5f1ae8a1a4 100644 --- a/usr/src/uts/common/fs/lofs/lofs_vfsops.c +++ b/usr/src/uts/common/fs/lofs/lofs_vfsops.c @@ -58,7 +58,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "lofs", lofsinit, - VSW_HASPROTO|VSW_STATS, + VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT, &lofs_mntopts }; diff --git a/usr/src/uts/common/fs/mntfs/mntvfsops.c b/usr/src/uts/common/fs/mntfs/mntvfsops.c index 7cd5f82808..102e681e8e 100644 --- a/usr/src/uts/common/fs/mntfs/mntvfsops.c +++ b/usr/src/uts/common/fs/mntfs/mntvfsops.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -66,7 +65,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "mntfs", mntinit, - VSW_HASPROTO|VSW_STATS, + VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT, &mnt_mntopts }; diff --git a/usr/src/uts/common/fs/namefs/namevfs.c b/usr/src/uts/common/fs/namefs/namevfs.c index b0470e45d9..0725504842 100644 --- a/usr/src/uts/common/fs/namefs/namevfs.c +++ b/usr/src/uts/common/fs/namefs/namevfs.c @@ -730,7 +730,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "namefs", nameinit, - VSW_HASPROTO, + VSW_HASPROTO | VSW_ZMOUNT, &nm_mntopts }; diff --git a/usr/src/uts/common/fs/nfs/nfs4_common.c b/usr/src/uts/common/fs/nfs/nfs4_common.c index fbd2670acc..c6d7db3d90 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_common.c +++ b/usr/src/uts/common/fs/nfs/nfs4_common.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -49,7 +48,7 @@ static vfsdef_t vfw4 = { VFSDEF_VERSION, "nfs4", nfs4init, - VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, + VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS|VSW_ZMOUNT, NULL }; diff --git a/usr/src/uts/common/fs/nfs/nfs_common.c b/usr/src/uts/common/fs/nfs/nfs_common.c index c980d55aed..5588a511c8 100644 --- a/usr/src/uts/common/fs/nfs/nfs_common.c +++ b/usr/src/uts/common/fs/nfs/nfs_common.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -28,8 +27,6 @@ * All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/errno.h> #include <sys/param.h> #include <sys/types.h> @@ -125,7 +122,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "nfsdyn", nfsdyninit, - 0, + VSW_ZMOUNT, NULL }; @@ -142,7 +139,7 @@ static vfsdef_t vfw2 = { VFSDEF_VERSION, "nfs", nfsinit, - VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, + VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS|VSW_ZMOUNT, NULL }; @@ -159,7 +156,7 @@ static vfsdef_t vfw3 = { VFSDEF_VERSION, "nfs3", nfs3init, - VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, + VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS|VSW_ZMOUNT, NULL }; @@ -410,10 +407,10 @@ nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why) vfsflags = 0; if (error = mount_root(*name ? name : "root", root_path, NFS_V4, - &args, &vfsflags)) { + &args, &vfsflags)) { if (error != EPROTONOSUPPORT) { nfs_cmn_err(error, CE_WARN, - "Unable to mount NFS root filesystem: %m"); + "Unable to mount NFS root filesystem: %m"); sv_free(svp); pn_free(&pn); vfs_setops(vfsp, nfsdyn_vfsops); @@ -432,7 +429,7 @@ nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why) vfsflags = 0; if (error = mount_root(*name ? name : "root", root_path, - NFS_V3, &args, &vfsflags)) { + NFS_V3, &args, &vfsflags)) { if (error != EPROTONOSUPPORT) { nfs_cmn_err(error, CE_WARN, "Unable to mount NFS root filesystem: %m"); @@ -455,8 +452,7 @@ nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why) vfs_setops(vfsp, nfs_vfsops); if (error = mount_root(*name ? name : "root", - root_path, NFS_VERSION, &args, - &vfsflags)) { + root_path, NFS_VERSION, &args, &vfsflags)) { nfs_cmn_err(error, CE_WARN, "Unable to mount NFS root filesystem: %m"); sv_free(svp); diff --git a/usr/src/uts/common/fs/objfs/objfs_vfs.c b/usr/src/uts/common/fs/objfs/objfs_vfs.c index 0ee0b0a577..00dafeb625 100644 --- a/usr/src/uts/common/fs/objfs/objfs_vfs.c +++ b/usr/src/uts/common/fs/objfs/objfs_vfs.c @@ -19,12 +19,9 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/atomic.h> #include <sys/cmn_err.h> #include <sys/errno.h> @@ -76,7 +73,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "objfs", objfs_init, - VSW_HASPROTO, + VSW_HASPROTO | VSW_ZMOUNT, &objfs_mntopts, }; diff --git a/usr/src/uts/common/fs/proc/prvfsops.c b/usr/src/uts/common/fs/proc/prvfsops.c index 1ff5993983..d0bebc7163 100644 --- a/usr/src/uts/common/fs/proc/prvfsops.c +++ b/usr/src/uts/common/fs/proc/prvfsops.c @@ -19,16 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.25 */ - #include <sys/types.h> #include <sys/param.h> #include <sys/cmn_err.h> @@ -72,7 +69,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "proc", prinit, - VSW_HASPROTO|VSW_STATS|VSW_XID, + VSW_HASPROTO|VSW_STATS|VSW_XID|VSW_ZMOUNT, &proc_mntopts }; diff --git a/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c b/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c index ed02061fb9..6f7796e6e0 100644 --- a/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c +++ b/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c @@ -20,12 +20,9 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/atomic.h> #include <sys/cmn_err.h> #include <sys/errno.h> @@ -94,7 +91,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "sharefs", sharefs_init, - VSW_HASPROTO, + VSW_HASPROTO | VSW_ZMOUNT, &sharefs_mntopts, }; diff --git a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vfsops.c b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vfsops.c index d1e28b971f..d649e9d664 100644 --- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vfsops.c +++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vfsops.c @@ -33,8 +33,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/systm.h> @@ -116,7 +115,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, (char *)fs_type_name, smbfsinit, /* init routine */ - VSW_HASPROTO|VSW_NOTZONESAFE, /* flags */ + VSW_HASPROTO|VSW_NOTZONESAFE|VSW_ZMOUNT, /* flags */ &smbfs_mntopts /* mount options table prototype */ }; diff --git a/usr/src/uts/common/fs/sockfs/sockvfsops.c b/usr/src/uts/common/fs/sockfs/sockvfsops.c index 2462306bdb..64d96eda41 100644 --- a/usr/src/uts/common/fs/sockfs/sockvfsops.c +++ b/usr/src/uts/common/fs/sockfs/sockvfsops.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,12 +19,9 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/t_lock.h> #include <sys/param.h> @@ -59,7 +55,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "sockfs", sockinit, - 0, + VSW_ZMOUNT, NULL }; diff --git a/usr/src/uts/common/fs/specfs/specvfsops.c b/usr/src/uts/common/fs/specfs/specvfsops.c index 98342f4424..83f270c90a 100644 --- a/usr/src/uts/common/fs/specfs/specvfsops.c +++ b/usr/src/uts/common/fs/specfs/specvfsops.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -37,8 +36,6 @@ */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/t_lock.h> #include <sys/param.h> @@ -64,7 +61,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "specfs", specinit, - 0, + VSW_ZMOUNT, NULL }; diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c b/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c index ad545f4a0b..9bb96e4165 100644 --- a/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c +++ b/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -77,7 +76,7 @@ static vfsdef_t vfw = { VFSDEF_VERSION, "tmpfs", tmpfsinit, - VSW_HASPROTO|VSW_STATS, + VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT, &tmpfs_proto_opttbl }; diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c index d5d254a20d..2bfe3908f0 100644 --- a/usr/src/uts/common/fs/vfs.c +++ b/usr/src/uts/common/fs/vfs.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -1014,8 +1013,7 @@ lofi_add(const char *fsname, struct vfs *vfsp, int minor; int err = 0; - if (fsname == NULL || - (vfssw = vfs_getvfssw(fsname)) == NULL) + if ((vfssw = vfs_getvfssw(fsname)) == NULL) return (0); if (!(vfssw->vsw_flag & VSW_CANLOFI)) { @@ -1049,29 +1047,16 @@ lofi_add(const char *fsname, struct vfs *vfsp, li = kmem_zalloc(sizeof (*li), KM_SLEEP); (void) strlcpy(li->li_filename, pn.pn_path, MAXPATHLEN); - /* - * The lofi control node is currently exclusive-open. We'd like - * to improve this, but in the meantime, we'll loop waiting for - * access. - */ - for (;;) { - err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL, - kcred, &ldi_hdl, ldi_id); - - if (err != EBUSY) - break; - - if ((err = delay_sig(hz / 8)) == EINTR) - break; - } + err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE, kcred, + &ldi_hdl, ldi_id); if (err) goto out2; err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li, - FREAD | FWRITE | FEXCL | FKIOCTL, kcred, &minor); + FREAD | FWRITE | FKIOCTL, kcred, &minor); - (void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred); + (void) ldi_close(ldi_hdl, FREAD | FWRITE, kcred); if (!err) vfsp->vfs_lofi_minor = minor; @@ -1104,18 +1089,16 @@ lofi_remove(struct vfs *vfsp) li->li_minor = vfsp->vfs_lofi_minor; li->li_cleanup = B_TRUE; - do { - err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL, - kcred, &ldi_hdl, ldi_id); - } while (err == EBUSY); + err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE, kcred, + &ldi_hdl, ldi_id); if (err) goto out; err = ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE_MINOR, (intptr_t)li, - FREAD | FWRITE | FEXCL | FKIOCTL, kcred, NULL); + FREAD | FWRITE | FKIOCTL, kcred, NULL); - (void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred); + (void) ldi_close(ldi_hdl, FREAD | FWRITE, kcred); if (!err) vfsp->vfs_lofi_minor = 0; @@ -1251,9 +1234,16 @@ domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp, } else { if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) == NULL) return (EINVAL); + fsname = vswp->vsw_name; } if (!VFS_INSTALLED(vswp)) return (EINVAL); + + if ((error = secpolicy_fs_allowed_mount(fsname)) != 0) { + vfs_unrefvfssw(vswp); + return (error); + } + vfsops = &vswp->vsw_vfsops; vfs_copyopttbl(&vswp->vsw_optproto, &mnt_mntopts); @@ -4782,7 +4772,7 @@ vfs_propagate_features(vfs_t *from, vfs_t *to) } } -#define LOFICTL_PATH "/devices/pseudo/lofi@0:%d" +#define LOFINODE_PATH "/dev/lofi/%d" /* * Return the vnode for the lofi node if there's a lofi mount in place. @@ -4801,11 +4791,23 @@ vfs_get_lofi(vfs_t *vfsp, vnode_t **vpp) return (-1); } - strsize = snprintf(NULL, 0, LOFICTL_PATH, vfsp->vfs_lofi_minor); + strsize = snprintf(NULL, 0, LOFINODE_PATH, vfsp->vfs_lofi_minor); path = kmem_alloc(strsize + 1, KM_SLEEP); - (void) snprintf(path, strsize + 1, LOFICTL_PATH, vfsp->vfs_lofi_minor); + (void) snprintf(path, strsize + 1, LOFINODE_PATH, vfsp->vfs_lofi_minor); + + /* + * We may be inside a zone, so we need to use the /dev path, but + * it's created asynchronously, so we wait here. + */ + for (;;) { + err = lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, vpp); + + if (err != ENOENT) + break; - err = lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, vpp); + if ((err = delay_sig(hz / 8)) == EINTR) + break; + } if (err) *vpp = NULL; diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c index 6b83f8c803..0f9717ed07 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c @@ -2277,7 +2277,7 @@ static vfsdef_t vfw = { MNTTYPE_ZFS, zfs_vfsinit, VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS| - VSW_XID, + VSW_XID|VSW_ZMOUNT, &zfs_mntopts }; diff --git a/usr/src/uts/common/io/lofi.c b/usr/src/uts/common/io/lofi.c index d040c74041..abafd483aa 100644 --- a/usr/src/uts/common/io/lofi.c +++ b/usr/src/uts/common/io/lofi.c @@ -76,12 +76,6 @@ * enable direct I/O on the underlying file. Don't, because that deadlocks. * I think to fix the cache-twice problem we might need filesystem support. * - * lofi on itself. The simple lock strategy (lofi_lock) precludes this - * because you'll be in lofi_ioctl, holding the lock when you open the - * file, which, if it's lofi, will grab lofi_lock. We prevent this for - * now, though not using ddi_soft_state(9F) would make it possible to - * do. Though it would still be silly. - * * Interesting things to do: * * Allow multiple files for each device. A poor-man's metadisk, basically. @@ -129,8 +123,11 @@ #include <sys/ddi.h> #include <sys/sunddi.h> #include <sys/zmod.h> +#include <sys/id_space.h> +#include <sys/mkdev.h> #include <sys/crypto/common.h> #include <sys/crypto/api.h> +#include <sys/rctl.h> #include <LzmaDec.h> /* @@ -144,6 +141,7 @@ #define NBLOCKS_PROP_NAME "Nblocks" #define SIZE_PROP_NAME "Size" +#define ZONE_PROP_NAME "zone" #define SETUP_C_DATA(cd, buf, len) \ (cd).cd_format = CRYPTO_DATA_RAW; \ @@ -162,6 +160,9 @@ static dev_info_t *lofi_dip = NULL; static void *lofi_statep = NULL; static kmutex_t lofi_lock; /* state lock */ +static id_space_t *lofi_minor_id; +static list_t lofi_list; +static zone_key_t lofi_zone_key; /* * Because lofi_taskq_nthreads limits the actual swamping of the device, the @@ -178,7 +179,6 @@ static kmutex_t lofi_lock; /* state lock */ static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE; static int lofi_taskq_nthreads = 4; /* # of taskq threads per device */ -uint32_t lofi_max_files = LOFI_MAX_FILES; const char lofi_crypto_magic[6] = LOFI_CRYPTO_MAGIC; /* @@ -244,36 +244,16 @@ lofi_free_comp_cache(struct lofi_state *lsp) } static int -lofi_busy(void) -{ - minor_t minor; - - /* - * We need to make sure no mappings exist - mod_remove won't - * help because the device isn't open. - */ - mutex_enter(&lofi_lock); - for (minor = 1; minor <= lofi_max_files; minor++) { - if (ddi_get_soft_state(lofi_statep, minor) != NULL) { - mutex_exit(&lofi_lock); - return (EBUSY); - } - } - mutex_exit(&lofi_lock); - return (0); -} - -static int is_opened(struct lofi_state *lsp) { - ASSERT(mutex_owned(&lofi_lock)); + ASSERT(MUTEX_HELD(&lofi_lock)); return (lsp->ls_chr_open || lsp->ls_blk_open || lsp->ls_lyr_open_count); } static int mark_opened(struct lofi_state *lsp, int otyp) { - ASSERT(mutex_owned(&lofi_lock)); + ASSERT(MUTEX_HELD(&lofi_lock)); switch (otyp) { case OTYP_CHR: lsp->ls_chr_open = 1; @@ -293,7 +273,7 @@ mark_opened(struct lofi_state *lsp, int otyp) static void mark_closed(struct lofi_state *lsp, int otyp) { - ASSERT(mutex_owned(&lofi_lock)); + ASSERT(MUTEX_HELD(&lofi_lock)); switch (otyp) { case OTYP_CHR: lsp->ls_chr_open = 0; @@ -312,19 +292,21 @@ mark_closed(struct lofi_state *lsp, int otyp) static void lofi_free_crypto(struct lofi_state *lsp) { - ASSERT(mutex_owned(&lofi_lock)); + ASSERT(MUTEX_HELD(&lofi_lock)); if (lsp->ls_crypto_enabled) { /* * Clean up the crypto state so that it doesn't hang around * in memory after we are done with it. */ - bzero(lsp->ls_key.ck_data, - CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); - kmem_free(lsp->ls_key.ck_data, - CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); - lsp->ls_key.ck_data = NULL; - lsp->ls_key.ck_length = 0; + if (lsp->ls_key.ck_data != NULL) { + bzero(lsp->ls_key.ck_data, + CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); + kmem_free(lsp->ls_key.ck_data, + CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); + lsp->ls_key.ck_data = NULL; + lsp->ls_key.ck_length = 0; + } if (lsp->ls_mech.cm_param != NULL) { kmem_free(lsp->ls_mech.cm_param, @@ -345,69 +327,115 @@ lofi_free_crypto(struct lofi_state *lsp) } static void -lofi_free_handle(dev_t dev, minor_t minor, struct lofi_state *lsp, - cred_t *credp) +lofi_destroy(struct lofi_state *lsp, cred_t *credp) { - dev_t newdev; - char namebuf[50]; - int i; + minor_t minor = getminor(lsp->ls_dev); + int i; + + ASSERT(MUTEX_HELD(&lofi_lock)); - ASSERT(mutex_owned(&lofi_lock)); + list_remove(&lofi_list, lsp); lofi_free_crypto(lsp); - if (lsp->ls_vp) { - (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, - 1, 0, credp, NULL); - VN_RELE(lsp->ls_vp); - lsp->ls_vp = NULL; + /* + * Free pre-allocated compressed buffers + */ + if (lsp->ls_comp_bufs != NULL) { + for (i = 0; i < lofi_taskq_nthreads; i++) { + if (lsp->ls_comp_bufs[i].bufsize > 0) + kmem_free(lsp->ls_comp_bufs[i].buf, + lsp->ls_comp_bufs[i].bufsize); + } + kmem_free(lsp->ls_comp_bufs, + sizeof (struct compbuf) * lofi_taskq_nthreads); } - newdev = makedevice(getmajor(dev), minor); - (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); - (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); + (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, + 1, 0, credp, NULL); + VN_RELE(lsp->ls_vp); + if (lsp->ls_stacked_vp != lsp->ls_vp) + VN_RELE(lsp->ls_stacked_vp); - (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); - ddi_remove_minor_node(lofi_dip, namebuf); - (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); - ddi_remove_minor_node(lofi_dip, namebuf); - - kmem_free(lsp->ls_filename, lsp->ls_filename_sz); taskq_destroy(lsp->ls_taskq); - if (lsp->ls_kstat) { + + if (lsp->ls_kstat != NULL) kstat_delete(lsp->ls_kstat); - mutex_destroy(&lsp->ls_kstat_lock); - } /* * Free cached decompressed segment data */ lofi_free_comp_cache(lsp); list_destroy(&lsp->ls_comp_cache); - mutex_destroy(&lsp->ls_comp_cache_lock); if (lsp->ls_uncomp_seg_sz > 0) { kmem_free(lsp->ls_comp_index_data, lsp->ls_comp_index_data_sz); lsp->ls_uncomp_seg_sz = 0; } - /* - * Free pre-allocated compressed buffers - */ - if (lsp->ls_comp_bufs != NULL) { - for (i = 0; i < lofi_taskq_nthreads; i++) { - if (lsp->ls_comp_bufs[i].bufsize > 0) - kmem_free(lsp->ls_comp_bufs[i].buf, - lsp->ls_comp_bufs[i].bufsize); - } - kmem_free(lsp->ls_comp_bufs, - sizeof (struct compbuf) * lofi_taskq_nthreads); - mutex_destroy(&lsp->ls_comp_bufs_lock); - } + rctl_decr_lofi(lsp->ls_zone, 1); + zone_rele(lsp->ls_zone); + mutex_destroy(&lsp->ls_comp_cache_lock); + mutex_destroy(&lsp->ls_comp_bufs_lock); + mutex_destroy(&lsp->ls_kstat_lock); mutex_destroy(&lsp->ls_vp_lock); + ASSERT(ddi_get_soft_state(lofi_statep, minor) == lsp); ddi_soft_state_free(lofi_statep, minor); + id_free(lofi_minor_id, minor); +} + +static void +lofi_free_dev(dev_t dev) +{ + minor_t minor = getminor(dev); + char namebuf[50]; + + ASSERT(MUTEX_HELD(&lofi_lock)); + + (void) ddi_prop_remove(dev, lofi_dip, ZONE_PROP_NAME); + (void) ddi_prop_remove(dev, lofi_dip, SIZE_PROP_NAME); + (void) ddi_prop_remove(dev, lofi_dip, NBLOCKS_PROP_NAME); + + (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); + ddi_remove_minor_node(lofi_dip, namebuf); + (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); + ddi_remove_minor_node(lofi_dip, namebuf); +} + +/*ARGSUSED*/ +static void +lofi_zone_shutdown(zoneid_t zoneid, void *arg) +{ + struct lofi_state *lsp; + struct lofi_state *next; + + mutex_enter(&lofi_lock); + + for (lsp = list_head(&lofi_list); lsp != NULL; lsp = next) { + + /* lofi_destroy() frees lsp */ + next = list_next(&lofi_list, lsp); + + if (lsp->ls_zone->zone_id != zoneid) + continue; + + /* + * No in-zone processes are running, but something has this + * open. It's either a global zone process, or a lofi + * mount. In either case we set ls_cleanup so the last + * user destroys the device. + */ + if (is_opened(lsp)) { + lsp->ls_cleanup = 1; + } else { + lofi_free_dev(lsp->ls_dev); + lofi_destroy(lsp, kcred); + } + } + + mutex_exit(&lofi_lock); } /*ARGSUSED*/ @@ -417,25 +445,18 @@ lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp) minor_t minor; struct lofi_state *lsp; + /* + * lofiadm -a /dev/lofi/1 gets us here. + */ + if (mutex_owner(&lofi_lock) == curthread) + return (EINVAL); + mutex_enter(&lofi_lock); + minor = getminor(*devp); + + /* master control device */ if (minor == 0) { - /* master control device */ - /* must be opened exclusively */ - if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) { - mutex_exit(&lofi_lock); - return (EINVAL); - } - lsp = ddi_get_soft_state(lofi_statep, 0); - if (lsp == NULL) { - mutex_exit(&lofi_lock); - return (ENXIO); - } - if (is_opened(lsp)) { - mutex_exit(&lofi_lock); - return (EBUSY); - } - (void) mark_opened(lsp, OTYP_CHR); mutex_exit(&lofi_lock); return (0); } @@ -475,6 +496,12 @@ lofi_close(dev_t dev, int flag, int otyp, struct cred *credp) mutex_exit(&lofi_lock); return (EINVAL); } + + if (minor == 0) { + mutex_exit(&lofi_lock); + return (0); + } + mark_closed(lsp, otyp); /* @@ -482,9 +509,10 @@ lofi_close(dev_t dev, int flag, int otyp, struct cred *credp) * asked for cleanup (li_cleanup), finish up if we're the last * out of the door. */ - if (minor != 0 && !is_opened(lsp) && - (lsp->ls_cleanup || lsp->ls_vp == NULL)) - lofi_free_handle(dev, minor, lsp, credp); + if (!is_opened(lsp) && (lsp->ls_cleanup || lsp->ls_vp == NULL)) { + lofi_free_dev(dev); + lofi_destroy(lsp, credp); + } mutex_exit(&lofi_lock); return (0); @@ -508,7 +536,7 @@ lofi_blk_mech(struct lofi_state *lsp, longlong_t lblkno) void *data; size_t datasz; - ASSERT(mutex_owned(&lsp->ls_crypto_lock)); + ASSERT(MUTEX_HELD(&lsp->ls_crypto_lock)); if (lsp == NULL) return (CRYPTO_DEVICE_ERROR); @@ -843,7 +871,7 @@ lofi_find_comp_data(struct lofi_state *lsp, uint64_t seg_index) { struct lofi_comp_cache *lc; - ASSERT(mutex_owned(&lsp->ls_comp_cache_lock)); + ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock)); for (lc = list_head(&lsp->ls_comp_cache); lc != NULL; lc = list_next(&lsp->ls_comp_cache, lc)) { @@ -877,7 +905,7 @@ lofi_add_comp_data(struct lofi_state *lsp, uint64_t seg_index, { struct lofi_comp_cache *lc; - ASSERT(mutex_owned(&lsp->ls_comp_cache_lock)); + ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock)); while (lsp->ls_comp_cache_count > lofi_max_comp_cache) { lc = list_remove_tail(&lsp->ls_comp_cache); @@ -1443,14 +1471,22 @@ lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) if (cmd != DDI_ATTACH) return (DDI_FAILURE); + + lofi_minor_id = id_space_create("lofi_minor_id", 1, L_MAXMIN32 + 1); + + if (!lofi_minor_id) + return (DDI_FAILURE); + error = ddi_soft_state_zalloc(lofi_statep, 0); if (error == DDI_FAILURE) { + id_space_destroy(lofi_minor_id); return (DDI_FAILURE); } error = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0, DDI_PSEUDO, NULL); if (error == DDI_FAILURE) { ddi_soft_state_free(lofi_statep, 0); + id_space_destroy(lofi_minor_id); return (DDI_FAILURE); } /* driver handles kernel-issued IOCTLs */ @@ -1458,8 +1494,12 @@ lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { ddi_remove_minor_node(dip, NULL); ddi_soft_state_free(lofi_statep, 0); + id_space_destroy(lofi_minor_id); return (DDI_FAILURE); } + + zone_key_create(&lofi_zone_key, NULL, lofi_zone_shutdown, NULL); + lofi_dip = dip; ddi_report_dev(dip); return (DDI_SUCCESS); @@ -1470,12 +1510,27 @@ lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { if (cmd != DDI_DETACH) return (DDI_FAILURE); - if (lofi_busy()) + + mutex_enter(&lofi_lock); + + if (!list_is_empty(&lofi_list)) { + mutex_exit(&lofi_lock); return (DDI_FAILURE); + } + lofi_dip = NULL; ddi_remove_minor_node(dip, NULL); ddi_prop_remove_all(dip); + + mutex_exit(&lofi_lock); + + if (zone_key_delete(lofi_zone_key) != 0) + cmn_err(CE_WARN, "failed to delete zone key"); + ddi_soft_state_free(lofi_statep, 0); + + id_space_destroy(lofi_minor_id); + return (DDI_SUCCESS); } @@ -1496,30 +1551,34 @@ free_lofi_ioctl(struct lofi_ioctl *klip) * These two just simplify the rest of the ioctls that need to copyin/out * the lofi_ioctl structure. */ -struct lofi_ioctl * -copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, int flag) +int +copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, struct lofi_ioctl **klipp, + int flag) { struct lofi_ioctl *klip; int error; - klip = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP); + klip = *klipp = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP); error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag); - if (error) { - free_lofi_ioctl(klip); - return (NULL); - } + if (error) + goto err; - /* make sure filename is always null-terminated */ + /* ensure NULL termination */ klip->li_filename[MAXPATHLEN-1] = '\0'; + klip->li_algorithm[MAXALGLEN-1] = '\0'; + klip->li_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0'; + klip->li_iv_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0'; - /* validate minor number */ - if (klip->li_minor > lofi_max_files) { - free_lofi_ioctl(klip); - cmn_err(CE_WARN, "attempt to map more than lofi_max_files (%d)", - lofi_max_files); - return (NULL); + if (klip->li_minor > L_MAXMIN32) { + error = EINVAL; + goto err; } - return (klip); + + return (0); + +err: + free_lofi_ioctl(klip); + return (error); } int @@ -1547,45 +1606,76 @@ copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip, return (0); } +static int +lofi_access(struct lofi_state *lsp) +{ + ASSERT(MUTEX_HELD(&lofi_lock)); + if (INGLOBALZONE(curproc) || lsp->ls_zone == curproc->p_zone) + return (0); + return (EPERM); +} + /* - * Return the minor number 'filename' is mapped to, if it is. + * Find the lofi state for the given filename. We compare by vnode to + * allow the global zone visibility into NGZ lofi nodes. */ static int -file_to_minor(char *filename) +file_to_lofi_nocheck(char *filename, struct lofi_state **lspp) { - minor_t minor; struct lofi_state *lsp; + vnode_t *vp = NULL; + int err = 0; - ASSERT(mutex_owned(&lofi_lock)); - for (minor = 1; minor <= lofi_max_files; minor++) { - lsp = ddi_get_soft_state(lofi_statep, minor); - if (lsp == NULL) - continue; - if (strcmp(lsp->ls_filename, filename) == 0) - return (minor); + ASSERT(MUTEX_HELD(&lofi_lock)); + + if ((err = lookupname(filename, UIO_SYSSPACE, FOLLOW, + NULLVPP, &vp)) != 0) + goto out; + + if (vp->v_type == VREG) { + vnode_t *realvp; + if (VOP_REALVP(vp, &realvp, NULL) == 0) { + VN_HOLD(realvp); + VN_RELE(vp); + vp = realvp; + } } - return (0); + + for (lsp = list_head(&lofi_list); lsp != NULL; + lsp = list_next(&lofi_list, lsp)) { + if (lsp->ls_vp == vp) { + if (lspp != NULL) + *lspp = lsp; + goto out; + } + } + + err = ENOENT; + +out: + if (vp != NULL) + VN_RELE(vp); + return (err); } /* - * lofiadm does some validation, but since Joe Random (or crashme) could - * do our ioctls, we need to do some validation too. + * Find the minor for the given filename, checking the zone can access + * it. */ static int -valid_filename(const char *filename) +file_to_lofi(char *filename, struct lofi_state **lspp) { - static char *blkprefix = "/dev/" LOFI_BLOCK_NAME "/"; - static char *charprefix = "/dev/" LOFI_CHAR_NAME "/"; + int err = 0; - /* must be absolute path */ - if (filename[0] != '/') - return (0); - /* must not be lofi */ - if (strncmp(filename, blkprefix, strlen(blkprefix)) == 0) - return (0); - if (strncmp(filename, charprefix, strlen(charprefix)) == 0) - return (0); - return (1); + ASSERT(MUTEX_HELD(&lofi_lock)); + + if ((err = file_to_lofi_nocheck(filename, lspp)) != 0) + return (err); + + if ((err = lofi_access(*lspp)) != 0) + return (err); + + return (0); } /* @@ -1790,24 +1880,171 @@ lofi_map_compressed_file(struct lofi_state *lsp, char *buf) BE_64(lsp->ls_comp_seg_index[i]); } + return (error); +} + +static int +lofi_init_crypto(struct lofi_state *lsp, struct lofi_ioctl *klip) +{ + struct crypto_meta chead; + char buf[DEV_BSIZE]; + ssize_t resid; + char *marker; + int error; + int ret; + int i; + + if (!klip->li_crypto_enabled) + return (0); + /* - * Finally setup per-thread pre-allocated buffers + * All current algorithms have a max of 448 bits. */ - lsp->ls_comp_bufs = kmem_zalloc(lofi_taskq_nthreads * - sizeof (struct compbuf), KM_SLEEP); - mutex_init(&lsp->ls_comp_bufs_lock, NULL, MUTEX_DRIVER, NULL); + if (klip->li_iv_len > CRYPTO_BITS2BYTES(512)) + return (EINVAL); - return (error); + if (CRYPTO_BITS2BYTES(klip->li_key_len) > sizeof (klip->li_key)) + return (EINVAL); + + lsp->ls_crypto_enabled = klip->li_crypto_enabled; + + mutex_init(&lsp->ls_crypto_lock, NULL, MUTEX_DRIVER, NULL); + + lsp->ls_mech.cm_type = crypto_mech2id(klip->li_cipher); + if (lsp->ls_mech.cm_type == CRYPTO_MECH_INVALID) { + cmn_err(CE_WARN, "invalid cipher %s requested for %s", + klip->li_cipher, klip->li_filename); + return (EINVAL); + } + + /* this is just initialization here */ + lsp->ls_mech.cm_param = NULL; + lsp->ls_mech.cm_param_len = 0; + + lsp->ls_iv_type = klip->li_iv_type; + lsp->ls_iv_mech.cm_type = crypto_mech2id(klip->li_iv_cipher); + if (lsp->ls_iv_mech.cm_type == CRYPTO_MECH_INVALID) { + cmn_err(CE_WARN, "invalid iv cipher %s requested" + " for %s", klip->li_iv_cipher, klip->li_filename); + return (EINVAL); + } + + /* iv mech must itself take a null iv */ + lsp->ls_iv_mech.cm_param = NULL; + lsp->ls_iv_mech.cm_param_len = 0; + lsp->ls_iv_len = klip->li_iv_len; + + /* + * Create ctx using li_cipher & the raw li_key after checking + * that it isn't a weak key. + */ + lsp->ls_key.ck_format = CRYPTO_KEY_RAW; + lsp->ls_key.ck_length = klip->li_key_len; + lsp->ls_key.ck_data = kmem_alloc( + CRYPTO_BITS2BYTES(lsp->ls_key.ck_length), KM_SLEEP); + bcopy(klip->li_key, lsp->ls_key.ck_data, + CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); + + ret = crypto_key_check(&lsp->ls_mech, &lsp->ls_key); + if (ret != CRYPTO_SUCCESS) { + cmn_err(CE_WARN, "weak key check failed for cipher " + "%s on file %s (0x%x)", klip->li_cipher, + klip->li_filename, ret); + return (EINVAL); + } + + error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, + CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); + if (error != 0) + return (error); + + /* + * This is the case where the header in the lofi image is already + * initialized to indicate it is encrypted. + */ + if (strncmp(buf, lofi_crypto_magic, sizeof (lofi_crypto_magic)) == 0) { + /* + * The encryption header information is laid out this way: + * 6 bytes: hex "CFLOFI" + * 2 bytes: version = 0 ... for now + * 96 bytes: reserved1 (not implemented yet) + * 4 bytes: data_sector = 2 ... for now + * more... not implemented yet + */ + + marker = buf; + + /* copy the magic */ + bcopy(marker, lsp->ls_crypto.magic, + sizeof (lsp->ls_crypto.magic)); + marker += sizeof (lsp->ls_crypto.magic); + + /* read the encryption version number */ + bcopy(marker, &(lsp->ls_crypto.version), + sizeof (lsp->ls_crypto.version)); + lsp->ls_crypto.version = ntohs(lsp->ls_crypto.version); + marker += sizeof (lsp->ls_crypto.version); + + /* read a chunk of reserved data */ + bcopy(marker, lsp->ls_crypto.reserved1, + sizeof (lsp->ls_crypto.reserved1)); + marker += sizeof (lsp->ls_crypto.reserved1); + + /* read block number where encrypted data begins */ + bcopy(marker, &(lsp->ls_crypto.data_sector), + sizeof (lsp->ls_crypto.data_sector)); + lsp->ls_crypto.data_sector = ntohl(lsp->ls_crypto.data_sector); + marker += sizeof (lsp->ls_crypto.data_sector); + + /* and ignore the rest until it is implemented */ + + lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; + return (0); + } + + /* + * We've requested encryption, but no magic was found, so it must be + * a new image. + */ + + for (i = 0; i < sizeof (struct crypto_meta); i++) { + if (buf[i] != '\0') + return (EINVAL); + } + + marker = buf; + bcopy(lofi_crypto_magic, marker, sizeof (lofi_crypto_magic)); + marker += sizeof (lofi_crypto_magic); + chead.version = htons(LOFI_CRYPTO_VERSION); + bcopy(&(chead.version), marker, sizeof (chead.version)); + marker += sizeof (chead.version); + marker += sizeof (chead.reserved1); + chead.data_sector = htonl(LOFI_CRYPTO_DATA_SECTOR); + bcopy(&(chead.data_sector), marker, sizeof (chead.data_sector)); + + /* write the header */ + error = vn_rdwr(UIO_WRITE, lsp->ls_vp, buf, DEV_BSIZE, + CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); + if (error != 0) + return (error); + + /* fix things up so it looks like we read this info */ + bcopy(lofi_crypto_magic, lsp->ls_crypto.magic, + sizeof (lofi_crypto_magic)); + lsp->ls_crypto.version = LOFI_CRYPTO_VERSION; + lsp->ls_crypto.data_sector = LOFI_CRYPTO_DATA_SECTOR; + lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; + return (0); } /* - * Check to see if the passed in signature is a valid - * one. If it is valid, return the index into - * lofi_compress_table. + * Check to see if the passed in signature is a valid one. If it is + * valid, return the index into lofi_compress_table. * * Return -1 if it is invalid */ -static int lofi_compress_select(char *signature) +static int +lofi_compress_select(const char *signature) { int i; @@ -1819,6 +2056,40 @@ static int lofi_compress_select(char *signature) return (-1); } +static int +lofi_init_compress(struct lofi_state *lsp) +{ + char buf[DEV_BSIZE]; + int compress_index; + ssize_t resid; + int error; + + error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE, + 0, RLIM64_INFINITY, kcred, &resid); + + if (error != 0) + return (error); + + if ((compress_index = lofi_compress_select(buf)) == -1) + return (0); + + /* compression and encryption are mutually exclusive */ + if (lsp->ls_crypto_enabled) + return (ENOTSUP); + + /* initialize compression info for compressed lofi */ + lsp->ls_comp_algorithm_index = compress_index; + (void) strlcpy(lsp->ls_comp_algorithm, + lofi_compress_table[compress_index].l_name, + sizeof (lsp->ls_comp_algorithm)); + + /* Finally setup per-thread pre-allocated buffers */ + lsp->ls_comp_bufs = kmem_zalloc(lofi_taskq_nthreads * + sizeof (struct compbuf), KM_SLEEP); + + return (lofi_map_compressed_file(lsp, buf)); +} + /* * map a file to a minor number. Return the minor number. */ @@ -1826,72 +2097,53 @@ static int lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, int *rvalp, struct cred *credp, int ioctl_flag) { - minor_t newminor; - struct lofi_state *lsp; + minor_t minor = (minor_t)-1; + struct lofi_state *lsp = NULL; struct lofi_ioctl *klip; int error; - struct vnode *vp; - int64_t Nblocks_prop_val; - int64_t Size_prop_val; - int compress_index; + struct vnode *vp = NULL; vattr_t vattr; int flag; - enum vtype v_type; - int zalloced = 0; dev_t newdev; char namebuf[50]; - char buf[DEV_BSIZE]; - char crybuf[DEV_BSIZE]; - ssize_t resid; - boolean_t need_vn_close = B_FALSE; - boolean_t keycopied = B_FALSE; - boolean_t need_size_update = B_FALSE; - klip = copy_in_lofi_ioctl(ulip, ioctl_flag); - if (klip == NULL) - return (EFAULT); + error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); + if (error != 0) + return (error); mutex_enter(&lofi_lock); - if (!valid_filename(klip->li_filename)) { - error = EINVAL; - goto out; + mutex_enter(&curproc->p_lock); + if ((error = rctl_incr_lofi(curproc, curproc->p_zone, 1)) != 0) { + mutex_exit(&curproc->p_lock); + mutex_exit(&lofi_lock); + free_lofi_ioctl(klip); + return (error); } + mutex_exit(&curproc->p_lock); - if (file_to_minor(klip->li_filename) != 0) { + if (file_to_lofi_nocheck(klip->li_filename, NULL) == 0) { error = EBUSY; - goto out; + goto err; } if (pickminor) { - /* Find a free one */ - for (newminor = 1; newminor <= lofi_max_files; newminor++) - if (ddi_get_soft_state(lofi_statep, newminor) == NULL) - break; - if (newminor >= lofi_max_files) { + minor = (minor_t)id_allocff_nosleep(lofi_minor_id); + if (minor == (minor_t)-1) { error = EAGAIN; - goto out; + goto err; } } else { - newminor = klip->li_minor; - if (ddi_get_soft_state(lofi_statep, newminor) != NULL) { + if (ddi_get_soft_state(lofi_statep, klip->li_minor) != NULL) { error = EEXIST; - goto out; + goto err; } - } - /* make sure it's valid */ - error = lookupname(klip->li_filename, UIO_SYSSPACE, FOLLOW, - NULLVPP, &vp); - if (error) { - goto out; - } - v_type = vp->v_type; - VN_RELE(vp); - if (!V_ISLOFIABLE(v_type)) { - error = EINVAL; - goto out; + minor = (minor_t) + id_alloc_specific_nosleep(lofi_minor_id, klip->li_minor); + ASSERT(minor != (minor_t)-1); } + flag = FREAD | FWRITE | FOFFMAX | FEXCL; error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0); if (error) { @@ -1899,78 +2151,58 @@ lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, flag &= ~FWRITE; error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0); - if (error) { - goto out; - } + if (error) + goto err; + } + + if (!V_ISLOFIABLE(vp->v_type)) { + error = EINVAL; + goto err; } - need_vn_close = B_TRUE; vattr.va_mask = AT_SIZE; error = VOP_GETATTR(vp, &vattr, 0, credp, NULL); - if (error) { - goto out; - } + if (error) + goto err; + /* the file needs to be a multiple of the block size */ if ((vattr.va_size % DEV_BSIZE) != 0) { error = EINVAL; - goto out; - } - newdev = makedevice(getmajor(dev), newminor); - Size_prop_val = vattr.va_size; - if ((ddi_prop_update_int64(newdev, lofi_dip, - SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) { - error = EINVAL; - goto out; + goto err; } - Nblocks_prop_val = vattr.va_size / DEV_BSIZE; - if ((ddi_prop_update_int64(newdev, lofi_dip, - NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) { - error = EINVAL; - goto propout; - } - error = ddi_soft_state_zalloc(lofi_statep, newminor); + + /* lsp alloc+init */ + + error = ddi_soft_state_zalloc(lofi_statep, minor); if (error == DDI_FAILURE) { error = ENOMEM; - goto propout; - } - zalloced = 1; - (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); - error = ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, newminor, - DDI_PSEUDO, NULL); - if (error != DDI_SUCCESS) { - error = ENXIO; - goto propout; - } - (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", newminor); - error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, newminor, - DDI_PSEUDO, NULL); - if (error != DDI_SUCCESS) { - /* remove block node */ - (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); - ddi_remove_minor_node(lofi_dip, namebuf); - error = ENXIO; - goto propout; - } - lsp = ddi_get_soft_state(lofi_statep, newminor); - lsp->ls_filename_sz = strlen(klip->li_filename) + 1; - lsp->ls_filename = kmem_alloc(lsp->ls_filename_sz, KM_SLEEP); - (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d", - LOFI_DRIVER_NAME, newminor); - lsp->ls_taskq = taskq_create(namebuf, lofi_taskq_nthreads, - minclsyspri, 1, lofi_taskq_maxalloc, 0); - lsp->ls_kstat = kstat_create(LOFI_DRIVER_NAME, newminor, - NULL, "disk", KSTAT_TYPE_IO, 1, 0); - if (lsp->ls_kstat) { - mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL); - lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock; - kstat_install(lsp->ls_kstat); + goto err; } + + lsp = ddi_get_soft_state(lofi_statep, minor); + list_insert_tail(&lofi_list, lsp); + + newdev = makedevice(getmajor(dev), minor); + lsp->ls_dev = newdev; + lsp->ls_zone = zone_find_by_id(getzoneid()); + ASSERT(lsp->ls_zone != NULL); + lsp->ls_uncomp_seg_sz = 0; + lsp->ls_comp_algorithm[0] = '\0'; + lsp->ls_crypto_offset = 0; + cv_init(&lsp->ls_vp_cv, NULL, CV_DRIVER, NULL); + mutex_init(&lsp->ls_comp_cache_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&lsp->ls_comp_bufs_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL); mutex_init(&lsp->ls_vp_lock, NULL, MUTEX_DRIVER, NULL); + (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d", + LOFI_DRIVER_NAME, minor); + lsp->ls_taskq = taskq_create_proc(namebuf, lofi_taskq_nthreads, + minclsyspri, 1, lofi_taskq_maxalloc, curzone->zone_zsched, 0); + list_create(&lsp->ls_comp_cache, sizeof (struct lofi_comp_cache), offsetof(struct lofi_comp_cache, lc_list)); - mutex_init(&lsp->ls_comp_cache_lock, NULL, MUTEX_DRIVER, NULL); /* * save open mode so file can be closed properly and vnode counts @@ -1978,288 +2210,115 @@ lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, */ lsp->ls_openflag = flag; + lsp->ls_vp = vp; + lsp->ls_stacked_vp = vp; /* * Try to handle stacked lofs vnodes. */ if (vp->v_type == VREG) { - if (VOP_REALVP(vp, &lsp->ls_vp, NULL) != 0) { - lsp->ls_vp = vp; - } else { + vnode_t *realvp; + + if (VOP_REALVP(vp, &realvp, NULL) == 0) { /* - * Even though vp was obtained via vn_open(), we - * can't call vn_close() on it, since lofs will - * pass the VOP_CLOSE() on down to the realvp - * (which we are about to use). Hence we merely - * drop the reference to the lofs vnode and hold - * the realvp so things behave as if we've - * opened the realvp without any interaction - * with lofs. + * We need to use the realvp for uniqueness + * checking, but keep the stacked vp for + * LOFI_GET_FILENAME display. */ - VN_HOLD(lsp->ls_vp); - VN_RELE(vp); + VN_HOLD(realvp); + lsp->ls_vp = realvp; } - } else { - lsp->ls_vp = vp; } - lsp->ls_vp_size = vattr.va_size; - (void) strcpy(lsp->ls_filename, klip->li_filename); - if (rvalp) - *rvalp = (int)newminor; - klip->li_minor = newminor; - - /* - * Initialize crypto details for encrypted lofi - */ - if (klip->li_crypto_enabled) { - int ret; - - mutex_init(&lsp->ls_crypto_lock, NULL, MUTEX_DRIVER, NULL); - - lsp->ls_mech.cm_type = crypto_mech2id(klip->li_cipher); - if (lsp->ls_mech.cm_type == CRYPTO_MECH_INVALID) { - cmn_err(CE_WARN, "invalid cipher %s requested for %s", - klip->li_cipher, lsp->ls_filename); - error = EINVAL; - goto propout; - } - /* this is just initialization here */ - lsp->ls_mech.cm_param = NULL; - lsp->ls_mech.cm_param_len = 0; - - lsp->ls_iv_type = klip->li_iv_type; - lsp->ls_iv_mech.cm_type = crypto_mech2id(klip->li_iv_cipher); - if (lsp->ls_iv_mech.cm_type == CRYPTO_MECH_INVALID) { - cmn_err(CE_WARN, "invalid iv cipher %s requested" - " for %s", klip->li_iv_cipher, lsp->ls_filename); - error = EINVAL; - goto propout; - } - - /* iv mech must itself take a null iv */ - lsp->ls_iv_mech.cm_param = NULL; - lsp->ls_iv_mech.cm_param_len = 0; - lsp->ls_iv_len = klip->li_iv_len; - - /* - * Create ctx using li_cipher & the raw li_key after checking - * that it isn't a weak key. - */ - lsp->ls_key.ck_format = CRYPTO_KEY_RAW; - lsp->ls_key.ck_length = klip->li_key_len; - lsp->ls_key.ck_data = kmem_alloc( - CRYPTO_BITS2BYTES(lsp->ls_key.ck_length), KM_SLEEP); - bcopy(klip->li_key, lsp->ls_key.ck_data, - CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); - keycopied = B_TRUE; - - ret = crypto_key_check(&lsp->ls_mech, &lsp->ls_key); - if (ret != CRYPTO_SUCCESS) { - error = EINVAL; - cmn_err(CE_WARN, "weak key check failed for cipher " - "%s on file %s (0x%x)", klip->li_cipher, - lsp->ls_filename, ret); - goto propout; - } - } - lsp->ls_crypto_enabled = klip->li_crypto_enabled; - - /* - * Read the file signature to check if it is compressed or encrypted. - * Crypto signature is in a different location; both areas should - * read to keep compression and encryption mutually exclusive. - */ - if (lsp->ls_crypto_enabled) { - error = vn_rdwr(UIO_READ, lsp->ls_vp, crybuf, DEV_BSIZE, - CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); - if (error != 0) - goto propout; - } - error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE, - 0, RLIM64_INFINITY, kcred, &resid); - if (error != 0) - goto propout; - - /* initialize these variables for all lofi files */ - lsp->ls_comp_bufs = NULL; - lsp->ls_uncomp_seg_sz = 0; + lsp->ls_vp_size = vattr.va_size; lsp->ls_vp_comp_size = lsp->ls_vp_size; - lsp->ls_comp_algorithm[0] = '\0'; - /* encrypted lofi reads/writes shifted by crypto metadata size */ - lsp->ls_crypto_offset = 0; - - /* this is a compressed lofi */ - if ((compress_index = lofi_compress_select(buf)) != -1) { - - /* compression and encryption are mutually exclusive */ - if (klip->li_crypto_enabled) { - error = ENOTSUP; - goto propout; - } + lsp->ls_kstat = kstat_create_zone(LOFI_DRIVER_NAME, minor, + NULL, "disk", KSTAT_TYPE_IO, 1, 0, getzoneid()); - /* initialize compression info for compressed lofi */ - lsp->ls_comp_algorithm_index = compress_index; - (void) strlcpy(lsp->ls_comp_algorithm, - lofi_compress_table[compress_index].l_name, - sizeof (lsp->ls_comp_algorithm)); - - error = lofi_map_compressed_file(lsp, buf); - if (error != 0) - goto propout; - need_size_update = B_TRUE; - - /* this is an encrypted lofi */ - } else if (strncmp(crybuf, lofi_crypto_magic, - sizeof (lofi_crypto_magic)) == 0) { - - char *marker = crybuf; - - /* - * This is the case where the header in the lofi image is - * already initialized to indicate it is encrypted. - * There is another case (see below) where encryption is - * requested but the lofi image has never been used yet, - * so the header needs to be written with encryption magic. - */ - - /* indicate this must be an encrypted lofi due to magic */ - klip->li_crypto_enabled = B_TRUE; - - /* - * The encryption header information is laid out this way: - * 6 bytes: hex "CFLOFI" - * 2 bytes: version = 0 ... for now - * 96 bytes: reserved1 (not implemented yet) - * 4 bytes: data_sector = 2 ... for now - * more... not implemented yet - */ - - /* copy the magic */ - bcopy(marker, lsp->ls_crypto.magic, - sizeof (lsp->ls_crypto.magic)); - marker += sizeof (lsp->ls_crypto.magic); - - /* read the encryption version number */ - bcopy(marker, &(lsp->ls_crypto.version), - sizeof (lsp->ls_crypto.version)); - lsp->ls_crypto.version = ntohs(lsp->ls_crypto.version); - marker += sizeof (lsp->ls_crypto.version); + if (lsp->ls_kstat == NULL) { + error = ENOMEM; + goto err; + } - /* read a chunk of reserved data */ - bcopy(marker, lsp->ls_crypto.reserved1, - sizeof (lsp->ls_crypto.reserved1)); - marker += sizeof (lsp->ls_crypto.reserved1); + lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock; + kstat_zone_add(lsp->ls_kstat, GLOBAL_ZONEID); - /* read block number where encrypted data begins */ - bcopy(marker, &(lsp->ls_crypto.data_sector), - sizeof (lsp->ls_crypto.data_sector)); - lsp->ls_crypto.data_sector = ntohl(lsp->ls_crypto.data_sector); - marker += sizeof (lsp->ls_crypto.data_sector); + if ((error = lofi_init_crypto(lsp, klip)) != 0) + goto err; - /* and ignore the rest until it is implemented */ + if ((error = lofi_init_compress(lsp)) != 0) + goto err; - lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; - need_size_update = B_TRUE; + fake_disk_geometry(lsp); - /* neither compressed nor encrypted, BUT could be new encrypted lofi */ - } else if (klip->li_crypto_enabled) { + /* create minor nodes */ - /* - * This is the case where encryption was requested but the - * appears to be entirely blank where the encryption header - * would have been in the lofi image. If it is blank, - * assume it is a brand new lofi image and initialize the - * header area with encryption magic and current version - * header data. If it is not blank, that's an error. - */ - int i; - char *marker; - struct crypto_meta chead; + (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); + error = ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, minor, + DDI_PSEUDO, NULL); + if (error != DDI_SUCCESS) { + error = ENXIO; + goto err; + } - for (i = 0; i < sizeof (struct crypto_meta); i++) - if (crybuf[i] != '\0') - break; - if (i != sizeof (struct crypto_meta)) { - error = EINVAL; - goto propout; - } + (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); + error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, minor, + DDI_PSEUDO, NULL); + if (error != DDI_SUCCESS) { + /* remove block node */ + (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); + ddi_remove_minor_node(lofi_dip, namebuf); + error = ENXIO; + goto err; + } - /* nothing there, initialize as encrypted lofi */ - marker = crybuf; - bcopy(lofi_crypto_magic, marker, sizeof (lofi_crypto_magic)); - marker += sizeof (lofi_crypto_magic); - chead.version = htons(LOFI_CRYPTO_VERSION); - bcopy(&(chead.version), marker, sizeof (chead.version)); - marker += sizeof (chead.version); - marker += sizeof (chead.reserved1); - chead.data_sector = htonl(LOFI_CRYPTO_DATA_SECTOR); - bcopy(&(chead.data_sector), marker, sizeof (chead.data_sector)); - - /* write the header */ - error = vn_rdwr(UIO_WRITE, lsp->ls_vp, crybuf, DEV_BSIZE, - CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); - if (error != 0) - goto propout; + /* create DDI properties */ - /* fix things up so it looks like we read this info */ - bcopy(lofi_crypto_magic, lsp->ls_crypto.magic, - sizeof (lofi_crypto_magic)); - lsp->ls_crypto.version = LOFI_CRYPTO_VERSION; - lsp->ls_crypto.data_sector = LOFI_CRYPTO_DATA_SECTOR; + if ((ddi_prop_update_int64(newdev, lofi_dip, SIZE_PROP_NAME, + lsp->ls_vp_size - lsp->ls_crypto_offset)) != DDI_PROP_SUCCESS) { + error = EINVAL; + goto nodeerr; + } - lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; - need_size_update = B_TRUE; + if ((ddi_prop_update_int64(newdev, lofi_dip, NBLOCKS_PROP_NAME, + (lsp->ls_vp_size - lsp->ls_crypto_offset) / DEV_BSIZE)) + != DDI_PROP_SUCCESS) { + error = EINVAL; + goto nodeerr; } - /* - * Either lsp->ls_vp_size or lsp->ls_crypto_offset changed; - * for encrypted lofi, advertise that it is somewhat shorter - * due to embedded crypto metadata section - */ - if (need_size_update) { - /* update DDI properties */ - Size_prop_val = lsp->ls_vp_size - lsp->ls_crypto_offset; - if ((ddi_prop_update_int64(newdev, lofi_dip, SIZE_PROP_NAME, - Size_prop_val)) != DDI_PROP_SUCCESS) { - error = EINVAL; - goto propout; - } - Nblocks_prop_val = - (lsp->ls_vp_size - lsp->ls_crypto_offset) / DEV_BSIZE; - if ((ddi_prop_update_int64(newdev, lofi_dip, NBLOCKS_PROP_NAME, - Nblocks_prop_val)) != DDI_PROP_SUCCESS) { - error = EINVAL; - goto propout; - } + if (ddi_prop_update_string(newdev, lofi_dip, ZONE_PROP_NAME, + (char *)curproc->p_zone->zone_name) != DDI_PROP_SUCCESS) { + error = EINVAL; + goto nodeerr; } - fake_disk_geometry(lsp); + kstat_install(lsp->ls_kstat); + mutex_exit(&lofi_lock); + + if (rvalp) + *rvalp = (int)minor; + klip->li_minor = minor; (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); free_lofi_ioctl(klip); return (0); -propout: - if (keycopied) { - bzero(lsp->ls_key.ck_data, - CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); - kmem_free(lsp->ls_key.ck_data, - CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); - lsp->ls_key.ck_data = NULL; - lsp->ls_key.ck_length = 0; - } - - if (zalloced) - ddi_soft_state_free(lofi_statep, newminor); +nodeerr: + lofi_free_dev(newdev); +err: + if (lsp != NULL) { + lofi_destroy(lsp, credp); + } else { + if (vp != NULL) { + (void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL); + VN_RELE(vp); + } - (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); - (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); + if (minor != (minor_t)-1) + id_free(lofi_minor_id, minor); -out: - if (need_vn_close) { - (void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL); - VN_RELE(vp); + rctl_decr_lofi(curproc->p_zone, 1); } mutex_exit(&lofi_lock); @@ -2276,30 +2335,34 @@ lofi_unmap_file(dev_t dev, struct lofi_ioctl *ulip, int byfilename, { struct lofi_state *lsp; struct lofi_ioctl *klip; - minor_t minor; + int err; - klip = copy_in_lofi_ioctl(ulip, ioctl_flag); - if (klip == NULL) - return (EFAULT); + err = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); + if (err != 0) + return (err); mutex_enter(&lofi_lock); if (byfilename) { - minor = file_to_minor(klip->li_filename); - } else { - minor = klip->li_minor; - } - if (minor == 0) { + if ((err = file_to_lofi(klip->li_filename, &lsp)) != 0) { + mutex_exit(&lofi_lock); + return (err); + } + } else if (klip->li_minor == 0) { mutex_exit(&lofi_lock); free_lofi_ioctl(klip); return (ENXIO); + } else { + lsp = ddi_get_soft_state(lofi_statep, klip->li_minor); } - lsp = ddi_get_soft_state(lofi_statep, minor); - if (lsp == NULL || lsp->ls_vp == NULL) { + + if (lsp == NULL || lsp->ls_vp == NULL || lofi_access(lsp) != 0) { mutex_exit(&lofi_lock); free_lofi_ioctl(klip); return (ENXIO); } + klip->li_minor = getminor(lsp->ls_dev); + /* * If it's still held open, we'll do one of three things: * @@ -2331,13 +2394,8 @@ lofi_unmap_file(dev_t dev, struct lofi_ioctl *ulip, int byfilename, while (lsp->ls_vp_iocount > 0) cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock); mutex_exit(&lsp->ls_vp_lock); - lofi_free_handle(dev, minor, lsp, credp); - klip->li_minor = minor; - mutex_exit(&lofi_lock); - (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); - free_lofi_ioctl(klip); - return (0); + goto out; } else if (klip->li_cleanup) { lsp->ls_cleanup = 1; mutex_exit(&lofi_lock); @@ -2350,9 +2408,10 @@ lofi_unmap_file(dev_t dev, struct lofi_ioctl *ulip, int byfilename, return (EBUSY); } - lofi_free_handle(dev, minor, lsp, credp); +out: + lofi_free_dev(dev); + lofi_destroy(lsp, credp); - klip->li_minor = minor; mutex_exit(&lofi_lock); (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); free_lofi_ioctl(klip); @@ -2368,31 +2427,39 @@ static int lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which, struct cred *credp, int ioctl_flag) { - struct lofi_state *lsp; struct lofi_ioctl *klip; + struct lofi_state *lsp; int error; - minor_t minor; - klip = copy_in_lofi_ioctl(ulip, ioctl_flag); - if (klip == NULL) - return (EFAULT); + error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); + if (error != 0) + return (error); switch (which) { case LOFI_GET_FILENAME: - minor = klip->li_minor; - if (minor == 0) { + if (klip->li_minor == 0) { free_lofi_ioctl(klip); return (EINVAL); } mutex_enter(&lofi_lock); - lsp = ddi_get_soft_state(lofi_statep, minor); - if (lsp == NULL) { + lsp = ddi_get_soft_state(lofi_statep, klip->li_minor); + if (lsp == NULL || lofi_access(lsp) != 0) { mutex_exit(&lofi_lock); free_lofi_ioctl(klip); return (ENXIO); } - (void) strcpy(klip->li_filename, lsp->ls_filename); + + /* + * This may fail if, for example, we're trying to look + * up a zoned NFS path from the global zone. + */ + if (vnodetopath(NULL, lsp->ls_stacked_vp, klip->li_filename, + sizeof (klip->li_filename), CRED()) != 0) { + (void) strlcpy(klip->li_filename, "?", + sizeof (klip->li_filename)); + } + (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, sizeof (klip->li_algorithm)); klip->li_crypto_enabled = lsp->ls_crypto_enabled; @@ -2402,35 +2469,29 @@ lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which, return (error); case LOFI_GET_MINOR: mutex_enter(&lofi_lock); - klip->li_minor = file_to_minor(klip->li_filename); - /* caller should not depend on klip->li_crypto_enabled here */ + error = file_to_lofi(klip->li_filename, &lsp); + if (error == 0) + klip->li_minor = getminor(lsp->ls_dev); mutex_exit(&lofi_lock); - if (klip->li_minor == 0) { - free_lofi_ioctl(klip); - return (ENOENT); - } - error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); + + if (error == 0) + error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); + free_lofi_ioctl(klip); return (error); case LOFI_CHECK_COMPRESSED: mutex_enter(&lofi_lock); - klip->li_minor = file_to_minor(klip->li_filename); - mutex_exit(&lofi_lock); - if (klip->li_minor == 0) { - free_lofi_ioctl(klip); - return (ENOENT); - } - mutex_enter(&lofi_lock); - lsp = ddi_get_soft_state(lofi_statep, klip->li_minor); - if (lsp == NULL) { + error = file_to_lofi(klip->li_filename, &lsp); + if (error != 0) { mutex_exit(&lofi_lock); free_lofi_ioctl(klip); - return (ENXIO); + return (error); } - ASSERT(strcmp(klip->li_filename, lsp->ls_filename) == 0); + klip->li_minor = getminor(lsp->ls_dev); (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, sizeof (klip->li_algorithm)); + mutex_exit(&lofi_lock); error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); free_lofi_ioctl(klip); @@ -2439,7 +2500,6 @@ lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which, free_lofi_ioctl(klip); return (EINVAL); } - } static int @@ -2484,17 +2544,41 @@ lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, case LOFI_GET_MINOR: return (lofi_get_info(dev, lip, LOFI_GET_MINOR, credp, flag)); + + /* + * This API made limited sense when this value was fixed + * at LOFI_MAX_FILES. However, its use to iterate + * across all possible devices in lofiadm means we don't + * want to return L_MAXMIN32, but the highest + * *allocated* minor. + */ case LOFI_GET_MAXMINOR: - error = ddi_copyout(&lofi_max_files, &lip->li_minor, - sizeof (lofi_max_files), flag); + minor = 0; + + mutex_enter(&lofi_lock); + + for (lsp = list_head(&lofi_list); lsp != NULL; + lsp = list_next(&lofi_list, lsp)) { + if (lofi_access(lsp) != 0) + continue; + + if (getminor(lsp->ls_dev) > minor) + minor = getminor(lsp->ls_dev); + } + + mutex_exit(&lofi_lock); + + error = ddi_copyout(&minor, &lip->li_minor, + sizeof (minor), flag); if (error) return (EFAULT); return (0); + case LOFI_CHECK_COMPRESSED: return (lofi_get_info(dev, lip, LOFI_CHECK_COMPRESSED, credp, flag)); default: - break; + return (EINVAL); } } @@ -2644,16 +2728,21 @@ _init(void) { int error; + list_create(&lofi_list, sizeof (struct lofi_state), + offsetof(struct lofi_state, ls_list)); + error = ddi_soft_state_init(&lofi_statep, sizeof (struct lofi_state), 0); if (error) return (error); mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL); + error = mod_install(&modlinkage); if (error) { mutex_destroy(&lofi_lock); ddi_soft_state_fini(&lofi_statep); + list_destroy(&lofi_list); } return (error); @@ -2664,8 +2753,14 @@ _fini(void) { int error; - if (lofi_busy()) + mutex_enter(&lofi_lock); + + if (!list_is_empty(&lofi_list)) { + mutex_exit(&lofi_lock); return (EBUSY); + } + + mutex_exit(&lofi_lock); error = mod_remove(&modlinkage); if (error) @@ -2673,6 +2768,7 @@ _fini(void) mutex_destroy(&lofi_lock); ddi_soft_state_fini(&lofi_statep); + list_destroy(&lofi_list); return (error); } diff --git a/usr/src/uts/common/os/id_space.c b/usr/src/uts/common/os/id_space.c index 07b1a630ea..2dad0cb940 100644 --- a/usr/src/uts/common/os/id_space.c +++ b/usr/src/uts/common/os/id_space.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -47,7 +46,7 @@ * As an ID space is designed for representing a range of id_t's, there * is a preexisting maximal range: [0, MAXUID]. ID space requests outside * that range will fail on a DEBUG kernel. The id_allocff*() functions - * return the first available id, and should be used when there is benifit + * return the first available id, and should be used when there is benefit * to having a compact allocated range. * * (Presently, the id_space_t abstraction supports only direct allocations; ID @@ -56,6 +55,9 @@ * arrives.) */ +#define ID_TO_ADDR(id) ((void *)(uintptr_t)(id + 1)) +#define ADDR_TO_ID(addr) ((id_t)((uintptr_t)addr - 1)) + /* * Create an arena to represent the range [low, high). * Caller must be in a context in which VM_SLEEP is legal. @@ -66,7 +68,7 @@ id_space_create(const char *name, id_t low, id_t high) ASSERT(low >= 0); ASSERT(low < high); - return (vmem_create(name, (void *)(uintptr_t)(low + 1), high - low, 1, + return (vmem_create(name, ID_TO_ADDR(low), high - low, 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER)); } @@ -83,8 +85,7 @@ id_space_destroy(id_space_t *isp) void id_space_extend(id_space_t *isp, id_t low, id_t high) { - (void) vmem_add(isp, - (void *)(uintptr_t)(low + 1), high - low, VM_SLEEP); + (void) vmem_add(isp, ID_TO_ADDR(low), high - low, VM_SLEEP); } /* @@ -94,8 +95,7 @@ id_space_extend(id_space_t *isp, id_t low, id_t high) id_t id_alloc(id_space_t *isp) { - return ((id_t)(uintptr_t) - vmem_alloc(isp, 1, VM_SLEEP | VM_NEXTFIT) - 1); + return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_SLEEP | VM_NEXTFIT))); } /* @@ -106,8 +106,7 @@ id_alloc(id_space_t *isp) id_t id_alloc_nosleep(id_space_t *isp) { - return ((id_t)(uintptr_t) - vmem_alloc(isp, 1, VM_NOSLEEP | VM_NEXTFIT) - 1); + return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_NOSLEEP | VM_NEXTFIT))); } /* @@ -117,8 +116,7 @@ id_alloc_nosleep(id_space_t *isp) id_t id_allocff(id_space_t *isp) { - return ((id_t)(uintptr_t) - vmem_alloc(isp, 1, VM_SLEEP | VM_FIRSTFIT) - 1); + return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_SLEEP | VM_FIRSTFIT))); } /* @@ -129,8 +127,25 @@ id_allocff(id_space_t *isp) id_t id_allocff_nosleep(id_space_t *isp) { - return ((id_t)(uintptr_t) - vmem_alloc(isp, 1, VM_NOSLEEP | VM_FIRSTFIT) - 1); + return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_NOSLEEP | VM_FIRSTFIT))); +} + +/* + * Allocate a specific identifier if possible, returning the id if + * successful, or -1 on failure. + */ +id_t +id_alloc_specific_nosleep(id_space_t *isp, id_t id) +{ + void *minaddr = ID_TO_ADDR(id); + void *maxaddr = ID_TO_ADDR(id + 1); + + /* + * Note that even though we're vmem_free()ing this later, it + * should be OK, since there's no quantum cache. + */ + return (ADDR_TO_ID(vmem_xalloc(isp, 1, 1, 0, 0, + minaddr, maxaddr, VM_NOSLEEP))); } /* @@ -140,5 +155,5 @@ id_allocff_nosleep(id_space_t *isp) void id_free(id_space_t *isp, id_t id) { - vmem_free(isp, (void *)(uintptr_t)(id + 1), 1); + vmem_free(isp, ID_TO_ADDR(id), 1); } diff --git a/usr/src/uts/common/os/policy.c b/usr/src/uts/common/os/policy.c index caed51c2eb..e68565f141 100644 --- a/usr/src/uts/common/os/policy.c +++ b/usr/src/uts/common/os/policy.c @@ -755,6 +755,48 @@ secpolicy_fs_mount_clearopts(cred_t *cr, struct vfs *vfsp) } +int +secpolicy_fs_allowed_mount(const char *fsname) +{ + struct vfssw *vswp; + const char *p; + size_t len; + + ASSERT(fsname != NULL); + ASSERT(fsname[0] != '\0'); + + if (INGLOBALZONE(curproc)) + return (0); + + vswp = vfs_getvfssw(fsname); + if (vswp == NULL) + return (ENOENT); + + if ((vswp->vsw_flag & VSW_ZMOUNT) != 0) { + vfs_unrefvfssw(vswp); + return (0); + } + + vfs_unrefvfssw(vswp); + + p = curzone->zone_fs_allowed; + len = strlen(fsname); + + while (p != NULL && *p != '\0') { + if (strncmp(p, fsname, len) == 0) { + char c = *(p + len); + if (c == '\0' || c == ',') + return (0); + } + + /* skip to beyond the next comma */ + if ((p = strchr(p, ',')) != NULL) + p++; + } + + return (EPERM); +} + extern vnode_t *rootvp; extern vfs_t *rootvfs; diff --git a/usr/src/uts/common/os/rctl.c b/usr/src/uts/common/os/rctl.c index bd32159049..fed4331f23 100644 --- a/usr/src/uts/common/os/rctl.c +++ b/usr/src/uts/common/os/rctl.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/atomic.h> @@ -3058,6 +3057,64 @@ rctl_decr_swap(zone_t *zone, size_t swap) } /* + * rctl_incr_lofi(proc_t *, zone_t *, size_t) + * + * Overview + * Increments the number of lofi devices for the zone. + * + * Return values + * 0 on success. EAGAIN if increment fails due an rctl value + * on the zone. + * + * Callers context + * p_lock held on specified proc. + */ +int +rctl_incr_lofi(proc_t *proc, zone_t *zone, size_t incr) +{ + rctl_entity_p_t e; + + ASSERT(MUTEX_HELD(&proc->p_lock)); + ASSERT(incr > 0); + + e.rcep_p.zone = zone; + e.rcep_t = RCENTITY_ZONE; + + mutex_enter(&zone->zone_rctl_lock); + + /* Check for overflow */ + if ((zone->zone_max_lofi + incr) < zone->zone_max_lofi) { + mutex_exit(&zone->zone_rctl_lock); + return (EAGAIN); + } + if ((zone->zone_max_lofi + incr) > zone->zone_max_lofi_ctl) { + if (rctl_test_entity(rc_zone_max_lofi, zone->zone_rctls, + proc, &e, incr, 0) & RCT_DENY) { + mutex_exit(&zone->zone_rctl_lock); + return (EAGAIN); + } + } + zone->zone_max_lofi += incr; + mutex_exit(&zone->zone_rctl_lock); + return (0); +} + +/* + * rctl_decr_lofi(zone_t *, size_t) + * + * Overview + * Decrements the number of lofi devices for the zone. + */ +void +rctl_decr_lofi(zone_t *zone, size_t decr) +{ + mutex_enter(&zone->zone_rctl_lock); + ASSERT(zone->zone_max_lofi >= decr); + zone->zone_max_lofi -= decr; + mutex_exit(&zone->zone_rctl_lock); +} + +/* * Create resource kstat */ static kstat_t * diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 1d3cfc8f51..d29f6b9986 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -159,6 +159,8 @@ * related to the zone.max-lwps rctl. * zone_mem_lock: This is a per-zone lock used to protect the fields * related to the zone.max-locked-memory and zone.max-swap rctls. + * zone_rctl_lock: This is a per-zone lock used to protect other rctls, + * currently just max_lofi * zsd_key_lock: This is a global lock protecting the key state for ZSD. * zone_deathrow_lock: This is a global lock protecting the "deathrow" * list (a list of zones in the ZONE_IS_DEAD state). @@ -340,6 +342,7 @@ const char *zone_status_table[] = { rctl_hndl_t rc_zone_cpu_shares; rctl_hndl_t rc_zone_locked_mem; rctl_hndl_t rc_zone_max_swap; +rctl_hndl_t rc_zone_max_lofi; rctl_hndl_t rc_zone_cpu_cap; rctl_hndl_t rc_zone_nlwps; rctl_hndl_t rc_zone_shmmax; @@ -1584,6 +1587,57 @@ static rctl_ops_t zone_max_swap_ops = { zone_max_swap_test }; +/*ARGSUSED*/ +static rctl_qty_t +zone_max_lofi_usage(rctl_t *rctl, struct proc *p) +{ + rctl_qty_t q; + zone_t *z = p->p_zone; + + ASSERT(MUTEX_HELD(&p->p_lock)); + mutex_enter(&z->zone_rctl_lock); + q = z->zone_max_lofi; + mutex_exit(&z->zone_rctl_lock); + return (q); +} + +/*ARGSUSED*/ +static int +zone_max_lofi_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, + rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags) +{ + rctl_qty_t q; + zone_t *z; + + z = e->rcep_p.zone; + ASSERT(MUTEX_HELD(&p->p_lock)); + ASSERT(MUTEX_HELD(&z->zone_rctl_lock)); + q = z->zone_max_lofi; + if (q + incr > rcntl->rcv_value) + return (1); + return (0); +} + +/*ARGSUSED*/ +static int +zone_max_lofi_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, + rctl_qty_t nv) +{ + ASSERT(MUTEX_HELD(&p->p_lock)); + ASSERT(e->rcep_t == RCENTITY_ZONE); + if (e->rcep_p.zone == NULL) + return (0); + e->rcep_p.zone->zone_max_lofi_ctl = nv; + return (0); +} + +static rctl_ops_t zone_max_lofi_ops = { + rcop_no_action, + zone_max_lofi_usage, + zone_max_lofi_set, + zone_max_lofi_test +}; + /* * Helper function to brand the zone with a unique ID. */ @@ -1732,6 +1786,8 @@ zone_zsd_init(void) zone0.zone_locked_mem_ctl = UINT64_MAX; ASSERT(zone0.zone_max_swap == 0); zone0.zone_max_swap_ctl = UINT64_MAX; + zone0.zone_max_lofi = 0; + zone0.zone_max_lofi_ctl = UINT64_MAX; zone0.zone_shmmax = 0; zone0.zone_ipc.ipcq_shmmni = 0; zone0.zone_ipc.ipcq_semmni = 0; @@ -1740,6 +1796,7 @@ zone_zsd_init(void) zone0.zone_nodename = utsname.nodename; zone0.zone_domain = srpc_domain; zone0.zone_hostid = HW_INVALID_HOSTID; + zone0.zone_fs_allowed = NULL; zone0.zone_ref = 1; zone0.zone_id = GLOBAL_ZONEID; zone0.zone_status = ZONE_IS_RUNNING; @@ -1902,6 +1959,11 @@ zone_init(void) RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX, &zone_max_swap_ops); + rc_zone_max_lofi = rctl_register("zone.max-lofi", + RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | + RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX, + &zone_max_lofi_ops); + /* * Initialize the ``global zone''. */ @@ -2040,9 +2102,11 @@ zone_free(zone_t *zone) if (zone->zone_rctls != NULL) rctl_set_free(zone->zone_rctls); if (zone->zone_bootargs != NULL) - kmem_free(zone->zone_bootargs, strlen(zone->zone_bootargs) + 1); + strfree(zone->zone_bootargs); if (zone->zone_initname != NULL) - kmem_free(zone->zone_initname, strlen(zone->zone_initname) + 1); + strfree(zone->zone_initname); + if (zone->zone_fs_allowed != NULL) + strfree(zone->zone_fs_allowed); if (zone->zone_pfexecd != NULL) klpd_freelist(&zone->zone_pfexecd); id_free(zoneid_space, zone->zone_id); @@ -2104,21 +2168,20 @@ zone_status_get(zone_t *zone) static int zone_set_bootargs(zone_t *zone, const char *zone_bootargs) { - char *bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP); + char *buf = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP); int err = 0; ASSERT(zone != global_zone); - if ((err = copyinstr(zone_bootargs, bootargs, BOOTARGS_MAX, NULL)) != 0) + if ((err = copyinstr(zone_bootargs, buf, BOOTARGS_MAX, NULL)) != 0) goto done; /* EFAULT or ENAMETOOLONG */ if (zone->zone_bootargs != NULL) - kmem_free(zone->zone_bootargs, strlen(zone->zone_bootargs) + 1); + strfree(zone->zone_bootargs); - zone->zone_bootargs = kmem_alloc(strlen(bootargs) + 1, KM_SLEEP); - (void) strcpy(zone->zone_bootargs, bootargs); + zone->zone_bootargs = strdup(buf); done: - kmem_free(bootargs, BOOTARGS_MAX); + kmem_free(buf, BOOTARGS_MAX); return (err); } @@ -2164,6 +2227,27 @@ zone_set_brand(zone_t *zone, const char *brand) } static int +zone_set_fs_allowed(zone_t *zone, const char *zone_fs_allowed) +{ + char *buf = kmem_zalloc(ZONE_FS_ALLOWED_MAX, KM_SLEEP); + int err = 0; + + ASSERT(zone != global_zone); + if ((err = copyinstr(zone_fs_allowed, buf, + ZONE_FS_ALLOWED_MAX, NULL)) != 0) + goto done; + + if (zone->zone_fs_allowed != NULL) + strfree(zone->zone_fs_allowed); + + zone->zone_fs_allowed = strdup(buf); + +done: + kmem_free(buf, ZONE_FS_ALLOWED_MAX); + return (err); +} + +static int zone_set_initname(zone_t *zone, const char *zone_initname) { char initname[INITNAME_SZ]; @@ -2175,7 +2259,7 @@ zone_set_initname(zone_t *zone, const char *zone_initname) return (err); /* EFAULT or ENAMETOOLONG */ if (zone->zone_initname != NULL) - kmem_free(zone->zone_initname, strlen(zone->zone_initname) + 1); + strfree(zone->zone_initname); zone->zone_initname = kmem_alloc(strlen(initname) + 1, KM_SLEEP); (void) strcpy(zone->zone_initname, initname); @@ -3856,6 +3940,7 @@ zone_create(const char *zone_name, const char *zone_root, zone->zone_ipc.ipcq_semmni = 0; zone->zone_ipc.ipcq_msgmni = 0; zone->zone_bootargs = NULL; + zone->zone_fs_allowed = NULL; zone->zone_initname = kmem_alloc(strlen(zone_default_initname) + 1, KM_SLEEP); (void) strcpy(zone->zone_initname, zone_default_initname); @@ -3865,6 +3950,8 @@ zone_create(const char *zone_name, const char *zone_root, zone->zone_locked_mem_ctl = UINT64_MAX; zone->zone_max_swap = 0; zone->zone_max_swap_ctl = UINT64_MAX; + zone->zone_max_lofi = 0; + zone->zone_max_lofi_ctl = UINT64_MAX; zone0.zone_lockedmem_kstat = NULL; zone0.zone_swapresv_kstat = NULL; @@ -4790,6 +4877,20 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) error = EINVAL; } break; + case ZONE_ATTR_FS_ALLOWED: + if (zone->zone_fs_allowed == NULL) + outstr = ""; + else + outstr = zone->zone_fs_allowed; + size = strlen(outstr) + 1; + if (bufsize > size) + bufsize = size; + if (buf != NULL) { + err = copyoutstr(outstr, buf, bufsize, NULL); + if (err != 0 && err != ENAMETOOLONG) + error = EFAULT; + } + break; default: if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) { size = bufsize; @@ -4853,6 +4954,9 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) case ZONE_ATTR_BRAND: err = zone_set_brand(zone, (const char *)buf); break; + case ZONE_ATTR_FS_ALLOWED: + err = zone_set_fs_allowed(zone, (const char *)buf); + break; case ZONE_ATTR_PHYS_MCAP: err = zone_set_phys_mcap(zone, (const uint64_t *)buf); break; diff --git a/usr/src/uts/common/sys/fs/sdev_impl.h b/usr/src/uts/common/sys/fs/sdev_impl.h index 66e8562cfa..0a841e9d22 100644 --- a/usr/src/uts/common/sys/fs/sdev_impl.h +++ b/usr/src/uts/common/sys/fs/sdev_impl.h @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_SDEV_IMPL_H @@ -230,6 +229,7 @@ typedef enum { #define SDEV_ATTR_INVALID 0x0080 /* invalid node attributes, */ /* need update */ #define SDEV_SUBDIR 0x0100 /* match all subdirs under here */ +#define SDEV_ZONED 0x0200 /* zoned subdir */ /* sdev_lookup_flags */ #define SDEV_LOOKUP 0x0001 /* node creation in progress */ diff --git a/usr/src/uts/common/sys/id_space.h b/usr/src/uts/common/sys/id_space.h index 1f7762316f..d56fcceb5a 100644 --- a/usr/src/uts/common/sys/id_space.h +++ b/usr/src/uts/common/sys/id_space.h @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _ID_SPACE_H @@ -46,6 +45,7 @@ id_t id_alloc(id_space_t *); id_t id_alloc_nosleep(id_space_t *); id_t id_allocff(id_space_t *); id_t id_allocff_nosleep(id_space_t *); +id_t id_alloc_specific_nosleep(id_space_t *, id_t); void id_free(id_space_t *, id_t); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/lofi.h b/usr/src/uts/common/sys/lofi.h index 33db138112..8e385b6fe2 100644 --- a/usr/src/uts/common/sys/lofi.h +++ b/usr/src/uts/common/sys/lofi.h @@ -34,6 +34,7 @@ #include <sys/vnode.h> #include <sys/list.h> #include <sys/crypto/api.h> +#include <sys/zone.h> #ifdef __cplusplus extern "C" { @@ -176,15 +177,6 @@ struct lofi_comp_cache { uint64_t lc_index; /* segment index */ }; -/* - * We limit the maximum number of active lofi devices to 128, which seems very - * large. You can tune this by changing lofi_max_files in /etc/system. - * If you change it dynamically, which you probably shouldn't do, make sure - * to only _increase_ it. - */ -#define LOFI_MAX_FILES 128 -extern uint32_t lofi_max_files; - #define V_ISLOFIABLE(vtype) \ ((vtype == VREG) || (vtype == VBLK) || (vtype == VCHR)) @@ -219,9 +211,8 @@ struct crypto_meta { }; struct lofi_state { - char *ls_filename; /* filename to open */ - size_t ls_filename_sz; - struct vnode *ls_vp; /* open vnode */ + vnode_t *ls_vp; /* open real vnode */ + vnode_t *ls_stacked_vp; /* open vnode */ kmutex_t ls_vp_lock; /* protects ls_vp */ kcondvar_t ls_vp_cv; /* signal changes to ls_vp */ uint32_t ls_vp_iocount; /* # pending I/O requests */ @@ -238,6 +229,9 @@ struct lofi_state { struct dk_geom ls_dkg; struct vtoc ls_vtoc; struct dk_cinfo ls_ci; + zone_t *ls_zone; + list_node_t ls_list; /* all lofis */ + dev_t ls_dev; /* this node's dev_t */ /* the following fields are required for compression support */ int ls_comp_algorithm_index; /* idx into compress_table */ diff --git a/usr/src/uts/common/sys/policy.h b/usr/src/uts/common/sys/policy.h index 338ce404b1..bcd5ba2b4c 100644 --- a/usr/src/uts/common/sys/policy.h +++ b/usr/src/uts/common/sys/policy.h @@ -89,12 +89,13 @@ int secpolicy_cpc_cpu(const cred_t *); int secpolicy_dispadm(const cred_t *); int secpolicy_error_inject(const cred_t *); int secpolicy_excl_open(const cred_t *); -int secpolicy_fs_mount(cred_t *, vnode_t *, struct vfs *); -int secpolicy_fs_unmount(cred_t *, struct vfs *); +int secpolicy_fs_allowed_mount(const char *); int secpolicy_fs_config(const cred_t *, const struct vfs *); int secpolicy_fs_linkdir(const cred_t *, const struct vfs *); int secpolicy_fs_minfree(const cred_t *, const struct vfs *); +int secpolicy_fs_mount(cred_t *, vnode_t *, struct vfs *); int secpolicy_fs_quota(const cred_t *, const struct vfs *); +int secpolicy_fs_unmount(cred_t *, struct vfs *); int secpolicy_idmap(const cred_t *); int secpolicy_ip(const cred_t *, int, boolean_t); int secpolicy_ip_config(const cred_t *, boolean_t); diff --git a/usr/src/uts/common/sys/rctl.h b/usr/src/uts/common/sys/rctl.h index af81cd387b..c376ac2df7 100644 --- a/usr/src/uts/common/sys/rctl.h +++ b/usr/src/uts/common/sys/rctl.h @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_RCTL_H @@ -337,6 +336,9 @@ void rctl_decr_locked_mem(struct proc *, struct kproject *, rctl_qty_t, int rctl_incr_swap(struct proc *, struct zone *, size_t); void rctl_decr_swap(struct zone *, size_t); +int rctl_incr_lofi(struct proc *, struct zone *, size_t); +void rctl_decr_lofi(struct zone *, size_t); + struct kstat *rctl_kstat_create_zone(struct zone *, char *, uchar_t, uint_t, uchar_t); diff --git a/usr/src/uts/common/sys/vfs.h b/usr/src/uts/common/sys/vfs.h index bae4e5b87f..97e176fc1a 100644 --- a/usr/src/uts/common/sys/vfs.h +++ b/usr/src/uts/common/sys/vfs.h @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -439,6 +438,7 @@ enum { #define VSW_STATS 0x20 /* file system can collect stats */ #define VSW_XID 0x40 /* file system supports extended ids */ #define VSW_CANLOFI 0x80 /* file system supports lofi mounts */ +#define VSW_ZMOUNT 0x100 /* file system always allowed in a zone */ #define VSW_INSTALLED 0x8000 /* this vsw is associated with a file system */ diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index ead3f94774..5c61a6833e 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -96,10 +96,13 @@ extern "C" { #define ZONE_ATTR_SCHED_CLASS 13 #define ZONE_ATTR_FLAGS 14 #define ZONE_ATTR_HOSTID 15 +#define ZONE_ATTR_FS_ALLOWED 16 /* Start of the brand-specific attribute namespace */ #define ZONE_ATTR_BRAND_ATTRS 32768 +#define ZONE_FS_ALLOWED_MAX 1024 + #define ZONE_EVENT_CHANNEL "com.sun:zones:status" #define ZONE_EVENT_STATUS_CLASS "status" #define ZONE_EVENT_STATUS_SUBCLASS "change" @@ -379,6 +382,11 @@ typedef struct zone { rctl_qty_t zone_max_swap_ctl; /* current swap limit. */ /* Protected by */ /* zone_rctls->rcs_lock */ + kmutex_t zone_rctl_lock; /* protects zone_max_lofi */ + rctl_qty_t zone_max_lofi; /* lofi devs for zone */ + rctl_qty_t zone_max_lofi_ctl; /* current lofi limit. */ + /* Protected by */ + /* zone_rctls->rcs_lock */ list_t zone_zsd; /* list of Zone-Specific Data values */ kcondvar_t zone_cv; /* used to signal state changes */ struct proc *zone_zsched; /* Dummy kernel "zsched" process */ @@ -443,6 +451,8 @@ typedef struct zone { krwlock_t zone_mntfs_db_lock; struct klpd_reg *zone_pfexecd; + + char *zone_fs_allowed; } zone_t; /* @@ -664,6 +674,7 @@ extern int zone_walk(int (*)(zone_t *, void *), void *); extern rctl_hndl_t rc_zone_locked_mem; extern rctl_hndl_t rc_zone_max_swap; +extern rctl_hndl_t rc_zone_max_lofi; #endif /* _KERNEL */ |