summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Mustacchi <rm@joyent.com>2014-05-02 16:05:47 +0000
committerRobert Mustacchi <rm@joyent.com>2014-05-14 22:48:13 +0000
commit74d5b5a7ae8758330255bfe28ab2399ad19fdd0e (patch)
treed7d4656852b8d2dcff9a4483d403698022fc3f35
parent52c3c3033b9ec8b2033fb516ab2a6efd11323393 (diff)
downloadillumos-joyent-74d5b5a7ae8758330255bfe28ab2399ad19fdd0e.tar.gz
OS-2677 modules should appear in /system/boot
Reviewed by: Keith M Wesolowski <wesolows@foobazco.org>
-rw-r--r--manifest4
-rw-r--r--usr/src/cmd/fs.d/bootfs/Makefile21
-rw-r--r--usr/src/cmd/fs.d/bootfs/mount.c139
-rw-r--r--usr/src/man/man7fs/Makefile5
-rw-r--r--usr/src/man/man7fs/bootfs.7fs90
-rw-r--r--usr/src/uts/common/Makefile.files1
-rw-r--r--usr/src/uts/common/Makefile.rules7
-rw-r--r--usr/src/uts/common/fs/bootfs/bootfs_construct.c367
-rw-r--r--usr/src/uts/common/fs/bootfs/bootfs_vfsops.c320
-rw-r--r--usr/src/uts/common/fs/bootfs/bootfs_vnops.c544
-rw-r--r--usr/src/uts/common/fs/vfs.c5
-rw-r--r--usr/src/uts/common/sys/fs/bootfs_impl.h81
-rw-r--r--usr/src/uts/intel/bootfs/Makefile72
13 files changed, 1652 insertions, 4 deletions
diff --git a/manifest b/manifest
index 4cc12d50ac..d0a8173195 100644
--- a/manifest
+++ b/manifest
@@ -857,6 +857,7 @@ f kernel/exec/amd64/intpexec 0755 root sys
d kernel/fs 0755 root sys
d kernel/fs/amd64 0755 root sys
f kernel/fs/amd64/autofs 0755 root sys
+f kernel/fs/amd64/bootfs 0755 root sys
f kernel/fs/amd64/ctfs 0755 root sys
f kernel/fs/amd64/dcfs 0755 root sys
f kernel/fs/amd64/dev 0755 root sys
@@ -5365,6 +5366,8 @@ s usr/lib/fs/autofs/libshare_autofs.so=libshare_autofs.so.1
f usr/lib/fs/autofs/mount 0555 root bin
f usr/lib/fs/autofs/share 0555 root bin
f usr/lib/fs/autofs/unshare 0555 root bin
+d usr/lib/fs/bootfs 0755 root sys
+f usr/lib/fs/bootfs/mount 0555 root bin
d usr/lib/fs/ctfs 0755 root sys
f usr/lib/fs/ctfs/mount 0555 root bin
d usr/lib/fs/dev 0755 root sys
@@ -18164,6 +18167,7 @@ f usr/share/man/man7d/yge.7d 0444 root bin
f usr/share/man/man7d/zcons.7d 0444 root bin
f usr/share/man/man7d/zero.7d 0444 root bin
d usr/share/man/man7fs 0755 root bin
+f usr/share/man/man7fs/bootfs.7fs 0444 root bin
f usr/share/man/man7fs/ctfs.7fs 0444 root bin
f usr/share/man/man7fs/dcfs.7fs 0444 root bin
f usr/share/man/man7fs/dev.7fs 0444 root bin
diff --git a/usr/src/cmd/fs.d/bootfs/Makefile b/usr/src/cmd/fs.d/bootfs/Makefile
new file mode 100644
index 0000000000..d0ac4311f4
--- /dev/null
+++ b/usr/src/cmd/fs.d/bootfs/Makefile
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+FSTYPE= bootfs
+LIBPROG= mount
+
+include ../Makefile.fstype
+include ../Makefile.mount
+include ../Makefile.mount.targ
diff --git a/usr/src/cmd/fs.d/bootfs/mount.c b/usr/src/cmd/fs.d/bootfs/mount.c
new file mode 100644
index 0000000000..5363a4f872
--- /dev/null
+++ b/usr/src/cmd/fs.d/bootfs/mount.c
@@ -0,0 +1,139 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libintl.h>
+#include <errno.h>
+#include <sys/fstyp.h>
+#include <sys/fsid.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/mount.h>
+#include <sys/signal.h>
+#include <sys/stat.h>
+#include <fslib.h>
+
+#define MNTTYPE_BOOTFS "bootfs"
+
+static char optbuf[MAX_MNTOPT_STR] = { '\0', };
+static int optsize = 0;
+
+static void
+usage(void)
+{
+ (void) fprintf(stderr,
+ "Usage: mount [-Ormq] [-o options] special mountpoint\n");
+ exit(2);
+}
+
+/*
+ * usage: mount [-Ormq] [-o options] special mountp
+ *
+ * This mount program is exec'ed by /usr/sbin/mount if '-F bootfs' is
+ * specified.
+ */
+int
+main(int argc, char *argv[])
+{
+ int c;
+ char *special; /* Entity being mounted */
+ char *mountp; /* Entity being mounted on */
+ char *savedoptbuf;
+ char *myname;
+ char typename[64];
+ int flags = 0;
+ int errflag = 0;
+ int qflg = 0;
+
+ myname = strrchr(argv[0], '/');
+ myname = myname ? myname+1 : argv[0];
+ (void) snprintf(typename, sizeof (typename), "%s %s", MNTTYPE_BOOTFS,
+ myname);
+ argv[0] = typename;
+
+ while ((c = getopt(argc, argv, "o:rmOq")) != EOF) {
+ switch (c) {
+ case '?':
+ errflag++;
+ break;
+
+ case 'o':
+ if (strlcpy(optbuf, optarg, sizeof (optbuf)) >=
+ sizeof (optbuf)) {
+ (void) fprintf(stderr,
+ gettext("%s: Invalid argument: %s\n"),
+ myname, optarg);
+ return (2);
+ }
+ optsize = strlen(optbuf);
+ break;
+ case 'O':
+ flags |= MS_OVERLAY;
+ break;
+ case 'r':
+ flags |= MS_RDONLY;
+ break;
+
+ case 'm':
+ flags |= MS_NOMNTTAB;
+ break;
+
+ case 'q':
+ qflg = 1;
+ break;
+
+ default:
+ usage();
+ }
+ }
+ if ((argc - optind != 2) || errflag) {
+ usage();
+ }
+ special = argv[argc - 2];
+ mountp = argv[argc - 1];
+
+ if ((savedoptbuf = strdup(optbuf)) == NULL) {
+ (void) fprintf(stderr, gettext("%s: out of memory\n"),
+ myname);
+ exit(2);
+ }
+
+ if (mount(special, mountp, flags | MS_OPTIONSTR, MNTTYPE_BOOTFS, NULL,
+ 0, optbuf, MAX_MNTOPT_STR)) {
+ (void) fprintf(stderr, "mount: ");
+ perror(special);
+ exit(2);
+ }
+ if (optsize && !qflg) {
+ cmp_requested_to_actual_options(savedoptbuf, optbuf,
+ special, mountp);
+ }
+
+ return (0);
+}
diff --git a/usr/src/man/man7fs/Makefile b/usr/src/man/man7fs/Makefile
index f940a4da13..d985e95410 100644
--- a/usr/src/man/man7fs/Makefile
+++ b/usr/src/man/man7fs/Makefile
@@ -12,14 +12,15 @@
#
# Copyright 2011, Richard Lowe
# Copyright 2013 Nexenta Systems, Inc. All rights reserved.
-# Copyright 2012 Joyent, Inc. All rights reserved.
+# Copyright 2014 Joyent, Inc. All rights reserved.
#
include $(SRC)/Makefile.master
MANSECT= 7fs
-MANFILES= ctfs.7fs \
+MANFILES= bootfs.7fs \
+ ctfs.7fs \
dcfs.7fs \
dev.7fs \
devfs.7fs \
diff --git a/usr/src/man/man7fs/bootfs.7fs b/usr/src/man/man7fs/bootfs.7fs
new file mode 100644
index 0000000000..9aea119370
--- /dev/null
+++ b/usr/src/man/man7fs/bootfs.7fs
@@ -0,0 +1,90 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014 Joyent, Inc. All rights reserved.
+.\"
+
+.TH BOOTFS 7FS "May 8, 2014"
+.SH NAME
+bootfs \- boot-time module file system
+
+.SH DESCRIPTION
+
+The
+.B bootfs
+file system is a read-only file system that provides access to any
+boot-time modules that were passed in to the system loader which were
+tagged with the type
+.IR file .
+.B bootfs
+does not display any boot-time modules that were tagged as type
+.I hash
+or type
+.IR rootfs .
+
+If modules with duplicate names and paths are specified, only the first
+such entry will be present in the file system and a counter will be
+incremented to indicate that a duplicate entry was found, but is not
+present into the file system. If a module's name only consists of
+invalid characters, such as '.', '..', or '/', then the module will not
+be present in the file system and a counter will be incremented to
+indicate that this has occurred. In both cases, diagnostic information
+is available through the kstats facility.
+
+.SH FILES
+.sp
+.ne 2
+.na
+.B /system/boot
+.ad
+.RS 8n
+The mount point for the
+.B bootfs
+file system in the global zone.
+
+.SH EXAMPLES
+
+.LP
+Example 1 Determining if collisions or invalid names are present
+.sp
+.LP
+To determine if any boot-time modules were not created due to collisions
+or invalid names, enter the following command:
+
+.sp
+.in +2
+.nf
+# kstat -m bootfs
+module: bootfs instance: 1
+name: bootfs class: fs
+ crtime 236063.651324041
+ nbytes 8749355
+ ndirs 3
+ ndiscard 0
+ ndup 0
+ nfiles 2
+ snaptime 236063.651324041
+.fi
+.in -2
+.sp
+.LP
+The field
+.B ndiscard
+lists the number of boot-time modules that were discarded due to naming conflicts. The field
+.B ndup
+lists the number of duplicate entries that were found and therefore not displayed in the file system.
+.sp
+.LP
+This information is provided for informational purposes only, it is not to be construed as a stable interface.
+
+.SH SEE ALSO
+.BR kstat (1M),
+.BR grub (5)
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index bd0a9d465a..e00224f248 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -1316,6 +1316,7 @@ SMBFS_OBJS += smbfs_vfsops.o smbfs_vnops.o smbfs_node.o \
smbfs_rwlock.o smbfs_xattr.o \
$(SMBFS_COMMON_OBJS)
+BOOTFS_OBJS += bootfs_construct.o bootfs_vfsops.o bootfs_vnops.o
#
# LVM modules
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index e4d94382b2..5f51f83b0e 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -215,6 +215,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/autofs/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/bootfs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/cachefs/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1779,6 +1783,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/autofs/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/bootfs/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/cachefs/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/common/fs/bootfs/bootfs_construct.c b/usr/src/uts/common/fs/bootfs/bootfs_construct.c
new file mode 100644
index 0000000000..87895d21bb
--- /dev/null
+++ b/usr/src/uts/common/fs/bootfs/bootfs_construct.c
@@ -0,0 +1,367 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * This file takes care of reading the boot time modules and constructing them
+ * into the appropriate series of vnodes.
+ */
+
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/vfs.h>
+#include <sys/sysmacros.h>
+#include <sys/stat.h>
+
+#include <sys/fs/bootfs_impl.h>
+
+kmem_cache_t *bootfs_node_cache;
+
+static const vattr_t bootfs_vattr_dir = {
+ AT_ALL, /* va_mask */
+ VDIR, /* va_type */
+ S_IFDIR | 0555, /* va_mode */
+ 0, /* va_uid */
+ 0, /* va_gid */
+ 0, /* va_fsid */
+ 0, /* va_nodeid */
+ 1, /* va_nlink */
+ 0, /* va_size */
+ 0, /* va_atime */
+ 0, /* va_mtime */
+ 0, /* va_ctime */
+ 0, /* va_rdev */
+ 0, /* va_blksize */
+ 0, /* va_nblocks */
+ 0 /* va_seq */
+};
+
+static const vattr_t bootfs_vattr_reg = {
+ AT_ALL, /* va_mask */
+ VREG, /* va_type */
+ S_IFREG | 0555, /* va_mode */
+ 0, /* va_uid */
+ 0, /* va_gid */
+ 0, /* va_fsid */
+ 0, /* va_nodeid */
+ 2, /* va_nlink */
+ 2, /* va_size */
+ 0, /* va_atime */
+ 0, /* va_mtime */
+ 0, /* va_ctime */
+ 0, /* va_rdev */
+ 0, /* va_blksize */
+ 1, /* va_nblocks */
+ 0 /* va_seq */
+};
+
+/*ARGSUSED*/
+int
+bootfs_node_constructor(void *buf, void *arg, int kmflags)
+{
+ bootfs_node_t *bnp = buf;
+
+ bnp->bvn_vnp = vn_alloc(kmflags);
+ if (bnp->bvn_vnp == NULL)
+ return (-1);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+void
+bootfs_node_destructor(void *buf, void *arg)
+{
+ bootfs_node_t *bnp = buf;
+
+ vn_free(bnp->bvn_vnp);
+}
+
+static int
+bootfs_comparator(const void *a, const void *b)
+{
+ const bootfs_node_t *lfs, *rfs;
+ int ret;
+
+ lfs = a;
+ rfs = b;
+
+ ret = strcmp(lfs->bvn_name, rfs->bvn_name);
+ if (ret > 0)
+ ret = 1;
+ if (ret < 0)
+ ret = -1;
+ return (ret);
+}
+
+static void
+bootfs_node_init(bootfs_t *bfs, bootfs_node_t *bnp, const struct vattr *vap,
+ const char *name, size_t namelen)
+{
+ timestruc_t now;
+
+ vn_reinit(bnp->bvn_vnp);
+
+ bnp->bvn_vnp->v_flag |= VNOSWAP;
+ bnp->bvn_vnp->v_type = vap->va_type;
+ bnp->bvn_vnp->v_vfsp = bfs->bfs_vfsp;
+ bnp->bvn_vnp->v_rdev = 0;
+ bnp->bvn_vnp->v_data = (caddr_t)bnp;
+ vn_setops(bnp->bvn_vnp, bootfs_vnodeops);
+
+ bnp->bvn_name = kmem_alloc(namelen + 1, KM_SLEEP);
+ bcopy(name, bnp->bvn_name, namelen);
+ bnp->bvn_name[namelen] = '\0';
+ if (vap->va_type == VDIR) {
+ avl_create(&bnp->bvn_dir, bootfs_comparator,
+ sizeof (bootfs_node_t),
+ offsetof(bootfs_node_t, bvn_link));
+ }
+ bzero(&bnp->bvn_link, sizeof (avl_node_t));
+ bcopy(vap, &bnp->bvn_attr, sizeof (vattr_t));
+
+ gethrestime(&now);
+ bnp->bvn_attr.va_atime = now;
+ bnp->bvn_attr.va_ctime = now;
+ bnp->bvn_attr.va_mtime = now;
+ bnp->bvn_attr.va_fsid = makedevice(bootfs_major, bfs->bfs_minor);
+ bnp->bvn_attr.va_nodeid = bfs->bfs_ninode;
+ bnp->bvn_attr.va_blksize = PAGESIZE;
+ bfs->bfs_ninode++;
+ list_insert_tail(&bfs->bfs_nodes, bnp);
+}
+
+static void
+bootfs_mkroot(bootfs_t *bfs)
+{
+ bootfs_node_t *bnp;
+
+ bnp = kmem_cache_alloc(bootfs_node_cache, KM_SLEEP);
+ bootfs_node_init(bfs, bnp, &bootfs_vattr_dir, "/", 1);
+ bnp->bvn_vnp->v_flag |= VROOT;
+ bnp->bvn_parent = bnp;
+ bfs->bfs_rootvn = bnp;
+ bfs->bfs_stat.bfss_ndirs.value.ui32++;
+ vn_exists(bnp->bvn_vnp);
+}
+
+static int
+bootfs_mknode(bootfs_t *bfs, bootfs_node_t *parent, bootfs_node_t **outp,
+ const char *name, size_t namelen, const vattr_t *vap, uintptr_t addr,
+ uint64_t size)
+{
+ bootfs_node_t *bnp;
+ bootfs_node_t sn;
+ avl_index_t where;
+ char *buf;
+
+ ASSERT(parent->bvn_attr.va_type == VDIR);
+ buf = kmem_alloc(namelen + 1, KM_SLEEP);
+ bcopy(name, buf, namelen);
+ buf[namelen] = '\0';
+ sn.bvn_name = buf;
+ if ((bnp = avl_find(&parent->bvn_dir, &sn, &where)) != NULL) {
+ kmem_free(buf, namelen + 1);
+ /* Directories can collide, files cannot */
+ if (vap->va_type == VDIR) {
+ *outp = bnp;
+ return (0);
+ }
+ return (EEXIST);
+ }
+ kmem_free(buf, namelen + 1);
+
+ bnp = kmem_cache_alloc(bootfs_node_cache, KM_SLEEP);
+ bootfs_node_init(bfs, bnp, vap, name, namelen);
+ bnp->bvn_parent = parent;
+ avl_add(&parent->bvn_dir, bnp);
+ *outp = bnp;
+
+ if (vap->va_type == VDIR) {
+ parent->bvn_attr.va_size++;
+ parent->bvn_attr.va_nlink++;
+ bfs->bfs_stat.bfss_ndirs.value.ui32++;
+ } else {
+ bnp->bvn_addr = addr;
+ bnp->bvn_size = size;
+ bfs->bfs_stat.bfss_nfiles.value.ui32++;
+ bfs->bfs_stat.bfss_nbytes.value.ui64 += size;
+ bnp->bvn_attr.va_nblocks = P2ROUNDUP(size, 512) >> 9;
+ }
+
+ vn_exists(bnp->bvn_vnp);
+
+ return (0);
+}
+
+/*
+ * Given the address, size, and path a boot-time module would like, go through
+ * and create all of the directory entries that are required and then the file
+ * itself. If someone has passed in a module that has the same name as another
+ * one, we honor the first one.
+ */
+static int
+bootfs_construct_entry(bootfs_t *bfs, uintptr_t addr, uint64_t size,
+ const char *mname)
+{
+ char *sp;
+ size_t nlen;
+ int ret;
+ bootfs_node_t *nbnp;
+
+ const char *p = mname;
+ bootfs_node_t *bnp = bfs->bfs_rootvn;
+
+ if (*p == '\0')
+ return (EINVAL);
+
+ for (;;) {
+ /* First eliminate all leading / characters. */
+ while (*p == '/')
+ p++;
+
+ /* A name with all slashes or ending in a / */
+ if (*p == '\0')
+ return (EINVAL);
+
+ sp = strchr(p, '/');
+ if (sp == NULL)
+ break;
+ nlen = (ptrdiff_t)sp - (ptrdiff_t)p;
+ if (strncmp(p, ".", nlen) == 0) {
+ p = sp + 1;
+ continue;
+ }
+
+ if (strncmp(p, "..", nlen) == 0) {
+ bnp = bnp->bvn_parent;
+ p = sp + 1;
+ continue;
+ }
+
+ VERIFY(bootfs_mknode(bfs, bnp, &nbnp, p, nlen,
+ &bootfs_vattr_dir, addr, size) == 0);
+ p = sp + 1;
+ bnp = nbnp;
+ }
+
+ nlen = strlen(p);
+ ret = bootfs_mknode(bfs, bnp, &nbnp, p, nlen, &bootfs_vattr_reg,
+ addr, size);
+ if (ret != 0)
+ return (ret);
+
+ return (0);
+}
+
+/*
+ * We're going to go through every boot time module and construct the
+ * appropriate vnodes for them now. Because there are very few of these that
+ * exist, generally on the order of a handful, we're going to create them all
+ * when the file system is initialized and then tear them all down when the
+ * module gets unloaded.
+ *
+ * The information about the modules is contained in properties on the root of
+ * the devinfo tree. Specifically there are three properties per module:
+ *
+ * - module-size-%d int64_t size, in bytes, of the boot time module.
+ * - module-addr-%d The address of the boot time module
+ * - module-name-%d The string name of the boot time module
+ *
+ * Note that the module-size and module-addr fields are always 64-bit values
+ * regardless of being on a 32-bit or 64-bit kernel. module-name is a string
+ * property.
+ *
+ * There is no property that indicates the total number of such modules. Modules
+ * start at 0 and work their way up incrementally. The first time we can't find
+ * a module or a property, then we stop.
+ */
+void
+bootfs_construct(bootfs_t *bfs)
+{
+ uint_t id = 0, ndata;
+ char paddr[64], psize[64], pname[64], *mname;
+ dev_info_t *root;
+ uchar_t *datap;
+ uint64_t size = 0, addr = 0;
+ int ret;
+
+ bootfs_mkroot(bfs);
+ root = ddi_root_node();
+
+ for (;;) {
+ if (id == UINT32_MAX)
+ break;
+
+ if (snprintf(paddr, sizeof (paddr), "module-addr-%d", id) >
+ sizeof (paddr))
+ break;
+
+ if (snprintf(psize, sizeof (paddr), "module-size-%d", id) >
+ sizeof (paddr))
+ break;
+
+ if (snprintf(pname, sizeof (paddr), "module-name-%d", id) >
+ sizeof (paddr))
+ break;
+
+ if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, root,
+ DDI_PROP_DONTPASS, paddr, &datap, &ndata) !=
+ DDI_PROP_SUCCESS)
+ break;
+
+ if (ndata == 8)
+ bcopy(datap, &addr, sizeof (uint64_t));
+ ddi_prop_free(datap);
+ if (ndata != 8)
+ break;
+
+ if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, root,
+ DDI_PROP_DONTPASS, psize, &datap, &ndata) !=
+ DDI_PROP_SUCCESS)
+ break;
+ if (ndata == 8)
+ bcopy(datap, &size, sizeof (uint64_t));
+ ddi_prop_free(datap);
+ if (ndata != 8)
+ break;
+
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, root,
+ DDI_PROP_DONTPASS, pname, &mname) != DDI_PROP_SUCCESS)
+ break;
+
+ ret = bootfs_construct_entry(bfs, addr, size, mname);
+ if (ret == EINVAL)
+ bfs->bfs_stat.bfss_ndiscards.value.ui32++;
+ if (ret == EEXIST)
+ bfs->bfs_stat.bfss_ndups.value.ui32++;
+ ddi_prop_free(mname);
+
+ id++;
+ }
+}
+
+void
+bootfs_destruct(bootfs_t *bfs)
+{
+ bootfs_node_t *bnp;
+
+ while ((bnp = list_remove_head(&bfs->bfs_nodes)) != NULL) {
+ ASSERT(bnp->bvn_vnp->v_count == 1);
+ VN_RELE(bnp->bvn_vnp);
+ kmem_free(bnp->bvn_name, strlen(bnp->bvn_name) + 1);
+ kmem_cache_free(bootfs_node_cache, bnp);
+ }
+}
diff --git a/usr/src/uts/common/fs/bootfs/bootfs_vfsops.c b/usr/src/uts/common/fs/bootfs/bootfs_vfsops.c
new file mode 100644
index 0000000000..b87e4b738e
--- /dev/null
+++ b/usr/src/uts/common/fs/bootfs/bootfs_vfsops.c
@@ -0,0 +1,320 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/errno.h>
+#include <sys/modctl.h>
+#include <sys/types.h>
+#include <sys/mkdev.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/systm.h>
+#include <sys/id_space.h>
+#include <sys/cmn_err.h>
+#include <sys/ksynch.h>
+#include <sys/policy.h>
+#include <sys/mount.h>
+#include <sys/sysmacros.h>
+
+#include <sys/fs/bootfs_impl.h>
+
+/*
+ * While booting, additional types of modules and files can be passed in to the
+ * loader. These include the familiar boot archive, as well as, a module hash
+ * and additional modules that are interpreted as files. As part of the handoff
+ * in early boot, information about these modules are saved as properties on the
+ * root of the devinfo tree, similar to other boot-time properties.
+ *
+ * This file system provides a read-only view of those additional files. Due to
+ * its limited scope, it has a slightly simpler construction than several other
+ * file systems. When mounted, it looks for the corresponding properties and
+ * creates bootfs_node_t's and vnodes for all of the corresponding files and
+ * directories that exist along the way. At this time, there are currently a
+ * rather small number of files passed in this way.
+ *
+ * This does lead to one behavior that folks used to other file systems might
+ * find peculiar. Because we are not always actively creating and destroying the
+ * required vnodes on demand, the count on the root vnode will not be going up
+ * accordingly with the existence of other vnodes. This means that a bootfs file
+ * system that is not in use will have all of its vnodes exist with a v_count of
+ * one.
+ */
+
+major_t bootfs_major;
+static int bootfs_fstype;
+static id_space_t *bootfs_idspace;
+static uint64_t bootfs_nactive;
+static kmutex_t bootfs_lock;
+
+static const char *bootfs_name = "bootfs";
+
+static int
+bootfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
+{
+ int ret;
+ bootfs_t *bfs;
+ struct pathname dpn;
+ dev_t fsdev;
+
+ if ((ret = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
+ return (ret);
+
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if (uap->flags & MS_REMOUNT)
+ return (EBUSY);
+
+ mutex_enter(&mvp->v_lock);
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
+ mutex_exit(&mvp->v_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&mvp->v_lock);
+
+ /*
+ * We indicate that the backing store is bootfs. We don't want to use
+ * swap, because folks might think that this is putting all the data
+ * into memory ala tmpfs. Rather these modules are always in memory and
+ * there's nothing to be done about that.
+ */
+ vfs_setresource(vfsp, bootfs_name, 0);
+ bfs = kmem_zalloc(sizeof (bootfs_t), KM_NOSLEEP | KM_NORMALPRI);
+ if (bfs == NULL)
+ return (ENOMEM);
+
+ ret = pn_get(uap->dir,
+ (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE, &dpn);
+ if (ret != 0) {
+ kmem_free(bfs, sizeof (bfs));
+ return (ret);
+ }
+
+ bfs->bfs_minor = id_alloc(bootfs_idspace);
+ bfs->bfs_kstat = kstat_create_zone("bootfs", bfs->bfs_minor, "bootfs",
+ "fs", KSTAT_TYPE_NAMED,
+ sizeof (bootfs_stat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID);
+ if (bfs->bfs_kstat == NULL) {
+ id_free(bootfs_idspace, bfs->bfs_minor);
+ pn_free(&dpn);
+ kmem_free(bfs, sizeof (bfs));
+ return (ENOMEM);
+ }
+ bfs->bfs_kstat->ks_data = &bfs->bfs_stat;
+
+ fsdev = makedevice(bootfs_major, bfs->bfs_minor);
+ bfs->bfs_vfsp = vfsp;
+
+ vfsp->vfs_data = (caddr_t)bfs;
+ vfsp->vfs_fstype = bootfs_fstype;
+ vfsp->vfs_dev = fsdev;
+ vfsp->vfs_bsize = PAGESIZE;
+ vfsp->vfs_flag |= VFS_RDONLY | VFS_NOSETUID | VFS_NOTRUNC |
+ VFS_UNLINKABLE;
+ vfs_make_fsid(&vfsp->vfs_fsid, fsdev, bootfs_fstype);
+ bfs->bfs_mntpath = kmem_alloc(dpn.pn_pathlen + 1, KM_SLEEP);
+ bcopy(dpn.pn_path, bfs->bfs_mntpath, dpn.pn_pathlen);
+ bfs->bfs_mntpath[dpn.pn_pathlen] = '\0';
+ pn_free(&dpn);
+ list_create(&bfs->bfs_nodes, sizeof (bootfs_node_t),
+ offsetof(bootfs_node_t, bvn_alink));
+
+ kstat_named_init(&bfs->bfs_stat.bfss_nfiles, "nfiles",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&bfs->bfs_stat.bfss_ndirs, "ndirs",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&bfs->bfs_stat.bfss_nbytes, "nbytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&bfs->bfs_stat.bfss_ndups, "ndup",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&bfs->bfs_stat.bfss_ndiscards, "ndiscard",
+ KSTAT_DATA_UINT32);
+
+ bootfs_construct(bfs);
+
+ kstat_install(bfs->bfs_kstat);
+
+ return (0);
+}
+
+static int
+bootfs_unmount(vfs_t *vfsp, int flag, cred_t *cr)
+{
+ int ret;
+ bootfs_t *bfs = vfsp->vfs_data;
+ bootfs_node_t *bnp;
+
+ if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0)
+ return (ret);
+
+ if (flag & MS_FORCE)
+ return (ENOTSUP);
+
+ for (bnp = list_head(&bfs->bfs_nodes); bnp != NULL;
+ bnp = list_next(&bfs->bfs_nodes, bnp)) {
+ mutex_enter(&bnp->bvn_vnp->v_lock);
+ if (bnp->bvn_vnp->v_count > 1) {
+ mutex_exit(&bnp->bvn_vnp->v_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&bnp->bvn_vnp->v_lock);
+ }
+
+ kstat_delete(bfs->bfs_kstat);
+ bootfs_destruct(bfs);
+ list_destroy(&bfs->bfs_nodes);
+ kmem_free(bfs->bfs_mntpath, strlen(bfs->bfs_mntpath) + 1);
+ id_free(bootfs_idspace, bfs->bfs_minor);
+ kmem_free(bfs, sizeof (bootfs_t));
+ return (0);
+}
+
+static int
+bootfs_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ bootfs_t *bfs;
+
+ bfs = (bootfs_t *)vfsp->vfs_data;
+ *vpp = bfs->bfs_rootvn->bvn_vnp;
+ VN_HOLD(*vpp)
+
+ return (0);
+}
+
+static int
+bootfs_statvfs(vfs_t *vfsp, struct statvfs64 *sbp)
+{
+ const bootfs_t *bfs = (bootfs_t *)vfsp;
+ dev32_t d32;
+
+ sbp->f_bsize = PAGESIZE;
+ sbp->f_frsize = PAGESIZE;
+
+ sbp->f_blocks = bfs->bfs_stat.bfss_nbytes.value.ui64 >> PAGESHIFT;
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+
+ sbp->f_files = bfs->bfs_stat.bfss_nfiles.value.ui32 +
+ bfs->bfs_stat.bfss_ndirs.value.ui32;
+ sbp->f_ffree = 0;
+ sbp->f_favail = 0;
+
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sbp->f_fsid = d32;
+ (void) strlcpy(sbp->f_basetype, bootfs_name, FSTYPSZ);
+ bzero(sbp->f_fstr, sizeof (sbp->f_fstr));
+
+ return (0);
+}
+
+static const fs_operation_def_t bootfs_vfsops_tmpl[] = {
+ VFSNAME_MOUNT, { .vfs_mount = bootfs_mount },
+ VFSNAME_UNMOUNT, { .vfs_unmount = bootfs_unmount },
+ VFSNAME_ROOT, { .vfs_root = bootfs_root },
+ VFSNAME_STATVFS, { .vfs_statvfs = bootfs_statvfs },
+};
+
+static int
+bootfs_init(int fstype, char *name)
+{
+ int ret;
+
+ bootfs_fstype = fstype;
+ ASSERT(bootfs_fstype != 0);
+
+ ret = vfs_setfsops(fstype, bootfs_vfsops_tmpl, NULL);
+ if (ret != 0)
+ return (ret);
+
+ ret = vn_make_ops(name, bootfs_vnodeops_template, &bootfs_vnodeops);
+ if (ret != 0) {
+ (void) vfs_freevfsops_by_type(bootfs_fstype);
+ return (ret);
+ }
+
+ bootfs_major = getudev();
+ if (bootfs_major == (major_t)-1) {
+ cmn_err(CE_WARN, "bootfs_init: Can't get unique device number");
+ bootfs_major = 0;
+ }
+
+ bootfs_nactive = 0;
+ return (0);
+}
+
+static mntopts_t bootfs_mntopts = {
+ 0, NULL
+};
+
+static vfsdef_t bootfs_vfsdef = {
+ VFSDEF_VERSION,
+ "bootfs",
+ bootfs_init,
+ VSW_HASPROTO|VSW_STATS,
+ &bootfs_mntopts
+};
+
+static struct modlfs bootfs_modlfs = {
+ &mod_fsops, "boot-time modules file system", &bootfs_vfsdef
+};
+
+static struct modlinkage bootfs_modlinkage = {
+ MODREV_1, &bootfs_modlfs, NULL
+};
+
+int
+_init(void)
+{
+ bootfs_node_cache = kmem_cache_create("bootfs_node_cache",
+ sizeof (bootfs_node_t), 0, bootfs_node_constructor,
+ bootfs_node_destructor, NULL, NULL, NULL, 0);
+ bootfs_idspace = id_space_create("bootfs_minors", 1, INT32_MAX);
+ mutex_init(&bootfs_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ return (mod_install(&bootfs_modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&bootfs_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ mutex_enter(&bootfs_lock);
+ if (bootfs_nactive > 0) {
+ mutex_exit(&bootfs_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&bootfs_lock);
+
+ err = mod_remove(&bootfs_modlinkage);
+ if (err != 0)
+ return (err);
+
+ (void) vfs_freevfsops_by_type(bootfs_fstype);
+ vn_freevnodeops(bootfs_vnodeops);
+ id_space_destroy(bootfs_idspace);
+ mutex_destroy(&bootfs_lock);
+ kmem_cache_destroy(bootfs_node_cache);
+ return (err);
+}
diff --git a/usr/src/uts/common/fs/bootfs/bootfs_vnops.c b/usr/src/uts/common/fs/bootfs/bootfs_vnops.c
new file mode 100644
index 0000000000..f63d0a4f24
--- /dev/null
+++ b/usr/src/uts/common/fs/bootfs/bootfs_vnops.c
@@ -0,0 +1,544 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * bootfs vnode operations
+ */
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/sunddi.h>
+#include <sys/errno.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/mman.h>
+#include <fs/fs_subr.h>
+#include <sys/policy.h>
+#include <sys/sysmacros.h>
+#include <sys/dirent.h>
+#include <sys/uio.h>
+#include <vm/pvn.h>
+#include <vm/hat.h>
+#include <vm/seg_map.h>
+#include <vm/seg_vn.h>
+#include <sys/vmsystm.h>
+
+#include <sys/fs/bootfs_impl.h>
+
+struct vnodeops *bootfs_vnodeops;
+
+/*ARGSUSED*/
+static int
+bootfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
+ caller_context_t *ct)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ int err;
+ ssize_t sres = uiop->uio_resid;
+ bootfs_node_t *bnp = vp->v_data;
+
+ if (vp->v_type == VDIR)
+ return (EISDIR);
+
+ if (vp->v_type != VREG)
+ return (EINVAL);
+
+ if (uiop->uio_loffset < 0)
+ return (EINVAL);
+
+ if (uiop->uio_loffset >= bnp->bvn_size)
+ return (0);
+
+ err = 0;
+ while (uiop->uio_resid != 0) {
+ caddr_t base;
+ long offset, frem;
+ ulong_t poff, segoff;
+ size_t bytes;
+ int relerr;
+
+ offset = uiop->uio_loffset;
+ poff = offset & PAGEOFFSET;
+ bytes = MIN(PAGESIZE - poff, uiop->uio_resid);
+
+ frem = bnp->bvn_size - offset;
+ if (frem <= 0) {
+ err = 0;
+ break;
+ }
+
+ /* Don't read past EOF */
+ bytes = MIN(bytes, frem);
+
+ /*
+ * Segmaps are likely larger than our page size, so make sure we
+ * have the proper offfset into the resulting segmap data.
+ */
+ segoff = (offset & PAGEMASK) & MAXBOFFSET;
+
+ base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, bytes,
+ 1, S_READ);
+
+ err = uiomove(base + segoff + poff, bytes, UIO_READ, uiop);
+ relerr = segmap_release(segkmap, base, 0);
+
+ if (err == 0)
+ err = relerr;
+
+ if (err != 0)
+ break;
+ }
+
+ /* Even if we had an error in a partial read, return success */
+ if (uiop->uio_resid > sres)
+ err = 0;
+
+ gethrestime(&bnp->bvn_attr.va_atime);
+
+ return (err);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_ioctl(vnode_t *vp, int cmd, intptr_t data, int flag,
+ cred_t *cr, int *rvalp, caller_context_t *ct)
+{
+ return (ENOTTY);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ uint32_t mask;
+ bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data;
+
+ mask = vap->va_mask;
+ bcopy(&bpn->bvn_attr, vap, sizeof (vattr_t));
+ vap->va_mask = mask;
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ int shift = 0;
+ bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data;
+
+ if (crgetuid(cr) != bpn->bvn_attr.va_uid) {
+ shift += 3;
+ if (groupmember(bpn->bvn_attr.va_gid, cr) == 0)
+ shift += 3;
+ }
+
+ return (secpolicy_vnode_access2(cr, vp, bpn->bvn_attr.va_uid,
+ bpn->bvn_attr.va_mode << shift, mode));
+}
+
+/*ARGSUSED*/
+static int
+bootfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp)
+{
+ avl_index_t where;
+ bootfs_node_t sn, *bnp;
+ bootfs_node_t *bpp = (bootfs_node_t *)dvp->v_data;
+
+ if (flags & LOOKUP_XATTR)
+ return (EINVAL);
+
+ if (bpp->bvn_attr.va_type != VDIR)
+ return (ENOTDIR);
+
+ if (*nm == '\0' || strcmp(nm, ".") == 0) {
+ VN_HOLD(dvp);
+ *vpp = dvp;
+ return (0);
+ }
+
+ if (strcmp(nm, "..") == 0) {
+ VN_HOLD(bpp->bvn_parent->bvn_vnp);
+ *vpp = bpp->bvn_parent->bvn_vnp;
+ return (0);
+ }
+
+ sn.bvn_name = nm;
+ bnp = avl_find(&bpp->bvn_dir, &sn, &where);
+ if (bnp == NULL)
+ return (ENOENT);
+
+ VN_HOLD(bnp->bvn_vnp);
+ *vpp = bnp->bvn_vnp;
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data;
+ dirent64_t *dp;
+ void *buf;
+ ulong_t bsize, brem;
+ offset_t coff, roff;
+ int dlen, ret;
+ bootfs_node_t *dnp;
+ boolean_t first = B_TRUE;
+
+ if (uiop->uio_loffset >= MAXOFF_T) {
+ if (eofp != NULL)
+ *eofp = 1;
+ return (0);
+ }
+
+ if (uiop->uio_iovcnt != 1)
+ return (EINVAL);
+
+ if (!(uiop->uio_iov->iov_len > 0))
+ return (EINVAL);
+
+ if (vp->v_type != VDIR)
+ return (ENOTDIR);
+
+ roff = uiop->uio_loffset;
+ coff = 0;
+ brem = bsize = uiop->uio_iov->iov_len;
+ buf = kmem_alloc(bsize, KM_SLEEP);
+ dp = buf;
+
+ /*
+ * Recall that offsets here are done based on the name of the dirent
+ * excluding the null terminator. Therefore `.` is always at 0, `..` is
+ * always at 1, and then the first real dirent is at 3. This offset is
+ * what's actually stored when we update the offset in the structure.
+ */
+ if (roff == 0) {
+ dlen = DIRENT64_RECLEN(1);
+ if (first == B_TRUE) {
+ if (dlen > brem) {
+ kmem_free(buf, bsize);
+ return (EINVAL);
+ }
+ first = B_FALSE;
+ }
+ dp->d_ino = (ino64_t)bnp->bvn_attr.va_nodeid;
+ dp->d_off = 0;
+ dp->d_reclen = (ushort_t)dlen;
+ (void) strncpy(dp->d_name, ".", DIRENT64_NAMELEN(dlen));
+ dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
+ brem -= dlen;
+ }
+
+ if (roff <= 1) {
+ dlen = DIRENT64_RECLEN(2);
+ if (first == B_TRUE) {
+ if (dlen > brem) {
+ kmem_free(buf, bsize);
+ return (EINVAL);
+ }
+ first = B_FALSE;
+ }
+ dp->d_ino = (ino64_t)bnp->bvn_parent->bvn_attr.va_nodeid;
+ dp->d_off = 1;
+ dp->d_reclen = (ushort_t)dlen;
+ (void) strncpy(dp->d_name, "..", DIRENT64_NAMELEN(dlen));
+ dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
+ brem -= dlen;
+ }
+
+ coff = 3;
+ for (dnp = avl_first(&bnp->bvn_dir); dnp != NULL;
+ dnp = AVL_NEXT(&bnp->bvn_dir, dnp)) {
+ size_t nlen = strlen(dnp->bvn_name);
+
+ if (roff > coff) {
+ coff += nlen;
+ continue;
+ }
+
+ dlen = DIRENT64_RECLEN(nlen);
+ if (dlen > brem) {
+ if (first == B_TRUE) {
+ kmem_free(buf, bsize);
+ return (EINVAL);
+ }
+ break;
+ }
+ first = B_FALSE;
+
+ dp->d_ino = (ino64_t)dnp->bvn_attr.va_nodeid;
+ dp->d_off = coff;
+ dp->d_reclen = (ushort_t)dlen;
+ (void) strncpy(dp->d_name, dnp->bvn_name,
+ DIRENT64_NAMELEN(dlen));
+ dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
+ brem -= dlen;
+ coff += nlen;
+ }
+
+ ret = uiomove(buf, (bsize - brem), UIO_READ, uiop);
+
+ if (ret == 0) {
+ if (dnp == NULL) {
+ coff++;
+ if (eofp != NULL)
+ *eofp = 1;
+ } else if (eofp != NULL) {
+ *eofp = 0;
+ }
+ uiop->uio_loffset = coff;
+ }
+ gethrestime(&bnp->bvn_attr.va_atime);
+ kmem_free(buf, bsize);
+ return (ret);
+}
+
+/*ARGSUSED*/
+static void
+bootfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
+{
+}
+
+/*ARGSUSED*/
+static int
+bootfs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
+{
+ if (write_lock != 0)
+ return (EINVAL);
+ return (0);
+}
+
+/*ARGSUSED*/
+static void
+bootfs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
+{
+}
+
+/*ARGSUSED*/
+static int
+bootfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp,
+ caller_context_t *ct)
+{
+ bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data;
+ if (vp->v_type == VDIR)
+ return (0);
+ return ((*noffp < 0 || *noffp > bnp->bvn_size ? EINVAL : 0));
+}
+
+/*
+ * We need to fill in a single page of a vnode's memory based on the actual data
+ * from the kernel. We'll use this node's sliding window into physical memory
+ * and update one page at a time.
+ */
+/*ARGSUSED*/
+static int
+bootfs_getapage(vnode_t *vp, u_offset_t off, size_t len, uint_t *protp,
+ page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
+ cred_t *cr)
+{
+ bootfs_node_t *bnp = vp->v_data;
+ page_t *pp, *fpp;
+ pfn_t pfn;
+
+ for (;;) {
+ /* Easy case where the page exists */
+ pp = page_lookup(vp, off, rw == S_CREATE ? SE_EXCL : SE_SHARED);
+ if (pp != NULL) {
+ if (pl != NULL) {
+ pl[0] = pp;
+ pl[1] = NULL;
+ } else {
+ page_unlock(pp);
+ }
+ return (0);
+ }
+
+ pp = page_create_va(vp, off, PAGESIZE, PG_EXCL | PG_WAIT, seg,
+ addr);
+
+ /*
+ * If we didn't get the page, that means someone else beat us to
+ * creating this so we need to try again.
+ */
+ if (pp != NULL)
+ break;
+ }
+
+ pfn = btop((bnp->bvn_addr + off) & PAGEMASK);
+ fpp = page_numtopp_nolock(pfn);
+
+ if (ppcopy(fpp, pp) == 0) {
+ pvn_read_done(pp, B_ERROR);
+ return (EIO);
+ }
+
+ if (pl != NULL) {
+ pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
+ } else {
+ pvn_io_done(pp);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
+ page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
+ cred_t *cr, caller_context_t *ct)
+{
+ int err;
+ bootfs_node_t *bnp = vp->v_data;
+
+ if (off + len > bnp->bvn_size + PAGEOFFSET)
+ return (EFAULT);
+
+ if (len <= PAGESIZE)
+ err = bootfs_getapage(vp, (u_offset_t)off, len, protp, pl,
+ plsz, seg, addr, rw, cr);
+ else
+ err = pvn_getpages(bootfs_getapage, vp, (u_offset_t)off, len,
+ protp, pl, plsz, seg, addr, rw, cr);
+
+ return (err);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
+ size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ int ret;
+ segvn_crargs_t vn_a;
+
+#ifdef _ILP32
+ if (len > MAXOFF_T)
+ return (ENOMEM);
+#endif
+
+ if (vp->v_flag & VNOMAP)
+ return (ENOSYS);
+
+ if (off < 0 || off > MAXOFFSET_T - off)
+ return (ENXIO);
+
+ if (vp->v_type != VREG)
+ return (ENODEV);
+
+ if (prot & PROT_WRITE)
+ return (ENOTSUP);
+
+ as_rangelock(as);
+ ret = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
+ if (ret != 0) {
+ as_rangeunlock(as);
+ return (ret);
+ }
+
+ vn_a.vp = vp;
+ vn_a.offset = (u_offset_t)off;
+ vn_a.type = flags & MAP_TYPE;
+ vn_a.prot = prot;
+ vn_a.maxprot = maxprot;
+ vn_a.cred = cr;
+ vn_a.amp = NULL;
+ vn_a.flags = flags & ~MAP_TYPE;
+ vn_a.szc = 0;
+ vn_a.lgrp_mem_policy_flags = 0;
+
+ ret = as_map(as, *addrp, len, segvn_create, &vn_a);
+
+ as_rangeunlock(as);
+ return (ret);
+
+}
+
+/*ARGSUSED*/
+static int
+bootfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
+ size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
+ size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ return (0);
+}
+
+static int
+bootfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
+ caller_context_t *ct)
+{
+ int ret;
+
+ switch (cmd) {
+ case _PC_TIMESTAMP_RESOLUTION:
+ *valp = 1L;
+ ret = 0;
+ break;
+ default:
+ ret = fs_pathconf(vp, cmd, valp, cr, ct);
+ }
+
+ return (ret);
+}
+
+const fs_operation_def_t bootfs_vnodeops_template[] = {
+ VOPNAME_OPEN, { .vop_open = bootfs_open },
+ VOPNAME_CLOSE, { .vop_close = bootfs_close },
+ VOPNAME_READ, { .vop_read = bootfs_read },
+ VOPNAME_IOCTL, { .vop_ioctl = bootfs_ioctl },
+ VOPNAME_GETATTR, { .vop_getattr = bootfs_getattr },
+ VOPNAME_ACCESS, { .vop_access = bootfs_access },
+ VOPNAME_LOOKUP, { .vop_lookup = bootfs_lookup },
+ VOPNAME_READDIR, { .vop_readdir = bootfs_readdir },
+ VOPNAME_INACTIVE, { .vop_inactive = bootfs_inactive },
+ VOPNAME_RWLOCK, { .vop_rwlock = bootfs_rwlock },
+ VOPNAME_RWUNLOCK, { .vop_rwunlock = bootfs_rwunlock },
+ VOPNAME_SEEK, { .vop_seek = bootfs_seek },
+ VOPNAME_GETPAGE, { .vop_getpage = bootfs_getpage },
+ VOPNAME_MAP, { .vop_map = bootfs_map },
+ VOPNAME_ADDMAP, { .vop_addmap = bootfs_addmap },
+ VOPNAME_DELMAP, { .vop_delmap = bootfs_delmap },
+ VOPNAME_PATHCONF, { .vop_pathconf = bootfs_pathconf },
+ VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_nosupport },
+ NULL, NULL
+};
diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c
index bea80bd609..8044a3deaa 100644
--- a/usr/src/uts/common/fs/vfs.c
+++ b/usr/src/uts/common/fs/vfs.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -905,6 +905,7 @@ vfs_mountroot(void)
vfs_mountfs("mntfs", "/etc/mnttab", "/etc/mnttab");
vfs_mountfs("tmpfs", "/etc/svc/volatile", "/etc/svc/volatile");
vfs_mountfs("objfs", "objfs", OBJFS_ROOT);
+ vfs_mountfs("bootfs", "bootfs", "/system/boot");
if (getzoneid() == GLOBAL_ZONEID) {
vfs_mountfs("sharefs", "sharefs", "/etc/dfs/sharetab");
@@ -1594,7 +1595,7 @@ domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp,
/*
* Serialize with zone state transitions.
* See vfs_list_add; zone mounted into is:
- * zone_find_by_path(refstr_value(vfsp->vfs_mntpt))
+ * zone_find_by_path(refstr_value(vfsp->vfs_mntpt))
* not the zone doing the mount (curproc->p_zone), but if we're already
* inside a NGZ, then we know what zone we are.
*/
diff --git a/usr/src/uts/common/sys/fs/bootfs_impl.h b/usr/src/uts/common/sys/fs/bootfs_impl.h
new file mode 100644
index 0000000000..5726f1428a
--- /dev/null
+++ b/usr/src/uts/common/sys/fs/bootfs_impl.h
@@ -0,0 +1,81 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_FS_BOOTFS_IMPL_H
+#define _SYS_FS_BOOTFS_IMPL_H
+
+#include <sys/types.h>
+#include <sys/list.h>
+#include <sys/avl.h>
+#include <sys/vnode.h>
+#include <sys/vfs_opreg.h>
+#include <sys/kstat.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The bootfs node is the file system specific version of the vnode for the
+ * bootfs file system. Because the bootfs file system is entirely a read-only
+ * file system, this structure requires no locking as the contents are
+ * immutable.
+ */
+typedef struct bootfs_node {
+ char *bvn_name; /* entry name */
+ struct vnode *bvn_vnp; /* Corresponding vnode */
+ avl_tree_t bvn_dir; /* directory entries, if VDIR */
+ avl_node_t bvn_link; /* dirent link */
+ list_node_t bvn_alink; /* link for all nodes */
+ uint64_t bvn_addr; /* Address in pmem */
+ uint64_t bvn_size; /* Size of the file */
+ struct bootfs_node *bvn_parent; /* .. */
+ vattr_t bvn_attr; /* attributes for the node */
+} bootfs_node_t;
+
+typedef struct bootfs_stat {
+ kstat_named_t bfss_nfiles;
+ kstat_named_t bfss_ndirs;
+ kstat_named_t bfss_nbytes;
+ kstat_named_t bfss_ndups;
+ kstat_named_t bfss_ndiscards;
+} bootfs_stat_t;
+
+typedef struct bootfs {
+ vfs_t *bfs_vfsp;
+ char *bfs_mntpath;
+ bootfs_node_t *bfs_rootvn;
+ kstat_t *bfs_kstat;
+ list_t bfs_nodes;
+ minor_t bfs_minor;
+ uint_t bfs_ninode;
+ bootfs_stat_t bfs_stat;
+} bootfs_t;
+
+extern void bootfs_construct(bootfs_t *);
+extern void bootfs_destruct(bootfs_t *);
+extern int bootfs_node_constructor(void *, void *, int);
+extern void bootfs_node_destructor(void *, void *);
+
+extern struct vnodeops *bootfs_vnodeops;
+extern const fs_operation_def_t bootfs_vnodeops_template[];
+extern kmem_cache_t *bootfs_node_cache;
+extern major_t bootfs_major;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FS_BOOTFS_IMPL_H */
diff --git a/usr/src/uts/intel/bootfs/Makefile b/usr/src/uts/intel/bootfs/Makefile
new file mode 100644
index 0000000000..ca412de439
--- /dev/null
+++ b/usr/src/uts/intel/bootfs/Makefile
@@ -0,0 +1,72 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# This makefile drives the production of the bootfs file system
+# kernel module.
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = bootfs
+OBJECTS = $(BOOTFS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(BOOTFS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ