diff options
author | gw25295 <none@none> | 2008-04-11 18:36:28 -0700 |
---|---|---|
committer | gw25295 <none@none> | 2008-04-11 18:36:28 -0700 |
commit | e7cbe64f7a72dae5cb44f100db60ca88f3313c65 (patch) | |
tree | 778467a6522111f338e4644cc2cb895dcecacee4 | |
parent | f635d46a9872dc5a02bbbd736f2bf18685c2c221 (diff) | |
download | illumos-gate-e7cbe64f7a72dae5cb44f100db60ca88f3313c65.tar.gz |
PSARC 2006/370 ZFS Boot Support
5008936 ZFS and/or zvol should support dumps
5070124 dumpadm -d /dev/... does not enforce block device requirement for savecore
6521468 ZFS Boot support Phase 2
6553503 bfu can't find 'rootdev' from /etc/vfstab on a zfs root filesystem
6574993 zfs_mountroot() may need to call clkset() to set the boot_time kstat
6633197 zvol should not permit newfs or createpool while it's in use by swap or dump
6661127 zfs_name_valid() does not support ZFS_TYPE_POOL
6684121 The changes to smf scripts for supporting canmount=noauto will cause a boot failure.
--HG--
rename : usr/src/psm/stand/bootblks/zfs/common/debug-zfs.fth => deleted_files/usr/src/psm/stand/bootblks/zfs/common/debug-zfs.fth
rename : usr/src/psm/stand/bootblks/zfs/common/big-zfs.fth => usr/src/psm/stand/bootblks/zfs/common/fs-zfs.fth
101 files changed, 3897 insertions, 466 deletions
diff --git a/usr/src/psm/stand/bootblks/zfs/common/debug-zfs.fth b/deleted_files/usr/src/psm/stand/bootblks/zfs/common/debug-zfs.fth index 296ea5a2a7..296ea5a2a7 100644 --- a/usr/src/psm/stand/bootblks/zfs/common/debug-zfs.fth +++ b/deleted_files/usr/src/psm/stand/bootblks/zfs/common/debug-zfs.fth diff --git a/usr/src/cmd/boot/bootadm/bootadm.c b/usr/src/cmd/boot/bootadm/bootadm.c index d1ae66bbcb..a0647e0b0e 100644 --- a/usr/src/cmd/boot/bootadm/bootadm.c +++ b/usr/src/cmd/boot/bootadm/bootadm.c @@ -54,6 +54,7 @@ #include <sys/systeminfo.h> #include <sys/dktp/fdisk.h> #include <sys/param.h> +#include <sys/sysmacros.h> #if !defined(_OPB) #include <sys/ucode.h> @@ -197,6 +198,7 @@ static int bam_argc; static int bam_check; static int bam_smf_check; static int bam_lock_fd = -1; +static int bam_zfs; static char rootbuf[PATH_MAX] = "/"; static int bam_update_all; static int bam_alt_platform; @@ -242,6 +244,7 @@ static int s_fputs(char *, FILE *); static char *s_strdup(char *); static int is_readonly(char *); +static int is_zfs(char *, char **); static int is_amd64(void); static int is_sun4u(void); static int is_sun4v(void); @@ -419,7 +422,7 @@ parse_args_internal(int argc, char *argv[]) opterr = 0; error = 0; - while ((c = getopt(argc, argv, "a:d:fm:no:vCR:p:")) != -1) { + while ((c = getopt(argc, argv, "a:d:fm:no:vCR:p:Z")) != -1) { switch (c) { case 'a': if (bam_cmd) { @@ -500,6 +503,9 @@ parse_args_internal(int argc, char *argv[]) bam_error(INVALID_PLAT, bam_platform); } break; + case 'Z': + bam_zfs = 1; + break; case '?': error = 1; bam_error(BAD_OPT, optopt); @@ -893,8 +899,9 @@ bam_menu(char *subcmd, char *opt, int largc, char *largv[]) error_t ret; char menu_path[PATH_MAX]; char path[PATH_MAX]; + char full_menu_root[PATH_MAX]; menu_t *menu; - char *mntpt, *menu_root, *logslice, *fstype; + char *mntpt, *menu_root, *logslice, *fstype, *grubSLICEpool, *pool; struct stat sb; int mnted; /* set if we did a mount */ error_t (*f)(menu_t *mp, char *menu_path, char *opt); @@ -919,7 +926,7 @@ bam_menu(char *subcmd, char *opt, int largc, char *largv[]) mntpt = NULL; mnted = 0; - logslice = fstype = NULL; + logslice = fstype = grubSLICEpool = pool = NULL; /* * Check for the menu.list file: @@ -931,10 +938,18 @@ bam_menu(char *subcmd, char *opt, int largc, char *largv[]) * 4. Use / */ if (bam_alt_root) { - (void) snprintf(path, sizeof (path), "%s%s", bam_root, - GRUB_slice); + if (is_zfs(bam_root, &grubSLICEpool)) + (void) snprintf(path, sizeof (path), "%s/%s%s", + bam_root, grubSLICEpool, GRUB_slice); + else + (void) snprintf(path, sizeof (path), "%s%s", + bam_root, GRUB_slice); } else { - (void) snprintf(path, sizeof (path), "%s", GRUB_slice); + if (is_zfs(bam_root, &grubSLICEpool)) + (void) snprintf(path, sizeof (path), "/%s%s", + grubSLICEpool, GRUB_slice); + else + (void) snprintf(path, sizeof (path), "%s", GRUB_slice); } if (stat(path, &sb) == 0) { @@ -953,7 +968,25 @@ bam_menu(char *subcmd, char *opt, int largc, char *largv[]) return (BAM_ERROR); } - elide_trailing_slash(menu_root, menu_path, sizeof (menu_path)); + /* + * menu_root is the root file system of the boot environment being + * operated on. + * full_menu_root is the location of the /boot/grub directory. + * With a ufs root, this is simply menu_root. With a zfs + * root, it's <menu_root>/<poolname> + */ + elide_trailing_slash(menu_root, full_menu_root, + sizeof (full_menu_root)); + + if (is_zfs(menu_root, &pool)) { + (void) strlcat(full_menu_root, "/", sizeof (full_menu_root)); + (void) strlcat(full_menu_root, pool, sizeof (full_menu_root)); + } + + /* + * menu_path is the directory that contains the menu.lst file + */ + (void) strlcpy(menu_path, full_menu_root, sizeof (menu_path)); (void) strlcat(menu_path, GRUB_MENU, sizeof (menu_path)); /* @@ -999,13 +1032,17 @@ bam_menu(char *subcmd, char *opt, int largc, char *largv[]) else ret = f(menu, menu_path, opt); if (ret == BAM_WRITE) { - ret = menu_write(menu_root, menu); + ret = menu_write(full_menu_root, menu); } menu_free(menu); umount_grub_slice(mnted, mntpt, NULL, logslice, fstype); + if (grubSLICEpool) + free(grubSLICEpool); + if (pool) + free(pool); return (ret); } @@ -2093,6 +2130,65 @@ is_readonly(char *root) return (0); } +static int +is_zfs(char *root, char **poolname) +{ + struct statvfs64 vfs; + FILE *fp; + struct extmnttab mnt; + dev_t devicenum; + char *special = NULL; + char *cp; + + /* poolname can be null */ + if (poolname) + *poolname = NULL; + + if (statvfs64(root, &vfs) != 0) { + if (bam_verbose) + bam_error(STATVFS_FAIL, root, strerror(errno)); + return (0); + } + + if (strncmp(vfs.f_basetype, "zfs", strlen("zfs")) != 0) + return (0); + + if (poolname == NULL) + return (1); + + /* + * Now find the mnttab entry so that we can extract the + * pool name from the special device field. + */ + fp = fopen(MNTTAB, "r"); + if (fp == NULL) { + bam_error(OPEN_FAIL, MNTTAB, strerror(errno)); + return (0); + } + + resetmnttab(fp); + + while (getextmntent(fp, &mnt, sizeof (mnt)) == 0) { + devicenum = makedevice(mnt.mnt_major, mnt.mnt_minor); + if (devicenum == vfs.f_fsid) { + special = s_strdup(mnt.mnt_special); + if ((cp = strchr(special, '/')) != NULL) + *cp = '\0'; + *poolname = s_strdup(special); + break; + } + } + + (void) fclose(fp); + + if (special) { + free(special); + return (1); + } + + return (0); +} + static error_t update_archive(char *root, char *opt) { @@ -3480,12 +3576,13 @@ update_entry(menu_t *mp, char *menu_root, char *opt) /* add the entry for normal Solaris */ if (bam_direct == BAM_DIRECT_DBOOT) { entry = update_boot_entry(mp, title, grubdisk, - DIRECT_BOOT_KERNEL, NULL, DIRECT_BOOT_ARCHIVE, - osroot == menu_root); + (bam_zfs ? DIRECT_BOOT_KERNEL_ZFS : DIRECT_BOOT_KERNEL), + NULL, DIRECT_BOOT_ARCHIVE, osroot == menu_root); if ((entry != BAM_ERROR) && (bam_is_hv == BAM_HV_PRESENT)) { (void) update_boot_entry(mp, NEW_HV_ENTRY, grubdisk, - XEN_MENU, KERNEL_MODULE_LINE, DIRECT_BOOT_ARCHIVE, - osroot == menu_root); + XEN_MENU, (bam_zfs ? + KERNEL_MODULE_LINE_ZFS : KERNEL_MODULE_LINE), + DIRECT_BOOT_ARCHIVE, osroot == menu_root); } } else { entry = update_boot_entry(mp, title, grubdisk, MULTI_BOOT, @@ -3503,7 +3600,9 @@ update_entry(menu_t *mp, char *menu_root, char *opt) (void) snprintf(failsafe, sizeof (failsafe), "%s%s", osroot, DIRECT_BOOT_FAILSAFE_KERNEL); if (stat(failsafe, &sbuf) == 0) { - failsafe_kernel = DIRECT_BOOT_FAILSAFE_LINE; + failsafe_kernel = + (bam_zfs ? DIRECT_BOOT_FAILSAFE_LINE_ZFS : + DIRECT_BOOT_FAILSAFE_LINE); } else { (void) snprintf(failsafe, sizeof (failsafe), "%s%s", osroot, MULTI_BOOT_FAILSAFE); diff --git a/usr/src/cmd/boot/bootadm/bootadm.h b/usr/src/cmd/boot/bootadm/bootadm.h index f29071d1d0..a4a606e845 100644 --- a/usr/src/cmd/boot/bootadm/bootadm.h +++ b/usr/src/cmd/boot/bootadm/bootadm.h @@ -174,6 +174,9 @@ extern int is_grub(const char *); /* Title used for hv entries */ #define NEW_HV_ENTRY "Solaris xVM" +/* ZFS boot option */ +#define ZFS_BOOT "-B $ZFS-BOOTFS" + /* multiboot */ #define MULTI_BOOT "/platform/i86pc/multiboot" #define MULTI_BOOT_FAILSAFE "/boot/multiboot" @@ -186,6 +189,8 @@ extern int is_grub(const char *); #define DIRECT_BOOT_KERNEL "/platform/i86pc/kernel/$ISADIR/unix" #define DIRECT_BOOT_FAILSAFE_KERNEL "/boot/platform/i86pc/kernel/unix" #define DIRECT_BOOT_FAILSAFE_LINE DIRECT_BOOT_FAILSAFE_KERNEL " -s" +#define DIRECT_BOOT_KERNEL_ZFS DIRECT_BOOT_KERNEL " " ZFS_BOOT +#define DIRECT_BOOT_FAILSAFE_LINE_ZFS DIRECT_BOOT_FAILSAFE_LINE " " ZFS_BOOT /* Boot archives */ #define SUN4U_ARCHIVE "/platform/sun4u/boot_archive" @@ -202,6 +207,8 @@ extern int is_grub(const char *); #define XEN_MENU "/boot/$ISADIR/xen.gz" #define HYPERVISOR_KERNEL "/platform/i86xpv/kernel/$ISADIR/unix" #define KERNEL_MODULE_LINE HYPERVISOR_KERNEL " " HYPERVISOR_KERNEL +#define KERNEL_MODULE_LINE_ZFS \ + HYPERVISOR_KERNEL " " HYPERVISOR_KERNEL " " ZFS_BOOT #ifdef __cplusplus } diff --git a/usr/src/cmd/boot/scripts/update_grub.ksh b/usr/src/cmd/boot/scripts/update_grub.ksh index 86a2ee65c8..d824afd5d1 100644 --- a/usr/src/cmd/boot/scripts/update_grub.ksh +++ b/usr/src/cmd/boot/scripts/update_grub.ksh @@ -21,7 +21,7 @@ # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -43,6 +43,7 @@ done ARCH=`uname -p` is_pcfs_boot=yes +is_zfs_boot=no check_pcfs_boot() { @@ -53,13 +54,42 @@ check_pcfs_boot() fi } +check_zfs_boot() +{ + if [ -f "$ALT_ROOT"/etc/lu/GRUB_slice ]; then + dev=`grep '^PHYS_SLICE=' "$ALT_ROOT"/etc/lu/GRUB_slice | + cut -d= -f2` + if [ "`fstyp $dev`" = "zfs" ]; then + is_zfs_boot=yes + fi + else + rootfstype=`df -n ${ALT_ROOT:-/} | awk '{print $3}'` + if [ "$rootfstype" = "zfs" ]; then + is_zfs_boot=yes + fi + + fi +} + # # Detect SVM root and return the list of raw devices under the mirror # get_rootdev_list() { if [ -f "$ALT_ROOT"/etc/lu/GRUB_slice ]; then - grep '^PHYS_SLICE' "$ALT_ROOT"/etc/lu/GRUB_slice | cut -d= -f2 + dev=`grep '^PHYS_SLICE' "$ALT_ROOT"/etc/lu/GRUB_slice | + cut -d= -f2` + if [ "$is_zfs_boot" = "yes" ]; then + fstyp -a "$dev" | grep 'path: ' | grep -v phys_path: | + cut -d"'" -f2 | sed 's+/dsk/+/rdsk/+' + else + echo "$dev" + fi + return + elif [ "$is_zfs_boot" = "yes" ]; then + rootpool=`df -k ${ALT_ROOT:-/} | tail +2 | cut -d/ -f1` + rootdevlist=`zpool iostat -v "$rootpool" | tail +5 | + grep -v mirror | sed -n -e '/--/q' -e p | awk '{print $1}'` else metadev=`grep -v "^#" "$ALT_ROOT"/etc/vfstab | \ grep "[ ]/[ ]" | nawk '{print $2}'` @@ -70,11 +100,11 @@ get_rootdev_list() rootdevlist=`metastat -p $metavol |\ grep -v "^$metavol[ ]" | nawk '{print $4}'` fi - for rootdev in $rootdevlist - do - echo /dev/rdsk/$rootdev - done fi + for rootdev in $rootdevlist + do + echo /dev/rdsk/$rootdev + done } # @@ -101,16 +131,22 @@ install_grub() fi fi - get_rootdev_list | while read rootdev + grubdevlist=`get_rootdev_list` + zfsarg="" + if [ "$is_zfs_boot" = "yes" ]; then + zfsarg="-Z" + fi + + for rootdev in $grubdevlist do if [ X"$rpcfsdev" != X ]; then echo "create GRUB menu in "$ALT_ROOT"/stubboot" - "$ALT_ROOT"/sbin/bootadm update-menu \ + "$ALT_ROOT"/sbin/bootadm update-menu $zfsarg\ -R "$ALT_ROOT"/stubboot -o $rootdev,"$ALT_ROOT" else echo "Creating GRUB menu in ${ALT_ROOT:-/}" $ALT_ROOT/sbin/bootadm update-menu -R ${ALT_ROOT:-/} \ - -o $rootdev + $zfsarg -o $rootdev fi print "Installing grub on $rootdev" "$ALT_ROOT"/sbin/installgrub $STAGE1 $STAGE2 $rootdev @@ -119,6 +155,7 @@ install_grub() if [ -f "$ALT_ROOT"/platform/i86pc/multiboot -a "$ARCH" = i386 ] ; then check_pcfs_boot + check_zfs_boot install_grub fi diff --git a/usr/src/cmd/dumpadm/Makefile b/usr/src/cmd/dumpadm/Makefile index 406f5dc3a0..fac14347df 100644 --- a/usr/src/cmd/dumpadm/Makefile +++ b/usr/src/cmd/dumpadm/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -44,7 +44,7 @@ GROUP = bin ROOTMANIFESTDIR = $(ROOTSVCSYSTEM) -LDLIBS += -ldiskmgt +LDLIBS += -ldiskmgt -lzfs .KEEP_STATE: diff --git a/usr/src/cmd/dumpadm/dconf.c b/usr/src/cmd/dumpadm/dconf.c index 1fb9216765..3cd5b725da 100644 --- a/usr/src/cmd/dumpadm/dconf.c +++ b/usr/src/cmd/dumpadm/dconf.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,6 +38,7 @@ #include <fcntl.h> #include <errno.h> #include <libdiskmgt.h> +#include <libzfs.h> #include "dconf.h" #include "minfree.h" @@ -343,6 +344,11 @@ dconf_dev_ioctl(dumpconf_t *dcp, int cmd) case EBUSY: warn(gettext("device %s is already in use\n"), dcp->dc_device); break; + case EBADR: + /* ZFS pool is too fragmented to support a dump device */ + warn(gettext("device %s is too fragmented to be used as " + "a dump device\n"), dcp->dc_device); + break; default: /* * NOTE: The stmsboot(1M) command's boot-up script parses this @@ -447,6 +453,9 @@ dconf_update(dumpconf_t *dcp, int checkinuse) goto err; } + if ((error = zvol_check_dump_config( + dcp->dc_device)) > 0) + goto err; if (ioctl(dcp->dc_dump_fd, DIOCGETDUMPSIZE, &d) == -1) { warn(gettext("failed to get kernel dump size")); goto err; diff --git a/usr/src/cmd/power/Makefile b/usr/src/cmd/power/Makefile index 6fc647efc5..0af61cb3fb 100644 --- a/usr/src/cmd/power/Makefile +++ b/usr/src/cmd/power/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -60,7 +60,7 @@ POFILES= $(PMCFG_SRCS:%.c=%.po) OPOFILES= sysidpm.po # pmconfig only needs libdevinfo on sparc -sparc_LDEVINFO= -ldevinfo +sparc_LDEVINFO= -ldevinfo -lefi -ladm -lzfs -lnvpair i386_LDEVINFO= LDEVINFO= -ldevinfo diff --git a/usr/src/cmd/power/handlers.c b/usr/src/cmd/power/handlers.c index 3440d56389..f5fa621c0c 100644 --- a/usr/src/cmd/power/handlers.c +++ b/usr/src/cmd/power/handlers.c @@ -30,11 +30,14 @@ #include <sys/syslog.h> #include <sys/openpromio.h> #include <sys/mnttab.h> +#include <sys/vtoc.h> +#include <sys/efi_partition.h> #include <syslog.h> #include <stdlib.h> #include <sys/pm.h> #include <kstat.h> #include <sys/smbios.h> +#include <libzfs.h> #define STRCPYLIM(dst, src, str) strcpy_limit(dst, src, sizeof (dst), str) @@ -824,7 +827,6 @@ nfsreq(void) return (scan_int(LINEARG(1), &new_cc.nfsreqs_thold)); } - #ifdef sparc static char open_fmt[] = "cannot open \"%s\", %s\n"; @@ -838,6 +840,8 @@ check_mount(char *sfile, dev_t sfdev, int ufs) { char *src, *err_fmt = NULL, *mnttab = MNTTAB; int rgent, match = 0; + struct mnttab zroot = { 0 }; + struct mnttab entry; struct extmnttab ent; FILE *fp; @@ -846,6 +850,18 @@ check_mount(char *sfile, dev_t sfdev, int ufs) return (1); } + if (ufs) { + zroot.mnt_mountp = "/"; + zroot.mnt_fstype = "zfs"; + if (getmntany(fp, &entry, &zroot) == 0) { + err_fmt = "ufs statefile with zfs root is not" + " supported\n"; + mesg(MERR, err_fmt, sfile); + fclose(fp); + return (1); + } + resetmnttab(fp); + } /* * Search for a matching dev_t; * ignore non-ufs filesystems for a regular statefile. @@ -862,14 +878,13 @@ check_mount(char *sfile, dev_t sfdev, int ufs) break; } } - (void) fclose(fp); /* * No match is needed for a block device statefile, * a match is needed for a regular statefile. */ if (match == 0) { - if (new_cc.cf_type == CFT_SPEC) + if (new_cc.cf_type != CFT_UFS) STRCPYLIM(new_cc.cf_devfs, sfile, "block statefile"); else err_fmt = "cannot find ufs mount point for \"%s\"\n"; @@ -882,6 +897,7 @@ check_mount(char *sfile, dev_t sfdev, int ufs) STRCPYLIM(new_cc.cf_path, src, "statefile path"); } else err_fmt = "statefile device \"%s\" is a mounted filesystem\n"; + (void) fclose(fp); if (err_fmt) mesg(MERR, err_fmt, sfile); return (err_fmt != NULL); @@ -893,7 +909,7 @@ check_mount(char *sfile, dev_t sfdev, int ufs) * log any ioctl/conversion error. */ static int -utop(void) +utop(char *fs_name, char *prom_name) { union obpbuf { char buf[OBP_MAXPATHLEN + sizeof (uint_t)]; @@ -911,23 +927,132 @@ utop(void) opp = &oppbuf.oppio; opp->oprom_size = OBP_MAXPATHLEN; - strcpy_limit(opp->oprom_array, new_cc.cf_devfs, + strcpy_limit(opp->oprom_array, fs_name, OBP_MAXPATHLEN, "statefile device"); upval = ioctl(fd, OPROMDEV2PROMNAME, opp); (void) close(fd); - if (upval == OKUP) - STRCPYLIM(new_cc.cf_dev_prom, opp->oprom_array, "prom device"); - else { + if (upval == OKUP) { + strcpy_limit(prom_name, opp->oprom_array, OBP_MAXPATHLEN, + "prom device"); + } else { openlog("pmconfig", 0, LOG_DAEMON); syslog(LOG_NOTICE, gettext("cannot convert \"%s\" to prom device"), - new_cc.cf_devfs); + fs_name); closelog(); } return (upval); } +/* + * given the path to a zvol, return the cXtYdZ name + * returns < 0 on error, 0 if it isn't a zvol, > 1 on success + */ +static int +ztop(char *arg, char *diskname) +{ + zpool_handle_t *zpool_handle; + nvlist_t *config, *nvroot; + nvlist_t **child; + uint_t children; + libzfs_handle_t *lzfs; + char *vname; + char *p; + char pool_name[MAXPATHLEN]; + + if (strncmp(arg, "/dev/zvol/dsk/", 14)) { + return (0); + } + arg += 14; + strncpy(pool_name, arg, MAXPATHLEN); + if (p = strchr(pool_name, '/')) + *p = '\0'; + STRCPYLIM(new_cc.cf_fs, p + 1, "statefile path"); + + if ((lzfs = libzfs_init()) == NULL) { + mesg(MERR, "failed to initialize ZFS library\n"); + return (-1); + } + if ((zpool_handle = zpool_open(lzfs, pool_name)) == NULL) { + mesg(MERR, "couldn't open pool '%s'\n", pool_name); + libzfs_fini(lzfs); + return (-1); + } + config = zpool_get_config(zpool_handle, NULL); + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) != 0) { + zpool_close(zpool_handle); + libzfs_fini(lzfs); + return (-1); + } + verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0); + if (children != 1) { + mesg(MERR, "expected one vdev, got %d\n", children); + zpool_close(zpool_handle); + libzfs_fini(lzfs); + return (-1); + } + vname = zpool_vdev_name(lzfs, zpool_handle, child[0]); + if (vname == NULL) { + mesg(MERR, "couldn't determine vdev name\n"); + zpool_close(zpool_handle); + libzfs_fini(lzfs); + return (-1); + } + strcpy(diskname, "/dev/dsk/"); + strcat(diskname, vname); + free(vname); + zpool_close(zpool_handle); + libzfs_fini(lzfs); + return (1); +} + +/* + * returns NULL if the slice is good (e.g. does not start at block + * zero, or a string describing the error if it doesn't + */ +static boolean_t +is_good_slice(char *sfile, char **err) +{ + int fd, rc; + struct vtoc vtoc; + dk_gpt_t *gpt; + char rdskname[MAXPATHLEN]; + char *x, *y; + + *err = NULL; + /* convert from dsk to rdsk */ + STRCPYLIM(rdskname, sfile, "disk name"); + x = strstr(rdskname, "dsk/"); + y = strstr(sfile, "dsk/"); + if (x != NULL) { + *x++ = 'r'; + strcpy(x, y); + } + + if ((fd = open(rdskname, O_RDONLY)) == -1) { + *err = "could not open '%s'\n"; + } else if ((rc = read_vtoc(fd, &vtoc)) >= 0) { + /* + * we got a slice number; now check the block + * number where the slice starts + */ + if (vtoc.v_part[rc].p_start < 2) + *err = "using '%s' would clobber the disk label\n"; + close(fd); + return (*err ? B_FALSE : B_TRUE); + } else if ((rc == VT_ENOTSUP) && + (efi_alloc_and_read(fd, &gpt)) >= 0) { + /* EFI slices don't clobber the disk label */ + free(gpt); + close(fd); + return (B_TRUE); + } else + *err = "could not read partition table from '%s'\n"; + return (B_FALSE); +} /* * Check for a valid statefile pathname, inode and mount status. @@ -938,6 +1063,7 @@ sfpath(void) static int statefile; char *err_fmt = NULL; char *sfile, *sp, ch; + char diskname[256]; struct stat stbuf; int dir = 0; dev_t dev; @@ -991,12 +1117,20 @@ sfpath(void) new_cc.cf_type = CFT_UFS; dev = stbuf.st_dev; } else if (S_ISBLK(stbuf.st_mode)) { - if (minor(stbuf.st_rdev) != 2) { - new_cc.cf_type = CFT_SPEC; + if (is_good_slice(sfile, &err_fmt)) { + switch (ztop(sfile, diskname)) { + case 1: + new_cc.cf_type = CFT_ZVOL; + break; + case 0: + new_cc.cf_type = CFT_SPEC; + break; + case -1: + default: + return (NOUP); + } dev = stbuf.st_rdev; - } else - err_fmt = "statefile device cannot be slice 2 (%s)\n" - "would clobber the disk label and boot-block\n"; + } } else err_fmt = "bad file type for \"%s\"\n" "statefile must be a regular file or block device\n"; @@ -1004,9 +1138,14 @@ sfpath(void) mesg(MERR, err_fmt, sfile); return (NOUP); } - - if (check_mount(sfile, dev, (new_cc.cf_type == CFT_UFS)) || utop()) + if (check_mount(sfile, dev, (new_cc.cf_type == CFT_UFS))) return (NOUP); + if (new_cc.cf_type == CFT_ZVOL) { + if (utop(diskname, new_cc.cf_dev_prom)) + return (NOUP); + } else if (utop(new_cc.cf_devfs, new_cc.cf_dev_prom)) { + return (NOUP); + } new_cc.cf_magic = CPR_CONFIG_MAGIC; statefile = 1; return (OKUP); diff --git a/usr/src/cmd/power/svc-power b/usr/src/cmd/power/svc-power index 5fabb0b67f..bfef70d7e9 100644 --- a/usr/src/cmd/power/svc-power +++ b/usr/src/cmd/power/svc-power @@ -66,6 +66,19 @@ init_statefile_entry() { return ) + if [ $max_avail -eq 0 ]; then + if [ X`df -n / | awk '{print $3}'` != "Xzfs" ] ; then + return + fi + rootpool=`zfs mount | grep ' \/$' | awk '{print $1 }' |\ + sed 's/\/.*$//'` + if [ X$rootpool = "X" ] || \ + [ ! -L /dev/zvol/dsk/$rootpool/dump ]; then + return + fi + echo "statefile /dev/zvol/dsk/$rootpool/dump" \ + >> /etc/power.conf + fi } case "$1" in diff --git a/usr/src/cmd/svc/milestone/fs-minimal b/usr/src/cmd/svc/milestone/fs-minimal index 6a7141fa1a..ff7cea8a20 100644 --- a/usr/src/cmd/svc/milestone/fs-minimal +++ b/usr/src/cmd/svc/milestone/fs-minimal @@ -58,7 +58,7 @@ for fs in /var /var/adm /tmp; do mountpt=`zfs get -H -o value mountpoint $be$fs 2>/dev/null` if [ $? = 0 ] ; then if [ "x$mountpt" = "x$fs" ] ; then - /sbin/zfs mount $be$fs + /sbin/zfs mount -O $be$fs fi fi fi diff --git a/usr/src/cmd/svc/milestone/fs-usr b/usr/src/cmd/svc/milestone/fs-usr index f4a75e08f9..72f906e82e 100644 --- a/usr/src/cmd/svc/milestone/fs-usr +++ b/usr/src/cmd/svc/milestone/fs-usr @@ -33,18 +33,49 @@ . /lib/svc/share/fs_include.sh # -# Add physical swap. +# Once root is read/write we can enable the dedicated dumpdevice if it exists +# locally. This is an optimization as svc-dumpadm will attempt do this later. # -/sbin/swapadd -1 +dump_setup() +{ + [ -r /etc/dumpadm.conf ] && . /etc/dumpadm.conf + + readswapdev $DUMPADM_DEVICE < $vfstab + + # + # If we have a dedicated dump device, then go ahead and configure it. + # + if [ "x$special" != "x$DUMPADM_DEVICE" ]; then + if [ -x /usr/sbin/zfs ]; then + dataset=`echo $DUMPADM_DEVICE | cut -d'/' -f5-` + [ -n "$dataset" ] && \ + /usr/sbin/zfs list -t volume $dataset > \ + /dev/null 2>&1 + if [ $? -eq 0 ]; then + /usr/sbin/zfs volinit + fi + fi + + if [ -x /usr/sbin/dumpadm -a -b $DUMPADM_DEVICE ]; then + /usr/sbin/dumpadm -u || exit $SMF_EXIT_ERR_CONFIG + fi + fi +} rootiszfs=0 # get the fstype of root readmnttab / </etc/mnttab if [ "$fstype" = zfs ] ; then rootiszfs=1 + dump_setup fi # +# Add physical swap. +# +/sbin/swapadd -1 + +# # Check and remount the / (root) file system. # For NFS mounts, force the llock option on. # diff --git a/usr/src/cmd/svc/shell/fs_include.sh b/usr/src/cmd/svc/shell/fs_include.sh index 7ab7dfdd3e..99810383cc 100644 --- a/usr/src/cmd/svc/shell/fs_include.sh +++ b/usr/src/cmd/svc/shell/fs_include.sh @@ -72,6 +72,19 @@ readvfstab() { done } +readswapdev() { + while read special fsckdev mountp fstype fsckpass automnt mntopts; do + # Ignore comments, empty lines, and no-action lines + case "$special" in + '#'* | '' | '-') continue;; + esac + + [ "$fstype" != swap ] && continue + + [ "x$special" = "x$1" ] && break + done +} + # # readmnttab mount_point # -> (special, mountp, fstype, mntopts, mnttime) diff --git a/usr/src/cmd/swap/swap.c b/usr/src/cmd/swap/swap.c index a38897f70c..91cecd5cb4 100644 --- a/usr/src/cmd/swap/swap.c +++ b/usr/src/cmd/swap/swap.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -63,6 +63,7 @@ #include <locale.h> #include <libintl.h> #include <libdiskmgt.h> +#include <sys/fs/zfs.h> #define LFLAG 0x01 /* swap -l (list swap devices) */ #define DFLAG 0x02 /* swap -d (delete swap device) */ @@ -645,6 +646,19 @@ add(char *path, off_t offset, off_t cnt, int flags) (void) close(fd); + /* + * zvols cannot act as both a swap device and dump device. + */ + if (strncmp(dumpdev, ZVOL_FULL_DEV_DIR, + strlen(ZVOL_FULL_DEV_DIR)) == 0) { + if (strcmp(dumpdev, path) == 0) { + (void) fprintf(stderr, gettext("%s: zvol " + "cannot be used as a swap device and a " + "dump device\n"), path); + return (2); + } + } + } else if (!(flags & P1FLAG)) dumpadm_err(gettext("Warning: failed to open /dev/dump")); diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index 7108c36883..139aa5dd0d 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -95,7 +95,7 @@ usage(void) "dataset [object...]\n" " %s -C [pool]\n" " %s -l dev\n" - " %s -R vdev:offset:size:flags\n" + " %s -R pool:vdev:offset:size:flags\n" " %s [-p path_to_vdev_dir]\n" " %s -e pool | GUID | devid ...\n", cmdname, cmdname, cmdname, cmdname, cmdname, cmdname); @@ -1117,7 +1117,6 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) static char *objset_types[DMU_OST_NUMTYPES] = { "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; -/*ARGSUSED*/ static void dump_dir(objset_t *os) { diff --git a/usr/src/grub/grub-0.95/stage2/builtins.c b/usr/src/grub/grub-0.95/stage2/builtins.c index 0ccd3677e3..bd2588617f 100644 --- a/usr/src/grub/grub-0.95/stage2/builtins.c +++ b/usr/src/grub/grub-0.95/stage2/builtins.c @@ -115,6 +115,7 @@ init_config (void) grub_timeout = -1; current_rootpool[0] = '\0'; current_bootfs[0] = '\0'; + current_bootpath[0] = '\0'; current_bootfs_obj = 0; is_zfs_mount = 0; } @@ -2680,6 +2681,18 @@ expand_dollar_bootfs(char *in, char *out) tmpout = out + strlen(out); } + /* + * If there is a $ZFS-BOOTFS expansion, it is a ZFS root, + * then add bootpath property. + */ + if (tmpout != out) { + if ((outlen += 12 + strlen(current_bootpath)) > MAX_CMDLINE) { + errnum = ERR_WONT_FIT; + return (1); + } + grub_sprintf(tmpout, ",bootpath=\"%s\"", current_bootpath); + } + strncat(out, in, MAX_CMDLINE); return (0); } diff --git a/usr/src/grub/grub-0.95/stage2/char_io.c b/usr/src/grub/grub-0.95/stage2/char_io.c index f7952072aa..24395e3787 100644 --- a/usr/src/grub/grub-0.95/stage2/char_io.c +++ b/usr/src/grub/grub-0.95/stage2/char_io.c @@ -211,73 +211,6 @@ grub_vprintf (const char *format, int *dataptr) } #ifndef STAGE1_5 -static int -grub_vsprintf (char *buffer, const char *format, int *dataptr) -{ - /* XXX hohmuth - ugly hack -- should unify with printf() */ - char c, *ptr, str[16]; - char *bp = buffer; - int len = 0; - - while ((c = *format++) != 0) - { - if (c != '%') { - if (buffer) - *bp++ = c; /* putchar(c); */ - len++; - } else { - switch (c = *(format++)) - { - case 'd': case 'u': case 'x': - *convert_to_ascii (str, c, *((unsigned long *) dataptr++)) = 0; - - ptr = str; - - while (*ptr) { - if (buffer) - *bp++ = *(ptr++); /* putchar(*(ptr++)); */ - else - ptr++; - len++; - } - break; - - case 'c': - if (buffer) - *bp++ = (*(dataptr++))&0xff; - else - dataptr++; - len++; - /* putchar((*(dataptr++))&0xff); */ - break; - - case 's': - ptr = (char *) (*(dataptr++)); - - while ((c = *ptr++) != 0) { - if (buffer) - *bp++ = c; /* putchar(c); */ - len++; - } - break; - } - } - } - - *bp = 0; - return (len); -} - -int -grub_sprintf (char *buffer, const char *format, ...) -{ - int *dataptr = (int *) &format; - dataptr++; - - return (grub_vsprintf (buffer, format, dataptr)); -} - void init_page (void) { @@ -943,6 +876,76 @@ safe_parse_maxint (char **str_ptr, int *myint_ptr) } #endif /* STAGE1_5 */ +#if !defined(STAGE1_5) || defined(FSYS_ZFS) +static int +grub_vsprintf (char *buffer, const char *format, int *dataptr) +{ + /* XXX hohmuth + ugly hack -- should unify with printf() */ + char c, *ptr, str[16]; + char *bp = buffer; + int len = 0; + + while ((c = *format++) != 0) + { + if (c != '%') { + if (buffer) + *bp++ = c; /* putchar(c); */ + len++; + } else { + switch (c = *(format++)) + { + case 'd': case 'u': case 'x': + *convert_to_ascii (str, c, *((unsigned long *) dataptr++)) = 0; + + ptr = str; + + while (*ptr) { + if (buffer) + *bp++ = *(ptr++); /* putchar(*(ptr++)); */ + else + ptr++; + len++; + } + break; + + case 'c': + if (buffer) + *bp++ = (*(dataptr++))&0xff; + else + dataptr++; + len++; + /* putchar((*(dataptr++))&0xff); */ + break; + + case 's': + ptr = (char *) (*(dataptr++)); + + while ((c = *ptr++) != 0) { + if (buffer) + *bp++ = c; /* putchar(c); */ + len++; + } + break; + } + } + } + + *bp = 0; + return (len); +} + +int +grub_sprintf (char *buffer, const char *format, ...) +{ + int *dataptr = (int *) &format; + dataptr++; + + return (grub_vsprintf (buffer, format, dataptr)); +} + +#endif /* !defined(STAGE1_5) || defined(FSYS_ZFS) */ + void noisy_printf (const char *format,...) { @@ -1248,6 +1251,17 @@ grub_strstr (const char *s1, const char *s2) return 0; } + +int +grub_strlen (const char *str) +{ + int len = 0; + + while (*str++) + len++; + + return len; +} #endif /* !defined(STAGE1_5) || defined(FSYS_ZFS) */ #ifndef STAGE1_5 @@ -1264,17 +1278,6 @@ nul_terminate (char *str) *str = 0; return ch; } - -int -grub_strlen (const char *str) -{ - int len = 0; - - while (*str++) - len++; - - return len; -} #endif /* ! STAGE1_5 */ int diff --git a/usr/src/grub/grub-0.95/stage2/common.c b/usr/src/grub/grub-0.95/stage2/common.c index ad24d73c54..b6dc7d3a09 100644 --- a/usr/src/grub/grub-0.95/stage2/common.c +++ b/usr/src/grub/grub-0.95/stage2/common.c @@ -93,8 +93,9 @@ char *err_list[] = [ERR_WONT_FIT] = "Selected item cannot fit into memory", [ERR_WRITE] = "Disk write error", [ERR_BAD_GZIP_CRC] = "Incorrect gunzip CRC checksum", - [ERR_FILESYSTEM_NOT_FOUND] = "File System not found" + [ERR_FILESYSTEM_NOT_FOUND] = "File System not found", /* this zfs file system is not found in the pool of the device */ + [ERR_NO_BOOTPATH] = "No valid boot path found in the zfs label. This may be caused by attempting to boot from an off-lined device." }; diff --git a/usr/src/grub/grub-0.95/stage2/disk_io.c b/usr/src/grub/grub-0.95/stage2/disk_io.c index 620607e0ba..12cf5633aa 100644 --- a/usr/src/grub/grub-0.95/stage2/disk_io.c +++ b/usr/src/grub/grub-0.95/stage2/disk_io.c @@ -124,6 +124,7 @@ unsigned long part_length; int current_slice; /* ZFS root filesystem for booting */ +char current_bootpath[MAXNAMELEN]; char current_rootpool[MAXNAMELEN]; char current_bootfs[MAXNAMELEN]; uint64_t current_bootfs_obj; diff --git a/usr/src/grub/grub-0.95/stage2/fsys_zfs.c b/usr/src/grub/grub-0.95/stage2/fsys_zfs.c index b590a59a07..82a093c602 100644 --- a/usr/src/grub/grub-0.95/stage2/fsys_zfs.c +++ b/usr/src/grub/grub-0.95/stage2/fsys_zfs.c @@ -17,7 +17,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -199,7 +199,7 @@ zio_checksum_verify(blkptr_t *bp, char *data, int size) * Success : physical disk offset * Failure : errnum = ERR_BAD_ARGUMENT, return value is meaningless */ -uint64_t +static uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset) { /* XXX Need to add back label support! */ @@ -496,7 +496,7 @@ zap_leaf_array_equal(zap_leaf_phys_t *l, int blksft, int chunk, * 0 - success * errnum - failure */ -int +static int zap_leaf_lookup(zap_leaf_phys_t *l, int blksft, uint64_t h, const char *name, uint64_t *value) { @@ -557,7 +557,7 @@ zap_leaf_lookup(zap_leaf_phys_t *l, int blksft, uint64_t h, * 0 - success * errnum - failure */ -int +static int fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap, char *name, uint64_t *value, char *stack) { @@ -893,9 +893,9 @@ skip: } /* - * Parse the packed nvlist and search for the string value of a given name. + * For a given XDR packed nvlist, verify the first 4 bytes and move on. * - * An XDR packed nvlist is encoded as (from nvs_xdr_create) : + * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) : * * encoding method/host endian (4 bytes) * nvl_version (4 bytes) @@ -915,19 +915,46 @@ skip: * 0 - success * 1 - failure */ -int -nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype) +static int +nvlist_unpack(char *nvlist, char **out) { - int name_len, type, nelm, slen, encode_size; - char *nvpair, *nvp_name, *strval = val; - uint64_t *intval = val; - /* Verify if the 1st and 2nd byte in the nvlist are valid. */ if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN) return (1); + nvlist += 4; + *out = nvlist; + return (0); +} + +static char * +nvlist_array(char *nvlist, int index) +{ + int i, encode_size; + + for (i = 0; i < index; i++) { + /* skip the header, nvl_version, and nvl_nvflag */ + nvlist = nvlist + 4 * 2; + + while (encode_size = BSWAP_32(*(uint32_t *)nvlist)) + nvlist += encode_size; /* goto the next nvpair */ + + nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */ + } + + return (nvlist); +} + +static int +nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype, + int *nelmp) +{ + int name_len, type, slen, encode_size; + char *nvpair, *nvp_name, *strval = val; + uint64_t *intval = val; + /* skip the header, nvl_version, and nvl_nvflag */ - nvlist = nvlist + 4 * 3; + nvlist = nvlist + 4 * 2; /* * Loop thru the nvpair list @@ -948,8 +975,9 @@ nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype) if ((grub_strncmp(nvp_name, name, name_len) == 0) && type == valtype) { + int nelm; - if ((nelm = BSWAP_32(*(uint32_t *)nvpair)) != 1) + if ((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1) return (1); nvpair += 4; @@ -964,6 +992,16 @@ nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype) case DATA_TYPE_UINT64: *intval = BSWAP_64(*(uint64_t *)nvpair); return (0); + + case DATA_TYPE_NVLIST: + *(void **)val = (void *)nvpair; + return (0); + + case DATA_TYPE_NVLIST_ARRAY: + *(void **)val = (void *)nvpair; + if (nelmp) + *nelmp = nelm; + return (0); } } @@ -974,18 +1012,92 @@ nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype) } /* - * Get the pool name of the root pool from the vdev nvpair list of the label. + * Check if this vdev is online and is in a good state. + */ +static int +vdev_validate(char *nv) +{ + uint64_t ival; + + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_OFFLINE, &ival, + DATA_TYPE_UINT64, NULL) == 0 || + nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival, + DATA_TYPE_UINT64, NULL) == 0 || + nvlist_lookup_value(nv, ZPOOL_CONFIG_DEGRADED, &ival, + DATA_TYPE_UINT64, NULL) == 0 || + nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival, + DATA_TYPE_UINT64, NULL) == 0) + return (ERR_DEV_VALUES); + + return (0); +} + +/* + * Get a list of valid vdev pathname from the boot device. + * The caller should already allocate MAXNAMELEN memory for bootpath. + */ +static int +vdev_get_bootpath(char *nv, char *bootpath) +{ + char type[16]; + + bootpath[0] = '\0'; + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING, + NULL)) + return (ERR_FSYS_CORRUPT); + + if (strcmp(type, VDEV_TYPE_DISK) == 0) { + if (vdev_validate(nv) != 0 || + nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH, bootpath, + DATA_TYPE_STRING, NULL) != 0) + return (ERR_NO_BOOTPATH); + + } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) { + int nelm, i; + char *child; + + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_CHILDREN, &child, + DATA_TYPE_NVLIST_ARRAY, &nelm)) + return (ERR_FSYS_CORRUPT); + + for (i = 0; i < nelm; i++) { + char tmp_path[MAXNAMELEN]; + char *child_i; + + child_i = nvlist_array(child, i); + if (vdev_validate(child_i) != 0) + continue; + + if (nvlist_lookup_value(child_i, ZPOOL_CONFIG_PHYS_PATH, + tmp_path, DATA_TYPE_STRING, NULL) != 0) + return (ERR_NO_BOOTPATH); + + if ((strlen(bootpath) + strlen(tmp_path)) > MAXNAMELEN) + return (ERR_WONT_FIT); + + if (strlen(bootpath) == 0) + sprintf(bootpath, "%s", tmp_path); + else + sprintf(bootpath, "%s %s", bootpath, tmp_path); + } + } + + return (strlen(bootpath) > 0 ? 0 : ERR_NO_BOOTPATH); +} + +/* + * Check the disk label information and retrieve needed vdev name-value pairs. * * Return: * 0 - success - * errnum - failure + * ERR_* - failure */ -int -get_pool_name_value(int label, char *name, void *value, int valtype, - char *stack) +static int +check_pool_label(int label, char *stack) { vdev_phys_t *vdev; - uint64_t sector; + uint64_t sector, pool_state, txg = 0; + char *nvlist, *nv; sector = (label * sizeof (vdev_label_t) + VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE) >> SPA_MINBLOCKSHIFT; @@ -996,10 +1108,36 @@ get_pool_name_value(int label, char *name, void *value, int valtype, vdev = (vdev_phys_t *)stack; - if (nvlist_lookup_value(vdev->vp_nvlist, name, value, valtype)) + if (nvlist_unpack(vdev->vp_nvlist, &nvlist)) return (ERR_FSYS_CORRUPT); - else - return (0); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_STATE, &pool_state, + DATA_TYPE_UINT64, NULL)) + return (ERR_FSYS_CORRUPT); + + if (pool_state == POOL_STATE_DESTROYED) + return (ERR_FILESYSTEM_NOT_FOUND); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_NAME, + current_rootpool, DATA_TYPE_STRING, NULL)) + return (ERR_FSYS_CORRUPT); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_TXG, &txg, + DATA_TYPE_UINT64, NULL)) + return (ERR_FSYS_CORRUPT); + + /* not an active device */ + if (txg == 0) + return (ERR_NO_BOOTPATH); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv, + DATA_TYPE_NVLIST, NULL)) + return (ERR_FSYS_CORRUPT); + + if (vdev_get_bootpath(nv, current_bootpath)) + return (ERR_NO_BOOTPATH); + + return (0); } /* @@ -1044,22 +1182,13 @@ zfs_mount(void) if ((ubbest = find_bestub(ub_array, label)) != NULL && zio_read(&ubbest->ubp_uberblock.ub_rootbp, osp, stack) == 0) { - uint64_t pool_state; VERIFY_OS_TYPE(osp, DMU_OST_META); /* Got the MOS. Save it at the memory addr MOS. */ grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE); - if (get_pool_name_value(label, ZPOOL_CONFIG_POOL_STATE, - &pool_state, DATA_TYPE_UINT64, stack)) - return (0); - - if (pool_state == POOL_STATE_DESTROYED) - return (0); - - if (get_pool_name_value(label, ZPOOL_CONFIG_POOL_NAME, - current_rootpool, DATA_TYPE_STRING, stack)) + if (check_pool_label(label, stack)) return (0); is_zfs_mount = 1; diff --git a/usr/src/grub/grub-0.95/stage2/fsys_zfs.h b/usr/src/grub/grub-0.95/stage2/fsys_zfs.h index 25a0755f57..80d7e16890 100644 --- a/usr/src/grub/grub-0.95/stage2/fsys_zfs.h +++ b/usr/src/grub/grub-0.95/stage2/fsys_zfs.h @@ -17,7 +17,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _FSYS_ZFS_H @@ -128,6 +128,8 @@ typedef struct uberblock_phys { #define HOST_ENDIAN 1 /* for x86 machine */ #define DATA_TYPE_UINT64 8 #define DATA_TYPE_STRING 9 +#define DATA_TYPE_NVLIST 19 +#define DATA_TYPE_NVLIST_ARRAY 20 /* * Decompression Entry - lzjb diff --git a/usr/src/grub/grub-0.95/stage2/shared.h b/usr/src/grub/grub-0.95/stage2/shared.h index 221df8493c..eaf1758dfc 100644 --- a/usr/src/grub/grub-0.95/stage2/shared.h +++ b/usr/src/grub/grub-0.95/stage2/shared.h @@ -564,6 +564,7 @@ typedef enum ERR_NUMBER_OVERFLOW, ERR_BAD_GZIP_CRC, ERR_FILESYSTEM_NOT_FOUND, + ERR_NO_BOOTPATH, MAX_ERR_NUM } grub_error_t; @@ -652,6 +653,7 @@ extern unsigned long current_drive; extern unsigned long current_partition; extern char current_rootpool[MAXNAMELEN]; extern char current_bootfs[MAXNAMELEN]; +extern char current_bootpath[MAXNAMELEN]; extern unsigned long long current_bootfs_obj; extern int is_zfs_mount; diff --git a/usr/src/grub/grub-0.95/stage2/zfs-include/zfs.h b/usr/src/grub/grub-0.95/stage2/zfs-include/zfs.h index 82d767a010..20eb2d6ef9 100644 --- a/usr/src/grub/grub-0.95/stage2/zfs-include/zfs.h +++ b/usr/src/grub/grub-0.95/stage2/zfs-include/zfs.h @@ -17,7 +17,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -66,13 +66,22 @@ #define ZPOOL_CONFIG_DTL "DTL" #define ZPOOL_CONFIG_STATS "stats" #define ZPOOL_CONFIG_WHOLE_DISK "whole_disk" -#define ZPOOL_CONFIG_OFFLINE "offline" #define ZPOOL_CONFIG_ERRCOUNT "error_count" #define ZPOOL_CONFIG_NOT_PRESENT "not_present" #define ZPOOL_CONFIG_SPARES "spares" #define ZPOOL_CONFIG_IS_SPARE "is_spare" #define ZPOOL_CONFIG_NPARITY "nparity" +#define ZPOOL_CONFIG_PHYS_PATH "phys_path" #define ZPOOL_CONFIG_L2CACHE "l2cache" +/* + * The persistent vdev state is stored as separate values rather than a single + * 'vdev_state' entry. This is because a device can be in multiple states, such + * as offline and degraded. + */ +#define ZPOOL_CONFIG_OFFLINE "offline" +#define ZPOOL_CONFIG_FAULTED "faulted" +#define ZPOOL_CONFIG_DEGRADED "degraded" +#define ZPOOL_CONFIG_REMOVED "removed" #define VDEV_TYPE_ROOT "root" #define VDEV_TYPE_MIRROR "mirror" diff --git a/usr/src/lib/libdiskmgt/common/findevs.c b/usr/src/lib/libdiskmgt/common/findevs.c index 42d57d428a..076281693d 100644 --- a/usr/src/lib/libdiskmgt/common/findevs.c +++ b/usr/src/lib/libdiskmgt/common/findevs.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -37,6 +37,7 @@ #include <libgen.h> #include <unistd.h> #include <devid.h> +#include <sys/fs/zfs.h> #include "libdiskmgt.h" #include "disks_private.h" @@ -140,6 +141,7 @@ static int have_disk(struct search_args *args, char *devid, static int is_cluster_disk(di_node_t node, di_minor_t minor); static int is_ctds(char *name); static int is_drive(di_minor_t minor); +static int is_zvol(di_node_t node, di_minor_t minor); static int is_HBA(di_node_t node, di_minor_t minor); static int new_alias(disk_t *diskp, char *kernel_path, char *devlink_path, struct search_args *args); @@ -568,7 +570,8 @@ add_devs(di_node_t node, di_minor_t minor, void *arg) result = DI_WALK_TERMINATE; } - } else if (di_minor_spectype(minor) == S_IFCHR && is_drive(minor)) { + } else if (di_minor_spectype(minor) == S_IFCHR && + (is_drive(minor) || is_zvol(node, minor))) { char *devidstr; char kernel_name[MAXPATHLEN]; disk_t *diskp; @@ -1573,6 +1576,15 @@ is_drive(di_minor_t minor) } static int +is_zvol(di_node_t node, di_minor_t minor) +{ + if ((strncmp(di_node_name(node), ZFS_DRIVER, 3) == 0) && + di_minor_devt(minor)) + return (1); + return (0); +} + +static int is_HBA(di_node_t node, di_minor_t minor) { char *type; diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h index 3634580bf0..2a186b1de6 100644 --- a/usr/src/lib/libzfs/common/libzfs.h +++ b/usr/src/lib/libzfs/common/libzfs.h @@ -114,6 +114,7 @@ enum { EZFS_SHARESMBFAILED, /* failed to share over smb */ EZFS_BADCACHE, /* bad cache file */ EZFS_ISL2CACHE, /* device is for the level 2 ARC */ + EZFS_VDEVNOTSUP, /* unsupported vdev type */ EZFS_UNKNOWN }; @@ -538,6 +539,9 @@ extern int zpool_read_label(int, nvlist_t **); extern int zpool_create_zvol_links(zpool_handle_t *); extern int zpool_remove_zvol_links(zpool_handle_t *); +/* is this zvol valid for use as a dump device? */ +extern int zvol_check_dump_config(char *); + /* * Enable and disable datasets within a pool by mounting/unmounting and * sharing/unsharing them. diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c index a83226182d..53bfd6cbe7 100644 --- a/usr/src/lib/libzfs/common/libzfs_dataset.c +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c @@ -216,6 +216,8 @@ zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, int zfs_name_valid(const char *name, zfs_type_t type) { + if (type == ZFS_TYPE_POOL) + return (zpool_name_valid(NULL, B_FALSE, name)); return (zfs_validate_name(NULL, name, type, B_FALSE)); } @@ -2958,7 +2960,6 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, "pool must be upgraded to set this " "property or value")); return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); - #ifdef _ILP32 case EOVERFLOW: /* diff --git a/usr/src/lib/libzfs/common/libzfs_impl.h b/usr/src/lib/libzfs/common/libzfs_impl.h index 5e89a95652..4c9d9ced17 100644 --- a/usr/src/lib/libzfs/common/libzfs_impl.h +++ b/usr/src/lib/libzfs/common/libzfs_impl.h @@ -160,6 +160,7 @@ int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **); int zvol_create_link(libzfs_handle_t *, const char *); int zvol_remove_link(libzfs_handle_t *, const char *); int zpool_iter_zvol(zpool_handle_t *, int (*)(const char *, void *), void *); +boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *); void namespace_clear(libzfs_handle_t *); diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c index f91c9bb566..2db39e3c04 100644 --- a/usr/src/lib/libzfs/common/libzfs_pool.c +++ b/usr/src/lib/libzfs/common/libzfs_pool.c @@ -531,7 +531,7 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp) * Validate the given pool name, optionally putting an extended error message in * 'buf'. */ -static boolean_t +boolean_t zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool) { namecheck_err_t why; @@ -551,8 +551,9 @@ zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool) strncmp(pool, "raidz", 5) == 0 || strncmp(pool, "spare", 5) == 0 || strcmp(pool, "log") == 0)) { - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, "name is reserved")); + if (hdl != NULL) + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "name is reserved")); return (B_FALSE); } @@ -2657,3 +2658,113 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) efi_free(vtoc); return (0); } + +static boolean_t +supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf) +{ + char *type; + nvlist_t **child; + uint_t children, c; + + verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0); + if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 || + strcmp(type, VDEV_TYPE_FILE) == 0 || + strcmp(type, VDEV_TYPE_LOG) == 0 || + strcmp(type, VDEV_TYPE_MISSING) == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "vdev type '%s' is not supported"), type); + (void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf); + return (B_FALSE); + } + if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0) { + for (c = 0; c < children; c++) { + if (!supported_dump_vdev_type(hdl, child[c], errbuf)) + return (B_FALSE); + } + } + return (B_TRUE); +} + +/* + * check if this zvol is allowable for use as a dump device; zero if + * it is, > 0 if it isn't, < 0 if it isn't a zvol + */ +int +zvol_check_dump_config(char *arg) +{ + zpool_handle_t *zhp = NULL; + nvlist_t *config, *nvroot; + char *p, *volname; + nvlist_t **top; + uint_t toplevels; + libzfs_handle_t *hdl; + char errbuf[1024]; + char poolname[ZPOOL_MAXNAMELEN]; + int pathlen = strlen(ZVOL_FULL_DEV_DIR); + int ret = 1; + + if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) { + return (-1); + } + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "dump is not supported on device '%s'"), arg); + + if ((hdl = libzfs_init()) == NULL) + return (1); + libzfs_print_on_error(hdl, B_TRUE); + + volname = arg + pathlen; + + /* check the configuration of the pool */ + if ((p = strchr(volname, '/')) == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "malformed dataset name")); + (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); + return (1); + } else if (p - volname >= ZFS_MAXNAMELEN) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset name is too long")); + (void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf); + return (1); + } else { + (void) strncpy(poolname, volname, p - volname); + poolname[p - volname] = '\0'; + } + + if ((zhp = zpool_open(hdl, poolname)) == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "could not open pool '%s'"), poolname); + (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf); + goto out; + } + config = zpool_get_config(zhp, NULL); + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "could not obtain vdev configuration for '%s'"), poolname); + (void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf); + goto out; + } + + verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &top, &toplevels) == 0); + if (toplevels != 1) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' has multiple top level vdevs"), poolname); + (void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf); + goto out; + } + + if (!supported_dump_vdev_type(hdl, top[0], errbuf)) { + goto out; + } + ret = 0; + +out: + if (zhp) + zpool_close(zhp); + libzfs_fini(hdl); + return (ret); +} diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c index df8c3ca97e..77296b99cf 100644 --- a/usr/src/lib/libzfs/common/libzfs_util.c +++ b/usr/src/lib/libzfs/common/libzfs_util.c @@ -203,6 +203,9 @@ libzfs_error_description(libzfs_handle_t *hdl) return (dgettext(TEXT_DOMAIN, "invalid or missing cache file")); case EZFS_ISL2CACHE: return (dgettext(TEXT_DOMAIN, "device is in use as a cache")); + case EZFS_VDEVNOTSUP: + return (dgettext(TEXT_DOMAIN, "vdev specification is not " + "supported")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: diff --git a/usr/src/lib/libzfs/common/mapfile-vers b/usr/src/lib/libzfs/common/mapfile-vers index cd14dc1861..e349ff5874 100644 --- a/usr/src/lib/libzfs/common/mapfile-vers +++ b/usr/src/lib/libzfs/common/mapfile-vers @@ -173,6 +173,7 @@ SUNWprivate_1.1 { zprop_get_list; zprop_iter; zprop_print_one_property; + zvol_check_dump_config; local: *; }; diff --git a/usr/src/pkgdefs/SUNWcakr.u/prototype_com b/usr/src/pkgdefs/SUNWcakr.u/prototype_com index 97d5cb6f9d..6971fc15a9 100644 --- a/usr/src/pkgdefs/SUNWcakr.u/prototype_com +++ b/usr/src/pkgdefs/SUNWcakr.u/prototype_com @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -520,3 +520,4 @@ f none platform/sun4u/kernel/tod/sparcv9/todstarcat 755 root sys f none platform/sun4u/kernel/tod/sparcv9/todstarfire 755 root sys s none platform/sun4u/kernel/unix=sparcv9/unix f none platform/sun4u/wanboot 644 root sys +f none platform/sun4u/bootlst 644 root sys diff --git a/usr/src/pkgdefs/SUNWcakr.v/prototype_com b/usr/src/pkgdefs/SUNWcakr.v/prototype_com index 1a2bc28956..dc0c8441d0 100644 --- a/usr/src/pkgdefs/SUNWcakr.v/prototype_com +++ b/usr/src/pkgdefs/SUNWcakr.v/prototype_com @@ -97,3 +97,4 @@ s none platform/sun4v/kernel/unix=sparcv9/unix f none platform/sun4v/wanboot 644 root sys +f none platform/sun4v/bootlst 644 root sys diff --git a/usr/src/psm/promif/ieee1275/sun4/prom_fio.c b/usr/src/psm/promif/ieee1275/sun4/prom_fio.c index 3909b21170..0e814d2bcc 100644 --- a/usr/src/psm/promif/ieee1275/sun4/prom_fio.c +++ b/usr/src/psm/promif/ieee1275/sun4/prom_fio.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -76,6 +76,52 @@ prom_fopen(ihandle_t fsih, char *path) return (p1275_cell2int(ci[9])); /* Res2: fd */ } +int +prom_volopen(ihandle_t fsih, char *path) +{ + cell_t ci[10]; + size_t len; + +#ifdef PROM_32BIT_ADDRS + char *opath = NULL; + + if ((uintptr_t)path > (uint32_t)-1) { + opath = path; + len = prom_strlen(opath) + 1; /* include terminating NUL */ + path = promplat_alloc(len); + if (path == NULL) + return (0); + (void) prom_strcpy(path, opath); + } +#endif + len = prom_strlen(path); + + promif_preprom(); + ci[0] = p1275_ptr2cell("call-method"); /* Service name */ + ci[1] = (cell_t)4; /* #argument cells */ + ci[2] = (cell_t)3; /* #result cells */ + ci[3] = p1275_ptr2cell("open-volume"); /* Arg1: Method name */ + ci[4] = p1275_ihandle2cell(fsih); /* Arg2: fs ihandle */ + ci[5] = p1275_uint2cell(len); /* Arg3: Len */ + ci[6] = p1275_ptr2cell(path); /* Arg4: Pathname */ + + (void) p1275_cif_handler(&ci); + + promif_postprom(); + +#ifdef PROM_32BIT_ADDRS + if (opath != NULL) + promplat_free(path, len + 1); +#endif + + if (ci[7] != 0) /* Catch result */ + return (-1); + + if (ci[8] == 0) /* Res1: failed */ + return (-1); + + return (p1275_cell2int(ci[9])); /* Res2: fd */ +} int prom_fseek(ihandle_t fsih, int fd, unsigned long long offset) diff --git a/usr/src/psm/stand/Makefile b/usr/src/psm/stand/Makefile index cbc6f7bc13..b4c902af30 100644 --- a/usr/src/psm/stand/Makefile +++ b/usr/src/psm/stand/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -29,7 +28,7 @@ include ../../Makefile.master # note: the x86 needs the "bootblks" target for installboot -SUBDIRS= lib bootblks boot cpr +SUBDIRS= lib bootblks boot cpr bootlst all := TARGET= all install := TARGET= install diff --git a/usr/src/psm/stand/bootblks/Makefile.1275 b/usr/src/psm/stand/bootblks/Makefile.1275 index 63f4ebcff9..882553a154 100644 --- a/usr/src/psm/stand/bootblks/Makefile.1275 +++ b/usr/src/psm/stand/bootblks/Makefile.1275 @@ -21,7 +21,7 @@ # #ident "%Z%%M% %I% %E% SMI" # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # psm/stand/bootblks/Makefile.1275 @@ -40,6 +40,7 @@ RD_FCODE = rd.fcode MKBB = mkbb MKBB_SH = $(BASEDIR)/common/mkbb.sh +MKBB.fcode = $(MKBB) $(MKBBFLAGS) %.fcode: $(BASEDIR)/common/%.fth $(TOKENIZE) $< @@ -55,14 +56,14 @@ $(MKBB): $(MKBB_SH) chmod +x $@ $(FS_BB): $(MKBB) $(FS_FCODE) $(RD_FCODE) - $(MKBB) $(FS_FCODE) $(RD_FCODE) $(FS_BB) + $(MKBB.fcode) $(FS_FCODE) $(RD_FCODE) $(FS_BB) $(PROG): $(FS_BB) @-$(RM) $@ cp -p $(FS_BB) $@ clean: - -$(RM) $(FS_FCODE) $(RD_FCODE) + -$(RM) $(FS_FCODE) $(RD_FCODE) $(EX_FCODE) $(MKBB) clobber: clean -$(RM) $(PROG) $(FS_BB) diff --git a/usr/src/psm/stand/bootblks/common/boot.fth b/usr/src/psm/stand/bootblks/common/boot.fth index e408f503f9..060a24cf8c 100644 --- a/usr/src/psm/stand/bootblks/common/boot.fth +++ b/usr/src/psm/stand/bootblks/common/boot.fth @@ -20,7 +20,7 @@ \ \ \ ident "%Z%%M% %I% %E% SMI" -\ Copyright 2007 Sun Microsystems, Inc. All rights reserved. +\ Copyright 2008 Sun Microsystems, Inc. All rights reserved. \ Use is subject to license terms. \ @@ -120,7 +120,7 @@ headers : mount-root ( -- ) boot-dev$ fs-pkg$ $open-package to fs-ih fs-ih 0= if - ." Can't mount root" abort + " Can't mount root" die then ; @@ -364,6 +364,8 @@ headers ; +false value lflag? + \ ZFS support \ -Z fsname opens specified filesystem in disk pool @@ -383,7 +385,10 @@ false value zflag? [else] : open-zfs-fs ( fs$ -- ) - ." -Z not supported on non-zfs root" abort + \ ignore on -L + lflag? invert if + " -Z not supported on non-zfs root" die + then ; [then] @@ -471,6 +476,13 @@ false value halt? ascii H of true to halt? endof + ascii L of + " /" fs-name swap move + true to zflag? + " bootlst" boot-file swap move + true to fflag? + true to lflag? + endof ascii Z of skip-blanks next-str ( arg$ fs-name$ ) ?dup if @@ -596,7 +608,7 @@ headers get-arch get-targ open-path ( fd ) loader-base over get-file if ( fd alloc-sz virt size ) - ." Boot load failed" abort + " Boot load failed" die then to file-sz ( fd alloc-sz virt ) swap to rd-alloc-sz ( fd virt ) diff --git a/usr/src/psm/stand/bootblks/common/util.fth b/usr/src/psm/stand/bootblks/common/util.fth index d48904646c..3bbcd62499 100644 --- a/usr/src/psm/stand/bootblks/common/util.fth +++ b/usr/src/psm/stand/bootblks/common/util.fth @@ -20,7 +20,7 @@ \ \ \ ident "%Z%%M% %I% %E% SMI" -\ Copyright 2007 Sun Microsystems, Inc. All rights reserved. +\ Copyright 2008 Sun Microsystems, Inc. All rights reserved. \ Use is subject to license terms. \ @@ -74,11 +74,15 @@ d# 256 constant /buf-len swap 1+ swap 1- ; +: die ( str -- ) + cr type cr abort +; + : diag-cr? ( -- ) diagnostic-mode? if cr then ; : find-abort ( name$ -- ) - ." Can't find " type abort + cr ." Can't find " type cr abort ; : get-package ( pkg$ -- ph ) @@ -192,7 +196,7 @@ d# 256 constant /rd-fcode d# 8192 /rd-fcode - constant rd-offset : open-abort ( file$ -- ) - ." Can't open " type abort + cr ." Can't open " type cr abort ; /buf-len buffer: open-cstr @@ -210,10 +214,10 @@ d# 8192 /rd-fcode - constant rd-offset : read-disk ( adr len off ih -- ) dup >r 0 swap cif-seek if ( adr len r: ih ) - ." seek failed" abort + " seek failed" die then tuck swap r> cif-read <> if ( ) - ." read failed" abort + " read failed" die then ; diff --git a/usr/src/psm/stand/bootblks/ufs/sparc/installboot.sh b/usr/src/psm/stand/bootblks/ufs/sparc/installboot.sh index cd463bef71..98a0571c9e 100644 --- a/usr/src/psm/stand/bootblks/ufs/sparc/installboot.sh +++ b/usr/src/psm/stand/bootblks/ufs/sparc/installboot.sh @@ -3,9 +3,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -21,8 +20,8 @@ # CDDL HEADER END # # -# Copyright (c) 1994-1997, by Sun Microsystems, Inc. -# All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" # @@ -32,6 +31,21 @@ away() { exit $1 } +COUNT=15 + +while getopts F: a; do + case $a in + F) case $OPTARG in + ufs) COUNT=15;; + hsfs) COUNT=15;; + zfs) COUNT=31;; + *) away 1 "$OPTARG: Unknown fstype";; + esac;; + ?) away 1 "unknows fstype: $fs" + esac +done +shift `expr $OPTIND - 1` + Usage="Usage: `basename $0` bootblk raw-device" test $# -ne 2 && away 1 "$Usage" @@ -43,6 +57,6 @@ test ! -c $DEVICE && away 1 "$DEVICE: Not a character device" test ! -w $DEVICE && away 1 "$DEVICE: Not writeable" # label at block 0, bootblk from block 1 through 15 -stderr=`dd if=$BOOTBLK of=$DEVICE bs=1b oseek=1 count=15 conv=sync 2>&1` +stderr=`dd if=$BOOTBLK of=$DEVICE bs=1b oseek=1 count=$COUNT conv=sync 2>&1` err=$? ; test $err -ne 0 && away $err "$stderr" exit 0 diff --git a/usr/src/psm/stand/bootblks/zfs/Makefile.zfs b/usr/src/psm/stand/bootblks/zfs/Makefile.zfs index d2cb83a08c..92a78d6c82 100644 --- a/usr/src/psm/stand/bootblks/zfs/Makefile.zfs +++ b/usr/src/psm/stand/bootblks/zfs/Makefile.zfs @@ -35,23 +35,15 @@ include $(BASEDIR)/Makefile.com ZFS_DIR = $(BASEDIR)/zfs/common -ZFSBOOT_FTH = $(ZFS_DIR)/boot-zfs.fth -ZFS_FTH = $(ZFS_DIR)/zfs.fth - FS_FCODE = boot-zfs.fcode +EX_FCODE = fs-zfs.fcode FS_BB = zfs.bb -ZFSDBGBOOT_FTH = $(ZFS_DIR)/debug-zfs.fth -ZFSDBG_FTH = $(ZFS_DIR)/big-zfs.fth - -DBGBOOT_FCODE = debug-zfs.fcode -DBGFS_FCODE = big-zfs.fcode - -DBGFS_BB = debugzfs.bb -DBGPROG = debugbb +ZFSBOOT_FTH = $(ZFS_DIR)/boot-zfs.fth +ZFSFS_FTH = $(ZFS_DIR)/fs-zfs.fth +ZFS_FTH = $(ZFS_DIR)/zfs.fth -DBG_CLEAN += $(DBGBOOT_FCODE) $(DBGFS_FCODE) -DBG_CLOBBER += $(DBGPROG) $(DBGFS_BB) +MKBBFLAGS = -e $(EX_FCODE) # # Where and how stuff gets installed @@ -64,21 +56,10 @@ BOOTBLK_LINK = $(BOOTBLK_LINK_PREFIX)/zfs/$(PROG) $(TOKENIZE) $< # make doesn't know fload -$(FS_FCODE): $(UTIL_FTH) $(ZFS_FTH) $(BOOT_FTH) +$(FS_FCODE): $(UTIL_FTH) $(BOOT_FTH) $(TOKENIZE) $(ZFSBOOT_FTH) +$(EX_FCODE): $(UTIL_FTH) $(ZFS_FTH) + $(TOKENIZE) $(ZFSFS_FTH) -$(DBGBOOT_FCODE): $(UTIL_FTH) $(BOOT_FTH) - $(TOKENIZE) $(ZFSDBGBOOT_FTH) - -$(DBGFS_FCODE): $(UTIL_FTH) $(ZFS_FTH) - $(TOKENIZE) $(ZFSDBG_FTH) - -$(DBGFS_BB): $(MKBB) $(DBGBOOT_FCODE) $(DBGFS_FCODE) $(RD_FCODE) - $(MKBB) -e $(DBGFS_FCODE) $(DBGBOOT_FCODE) $(RD_FCODE) $(DBGFS_BB) - -$(DBGPROG): $(DBGFS_BB) - @-$(RM) $@ - cp -p $(DBGFS_BB) $@ - -debug: $(DBGPROG) +$(FS_BB): $(EX_FCODE) diff --git a/usr/src/psm/stand/bootblks/zfs/common/boot-zfs.fth b/usr/src/psm/stand/bootblks/zfs/common/boot-zfs.fth index 69536d3c6a..2f64847f90 100644 --- a/usr/src/psm/stand/bootblks/zfs/common/boot-zfs.fth +++ b/usr/src/psm/stand/bootblks/zfs/common/boot-zfs.fth @@ -1,7 +1,7 @@ \ ident "%Z%%M% %I% %E% SMI" -\ Copyright 2007 Sun Microsystems, Inc. All rights reserved. +\ Copyright 2008 Sun Microsystems, Inc. All rights reserved. \ Use is subject to license terms. \ \ CDDL HEADER START @@ -29,6 +29,12 @@ id: %Z%%M% %I% %E% SMI purpose: ZFS bootblock copyright: Copyright 2006 Sun Microsystems, Inc. All Rights Reserved +\ big bootblk +create doheaders +create bigbootblk +d# 8192 constant /fs-fcode +d# 8192 constant fs-offset + \ for [ifdef] zfs create zfs @@ -38,8 +44,5 @@ create zfs \ load common words fload ../../../common/util.fth -\ load fs reader -fload ../../common/zfs.fth - \ load booter fload ../../../common/boot.fth diff --git a/usr/src/psm/stand/bootblks/zfs/common/big-zfs.fth b/usr/src/psm/stand/bootblks/zfs/common/fs-zfs.fth index 986dabf003..986dabf003 100644 --- a/usr/src/psm/stand/bootblks/zfs/common/big-zfs.fth +++ b/usr/src/psm/stand/bootblks/zfs/common/fs-zfs.fth diff --git a/usr/src/psm/stand/bootblks/zfs/common/zfs.fth b/usr/src/psm/stand/bootblks/zfs/common/zfs.fth index 50784d8839..328e2248e9 100644 --- a/usr/src/psm/stand/bootblks/zfs/common/zfs.fth +++ b/usr/src/psm/stand/bootblks/zfs/common/zfs.fth @@ -1,6 +1,6 @@ \ ident "%Z%%M% %I% %E% SMI" -\ Copyright 2007 Sun Microsystems, Inc. All rights reserved. +\ Copyright 2008 Sun Microsystems, Inc. All rights reserved. \ Use is subject to license terms. \ \ CDDL HEADER START @@ -49,13 +49,13 @@ new-device \ stack ops and logical ops (dup, and, etc) are 64b : xcmp ( x1 x2 -- -1|0|1 ) xlsplit rot xlsplit ( x2.lo x2.hi x1.lo x1.hi ) - rot 2dup < if ( x2.lo x1.lo x1.hi x2.hi ) + rot 2dup u< if ( x2.lo x1.lo x1.hi x2.hi ) 2drop 2drop -1 ( lt ) - else > if ( x2.lo x1.lo ) + else u> if ( x2.lo x1.lo ) 2drop 1 ( gt ) - else swap 2dup < if ( x1.lo x2.lo ) + else swap 2dup u< if ( x1.lo x2.lo ) 2drop -1 ( lt ) - else > if ( ) + else u> if ( ) 1 ( gt ) else ( ) 0 ( eq ) @@ -267,6 +267,13 @@ new-device : >nvdata ( nv -- data ) >nvname + /l roundup ; + + \ convert nvdata to 64b int or string + : nvdata>x ( nvdata -- x ) + /l 2* + ( ptr ) + dup /l + l@ swap l@ ( x.lo x.hi ) + lxjoin ( x ) + ; alias nvdata>$ >nvname : nv-lookup ( nv name$ -- nvdata false | true ) @@ -285,8 +292,14 @@ new-device : scan-vdev ( -- ) temp-space /nvpairs nvpairs-off ( adr len off ) dev-ih read-disk ( ) + temp-space " txg" nv-lookup if + " no txg nvpair" die + then nvdata>x ( txg ) + x0= if + " detached mirror" die + then ( ) temp-space " name" nv-lookup if - ." no name nvpair" abort + " no name nvpair" die then nvdata>$ ( pool$ ) bootprop-buf swap move ( ) ; @@ -343,7 +356,7 @@ new-device /uber-block +loop \ make sure we found a valid ub - dup 0= if ." no ub found" abort then + dup 0= if " no ub found" die then uber-block /uber-block move ( ) ; @@ -398,7 +411,7 @@ new-device 2dup over dn_indblkshift rshift ( dn bp-off dn blk# r: lvl ) r> 1+ blk@lvl>bp ( dn bp-off bp ) - \ read parent indir and index + \ read parent indir blk and index rot tuck dn-indsize ( bp-off dn bp len ) ind-cache swap rot read-bp ( bp-off dn ) dn-indmask and ( bp-off' ) @@ -708,6 +721,8 @@ new-device : dd_head_dataset_obj ( dd -- n ) h# 8 + x@ ; : dd_child_dir_zapobj ( dd -- n ) h# 20 + x@ ; + + : ds_snapnames_zapobj ( ds -- n ) h# 20 + x@ ; : ds_bp ( ds -- p ) h# 80 + ; 0 instance value mos-dn @@ -725,7 +740,7 @@ new-device alias >dsl-dir dn_bonus alias >dsl-ds dn_bonus - : #dn/blk ( dn -- n ) dn-bsize /dnode / ; + : #dn/blk ( dn -- n ) dn-bsize /dnode / ; \ read block into dn-cache : get-dnblk ( dn blk# -- ) @@ -775,13 +790,23 @@ new-device \ read root dataset obj-dir " root_dataset" zap-lookup if - ." no root_dataset" abort + " no root_dataset" die then ( obj# ) dup to root-dsl# get-mos-dnode ( ) dnode root-dsl /dnode move ; + \ find snapshot of given dataset + : snap-look ( snap$ ds-obj# -- [ss-obj# ] not-found? ) + get-mos-dnode dnode >dsl-ds ( snap$ ds ) + ds_snapnames_zapobj get-mos-dnode ( snap$ ) + dnode -rot zap-lookup ( [ss-obj# ] not-found? ) + ; + + \ dsl dir to dataset + : dir>ds ( dn -- obj# ) >dsl-dir dd_head_dataset_obj ; + \ look thru the dsl hierarchy for path \ this looks almost exactly like a FS directory lookup : dsl-lookup ( path$ -- [ ds-obj# ] not-found? ) @@ -794,18 +819,34 @@ new-device r> >dsl-dir dd_child_dir_zapobj ( path$ file$ obj# ) get-mos-dnode ( path$ file$ ) + \ check for snapshot names + ascii @ left-parse-string ( path$ snap$ file$ ) + \ search it - dnode -rot zap-lookup if ( path$ ) + dnode -rot zap-lookup if ( path$ snap$ ) \ not found - 2drop true exit ( not-found ) - then ( path$ obj# ) - get-mos-dnode ( path$ ) + 2drop 2drop true exit ( not-found ) + then ( path$ snap$ obj# ) + get-mos-dnode ( path$ snap$ ) + + \ lookup any snapshot name + dup if + \ must be last path component + 2swap nip if ( snap$ ) + 2drop true exit ( not-found ) + then + dnode dir>ds snap-look if ( ) + true exit ( not-found ) + then ( obj# ) + false exit ( obj# found ) + else 2drop then ( path$ ) + dnode >r ( path$ r: dn ) repeat ( path$ file$ r: dn) 2drop 2drop r> drop ( ) \ found it, return dataset obj# - dnode >dsl-dir dd_head_dataset_obj ( ds-obj# ) + dnode dir>ds ( ds-obj# ) false ( ds-obj# found ) ; @@ -837,7 +878,6 @@ new-device : fsize ( dn -- n ) >znode zp_size ; : ftype ( dn -- n ) >znode zp_mode h# f000 and ; : dir? ( dn -- flag ) ftype h# 4000 = ; - : regular? ( dn -- flag ) ftype h# 8000 = ; : symlink? ( dn -- flag ) ftype h# a000 = ; \ read obj# from fs objset @@ -855,17 +895,17 @@ new-device \ get root obj# from master node master-node# get-fs-dnode dnode " ROOT" zap-lookup if - ." no ROOT" abort + " no ROOT" die then ( fsroot-obj# ) ; : prop>rootobj# ( -- ) obj-dir " pool_props" zap-lookup if - ." no pool_props" abort + " no pool_props" die then ( prop-obj# ) get-mos-dnode ( ) dnode " bootfs" zap-lookup if - ." no bootfs" abort + " no bootfs" die then ( ds-obj# ) get-rootobj# ( fsroot-obj# ) ; @@ -972,6 +1012,38 @@ new-device ; \ + \ ZFS volume (ZVOL) routines + \ + 1 constant zvol-data# + 2 constant zvol-prop# + + 0 instance value zv-dn + + : get-zvol ( zvol$ -- not-found? ) + dsl-lookup if + drop true exit ( failed ) + then ( ds-obj# ) + + \ get zvol objset + get-mos-dnode ( ) + zv-dn dnode get-objset + false ( succeeded ) + ; + + \ get zvol data dnode + : zvol-data ( -- ) + zv-dn zvol-data# get-dnode + ; + + : zvol-size ( -- size ) + zv-dn zvol-prop# get-dnode + dnode " size" zap-lookup if + " no zvol size" die + then ( size ) + ; + + + \ \ ZFS installation routines \ @@ -979,6 +1051,7 @@ new-device struct /x field >busy /x field >offset + /x field >fsize /dnode field >dnode constant /file-record @@ -993,7 +1066,7 @@ new-device : file-offset@ ( -- off ) current-fd fd>record >offset x@ ; : file-offset! ( off -- ) current-fd fd>record >offset x! ; : file-dnode ( -- dn ) current-fd fd>record >dnode ; - : file-size ( -- size ) file-dnode fsize ; + : file-size ( -- size ) current-fd fd>record >fsize x@ ; : file-bsize ( -- bsize ) file-dnode dn-bsize ; \ find free fd slot @@ -1010,11 +1083,12 @@ new-device ; \ init fd to offset 0 and copy dnode - : init-fd ( fd -- ) - fd>record ( rec ) + : init-fd ( fsize fd -- ) + fd>record ( fsize rec ) dup >busy 1 swap x! dup >dnode dnode swap /dnode move - >offset 0 swap x! + dup >fsize rot swap x! ( rec ) + >offset 0 swap x! ( ) ; \ make fd current @@ -1043,13 +1117,13 @@ new-device /max-bsize 5 * /uber-block + - /dnode 5 * + + /dnode 6 * + /disk-block + constant alloc-size : allocate-buffers ( -- ) alloc-size h# a0.0000 vmem-alloc dup 0= if - ." no memory" abort + " no memory" die then ( adr ) dup to temp-space /max-bsize + ( adr ) dup to dn-cache /max-bsize + ( adr ) @@ -1061,6 +1135,7 @@ new-device dup to obj-dir /dnode + ( adr ) dup to root-dsl /dnode + ( adr ) dup to fs-dn /dnode + ( adr ) + dup to zv-dn /dnode + ( adr ) dup to dnode /dnode + ( adr ) to gang-space ( ) @@ -1116,9 +1191,24 @@ new-device drop false exit ( failed ) then ( fd ) - dup init-fd true ( fd succeeded ) + dnode fsize over init-fd + true ( fd succeeded ) ; + : open-volume ( vol$ -- okay? ) + get-slot if + 2drop false exit ( failed ) + then -rot ( fd vol$ ) + + get-zvol if ( fd ) + drop false exit ( failed ) + then + + zvol-size over ( fd size fd ) + zvol-data init-fd ( fd ) + true ( fd succeeded ) + ; + : close-file ( fd -- ) free-slot ( ) ; @@ -1132,7 +1222,7 @@ new-device drop false exit ( failed ) then ( off ) - dup file-size > if ( off ) + dup file-size x> if ( off ) drop false exit ( failed ) then ( off ) dup file-offset! true ( off succeeded ) @@ -1143,10 +1233,8 @@ new-device 2drop 0 exit ( 0 ) then ( adr len ) - file-dnode regular? 0= if 2drop 0 exit then - \ adjust len if reading past eof - dup file-offset@ + file-size > if + dup file-offset@ + file-size x> if dup file-offset@ + file-size - - then dup 0= if nip exit then diff --git a/usr/src/psm/stand/bootlst/Makefile b/usr/src/psm/stand/bootlst/Makefile new file mode 100644 index 0000000000..2505a083b9 --- /dev/null +++ b/usr/src/psm/stand/bootlst/Makefile @@ -0,0 +1,49 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# psm/stand/bootlst/Makefile +# +include $(SRC)/Makefile.master + +sparc_ARCHITECTURES = sparc +SUBDIRS = $($(MACH)_ARCHITECTURES) + +all := TARGET= all +install := TARGET= install +clean := TARGET= clean +clobber := TARGET= clobber +lint := TARGET= lint +clean.lint := TARGET= clean.lint + +.KEEP_STATE: + +all install clean clobber lint clean.lint: $(SUBDIRS) + + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(MFLAGS) $(TARGET) + +FRC: diff --git a/usr/src/psm/stand/bootlst/common/Makefile.com b/usr/src/psm/stand/bootlst/common/Makefile.com new file mode 100644 index 0000000000..dccf930192 --- /dev/null +++ b/usr/src/psm/stand/bootlst/common/Makefile.com @@ -0,0 +1,133 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# psm/stand/bootlst/common/Makefile.com +# + +TOPDIR = ../../../../.. + +include $(TOPDIR)/Makefile.master +include $(TOPDIR)/Makefile.psm +include $(TOPDIR)/psm/stand/lib/Makefile.lib + +SYSDIR = $(TOPDIR)/uts +COMDIR = ../../common +STANDDIR = $(TOPDIR)/stand + +SALIBDIR = $(STANDDIR)/lib/sa +SALIB = $(SALIBDIR)/libsa.a +PROMLIBDIR= $(PROMIFDIR)/$(ARCH_PROMDIR) +PROMLIB = $(PROMLIBDIR)/libprom.a + +SALIBS += $(SALIB) $(PROMLIB) +LDLIBS = -L$(SALIBDIR) -lsa -L$(PROMLIBDIR) -lprom $(LDPLATLIBS) +LDFLAGS = -dn -M $(MAPFILE) $(MAP_FLAG) + +LINTLIBS = $(SALIBDIR)/llib-lsa.ln $(PROMLIBDIR)/llib-lprom.ln $(LINTPLATLIBS) +LINTFLAGS.lib = -ysxmun + +BOOTLSTOBJ += bootlst.o sasubr.o +BOOTLSTLINTS = $(BOOTLSTOBJ:%.o=%.ln) + +CPPDEFS = -D$(ARCH) -D__$(ARCH) -D$(TARG_MACH) -D__$(TARG_MACH) +CPPDEFS += -D_KERNEL -D_MACHDEP -D__ELF + +CPPINCS = -I$(SYSDIR)/common -I$(SYSDIR)/sun +CPPINCS += -I$(SYSDIR)/$(MACH) -I$(PLATDIR) +CPPINCS += -I$(STANDDIR)/lib/sa + +CPPFLAGS = $(CPPDEFS) $(CPPINCS) +CPPFLAGS += $(CCYFLAG)$(STANDDIR) + +C99MODE = $(C99_ENABLE) +CFLAGS = $(CCVERBOSE) -O $(C99MODE) + +ASFLAGS = -P -D_ASM $(CPPDEFS) -DLOCORE -D_LOCORE -D__STDC__ +AS_CPPFLAGS = $(CPPINCS) $(CPPFLAGS.master) + +# install values +LSTFILES= $(ALL:%=$(ROOT_PSM_DIR)/$(ARCH)/%) +FILEMODE= 644 +OWNER= root +GROUP= sys + +# lint stuff +LINTFLAGS += -Dlint +LOPTS = -hbxn + +# install rule +$(ROOT_PSM_DIR)/$(ARCH)/%: % + $(INS.file) + + +all: $(ALL) + +install: all $(LSTFILES) + + +LINT.c= $(LINT) $(LINTFLAGS.c) $(LINT_DEFS) $(CPPFLAGS) -c +LINT.s= $(LINT) $(LINTFLAGS.s) $(LINT_DEFS) $(CPPFLAGS) -c +LINT.2= $(LINT) $(LINTFLAGS.c) $(LINT_DEFS) $(CPPFLAGS) + +# build rules + +%.o: $(COMDIR)/%.c + $(COMPILE.c) -o $@ $< + +%.ln: $(COMDIR)/%.c + @$(LHEAD) $(LINT.c) $< $(LTAIL) + +.KEEP_STATE: + +.PARALLEL: $(BOOTLSTOBJ) $(BOOTLSTLINTS) + +bootlst: $(MAPFILE) $(BOOTLSTOBJ) $(SALIBS) + $(LD) $(LDFLAGS) -o $@ $(BOOTLSTOBJ) $(LDLIBS) + $(POST_PROCESS) + +$(SALIBS): FRC + @cd $(@D); $(MAKE) $(MFLAGS) + +$(LINTLIBS): FRC + @cd $(@D); $(MAKE) $(MFLAGS) $(@F) + +$(ROOTDIR): + $(INS.dir) + +lint: $(BOOTLSTLINTS) $(LINTLIBS) + @$(ECHO) "\n$@: global crosschecks:" + $(LINT.2) $(BOOTLSTLINTS) $(LINTLIBS) + +clean.lint: + $(RM) *.ln + +clean: + $(RM) *.o *.ln + +clobber: + $(RM) *.o *.ln $(ALL) + +FRC: diff --git a/usr/src/psm/stand/bootlst/common/bootlst.c b/usr/src/psm/stand/bootlst/common/bootlst.c new file mode 100644 index 0000000000..715c39774a --- /dev/null +++ b/usr/src/psm/stand/bootlst/common/bootlst.c @@ -0,0 +1,210 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/promif.h> +#include <sys/salib.h> + +#define MAX_CMDLINE 1600 /* from GRUB source */ + +char **titles; +char **datasets; + +int menu_entry_count; +int menu_table_size; + +int in_menu_entry; + +#define ENTRY_ALLOC_COUNT 10 + +extern void set_default_fs(char *fsw_name); +extern int mountroot(char *str); + +void +init_table(void) +{ + + menu_entry_count = 0; + titles = (char **)calloc(ENTRY_ALLOC_COUNT, sizeof (char *)); + datasets = (char **)calloc(ENTRY_ALLOC_COUNT, sizeof (char *)); + if (titles == NULL || datasets == NULL) + prom_panic("out of mem"); + menu_table_size = ENTRY_ALLOC_COUNT; + in_menu_entry = 0; +} + +void +add_title_entry(char *title_str) +{ + + /* skip leading white space */ + while (isspace(*title_str)) + title_str++; + + if (menu_entry_count == menu_table_size) { + printf("Reallocating at count %d\n", menu_table_size); + titles = (char **)realloc(titles, + ENTRY_ALLOC_COUNT * sizeof (char *)); + datasets = (char **)realloc(datasets, + ENTRY_ALLOC_COUNT * sizeof (char *)); + if (titles == NULL || datasets == NULL) + prom_panic("out of mem"); + menu_table_size += ENTRY_ALLOC_COUNT; + } + + if (in_menu_entry) + free(titles[menu_entry_count]); + if ((titles[menu_entry_count] = strdup(title_str)) == NULL) + prom_panic("out of mem"); + in_menu_entry = 1; +} + +void +add_dataset_entry(char *dataset_str) +{ + char *cp; + + /* skip leading white space */ + while (isspace(*dataset_str)) + dataset_str++; + + /* if there is still any white space in the line, it's invalid */ + for (cp = dataset_str; *cp; cp++) + if (isspace(*cp)) + break; + if (*cp) + return; /* dataset name was invalid */ + + if (!in_menu_entry) + return; /* dataset line was not preceded by a title */ + + if ((datasets[menu_entry_count] = strdup(dataset_str)) == NULL) + prom_panic("out of mem"); + menu_entry_count++; + in_menu_entry = 0; +} + + +char * +trim_white_space(char *cp) +{ + char *ep; + + /* skip leading white space */ + while (isspace(*cp)) + cp++; + + /* + * if the string contained nothing but white space, return a + * null string. + */ + if (*cp == '\0') + return (cp); + + /* truncate trailing white space */ + for (ep = cp + strlen(cp) - 1; isspace(*ep); ep--) + ; + ep++; + *ep = '\0'; + return (cp); +} + +char *cons_gets(char *, int); + +void +main(void *cif) +{ + char linebuf[MAX_CMDLINE]; + FILE *file; + char *cp, *ep; + int n; + unsigned long choice; + + prom_init("bootlst", cif); + set_default_fs("promfs"); + if (mountroot("bootfs") != 0) + prom_panic("can't mount root"); + + if ((file = fopen("/boot/menu.lst", "r")) == NULL) + prom_panic("can't open menu.lst"); + init_table(); + + while (fgets(linebuf, MAX_CMDLINE, file)) { + cp = trim_white_space(linebuf); + + /* skip comments and blank lines */ + if (*cp == '#' || *cp == '\0') + continue; + + /* find end of first keyword on line */ + for (ep = cp; !isspace(*ep) && *ep; ep++) + ; + + /* if at the end of the line, the line had no arguments */ + if (*ep == '\0') + continue; + + *ep = '\0'; + + if (strcmp(cp, "title") == 0) { + add_title_entry(ep + 1); + continue; + } + + if (strcmp(cp, "bootfs") == 0) { + add_dataset_entry(ep + 1); + continue; + } + } + + if (menu_entry_count == 0) + prom_panic("no menu entries found"); + + for (n = 0; n < menu_entry_count; n++) { + printf("%d %s\n", n + 1, titles[n]); + } + + printf("Select environment to boot: [ 1 - %d ]: ", menu_entry_count); + + while (cons_gets(linebuf, MAX_CMDLINE)) { + /* cut off leading and trailing white space */ + cp = trim_white_space(linebuf); + choice = strtoul(cp, NULL, 0); + + /* + * If the input is totally invalid, the return value of + * strtoul() will be 0 or ULONG_MAX. Either way, it's + * of the acceptable range. + */ + if (choice == 0 || choice > menu_entry_count) { + printf("Invalid entry.\n"); + continue; + } + /* XXX here is the result */ + printf("\nTo boot the selected entry, invoke:\n"); + printf("boot [<root-device>] -Z %s\n\n", datasets[choice - 1]); + prom_exit_to_mon(); + } +} diff --git a/usr/src/psm/stand/bootlst/common/sasubr.c b/usr/src/psm/stand/bootlst/common/sasubr.c new file mode 100644 index 0000000000..bcad2de9ac --- /dev/null +++ b/usr/src/psm/stand/bootlst/common/sasubr.c @@ -0,0 +1,117 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/sysmacros.h> +#include <sys/salib.h> +#include <sys/promif.h> + +#define MINALLOC 8 +#define TOPMEM ((caddr_t)0x1000000) + +extern caddr_t _end; +extern struct boot_fs_ops promfs_ops; + +struct boot_fs_ops *boot_fsw[] = { + &promfs_ops, +}; +int boot_nfsw = sizeof (boot_fsw) / sizeof (boot_fsw[0]); + +void * +bkmem_alloc(size_t s) +{ + static caddr_t next; + caddr_t ret; + + if (next == NULL) + next = (caddr_t)roundup((uintptr_t)&_end, MINALLOC); + ret = next; + next += roundup(s, MINALLOC); + if (next >= TOPMEM) + prom_panic("out of memory"); + return (ret); +} + +/*ARGSUSED*/ +void +bkmem_free(void *p, size_t s) +{ +} + +int +cons_getchar(void) +{ + register int c; + + while ((c = prom_mayget()) == -1) + ; + if (c == '\r') { + prom_putchar(c); + c = '\n'; + } + if (c == 0177 || c == '\b') { + prom_putchar('\b'); + prom_putchar(' '); + c = '\b'; + } + prom_putchar(c); + return (c); +} + +char * +cons_gets(char *buf, int n) +{ + char *lp; + char *limit; + int c; + + lp = buf; + limit = &buf[n - 1]; + for (;;) { + c = cons_getchar() & 0177; + switch (c) { + case '\n': + case '\r': + *lp = '\0'; + return (buf); + case '\b': + if (lp > buf) + lp--; + continue; + case 'u'&037: /* ^U */ + lp = buf; + prom_putchar('\r'); + prom_putchar('\n'); + continue; + case 0: + continue; + default: + if (lp < limit) + *lp++ = (char)c; + else + prom_putchar('\a'); /* bell */ + } + } +} diff --git a/usr/src/psm/stand/bootlst/sparc/Makefile b/usr/src/psm/stand/bootlst/sparc/Makefile new file mode 100644 index 0000000000..73c463b19a --- /dev/null +++ b/usr/src/psm/stand/bootlst/sparc/Makefile @@ -0,0 +1,44 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# + +SUBDIRS = sun4u sun4v + +all := TARGET= all +install := TARGET= install +clean := TARGET= clean +clobber := TARGET= clobber +lint := TARGET= lint +clean.lint := TARGET= clean.lint + +.KEEP_STATE: + +all install clean clobber lint clean.lint: $(SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/psm/stand/bootlst/sparc/mapfile b/usr/src/psm/stand/bootlst/sparc/mapfile new file mode 100644 index 0000000000..22dc304042 --- /dev/null +++ b/usr/src/psm/stand/bootlst/sparc/mapfile @@ -0,0 +1,40 @@ +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# psm/stand/bootlst/sparc/mapfile +# + +text = LOAD ?RX V0x100000; +text : $PROGBITS ?A!W; + +data = LOAD ?RWX A0x8; +data : $PROGBITS ?AW; +data : $NOBITS ?AW; + +note = NOTE; +note : $NOTE; diff --git a/usr/src/psm/stand/bootlst/sparc/srt0.s b/usr/src/psm/stand/bootlst/sparc/srt0.s new file mode 100644 index 0000000000..42ac26a93c --- /dev/null +++ b/usr/src/psm/stand/bootlst/sparc/srt0.s @@ -0,0 +1,113 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ident "%Z%%M% %I% %E% SMI" + +/* + * srt0.s - bootlst startup code + */ +#include <sys/asm_linkage.h> +#include <sys/machparam.h> + +#define STKSIZE 0x1000 + +#if defined(lint) +void *estack; +caddr_t _end; +#endif + +#if defined(lint) + +/* ARGSUSED */ +void +_start(void *a, ...) +{} + +#else /* !lint */ + + .seg ".bss" + .align MMU_PAGESIZE + .skip STKSIZE +estack: ! top of cprboot stack + .global estack + + .seg ".data" + .align 8 +local_cif: + .xword 0 ! space for prom cookie + + .seg ".text" + .align 8 + + ! + ! regs on entry: + ! %o4 = prom cookie + ! + ENTRY(_start) + set estack - STACK_BIAS, %o5 + save %o5, -SA(MINFRAME), %sp + + ! + ! clear the bss + ! + set _edata, %o0 + set _end, %g2 + call bzero + sub %g2, %o0, %o1 ! bss size = (_end - _edata) + + set local_cif, %g2 + stx %i4, [%g2] + call main + mov %i4, %o0 ! SPARCV9/CIF + + call prom_exit_to_mon + nop + SET_SIZE(_start) + +#endif /* lint */ + + +#if defined(lint) + +/* ARGSUSED */ +int +client_handler(void *cif_handler, void *arg_array) +{ return (0); } + +#else + + ! + ! 64/64 client interface for ieee1275 prom + ! + ENTRY(client_handler) + mov %o7, %g1 + mov %o0, %g5 + mov %o1, %o0 + jmp %g5 + mov %g1, %o7 + SET_SIZE(client_handler) + +#endif /* lint */ + diff --git a/usr/src/psm/stand/bootlst/sparc/sun4u/Makefile b/usr/src/psm/stand/bootlst/sparc/sun4u/Makefile new file mode 100644 index 0000000000..471f1ec143 --- /dev/null +++ b/usr/src/psm/stand/bootlst/sparc/sun4u/Makefile @@ -0,0 +1,64 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# + +TOPDIR = ../../../../.. +SPARCDIR = .. + +# override global macros as necessary +ARCH = sun4u +TARG_MACH = sparcv9 +PROMTYPE = ieee1275 +PROMIFDIR = $(TOPDIR)/psm/stand/lib/promif +PLATDIR = $(TOPDIR)/uts/$(ARCH) + +ARCH_PROMDIR = $(TARG_MACH)/$(PROMTYPE)/common +PLAT_PROMDIR = $(PROMTYPE)/$(ARCH) +PLATLIBDIR = $(PROMIFDIR)/$(TARG_MACH)/$(PLAT_PROMDIR) +PLATLIB = $(PLATLIBDIR)/libplat.a + +LDPLATLIBS = -L$(PLATLIBDIR) -lplat +SALIBS = $(PLATLIB) +MAPFILE = $(SPARCDIR)/mapfile +LINTPLATLIBS = $(PLATLIBDIR)/llib-lplat.ln + +BOOTLSTOBJ = srt0.o + +ALL = bootlst + +%.o: $(SPARCDIR)/%.s + $(COMPILE.s) -o $@ $< + +%.ln: $(SPARCDIR)/%.s + @$(LHEAD) $(LINT.s) $< $(LTAIL) + + +include ../../common/Makefile.com +include $(TOPDIR)/psm/Makefile.psm.64 +CFLAGS64 += -xchip=ultra $(CCABS32) + +.KEEP_STATE: + diff --git a/usr/src/psm/stand/bootlst/sparc/sun4v/Makefile b/usr/src/psm/stand/bootlst/sparc/sun4v/Makefile new file mode 100644 index 0000000000..aaf6f6ddc1 --- /dev/null +++ b/usr/src/psm/stand/bootlst/sparc/sun4v/Makefile @@ -0,0 +1,64 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# + +TOPDIR = ../../../../.. +SPARCDIR = .. + +# override global macros as necessary +ARCH = sun4v +TARG_MACH = sparcv9 +PROMTYPE = ieee1275 +PROMIFDIR = $(TOPDIR)/psm/stand/lib/promif +PLATDIR = $(TOPDIR)/uts/$(ARCH) + +ARCH_PROMDIR = $(TARG_MACH)/$(PROMTYPE)/common +PLAT_PROMDIR = $(PROMTYPE)/$(ARCH) +PLATLIBDIR = $(PROMIFDIR)/$(TARG_MACH)/$(PLAT_PROMDIR) +PLATLIB = $(PLATLIBDIR)/libplat.a + +LDPLATLIBS = -L$(PLATLIBDIR) -lplat +SALIBS = $(PLATLIB) +MAPFILE = $(SPARCDIR)/mapfile +LINTPLATLIBS = $(PLATLIBDIR)/llib-lplat.ln + +BOOTLSTOBJ = srt0.o + +ALL = bootlst + +%.o: $(SPARCDIR)/%.s + $(COMPILE.s) -o $@ $< + +%.ln: $(SPARCDIR)/%.s + @$(LHEAD) $(LINT.s) $< $(LTAIL) + + +include ../../common/Makefile.com +include $(TOPDIR)/psm/Makefile.psm.64 +CFLAGS64 += -xchip=ultra $(CCABS32) + +.KEEP_STATE: + diff --git a/usr/src/psm/stand/cpr/common/support.c b/usr/src/psm/stand/cpr/common/support.c index dfafa98865..d3cc145ea4 100644 --- a/usr/src/psm/stand/cpr/common/support.c +++ b/usr/src/psm/stand/cpr/common/support.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -37,7 +37,6 @@ extern void prom_unmap(caddr_t, uint_t); extern int cpr_debug; static int cpr_show_props = 0; - /* * Read the config file and pass back the file path, filesystem * device path. @@ -53,7 +52,8 @@ cpr_read_cprinfo(int fd, char *file_path, char *fs_path) (void) prom_strcpy(file_path, cf.cf_path); (void) prom_strcpy(fs_path, cf.cf_dev_prom); - + if (cf.cf_type == CFT_ZVOL) + volname = cf.cf_fs; return (0); } diff --git a/usr/src/psm/stand/cpr/sparcv9/sun4u/cprboot.c b/usr/src/psm/stand/cpr/sparcv9/sun4u/cprboot.c index 60ff9d8b96..e8037450d8 100644 --- a/usr/src/psm/stand/cpr/sparcv9/sun4u/cprboot.c +++ b/usr/src/psm/stand/cpr/sparcv9/sun4u/cprboot.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -48,6 +48,7 @@ #include <sys/ddi.h> #include "cprboot.h" +char *volname = NULL; /* * local defs @@ -289,9 +290,10 @@ cb_open_sf(void) /* * for block devices, seek past the disk label and bootblock */ - if (specialstate) + if (volname) + (void) cpr_fs_seek(sfile.fd, CPR_SPEC_OFFSET); + else if (specialstate) (void) prom_seek(sfile.fd, CPR_SPEC_OFFSET); - return (0); } @@ -355,8 +357,9 @@ cb_read_statefile(void) /* * read-in and check cpr dump header */ - if (cpr_read_cdump(sfile.fd, &cdump, CPR_MACHTYPE_4U)) + if (cpr_read_cdump(sfile.fd, &cdump, CPR_MACHTYPE_4U) == -1) return (ERR); + if (cpr_debug) prom_printf("\n"); cb_nbitmaps = cdump.cdd_bitmaprec; @@ -399,7 +402,9 @@ cb_read_statefile(void) cnt = 0; dtlb_index = cb_dents - 1; - if (specialstate) + if (volname) + (void) cpr_fs_seek(sfile.fd, CPR_SPEC_OFFSET); + else if (specialstate) (void) prom_seek(sfile.fd, CPR_SPEC_OFFSET); else (void) cpr_fs_seek(sfile.fd, 0); diff --git a/usr/src/psm/stand/cpr/sparcv9/sun4u/cprboot.h b/usr/src/psm/stand/cpr/sparcv9/sun4u/cprboot.h index 4ceb64ba83..694d89ffe3 100644 --- a/usr/src/psm/stand/cpr/sparcv9/sun4u/cprboot.h +++ b/usr/src/psm/stand/cpr/sparcv9/sun4u/cprboot.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -182,6 +182,7 @@ extern char ent_fmt[]; extern int verbose; extern uint_t cb_dents; extern uint_t cb_msec; +extern char *volname; /* * machdep.c @@ -233,6 +234,7 @@ extern int cb_usb_setup(void); extern void cb_enter_mon(void); extern void cb_exit_to_mon(void); extern int cpr_fs_close(int); +extern int cpr_fs_volopen(char *); extern int cpr_fs_open(char *); extern int cpr_fs_read(int, char *, int); extern int cpr_fs_seek(int, offset_t); diff --git a/usr/src/psm/stand/cpr/sparcv9/sun4u/util.c b/usr/src/psm/stand/cpr/sparcv9/sun4u/util.c index e739d31d46..79b98a9e36 100644 --- a/usr/src/psm/stand/cpr/sparcv9/sun4u/util.c +++ b/usr/src/psm/stand/cpr/sparcv9/sun4u/util.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,7 +45,6 @@ static char null_input[] = "\" /nulldev\" input"; * path of the file. Handle file pathnames with or without leading '/'. * if fs points to a null char, it indicates that we are opening a device. */ -/* ARGSUSED */ int cpr_statefile_open(char *path, char *fs_dev) { @@ -80,6 +79,10 @@ cpr_statefile_open(char *path, char *fs_dev) return (-1); } + if (volname) { + return (cpr_fs_volopen(volname)); + } + /* * Prepend '/' if it's not there already */ @@ -123,10 +126,20 @@ cb_unmountroot() return (0); } +int +cpr_fs_volopen(char *path) +{ + + CB_VENTRY(cpr_fs_volopen); + + if (cb_rih == OBP_BADNODE) + return (-1); + return (prom_volopen(cb_rih, path)); +} + /* * Ask prom to open a disk file. */ -/* ARGSUSED */ int cpr_fs_open(char *path) { @@ -146,7 +159,7 @@ cpr_fs_open(char *path) int cpr_read(int fd, caddr_t buf, size_t len) { - if (!statefile_special) + if (!statefile_special || volname) return (cpr_fs_read(fd, buf, len)); else return (prom_read(fd, buf, len, 0, 0)); diff --git a/usr/src/psm/stand/lib/promif/sparcv9/ieee1275/sun4v/Makefile b/usr/src/psm/stand/lib/promif/sparcv9/ieee1275/sun4v/Makefile index e10d2c36c6..620da38783 100644 --- a/usr/src/psm/stand/lib/promif/sparcv9/ieee1275/sun4v/Makefile +++ b/usr/src/psm/stand/lib/promif/sparcv9/ieee1275/sun4v/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # psm/stand/boot/sparcv9/ieee1275/sun4v/Makefile @@ -51,6 +51,7 @@ PLAT_PFILES = \ PLAT_PSUN4FILES = \ prom_alloc.c \ prom_cpuctl.c \ + prom_fio.c \ prom_getunum.c \ prom_idprom.c \ prom_init.c \ diff --git a/usr/src/stand/lib/fs/common/promfs.c b/usr/src/stand/lib/fs/common/promfs.c new file mode 100644 index 0000000000..0ed3aa4e2e --- /dev/null +++ b/usr/src/stand/lib/fs/common/promfs.c @@ -0,0 +1,114 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/stat.h> +#include <sys/bootvfs.h> +#include <sys/bootsyms.h> +#include <sys/promif.h> +#include <sys/salib.h> + +/* + * Function prototypes + */ +static int promfs_mountroot(char *str); +static int promfs_unmountroot(void); +static int promfs_open(char *filename, int flags); +static int promfs_close(int fd); +static ssize_t promfs_read(int fd, caddr_t buf, size_t size); +static off_t promfs_lseek(int fd, off_t offset, int whence); +static int promfs_fstat(int fd, struct bootstat *stp); +static void promfs_closeall(int flag); + +struct boot_fs_ops promfs_ops = { + "promfs", + promfs_mountroot, + promfs_unmountroot, + promfs_open, + promfs_close, + promfs_read, + promfs_lseek, + promfs_fstat, + promfs_closeall, + NULL +}; + +static ihandle_t fsih; + +static int +promfs_mountroot(char *str) +{ + + (void) prom_getprop(prom_chosennode(), str, (caddr_t)&fsih); + return (fsih == -1); +} + +static int +promfs_unmountroot(void) +{ + (void) prom_close(fsih); + return (0); +} + +/*ARGSUSED*/ +static int +promfs_open(char *filename, int flags) +{ + return (prom_fopen(fsih, filename)); +} + +static int +promfs_close(int fd) +{ + prom_fclose(fsih, fd); + return (0); +} + +static ssize_t +promfs_read(int fd, caddr_t buf, size_t size) +{ + return (prom_fread(fsih, fd, buf, size)); +} + +/*ARGSUSED*/ +static off_t +promfs_lseek(int fd, off_t offset, int whence) +{ + return (prom_fseek(fsih, fd, offset)); +} + +static int +promfs_fstat(int fd, struct bootstat *stp) +{ + return (prom_fsize(fsih, fd, (size_t *)&stp->st_size)); +} + +/*ARGSUSED*/ +static void +promfs_closeall(int flag) +{ +} diff --git a/usr/src/stand/lib/sa/Makefile b/usr/src/stand/lib/sa/Makefile index f330cc5b02..f419a548ac 100644 --- a/usr/src/stand/lib/sa/Makefile +++ b/usr/src/stand/lib/sa/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -30,7 +29,7 @@ LIBRARY = libsa.a LOCOBJS = assert.o ctype.o errno.o libintl.o malloc.o memlist.o \ standalloc.o stdio.o stdlib.o strdup.o strings.o \ time.o unistd.o -CMNOBJS = cache.o diskread.o fsswitch.o +CMNOBJS = cache.o diskread.o fsswitch.o promfs.o sparc_CMNUTILOBJS = memchr.o memmove.o CMNUTILOBJS = $($(MACH)_CMNUTILOBJS) \ diff --git a/usr/src/tools/scripts/bfu.sh b/usr/src/tools/scripts/bfu.sh index 1df3a45c4b..57b91cf515 100644 --- a/usr/src/tools/scripts/bfu.sh +++ b/usr/src/tools/scripts/bfu.sh @@ -1627,6 +1627,26 @@ archive_file_exists() } # +# extract one or more files from an archive into a temporary directory +# provided by the caller. The caller is responsible for checking to +# to see whether the desired file or files were extracted +# +# $1 - archive +# $2 - temporary dir +# remaining args: file(s) to be extracted. +# +archive_file_peek() { + compressed_archive=`pwd`/$1 + tdir=$2 + shift + shift + if [ ! -d $tdir ] ; then + return + fi + (cd $tdir; $ZCAT $compressed_archive | cpio -idmucB $* 2>&1 ) +} + +# # If we're no longer delivering the eeprom service, remove it from the system, # as eeprom -I is removed as well. # @@ -2230,6 +2250,27 @@ if [ $diskless = no ]; then [[ -f $root/etc/system ]] || \ fail "$root/etc/system not found; nonglobal zone target not allowed" + rootfstype=`df -n $root | awk '{print $3}'` + + if [ "$rootfstype" = "zfs" ]; then + archive_has_zfs_root_support=no + mkdir /tmp/zfschk.$$ + archive_file_peek generic.lib /tmp/zfschk.$$ \ + "lib/svc/share/fs_include.sh" + if [ -f /tmp/zfschk.$$/lib/svc/share/fs_include.sh ] ; then + if grep '^readswapdev' \ + /tmp/zfschk.$$/lib/svc/share/fs_include.sh \ + >/dev/null 2>&1 ; then + archive_has_zfs_root_support=yes + fi + fi + rm -fr /tmp/zfschk.$$ + + if [ "$archive_has_zfs_root_support" = "no" ] ; then + fail "Cannot bfu a system with zfs root to an archive with no zfs root support" + fi + fi + # Make sure we extract the sun4u-us3 libc_psr.so.1 if [ -d $root/platform/sun4u -a \ ! -d $root/platform/sun4u-us3 ] @@ -2310,7 +2351,11 @@ if [ $diskless = no ]; then fi done fi - rootslice=`df -k $root | nawk 'NR > 1 { print $1 }' | sed s/dsk/rdsk/` + if [ "$rootfstype" = "ufs" ] ; then + rootslice=`df -k $root | nawk 'NR > 1 { print $1 }' | \ + sed s/dsk/rdsk/` + fi + print "Loading $cpiodir on $root" else usrroot=$2 @@ -4705,6 +4750,10 @@ setup_pboot() cp $NEWBOOTBLK $BOOTBLK fi fi + # + # This function will never be called when upgrading a zfs root, + # so it's safe to assume a value for rootslice here. + # if [[ "$rootslice" = /dev/rdsk/* ]]; then print "Installing boot block." ( cd $PBOOTDIR ; @@ -4836,7 +4885,19 @@ check_system_type() get_rootdev_list() { if [ -f $rootprefix/etc/lu/GRUB_slice ]; then - grep '^PHYS_SLICE' $rootprefix/etc/lu/GRUB_slice | cut -d= -f2 + dev=`grep '^PHYS_SLICE' $rootprefix/etc/lu/GRUB_slice | + cut -d= -f2` + if [ "$rootfstype" = "zfs" ]; then + fstyp -a "$dev" | grep 'path: ' | grep -v phys_path: | + cut -d"'" -f2 | sed 's+/dsk/+/rdsk/+' + else + echo "$dev" + fi + return + elif [ "$rootfstype" = "zfs" ]; then + rootpool=`df -k ${rootprefix:-/} | tail +2 | cut -d/ -f1` + rootdevlist=`zpool iostat -v "$rootpool" | tail +5 | + grep -v mirror | sed -n -e '/--/q' -e p | awk '{print $1}'` else metadev=`grep -v "^#" $rootprefix/etc/vfstab | \ grep "[ ]/[ ]" | nawk '{print $2}'` @@ -4848,11 +4909,11 @@ get_rootdev_list() grep -v "^$metavol[ ]" |\ nawk '{print $4}' | sed -e "s#/dev/rdsk/##"` fi - for rootdev in $rootdevlist - do - echo /dev/rdsk/$rootdev - done fi + for rootdev in $rootdevlist + do + echo /dev/rdsk/$rootdev + done } # @@ -5132,6 +5193,12 @@ install_failsafe() fi } +# +# setup_grub_menu is only called when upgrading from a system +# with a dca boot. This cannot happen on systems with zfs root, +# so this function need not take care of the case where the root +# file system type is zfs +# setup_grub_menu() { MENU=$rootprefix/boot/grub/menu.lst @@ -7211,19 +7278,26 @@ mondo_loop() { # End of pre-archive extraction hacks. if [ $diskless = no -a $zone = global ]; then - print "Extracting ufs modules for boot block ... \c" | \ + print "Extracting $rootfstype modules for boot block ... \c" | \ tee -a $EXTRACT_LOG # extract both /platform and /usr/platform bootblks # for compatibility with older bootblk delivery do_extraction $cpiodir/$karch.root$ZFIX \ - 'platform/'$karch'/lib/fs/ufs/*' | \ + 'platform/'$karch'/lib/fs/$rootfstype/*' | \ tee -a $EXTRACT_LOG do_extraction $cpiodir/$karch.usr$ZFIX \ - 'usr/platform/'$karch'/lib/fs/ufs/*' | \ + 'usr/platform/'$karch'/lib/fs/$rootfstype/*' | \ tee -a $EXTRACT_LOG case $target_isa in sparc) - if [[ "$rootslice" = /dev/rdsk/* ]]; then + if [[ "$rootfstype" = zfs ]]; then + cd $usr/platform/$karch/lib/fs/zfs + get_rootdev_list | while read physlice + do + print "Installing bootblk on $physlice." + installboot -F zfs ./bootblk $physlice + done + elif [[ "$rootslice" = /dev/rdsk/* ]]; then print "Installing boot block on $rootslice." cd $usr/platform/$karch/lib/fs/ufs installboot ./bootblk $rootslice @@ -7238,6 +7312,7 @@ mondo_loop() { fi ;; i386) + $rootprefix/boot/solaris/bin/update_grub -R $root ;; *) ;; # unknown ISA @@ -7560,7 +7635,8 @@ mondo_loop() { nsmb:* nsmb* EOF - if [ $target_isa = i386 ] && [[ $rootslice = /dev/rdsk/* || \ + if [ $target_isa = i386 ] && [[ $rootfstype = zfs || \ + $rootslice = /dev/rdsk/* || \ $rootslice = /dev/md/rdsk/* ]]; then check_boot_env fi @@ -7569,7 +7645,7 @@ mondo_loop() { # update boot archives for new boot sparc # if [ $newboot_sparc = yes ] && \ - [[ $rootslice = /dev/rdsk/* || + [[ $rootfstype = zfs || $rootslice = /dev/rdsk/* || $rootslice = /dev/md/rdsk/* ]]; then build_boot_archive install_sparc_failsafe diff --git a/usr/src/uts/common/cpr/cpr_main.c b/usr/src/uts/common/cpr/cpr_main.c index 22e1b702f7..31295e1b2c 100644 --- a/usr/src/uts/common/cpr/cpr_main.c +++ b/usr/src/uts/common/cpr/cpr_main.c @@ -70,6 +70,7 @@ extern void (*srn_signal)(int, int); extern void init_cpu_syscall(struct cpu *); extern void i_cpr_pre_resume_cpus(); extern void i_cpr_post_resume_cpus(); +extern int cpr_is_ufs(struct vfs *); extern int pm_powering_down; extern kmutex_t srn_clone_lock; @@ -336,8 +337,14 @@ cpr_ufs_logging(int enable) if (error = cpr_open_deffile(FREAD, &vp)) return (error); - cpr_log_status(enable, &def_status, vp); vfsp = vp->v_vfsp; + if (!cpr_is_ufs(vfsp)) { + (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); + VN_RELE(vp); + return (0); + } + + cpr_log_status(enable, &def_status, vp); (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); VN_RELE(vp); diff --git a/usr/src/uts/common/cpr/cpr_misc.c b/usr/src/uts/common/cpr/cpr_misc.c index e35789b4a9..bf5ccf9fcf 100644 --- a/usr/src/uts/common/cpr/cpr_misc.c +++ b/usr/src/uts/common/cpr/cpr_misc.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -71,6 +71,7 @@ static void cpr_save_mp_state(void); #endif int cpr_is_ufs(struct vfs *); +int cpr_is_zfs(struct vfs *); char cpr_default_path[] = CPR_DEFAULT; @@ -240,8 +241,8 @@ cpr_cprconfig_to_path(void) * mounted on the same device as when pmconfig was last run, * and the translation of that device to a node in the prom's * device tree must be the same as when pmconfig was last run. - * for CFT_SPEC, cf_path must be the path to a block special file, - * it must have no file system mounted on it, + * for CFT_SPEC and CFT_ZVOL, cf_path must be the path to a block + * special file, it must have no file system mounted on it, * and the translation of that device to a node in the prom's * device tree must be the same as when pmconfig was last run. */ @@ -278,6 +279,15 @@ cpr_verify_statefile_path(void) switch (cf->cf_type) { case CFT_SPEC: + error = i_devname_to_promname(cf->cf_devfs, devpath, + OBP_MAXPATHLEN); + if (error || strcmp(devpath, cf->cf_dev_prom)) { + cpr_err(CE_CONT, path_chg_fmt, + cf->cf_dev_prom, devpath, rerun); + return (error); + } + /*FALLTHROUGH*/ + case CFT_ZVOL: if (strlen(cf->cf_path) > sizeof (sfpath)) { cpr_err(CE_CONT, long_name); return (ENAMETOOLONG); @@ -304,12 +314,6 @@ cpr_verify_statefile_path(void) return (ENOTSUP); } - error = i_devname_to_promname(cf->cf_devfs, devpath, - OBP_MAXPATHLEN); - if (error || strcmp(devpath, cf->cf_dev_prom)) { - cpr_err(CE_CONT, path_chg_fmt, - cf->cf_dev_prom, devpath, rerun); - } return (error); case CFT_UFS: break; /* don't indent all the original code */ @@ -430,7 +434,8 @@ cpr_check_spec_statefile(void) if (err = cpr_get_config()) return (err); - ASSERT(cprconfig.cf_type == CFT_SPEC); + ASSERT(cprconfig.cf_type == CFT_SPEC || + cprconfig.cf_type == CFT_ZVOL); if (cprconfig.cf_devfs == NULL) return (ENXIO); @@ -1025,6 +1030,8 @@ cpr_build_statefile_path(void) return (NULL); } return (cpr_cprconfig_to_path()); + case CFT_ZVOL: + /*FALLTHROUGH*/ case CFT_SPEC: return (cf->cf_devfs); default: @@ -1049,7 +1056,7 @@ cpr_get_statefile_prom_path(void) ASSERT(cprconfig_loaded); ASSERT(cf->cf_magic == CPR_CONFIG_MAGIC); - ASSERT(cf->cf_type == CFT_SPEC); + ASSERT(cf->cf_type == CFT_SPEC || cf->cf_type == CFT_ZVOL); return (cf->cf_dev_prom); } @@ -1067,6 +1074,15 @@ cpr_is_ufs(struct vfs *vfsp) return (strcmp(fsname, "ufs") == 0); } +int +cpr_is_zfs(struct vfs *vfsp) +{ + char *fsname; + + fsname = vfssw[vfsp->vfs_fstype].vsw_name; + return (strcmp(fsname, "zfs") == 0); +} + /* * This is a list of file systems that are allowed to be writeable when a * reusable statefile checkpoint is taken. They must not have any state that @@ -1121,7 +1137,7 @@ cpr_reusable_mount_check(void) int cpr_statefile_offset(void) { - return (cpr_statefile_is_spec() ? btod(CPR_SPEC_OFFSET) : 0); + return (cprconfig.cf_type != CFT_UFS ? btod(CPR_SPEC_OFFSET) : 0); } /* diff --git a/usr/src/uts/common/cpr/cpr_mod.c b/usr/src/uts/common/cpr/cpr_mod.c index 008cf5d73c..1b26cf38f1 100644 --- a/usr/src/uts/common/cpr/cpr_mod.c +++ b/usr/src/uts/common/cpr/cpr_mod.c @@ -44,6 +44,7 @@ extern int i_cpr_is_supported(int sleeptype); extern int cpr_is_ufs(struct vfs *); +extern int cpr_is_zfs(struct vfs *); extern int cpr_check_spec_statefile(void); extern int cpr_reusable_mount_check(void); extern int i_cpr_reusable_supported(void); @@ -341,7 +342,8 @@ cpr(int fcn, void *mdep) } if (!i_cpr_is_supported(cpr_sleeptype) || - (cpr_sleeptype == CPR_TODISK && !cpr_is_ufs(rootvfs))) + (cpr_sleeptype == CPR_TODISK && + !cpr_is_ufs(rootvfs)&& !cpr_is_zfs(rootvfs))) return (ENOTSUP); if (fcn == AD_CHECK_SUSPEND_TO_RAM || diff --git a/usr/src/uts/common/fs/specfs/specsubr.c b/usr/src/uts/common/fs/specfs/specsubr.c index de5bf62e44..b7158425b7 100644 --- a/usr/src/uts/common/fs/specfs/specsubr.c +++ b/usr/src/uts/common/fs/specfs/specsubr.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -820,7 +820,7 @@ specinit(int fstype, char *name) * Create snode cache */ snode_cache = kmem_cache_create("snode_cache", sizeof (struct snode), - 0, snode_constructor, snode_destructor, NULL, NULL, NULL, 0); + 0, snode_constructor, snode_destructor, NULL, NULL, NULL, 0); /* * Associate vfs operations with spec_vfs @@ -1050,3 +1050,19 @@ spec_unfence_snode(dev_info_t *dip) return (0); } + +void +spec_size_invalidate(dev_t dev, vtype_t type) +{ + + struct snode *csp; + + mutex_enter(&stable_lock); + if ((csp = sfind(dev, type, NULL)) != NULL) { + mutex_enter(&csp->s_lock); + csp->s_flag &= ~SSIZEVALID; + VN_RELE(STOV(csp)); + mutex_exit(&csp->s_lock); + } + mutex_exit(&stable_lock); +} diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c index c9d154327e..3c2008f599 100644 --- a/usr/src/uts/common/fs/vfs.c +++ b/usr/src/uts/common/fs/vfs.c @@ -84,6 +84,7 @@ #include <sys/console.h> #include <sys/reboot.h> #include <sys/attr.h> +#include <sys/spa.h> #include <vm/page.h> @@ -353,6 +354,13 @@ fs_copyfsops(const fs_operation_def_t *template, vfsops_t *actual, return (fs_build_vector(actual, unused_ops, vfs_ops_table, template)); } +void +zfs_boot_init() { + + if (strcmp(rootfs.bo_fstype, MNTTYPE_ZFS) == 0) + spa_boot_init(); +} + int vfs_setfsops(int fstype, const fs_operation_def_t *template, vfsops_t **actual) { @@ -842,6 +850,12 @@ vfs_mountroot(void) * root filesystem instead of the boot program's services. */ modrootloaded = 1; + + /* + * Special handling for a ZFS root file system. + */ + zfs_boot_init(); + /* * Set up mnttab information for root */ diff --git a/usr/src/uts/common/fs/zfs/dmu_traverse.c b/usr/src/uts/common/fs/zfs/dmu_traverse.c index 3d2bc3e476..cdfb442f67 100644 --- a/usr/src/uts/common/fs/zfs/dmu_traverse.c +++ b/usr/src/uts/common/fs/zfs/dmu_traverse.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,6 +35,7 @@ #include <sys/spa.h> #include <sys/zio.h> #include <sys/dmu_impl.h> +#include <sys/zvol.h> #define BP_SPAN_SHIFT(level, width) ((level) * (width)) @@ -261,6 +262,16 @@ advance_block(zseg_t *zseg, dnode_phys_t *dnp, int rc, int advance) return (EAGAIN); } +/* + * The traverse_callback function will call the function specified in th_func. + * In the event of an error the callee, specified by th_func, must return + * one of the following errors: + * + * EINTR - Indicates that the callee wants the traversal to + * abort immediately. + * ERESTART - The callee has acknowledged the error and would + * like to continue. + */ static int traverse_callback(traverse_handle_t *th, zseg_t *zseg, traverse_blk_cache_t *bc) { @@ -722,6 +733,24 @@ traverse_dsl_dataset(dsl_dataset_t *ds, uint64_t txg_start, int advance, } int +traverse_zvol(objset_t *os, int advance, blkptr_cb_t func, void *arg) +{ + spa_t *spa = dmu_objset_spa(os); + traverse_handle_t *th; + int err; + + th = traverse_init(spa, func, arg, advance, ZIO_FLAG_CANFAIL); + + traverse_add_dnode(th, 0, -1ULL, dmu_objset_id(os), ZVOL_OBJ); + + while ((err = traverse_more(th)) == EAGAIN) + continue; + + traverse_fini(th); + return (err); +} + +int traverse_more(traverse_handle_t *th) { zseg_t *zseg = list_head(&th->th_seglist); diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 81c52ce9de..733817fd54 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -60,6 +60,7 @@ #include <sys/callb.h> #include <sys/systeminfo.h> #include <sys/sunddi.h> +#include <sys/spa_boot.h> #include "zfs_prop.h" #include "zfs_comutil.h" @@ -2033,8 +2034,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, * Import the given pool into the system. We set up the necessary spa_t and * then call spa_load() to do the dirty work. */ -int -spa_import(const char *pool, nvlist_t *config, nvlist_t *props) +static int +spa_import_common(const char *pool, nvlist_t *config, nvlist_t *props, + boolean_t isroot) { spa_t *spa; char *altroot = NULL; @@ -2042,6 +2044,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props) nvlist_t *nvroot; nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; + int mosconfig = isroot? B_FALSE : B_TRUE; /* * If a pool with this name exists, return failure. @@ -2065,19 +2068,19 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props) * Pass TRUE for mosconfig because the user-supplied config * is actually the one to trust when doing an import. */ - error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); + error = spa_load(spa, config, SPA_LOAD_IMPORT, mosconfig); spa_config_enter(spa, RW_WRITER, FTAG); /* * Toss any existing sparelist, as it doesn't have any validity anymore, * and conflicts with spa_has_spare(). */ - if (spa->spa_spares.sav_config) { + if (!isroot && spa->spa_spares.sav_config) { nvlist_free(spa->spa_spares.sav_config); spa->spa_spares.sav_config = NULL; spa_load_spares(spa); } - if (spa->spa_l2cache.sav_config) { + if (!isroot && spa->spa_l2cache.sav_config) { nvlist_free(spa->spa_l2cache.sav_config); spa->spa_l2cache.sav_config = NULL; spa_load_l2cache(spa); @@ -2139,12 +2142,12 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props) * Update the config cache to include the newly-imported pool. */ if (spa_mode & FWRITE) - spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); + spa_config_update_common(spa, SPA_CONFIG_UPDATE_POOL, isroot); /* * Resilver anything that's out of date. */ - if (spa_mode & FWRITE) + if (!isroot && (spa_mode & FWRITE)) VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); mutex_exit(&spa_namespace_lock); @@ -2152,6 +2155,148 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props) return (0); } +#ifdef _KERNEL +/* + * Build a "root" vdev for a top level vdev read in from a rootpool + * device label. + */ +static void +spa_build_rootpool_config(nvlist_t *config) +{ + nvlist_t *nvtop, *nvroot; + uint64_t pgid; + + /* + * Add this top-level vdev to the child array. + */ + VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtop) + == 0); + VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pgid) + == 0); + + /* + * Put this pool's top-level vdevs into a root vdev. + */ + VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) + == 0); + VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); + VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); + VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &nvtop, 1) == 0); + + /* + * Replace the existing vdev_tree with the new root vdev in + * this pool's configuration (remove the old, add the new). + */ + VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); + nvlist_free(nvroot); +} + +/* + * Get the root pool information from the root disk, then import the root pool + * during the system boot up time. + */ +extern nvlist_t *vdev_disk_read_rootlabel(char *); + +void +spa_check_rootconf(char *devpath, char **bestdev, nvlist_t **bestconf, + uint64_t *besttxg) +{ + nvlist_t *config; + uint64_t txg; + + if ((config = vdev_disk_read_rootlabel(devpath)) == NULL) + return; + + VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); + + if (txg > *besttxg) { + *besttxg = txg; + if (*bestconf != NULL) + nvlist_free(*bestconf); + *bestconf = config; + *bestdev = devpath; + } +} + +boolean_t +spa_rootdev_validate(nvlist_t *nv) +{ + uint64_t ival; + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 || + nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 || + nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DEGRADED, &ival) == 0 || + nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0) + return (B_FALSE); + + return (B_TRUE); +} + +/* + * Import a root pool. + * + * For x86. devpath_list will consist the physpath name of the vdev in a single + * disk root pool or a list of physnames for the vdevs in a mirrored rootpool. + * e.g. + * "/pci@1f,0/ide@d/disk@0,0:a /pci@1f,o/ide@d/disk@2,0:a" + * + * For Sparc, devpath_list consists the physpath name of the booting device + * no matter the rootpool is a single device pool or a mirrored pool. + * e.g. + * "/pci@1f,0/ide@d/disk@0,0:a" + */ +int +spa_import_rootpool(char *devpath_list) +{ + nvlist_t *conf = NULL; + char *dev = NULL; + char *pname; + int error; + + /* + * Get the vdev pathname and configuation from the most + * recently updated vdev (highest txg). + */ + if (error = spa_get_rootconf(devpath_list, &dev, &conf)) + goto msg_out; + + /* + * Add type "root" vdev to the config. + */ + spa_build_rootpool_config(conf); + + VERIFY(nvlist_lookup_string(conf, ZPOOL_CONFIG_POOL_NAME, &pname) == 0); + + error = spa_import_common(pname, conf, NULL, TRUE); + if (error == EEXIST) + error = 0; + + nvlist_free(conf); + return (error); + +msg_out: + cmn_err(CE_NOTE, "\n\n" + " *************************************************** \n" + " * This device is not bootable! * \n" + " * It is either offlined or detached or faulted. * \n" + " * Please try to boot from a different device. * \n" + " *************************************************** \n\n"); + + return (error); +} +#endif + +/* + * Import a non-root pool into the system. + */ +int +spa_import(const char *pool, nvlist_t *config, nvlist_t *props) +{ + return (spa_import_common(pool, config, props, FALSE)); +} + /* * This (illegal) pool name is used when temporarily importing a spa_t in order * to get the vdev stats associated with the imported devices. @@ -2201,6 +2346,38 @@ spa_tryimport(nvlist_t *tryconfig) spa->spa_uberblock.ub_timestamp) == 0); /* + * If the bootfs property exists on this pool then we + * copy it out so that external consumers can tell which + * pools are bootable. + */ + if (spa->spa_bootfs) { + char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + /* + * We have to play games with the name since the + * pool was opened as TRYIMPORT_NAME. + */ + if (dsl_dsobj_to_dsname(spa->spa_name, + spa->spa_bootfs, tmpname) == 0) { + char *cp; + char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + cp = strchr(tmpname, '/'); + if (cp == NULL) { + (void) strlcpy(dsname, tmpname, + MAXPATHLEN); + } else { + (void) snprintf(dsname, MAXPATHLEN, + "%s/%s", poolname, ++cp); + } + VERIFY(nvlist_add_string(config, + ZPOOL_CONFIG_BOOTFS, dsname) == 0); + kmem_free(dsname, MAXPATHLEN); + } + kmem_free(tmpname, MAXPATHLEN); + } + + /* * Add the list of hot spares and level 2 cache devices. */ spa_add_spares(spa, config); diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c index 17978ccc25..d83553c713 100644 --- a/usr/src/uts/common/fs/zfs/spa_config.c +++ b/usr/src/uts/common/fs/zfs/spa_config.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -430,12 +430,24 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) } /* - * Update all disk labels, generate a fresh config based on the current - * in-core state, and sync the global config cache. + * For a pool that's not currently a booting rootpool, update all disk labels, + * generate a fresh config based on the current in-core state, and sync the + * global config cache. */ void spa_config_update(spa_t *spa, int what) { + spa_config_update_common(spa, what, FALSE); +} + +/* + * Update all disk labels, generate a fresh config based on the current + * in-core state, and sync the global config cache (do not sync the config + * cache if this is a booting rootpool). + */ +void +spa_config_update_common(spa_t *spa, int what, boolean_t isroot) +{ vdev_t *rvd = spa->spa_root_vdev; uint64_t txg; int c; @@ -472,8 +484,9 @@ spa_config_update(spa_t *spa, int what) /* * Update the global config cache to reflect the new mosconfig. */ - spa_config_sync(); + if (!isroot) + spa_config_sync(); if (what == SPA_CONFIG_UPDATE_POOL) - spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS); + spa_config_update_common(spa, SPA_CONFIG_UPDATE_VDEVS, isroot); } diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c index 311bec6da7..375bce75ee 100644 --- a/usr/src/uts/common/fs/zfs/spa_misc.c +++ b/usr/src/uts/common/fs/zfs/spa_misc.c @@ -1211,6 +1211,12 @@ spa_busy(void) } void +spa_boot_init() +{ + spa_config_load(); +} + +void spa_init(int mode) { mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL); diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h b/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h index ea9fa6c1e3..05e5ffdbff 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -100,6 +100,7 @@ struct traverse_handle { int traverse_dsl_dataset(struct dsl_dataset *ds, uint64_t txg_start, int advance, blkptr_cb_t func, void *arg); +int traverse_zvol(objset_t *os, int advance, blkptr_cb_t func, void *arg); traverse_handle_t *traverse_init(spa_t *spa, blkptr_cb_t *func, void *arg, int advance, int zio_flags); diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h index 713817d89c..2cf4fbb6fa 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa.h +++ b/usr/src/uts/common/fs/zfs/sys/spa.h @@ -326,6 +326,10 @@ extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot, size_t buflen); extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props, const char *history_str); +extern void spa_check_rootconf(char *devpath, char **the_dev_p, + nvlist_t **the_conf_p, uint64_t *the_txg_p); +extern boolean_t spa_rootdev_validate(nvlist_t *nv); +extern int spa_import_rootpool(char *devpath); extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props); extern nvlist_t *spa_tryimport(nvlist_t *tryconfig); extern int spa_destroy(char *pool); @@ -390,6 +394,7 @@ extern void spa_config_set(spa_t *spa, nvlist_t *config); extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats); extern void spa_config_update(spa_t *spa, int what); +extern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot); /* * Miscellaneous SPA routines in spa_misc.c @@ -430,7 +435,6 @@ extern uint64_t spa_first_txg(spa_t *spa); extern uint64_t spa_version(spa_t *spa); extern int spa_state(spa_t *spa); extern uint64_t spa_freeze_txg(spa_t *spa); -struct metaslab_class; extern uint64_t spa_get_alloc(spa_t *spa); extern uint64_t spa_get_space(spa_t *spa); extern uint64_t spa_get_dspace(spa_t *spa); @@ -502,6 +506,7 @@ extern void vdev_cache_stat_fini(void); /* Initialization and termination */ extern void spa_init(int flags); extern void spa_fini(void); +extern void spa_boot_init(); /* properties */ extern int spa_prop_set(spa_t *spa, nvlist_t *nvp); diff --git a/usr/src/uts/common/fs/zfs/sys/spa_boot.h b/usr/src/uts/common/fs/zfs/sys/spa_boot.h new file mode 100644 index 0000000000..b178ae0ac2 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/spa_boot.h @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_SPA_BOOT_H +#define _SYS_SPA_BOOT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/nvpair.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern char *spa_get_bootfs(); +extern void spa_free_bootfs(char *bootfs); +extern int spa_get_rootconf(char *devpath, char **bestdev_p, + nvlist_t **bestconf_p); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SPA_BOOT_H */ diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_disk.h b/usr/src/uts/common/fs/zfs/sys/vdev_disk.h index 95536a77db..b748571ea0 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_disk.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_disk.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,6 +30,8 @@ #include <sys/vdev.h> #ifdef _KERNEL +#include <sys/buf.h> +#include <sys/ddi.h> #include <sys/sunldi.h> #include <sys/sunddi.h> #endif @@ -45,6 +46,9 @@ typedef struct vdev_disk { ldi_handle_t vd_lh; } vdev_disk_t; +#ifdef _KERNEL +extern int vdev_disk_physio(ldi_handle_t, caddr_t, size_t, uint64_t, int); +#endif #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h index a62551ca9c..5c14ae5d0a 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio.h +++ b/usr/src/uts/common/fs/zfs/sys/zio.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -178,10 +178,11 @@ enum zio_compress { #define ZIO_PIPELINE_STOP 0x101 /* - * We'll take the unused errno 'EBADE' (from the Convergent graveyard) - * to indicate checksum errors. + * We'll take the unused errnos, 'EBADE' and 'EBADR' (from the Convergent + * graveyard) to indicate checksum errors and fragmentation. */ #define ECKSUM EBADE +#define EFRAGS EBADR typedef struct zio zio_t; typedef void zio_done_func_t(zio_t *zio); diff --git a/usr/src/uts/common/fs/zfs/sys/zvol.h b/usr/src/uts/common/fs/zfs/sys/zvol.h index f7a0f8fd4e..06adc667e1 100644 --- a/usr/src/uts/common/fs/zfs/sys/zvol.h +++ b/usr/src/uts/common/fs/zfs/sys/zvol.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,6 +35,9 @@ extern "C" { #endif +#define ZVOL_OBJ 1ULL +#define ZVOL_ZAP_OBJ 2ULL + #ifdef _KERNEL extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize); extern int zvol_check_volblocksize(uint64_t volblocksize); @@ -46,6 +49,7 @@ extern int zvol_set_volsize(const char *, major_t, uint64_t); extern int zvol_set_volblocksize(const char *, uint64_t); extern int zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr); +extern int zvol_dump(dev_t dev, caddr_t addr, daddr_t offset, int nblocks); extern int zvol_close(dev_t dev, int flag, int otyp, cred_t *cr); extern int zvol_strategy(buf_t *bp); extern int zvol_read(dev_t dev, uio_t *uiop, cred_t *cr); diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c index 933ed3e2bf..b586e23f71 100644 --- a/usr/src/uts/common/fs/zfs/vdev_disk.c +++ b/usr/src/uts/common/fs/zfs/vdev_disk.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -27,6 +27,7 @@ #include <sys/zfs_context.h> #include <sys/spa.h> +#include <sys/refcount.h> #include <sys/vdev_disk.h> #include <sys/vdev_impl.h> #include <sys/fs/zfs.h> @@ -266,29 +267,45 @@ vdev_disk_close(vdev_t *vd) vd->vdev_tsd = NULL; } +int +vdev_disk_physio(ldi_handle_t vd_lh, caddr_t data, size_t size, + uint64_t offset, int flags) +{ + buf_t *bp; + int error = 0; + + if (vd_lh == NULL) + return (EINVAL); + + ASSERT(flags & B_READ || flags & B_WRITE); + + bp = getrbuf(KM_SLEEP); + bp->b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST; + bp->b_bcount = size; + bp->b_un.b_addr = (void *)data; + bp->b_lblkno = lbtodb(offset); + bp->b_bufsize = size; + + error = ldi_strategy(vd_lh, bp); + ASSERT(error == 0); + if ((error = biowait(bp)) == 0 && bp->b_resid != 0) + error = EIO; + freerbuf(bp); + + return (error); +} + static int vdev_disk_probe_io(vdev_t *vd, caddr_t data, size_t size, uint64_t offset, int flags) { - buf_t buf; int error = 0; vdev_disk_t *dvd = vd->vdev_tsd; if (vd == NULL || dvd == NULL || dvd->vd_lh == NULL) return (EINVAL); - ASSERT(flags & B_READ || flags & B_WRITE); - - bioinit(&buf); - buf.b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST; - buf.b_bcount = size; - buf.b_un.b_addr = (void *)data; - buf.b_lblkno = lbtodb(offset); - buf.b_bufsize = size; - - error = ldi_strategy(dvd->vd_lh, &buf); - ASSERT(error == 0); - error = biowait(&buf); + error = vdev_disk_physio(dvd->vd_lh, data, size, offset, flags); if (zio_injection_enabled && error == 0) error = zio_handle_device_injection(vd, EIO); @@ -558,3 +575,65 @@ vdev_ops_t vdev_disk_ops = { VDEV_TYPE_DISK, /* name of this vdev type */ B_TRUE /* leaf vdev */ }; + +/* + * Given the root disk device pathname, read the label from the device, + * and construct a configuration nvlist. + */ +nvlist_t * +vdev_disk_read_rootlabel(char *devpath) +{ + nvlist_t *config = NULL; + ldi_handle_t vd_lh; + vdev_label_t *label; + uint64_t s, size; + int l; + + /* + * Read the device label and build the nvlist. + */ + if (ldi_open_by_name(devpath, FREAD, kcred, &vd_lh, zfs_li)) + return (NULL); + + if (ldi_get_size(vd_lh, &s)) + return (NULL); + + size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t); + label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP); + + for (l = 0; l < VDEV_LABELS; l++) { + uint64_t offset, state, txg = 0; + + /* read vdev label */ + offset = vdev_label_offset(size, l, 0); + if (vdev_disk_physio(vd_lh, (caddr_t)label, + VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE + + VDEV_PHYS_SIZE, offset, B_READ) != 0) + continue; + + if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, + sizeof (label->vl_vdev_phys.vp_nvlist), &config, 0) != 0) { + config = NULL; + continue; + } + + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, + &state) != 0 || state >= POOL_STATE_DESTROYED) { + nvlist_free(config); + config = NULL; + continue; + } + + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, + &txg) != 0 || txg == 0) { + nvlist_free(config); + config = NULL; + continue; + } + + break; + } + + kmem_free(label, sizeof (vdev_label_t)); + return (config); +} diff --git a/usr/src/uts/common/fs/zfs/zfs_fm.c b/usr/src/uts/common/fs/zfs/zfs_fm.c index c5fcb89633..654178a57b 100644 --- a/usr/src/uts/common/fs/zfs/zfs_fm.c +++ b/usr/src/uts/common/fs/zfs/zfs_fm.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -263,6 +263,9 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, */ if (zio->io_logical != NULL) fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET, + DATA_TYPE_UINT64, + zio->io_logical->io_bookmark.zb_objset, FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT, DATA_TYPE_UINT64, zio->io_logical->io_bookmark.zb_object, diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index f559dba7e0..8915ae53f0 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -984,8 +984,8 @@ zfs_ioc_vdev_add(zfs_cmd_t *zc) { spa_t *spa; int error; - nvlist_t *config, **l2cache; - uint_t nl2cache; + nvlist_t *config, **l2cache, **spares; + uint_t nl2cache = 0, nspares = 0; error = spa_open(zc->zc_name, &spa, FTAG); if (error != 0) @@ -996,13 +996,20 @@ zfs_ioc_vdev_add(zfs_cmd_t *zc) (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache); + (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES, + &spares, &nspares); + /* * A root pool with concatenated devices is not supported. - * Thus, can not add a device to a root pool with one device. - * Allow for l2cache devices to be added. + * Thus, can not add a device to a root pool. + * + * Intent log device can not be added to a rootpool because + * during mountroot, zil is replayed, a seperated log device + * can not be accessed during the mountroot time. + * + * l2cache and spare devices are ok to be added to a rootpool. */ - if (spa->spa_root_vdev->vdev_children == 1 && spa->spa_bootfs != 0 && - nl2cache == 0) { + if (spa->spa_bootfs != 0 && nl2cache == 0 && nspares == 0) { spa_close(spa, FTAG); return (EDOM); } @@ -1348,7 +1355,7 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) return (error); } -static int +int zfs_set_prop_nvlist(const char *name, nvlist_t *nvl) { nvpair_t *elem; @@ -1919,6 +1926,7 @@ zfs_ioc_create(zfs_cmd_t *zc) default: cbfunc = NULL; + break; } if (strchr(zc->zc_name, '@') || strchr(zc->zc_name, '%')) @@ -2040,6 +2048,7 @@ zfs_ioc_create(zfs_cmd_t *zc) error = dmu_objset_create(zc->zc_name, type, NULL, cbfunc, &zct); nvlist_free(zct.zct_zplprops); + } /* @@ -2049,7 +2058,6 @@ zfs_ioc_create(zfs_cmd_t *zc) if ((error = zfs_set_prop_nvlist(zc->zc_name, nvprops)) != 0) (void) dmu_objset_destroy(zc->zc_name); } - nvlist_free(nvprops); return (error); } @@ -2934,7 +2942,7 @@ static struct cb_ops zfs_cb_ops = { zvol_close, /* close */ zvol_strategy, /* strategy */ nodev, /* print */ - nodev, /* dump */ + zvol_dump, /* dump */ zvol_read, /* read */ zvol_write, /* write */ zfsdev_ioctl, /* ioctl */ diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c index 07676602bd..c524cb5eaa 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c @@ -61,6 +61,7 @@ #include <sys/sunddi.h> #include <sys/dnlc.h> #include <sys/dmu_objset.h> +#include <sys/spa_boot.h> int zfsfstype; vfsops_t *zfs_vfsops = NULL; @@ -830,7 +831,7 @@ str_to_uint64(char *str, uint64_t *objnum) * string to a dataset name: "rootpool-name/root-filesystem-name". */ static int -parse_bootpath(char *bpath, char *outpath) +zfs_parse_bootfs(char *bpath, char *outpath) { char *slashp; uint64_t objnum; @@ -861,60 +862,66 @@ static int zfs_mountroot(vfs_t *vfsp, enum whymountroot why) { int error = 0; - int ret = 0; static int zfsrootdone = 0; zfsvfs_t *zfsvfs = NULL; znode_t *zp = NULL; vnode_t *vp = NULL; - char *zfs_bootpath; -#if defined(_OBP) - int proplen; -#endif + char *zfs_bootfs; ASSERT(vfsp); /* * The filesystem that we mount as root is defined in the - * "zfs-bootfs" property. + * boot property "zfs-bootfs" with a format of + * "poolname/root-dataset-objnum". */ if (why == ROOT_INIT) { if (zfsrootdone++) return (EBUSY); + /* + * the process of doing a spa_load will require the + * clock to be set before we could (for example) do + * something better by looking at the timestamp on + * an uberblock, so just set it to -1. + */ + clkset(-1); + + if ((zfs_bootfs = spa_get_bootfs()) == NULL) { + cmn_err(CE_NOTE, "\nspa_get_bootfs: can not get " + "bootfs name \n"); + return (EINVAL); + } -#if defined(_OBP) - proplen = BOP_GETPROPLEN(bootops, "zfs-bootfs"); - if (proplen == 0) - return (EIO); - zfs_bootpath = kmem_zalloc(proplen, KM_SLEEP); - if (BOP_GETPROP(bootops, "zfs-bootfs", zfs_bootpath) == -1) { - kmem_free(zfs_bootpath, proplen); - return (EIO); + if (error = spa_import_rootpool(rootfs.bo_name)) { + spa_free_bootfs(zfs_bootfs); + cmn_err(CE_NOTE, "\nspa_import_rootpool: error %d\n", + error); + return (error); } - error = parse_bootpath(zfs_bootpath, rootfs.bo_name); - kmem_free(zfs_bootpath, proplen); -#else - if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), - DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) != - DDI_SUCCESS) - return (EIO); - - error = parse_bootpath(zfs_bootpath, rootfs.bo_name); - ddi_prop_free(zfs_bootpath); -#endif - - if (error) + + if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) { + spa_free_bootfs(zfs_bootfs); + cmn_err(CE_NOTE, "\nzfs_parse_bootfs: error %d\n", + error); return (error); + } + + spa_free_bootfs(zfs_bootfs); if (error = vfs_lock(vfsp)) return (error); - if (error = zfs_domount(vfsp, rootfs.bo_name, CRED())) + if (error = zfs_domount(vfsp, rootfs.bo_name, CRED())) { + cmn_err(CE_NOTE, "\nzfs_domount: error %d\n", error); goto out; + } zfsvfs = (zfsvfs_t *)vfsp->vfs_data; ASSERT(zfsvfs); - if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) + if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) { + cmn_err(CE_NOTE, "\nzfs_zget: error %d\n", error); goto out; + } vp = ZTOV(zp); mutex_enter(&vp->v_lock); @@ -928,17 +935,11 @@ zfs_mountroot(vfs_t *vfsp, enum whymountroot why) */ VN_RELE(vp); - /* - * Mount root as readonly initially, it will be remouted - * read/write by /lib/svc/method/fs-usr. - */ - readonly_changed_cb(vfsp->vfs_data, B_TRUE); vfs_add((struct vnode *)0, vfsp, (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); out: vfs_unlock(vfsp); - ret = (error) ? error : 0; - return (ret); + return (error); } else if (why == ROOT_REMOUNT) { readonly_changed_cb(vfsp->vfs_data, B_FALSE); vfsp->vfs_flag |= VFS_REMOUNT; diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c index 171932cc6b..5140e43966 100644 --- a/usr/src/uts/common/fs/zfs/zvol.c +++ b/usr/src/uts/common/fs/zfs/zvol.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -53,6 +53,9 @@ #include <sys/zap.h> #include <sys/spa.h> #include <sys/zio.h> +#include <sys/dmu_traverse.h> +#include <sys/dnode.h> +#include <sys/dsl_dataset.h> #include <sys/dsl_prop.h> #include <sys/dkio.h> #include <sys/efi_partition.h> @@ -70,14 +73,17 @@ #include <sys/refcount.h> #include <sys/zfs_znode.h> #include <sys/zfs_rlock.h> +#include <sys/vdev_disk.h> +#include <sys/vdev_impl.h> +#include <sys/zvol.h> +#include <sys/dumphdr.h> #include "zfs_namecheck.h" -#define ZVOL_OBJ 1ULL -#define ZVOL_ZAP_OBJ 2ULL - static void *zvol_state; +#define ZVOL_DUMPSIZE "dumpsize" + /* * This lock protects the zvol_state structure from being modified * while it's being used, e.g. an open that comes in before a create @@ -87,6 +93,22 @@ static void *zvol_state; static kmutex_t zvol_state_lock; static uint32_t zvol_minors; +#define NUM_EXTENTS ((SPA_MAXBLOCKSIZE) / sizeof (zvol_extent_t)) + +typedef struct zvol_extent { + dva_t ze_dva; /* dva associated with this extent */ + uint64_t ze_stride; /* extent stride */ + uint64_t ze_size; /* number of blocks in extent */ +} zvol_extent_t; + +/* + * The list of extents associated with the dump device + */ +typedef struct zvol_ext_list { + zvol_extent_t zl_extents[NUM_EXTENTS]; + struct zvol_ext_list *zl_next; +} zvol_ext_list_t; + /* * The in-core state of each volume. */ @@ -96,22 +118,33 @@ typedef struct zvol_state { uint64_t zv_volblocksize; /* volume block size */ minor_t zv_minor; /* minor number */ uint8_t zv_min_bs; /* minimum addressable block shift */ - uint8_t zv_readonly; /* hard readonly; like write-protect */ + uint8_t zv_flags; /* readonly; dumpified */ objset_t *zv_objset; /* objset handle */ uint32_t zv_mode; /* DS_MODE_* flags at open time */ uint32_t zv_open_count[OTYPCNT]; /* open counts */ uint32_t zv_total_opens; /* total open count */ zilog_t *zv_zilog; /* ZIL handle */ + zvol_ext_list_t *zv_list; /* List of extents for dump */ uint64_t zv_txg_assign; /* txg to assign during ZIL replay */ znode_t zv_znode; /* for range locking */ } zvol_state_t; /* + * zvol specific flags + */ +#define ZVOL_RDONLY 0x1 +#define ZVOL_DUMPIFIED 0x2 + +/* * zvol maximum transfer in one DMU tx. */ int zvol_maxphys = DMU_MAX_ACCESS/2; +extern int zfs_set_prop_nvlist(const char *, nvlist_t *); static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); +static int zvol_dumpify(zvol_state_t *zv); +static int zvol_dump_fini(zvol_state_t *zv); +static int zvol_dump_init(zvol_state_t *zv, boolean_t resize); static void zvol_size_changed(zvol_state_t *zv, major_t maj) @@ -122,6 +155,10 @@ zvol_size_changed(zvol_state_t *zv, major_t maj) "Size", zv->zv_volsize) == DDI_SUCCESS); VERIFY(ddi_prop_update_int64(dev, zfs_dip, "Nblocks", lbtodb(zv->zv_volsize)) == DDI_SUCCESS); + + /* Notify specfs to invalidate the cached size */ + spec_size_invalidate(dev, VBLK); + spec_size_invalidate(dev, VCHR); } int @@ -156,7 +193,10 @@ zvol_readonly_changed_cb(void *arg, uint64_t newval) { zvol_state_t *zv = arg; - zv->zv_readonly = (uint8_t)newval; + if (newval) + zv->zv_flags |= ZVOL_RDONLY; + else + zv->zv_flags &= ~ZVOL_RDONLY; } int @@ -219,6 +259,131 @@ zvol_minor_lookup(const char *name) return (zv); } +void +zvol_init_extent(zvol_extent_t *ze, blkptr_t *bp) +{ + ze->ze_dva = bp->blk_dva[0]; /* structure assignment */ + ze->ze_stride = 0; + ze->ze_size = 1; +} + +/* extent mapping arg */ +struct maparg { + zvol_ext_list_t *ma_list; + zvol_extent_t *ma_extent; + int ma_gang; +}; + +/*ARGSUSED*/ +static int +zvol_map_block(traverse_blk_cache_t *bc, spa_t *spa, void *arg) +{ + zbookmark_t *zb = &bc->bc_bookmark; + blkptr_t *bp = &bc->bc_blkptr; + void *data = bc->bc_data; + dnode_phys_t *dnp = bc->bc_dnode; + struct maparg *ma = (struct maparg *)arg; + uint64_t stride; + + /* If there is an error, then keep trying to make progress */ + if (bc->bc_errno) + return (ERESTART); + +#ifdef ZFS_DEBUG + if (zb->zb_level == -1) { + ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); + ASSERT3U(BP_GET_LEVEL(bp), ==, 0); + } else { + ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); + ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); + } + + if (zb->zb_level > 0) { + uint64_t fill = 0; + blkptr_t *bpx, *bpend; + + for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx); + bpx < bpend; bpx++) { + if (bpx->blk_birth != 0) { + fill += bpx->blk_fill; + } else { + ASSERT(bpx->blk_fill == 0); + } + } + ASSERT3U(fill, ==, bp->blk_fill); + } + + if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) { + uint64_t fill = 0; + dnode_phys_t *dnx, *dnend; + + for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT); + dnx < dnend; dnx++) { + if (dnx->dn_type != DMU_OT_NONE) + fill++; + } + ASSERT3U(fill, ==, bp->blk_fill); + } +#endif + + if (zb->zb_level || dnp->dn_type == DMU_OT_DNODE) + return (0); + + /* Abort immediately if we have encountered gang blocks */ + if (BP_IS_GANG(bp)) { + ma->ma_gang++; + return (EINTR); + } + + /* first time? */ + if (ma->ma_extent->ze_size == 0) { + zvol_init_extent(ma->ma_extent, bp); + return (0); + } + + stride = (DVA_GET_OFFSET(&bp->blk_dva[0])) - + ((DVA_GET_OFFSET(&ma->ma_extent->ze_dva)) + + (ma->ma_extent->ze_size - 1) * (ma->ma_extent->ze_stride)); + if (DVA_GET_VDEV(BP_IDENTITY(bp)) == + DVA_GET_VDEV(&ma->ma_extent->ze_dva)) { + if (ma->ma_extent->ze_stride == 0) { + /* second block in this extent */ + ma->ma_extent->ze_stride = stride; + ma->ma_extent->ze_size++; + return (0); + } else if (ma->ma_extent->ze_stride == stride) { + /* + * the block we allocated has the same + * stride + */ + ma->ma_extent->ze_size++; + return (0); + } + } + + /* + * dtrace -n 'zfs-dprintf + * /stringof(arg0) == "zvol.c"/ + * { + * printf("%s: %s", stringof(arg1), stringof(arg3)) + * } ' + */ + dprintf("ma_extent 0x%lx mrstride 0x%lx stride %lx\n", + ma->ma_extent->ze_size, ma->ma_extent->ze_stride, stride); + dprintf_bp(bp, "%s", "next blkptr:"); + /* start a new extent */ + if (ma->ma_extent == &ma->ma_list->zl_extents[NUM_EXTENTS - 1]) { + ma->ma_list->zl_next = kmem_zalloc(sizeof (zvol_ext_list_t), + KM_SLEEP); + ma->ma_list = ma->ma_list->zl_next; + ma->ma_extent = &ma->ma_list->zl_extents[0]; + } else { + ma->ma_extent++; + } + zvol_init_extent(ma->ma_extent, bp); + return (0); +} + /* ARGSUSED */ void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) @@ -235,7 +400,7 @@ zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); /* - * These properites must be removed from the list so the generic + * These properties must be removed from the list so the generic * property setting step won't apply to them. */ VERIFY(nvlist_remove_all(nvprops, @@ -313,7 +478,107 @@ zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { }; /* - * Create a minor node for the specified volume. + * reconstruct dva that gets us to the desired offset (offset + * is in bytes) + */ +int +zvol_get_dva(zvol_state_t *zv, uint64_t offset, dva_t *dva) +{ + zvol_ext_list_t *zl; + zvol_extent_t *ze; + int idx; + uint64_t tmp; + + if ((zl = zv->zv_list) == NULL) + return (EIO); + idx = 0; + ze = &zl->zl_extents[0]; + while (offset >= ze->ze_size * zv->zv_volblocksize) { + offset -= ze->ze_size * zv->zv_volblocksize; + + if (idx == NUM_EXTENTS - 1) { + /* we've reached the end of this array */ + ASSERT(zl->zl_next != NULL); + if (zl->zl_next == NULL) + return (-1); + zl = zl->zl_next; + ze = &zl->zl_extents[0]; + idx = 0; + } else { + ze++; + idx++; + } + } + DVA_SET_VDEV(dva, DVA_GET_VDEV(&ze->ze_dva)); + tmp = DVA_GET_OFFSET((&ze->ze_dva)); + tmp += (ze->ze_stride * (offset / zv->zv_volblocksize)); + DVA_SET_OFFSET(dva, tmp); + return (0); +} + +static void +zvol_free_extents(zvol_state_t *zv) +{ + zvol_ext_list_t *zl; + zvol_ext_list_t *tmp; + + if (zv->zv_list != NULL) { + zl = zv->zv_list; + while (zl != NULL) { + tmp = zl->zl_next; + kmem_free(zl, sizeof (zvol_ext_list_t)); + zl = tmp; + } + zv->zv_list = NULL; + } +} + +int +zvol_get_lbas(zvol_state_t *zv) +{ + struct maparg ma; + zvol_ext_list_t *zl; + zvol_extent_t *ze; + uint64_t blocks = 0; + int err; + + ma.ma_list = zl = kmem_zalloc(sizeof (zvol_ext_list_t), KM_SLEEP); + ma.ma_extent = &ma.ma_list->zl_extents[0]; + ma.ma_gang = 0; + zv->zv_list = ma.ma_list; + + err = traverse_zvol(zv->zv_objset, ADVANCE_PRE, zvol_map_block, &ma); + if (err == EINTR && ma.ma_gang) { + /* + * We currently don't support dump devices when the pool + * is so fragmented that our allocation has resulted in + * gang blocks. + */ + zvol_free_extents(zv); + return (EFRAGS); + } + ASSERT3U(err, ==, 0); + + ze = &zl->zl_extents[0]; + while (ze) { + blocks += ze->ze_size; + if (ze == &zl->zl_extents[NUM_EXTENTS - 1]) { + zl = zl->zl_next; + ze = &zl->zl_extents[0]; + } else { + ze++; + } + } + if (blocks != (zv->zv_volsize / zv->zv_volblocksize)) { + zvol_free_extents(zv); + return (EIO); + } + + return (0); +} + +/* + * Create a minor node (plus a whole lot more) for the specified volume. */ int zvol_create_minor(const char *name, major_t maj) @@ -327,7 +592,7 @@ zvol_create_minor(const char *name, major_t maj) int ds_mode = DS_MODE_PRIMARY; vnode_t *vp = NULL; char *devpath; - size_t devpathlen = strlen(ZVOL_FULL_DEV_DIR) + 1 + strlen(name) + 1; + size_t devpathlen = strlen(ZVOL_FULL_DEV_DIR) + strlen(name) + 1; char chrbuf[30], blkbuf[30]; int error; @@ -362,7 +627,7 @@ zvol_create_minor(const char *name, major_t maj) */ devpath = kmem_alloc(devpathlen, KM_SLEEP); - (void) sprintf(devpath, "%s/%s", ZVOL_FULL_DEV_DIR, name); + (void) sprintf(devpath, "%s%s", ZVOL_FULL_DEV_DIR, name); error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULL, &vp); @@ -444,15 +709,12 @@ zvol_create_minor(const char *name, major_t maj) mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, sizeof (rl_t), offsetof(rl_t, r_node)); - - /* get and cache the blocksize */ error = dmu_object_info(os, ZVOL_OBJ, &doi); ASSERT(error == 0); zv->zv_volblocksize = doi.doi_data_block_size; zil_replay(os, zv, &zv->zv_txg_assign, zvol_replay_vector); - zvol_size_changed(zv, maj); /* XXX this should handle the possible i/o error */ @@ -512,13 +774,107 @@ zvol_remove_minor(const char *name) return (0); } +static int +zvol_truncate(zvol_state_t *zv, uint64_t offset, uint64_t size) +{ + dmu_tx_t *tx; + int error; + + tx = dmu_tx_create(zv->zv_objset); + dmu_tx_hold_free(tx, ZVOL_OBJ, offset, size); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + return (error); + } + error = dmu_free_range(zv->zv_objset, ZVOL_OBJ, offset, size, tx); + dmu_tx_commit(tx); + return (0); +} + +int +zvol_prealloc(zvol_state_t *zv) +{ + objset_t *os = zv->zv_objset; + dmu_tx_t *tx; + void *data; + uint64_t refd, avail, usedobjs, availobjs; + uint64_t resid = zv->zv_volsize; + uint64_t off = 0; + + /* Check the space usage before attempting to allocate the space */ + dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs); + if (avail < zv->zv_volsize) + return (ENOSPC); + + /* Free old extents if they exist */ + zvol_free_extents(zv); + + /* allocate the blocks by writing each one */ + data = kmem_zalloc(SPA_MAXBLOCKSIZE, KM_SLEEP); + + while (resid != 0) { + int error; + uint64_t bytes = MIN(resid, SPA_MAXBLOCKSIZE); + + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + kmem_free(data, SPA_MAXBLOCKSIZE); + (void) zvol_truncate(zv, 0, off); + return (error); + } + dmu_write(os, ZVOL_OBJ, off, bytes, data, tx); + dmu_tx_commit(tx); + off += bytes; + resid -= bytes; + } + kmem_free(data, SPA_MAXBLOCKSIZE); + txg_wait_synced(dmu_objset_pool(os), 0); + + return (0); +} + +int +zvol_update_volsize(zvol_state_t *zv, major_t maj, uint64_t volsize) +{ + dmu_tx_t *tx; + int error; + + ASSERT(MUTEX_HELD(&zvol_state_lock)); + + tx = dmu_tx_create(zv->zv_objset); + dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); + dmu_tx_hold_free(tx, ZVOL_OBJ, volsize, DMU_OBJECT_END); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + return (error); + } + + error = zap_update(zv->zv_objset, ZVOL_ZAP_OBJ, "size", 8, 1, + &volsize, tx); + dmu_tx_commit(tx); + + if (error == 0) + error = zvol_truncate(zv, volsize, DMU_OBJECT_END); + + if (error == 0) { + zv->zv_volsize = volsize; + zvol_size_changed(zv, maj); + } + return (error); +} + int zvol_set_volsize(const char *name, major_t maj, uint64_t volsize) { zvol_state_t *zv; - dmu_tx_t *tx; int error; dmu_object_info_t doi; + uint64_t old_volsize = 0ULL; mutex_enter(&zvol_state_lock); @@ -526,6 +882,7 @@ zvol_set_volsize(const char *name, major_t maj, uint64_t volsize) mutex_exit(&zvol_state_lock); return (ENXIO); } + old_volsize = zv->zv_volsize; if ((error = dmu_object_info(zv->zv_objset, ZVOL_OBJ, &doi)) != 0 || (error = zvol_check_volsize(volsize, @@ -534,33 +891,24 @@ zvol_set_volsize(const char *name, major_t maj, uint64_t volsize) return (error); } - if (zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY)) { + if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) { mutex_exit(&zvol_state_lock); return (EROFS); } - tx = dmu_tx_create(zv->zv_objset); - dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); - dmu_tx_hold_free(tx, ZVOL_OBJ, volsize, DMU_OBJECT_END); - error = dmu_tx_assign(tx, TXG_WAIT); - if (error) { - dmu_tx_abort(tx); - mutex_exit(&zvol_state_lock); - return (error); - } - - error = zap_update(zv->zv_objset, ZVOL_ZAP_OBJ, "size", 8, 1, - &volsize, tx); - if (error == 0) { - error = dmu_free_range(zv->zv_objset, ZVOL_OBJ, volsize, - DMU_OBJECT_END, tx); - } + error = zvol_update_volsize(zv, maj, volsize); - dmu_tx_commit(tx); - - if (error == 0) { - zv->zv_volsize = volsize; - zvol_size_changed(zv, maj); + /* + * Reinitialize the dump area to the new size. If we + * failed to resize the dump area then restore the it back to + * it's original size. + */ + if (error == 0 && zv->zv_flags & ZVOL_DUMPIFIED) { + if ((error = zvol_dumpify(zv)) != 0 || + (error = dumpvp_resize()) != 0) { + (void) zvol_update_volsize(zv, maj, old_volsize); + error = zvol_dumpify(zv); + } } mutex_exit(&zvol_state_lock); @@ -581,8 +929,7 @@ zvol_set_volblocksize(const char *name, uint64_t volblocksize) mutex_exit(&zvol_state_lock); return (ENXIO); } - - if (zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY)) { + if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) { mutex_exit(&zvol_state_lock); return (EROFS); } @@ -626,7 +973,7 @@ zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr) ASSERT(zv->zv_objset != NULL); if ((flag & FWRITE) && - (zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY))) { + (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY))) { mutex_exit(&zvol_state_lock); return (EROFS); } @@ -732,7 +1079,7 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) /* * Lock the range of the block to ensure that when the data is - * written out and it's checksum is being calculated that no other + * written out and its checksum is being calculated that no other * thread can change the block. */ boff = P2ALIGN_TYPED(lr->lr_offset, zv->zv_volblocksize, uint64_t); @@ -794,6 +1141,76 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len) } int +zvol_dumpio(vdev_t *vd, uint64_t size, uint64_t offset, void *addr, + int bflags, int isdump) +{ + vdev_disk_t *dvd; + int direction; + int c; + int numerrors = 0; + + for (c = 0; c < vd->vdev_children; c++) { + if (zvol_dumpio(vd->vdev_child[c], size, offset, + addr, bflags, isdump) != 0) { + numerrors++; + } else if (bflags & B_READ) { + break; + } + } + + if (!vd->vdev_ops->vdev_op_leaf) + return (numerrors < vd->vdev_children ? 0 : EIO); + + if (!vdev_writeable(vd)) + return (EIO); + + dvd = vd->vdev_tsd; + ASSERT3P(dvd, !=, NULL); + direction = bflags & (B_WRITE | B_READ); + ASSERT(ISP2(direction)); + offset += VDEV_LABEL_START_SIZE; + + if (ddi_in_panic() || isdump) { + if (direction & B_READ) + return (EIO); + return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), + lbtodb(size))); + } else { + return (vdev_disk_physio(dvd->vd_lh, addr, size, offset, + direction)); + } +} + +int +zvol_physio(zvol_state_t *zv, int bflags, uint64_t off, + uint64_t size, void *addr, int isdump) +{ + dva_t dva; + vdev_t *vd; + int error; + spa_t *spa = dmu_objset_spa(zv->zv_objset); + + ASSERT(size <= zv->zv_volblocksize); + + /* restrict requests to multiples of the system block size */ + if (P2PHASE(off, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE)) + return (EINVAL); + + if (zvol_get_dva(zv, off, &dva) != 0) + return (EIO); + + spa_config_enter(spa, RW_READER, FTAG); + vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva)); + + error = zvol_dumpio(vd, size, + DVA_GET_OFFSET(&dva) + (off % zv->zv_volblocksize), + addr, bflags & (B_READ | B_WRITE | B_PHYS), isdump); + + spa_config_exit(spa, FTAG); + return (error); +} + +int zvol_strategy(buf_t *bp) { zvol_state_t *zv = ddi_get_soft_state(zvol_state, getminor(bp->b_edev)); @@ -803,7 +1220,7 @@ zvol_strategy(buf_t *bp) objset_t *os; rl_t *rl; int error = 0; - boolean_t reading; + boolean_t reading, is_dump = zv->zv_flags & ZVOL_DUMPIFIED; if (zv == NULL) { bioerror(bp, ENXIO); @@ -817,8 +1234,9 @@ zvol_strategy(buf_t *bp) return (0); } - if ((zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY)) && - !(bp->b_flags & B_READ)) { + if (!(bp->b_flags & B_READ) && + (zv->zv_flags & ZVOL_RDONLY || + zv->zv_mode & DS_MODE_READONLY)) { bioerror(bp, EROFS); biodone(bp); return (0); @@ -842,14 +1260,18 @@ zvol_strategy(buf_t *bp) rl = zfs_range_lock(&zv->zv_znode, off, resid, reading ? RL_READER : RL_WRITER); - while (resid != 0 && off < volsize) { - - size = MIN(resid, zvol_maxphys); /* zvol_maxphys per tx */ + if (resid > volsize - off) /* don't write past the end */ + resid = volsize - off; - if (size > volsize - off) /* don't write past the end */ - size = volsize - off; + while (resid != 0 && off < volsize) { - if (reading) { + size = MIN(resid, zvol_maxphys); + if (is_dump) { + /* can't straddle a block boundary */ + size = MIN(size, P2END(off, zv->zv_volblocksize) - off); + error = zvol_physio(zv, bp->b_flags, off, size, + addr, 0); + } else if (reading) { error = dmu_read(os, ZVOL_OBJ, off, size, addr); } else { dmu_tx_t *tx = dmu_tx_create(os); @@ -874,9 +1296,8 @@ zvol_strategy(buf_t *bp) if ((bp->b_resid = resid) == bp->b_bcount) bioerror(bp, off > volsize ? EINVAL : error); - if (!(bp->b_flags & B_ASYNC) && !reading && !zil_disable) + if (!(bp->b_flags & B_ASYNC) && !reading && !zil_disable && !is_dump) zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); - biodone(bp); return (0); @@ -897,6 +1318,45 @@ zvol_minphys(struct buf *bp) bp->b_bcount = zvol_maxphys; } +int +zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks) +{ + minor_t minor = getminor(dev); + zvol_state_t *zv; + int error = 0; + uint64_t size; + uint64_t boff; + uint64_t resid; + + if (minor == 0) /* This is the control device */ + return (ENXIO); + + zv = ddi_get_soft_state(zvol_state, minor); + if (zv == NULL) + return (ENXIO); + + boff = ldbtob(blkno); + resid = ldbtob(nblocks); + if (boff + resid > zv->zv_volsize) { + /* dump should know better than to write here */ + ASSERT(blkno + resid <= zv->zv_volsize); + return (EIO); + } + while (resid) { + /* can't straddle a block boundary */ + size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff); + + error = zvol_physio(zv, B_WRITE, boff, size, addr, 1); + if (error) + break; + boff += size; + addr += size; + resid -= size; + } + + return (error); +} + /*ARGSUSED*/ int zvol_read(dev_t dev, uio_t *uio, cred_t *cr) @@ -942,6 +1402,12 @@ zvol_write(dev_t dev, uio_t *uio, cred_t *cr) if (zv == NULL) return (ENXIO); + if (zv->zv_flags & ZVOL_DUMPIFIED) { + error = physio(zvol_strategy, NULL, dev, B_WRITE, + zvol_minphys, uio); + return (error); + } + rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, RL_WRITER); while (uio->uio_resid > 0) { @@ -982,6 +1448,7 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) struct uuid uuid = EFI_RESERVED; uint32_t crc; int error = 0; + rl_t *rl; mutex_enter(&zvol_state_lock); @@ -1027,7 +1494,7 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) * zvol. Currently this interface will return ENOTTY to * such requests. These requests could be supported by * adding a check for lba == 0 and consing up an appropriate - * RMBR. + * PMBR. */ if (efi.dki_lba == 1) { efi_gpt_t gpt; @@ -1099,10 +1566,27 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) case DKIOCGGEOM: case DKIOCGVTOC: - /* commands using these (like prtvtoc) expect ENOTSUP */ + /* + * commands using these (like prtvtoc) expect ENOTSUP + * since we're emulating an EFI label + */ error = ENOTSUP; break; + case DKIOCDUMPINIT: + rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, + RL_WRITER); + error = zvol_dumpify(zv); + zfs_range_unlock(rl); + break; + + case DKIOCDUMPFINI: + rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, + RL_WRITER); + error = zvol_dump_fini(zv); + zfs_range_unlock(rl); + break; + default: error = ENOTTY; break; @@ -1131,3 +1615,216 @@ zvol_fini(void) mutex_destroy(&zvol_state_lock); ddi_soft_state_fini(&zvol_state); } + +static boolean_t +zvol_is_swap(zvol_state_t *zv) +{ + vnode_t *vp; + boolean_t ret = B_FALSE; + char *devpath; + size_t devpathlen; + int error; + + devpathlen = strlen(ZVOL_FULL_DEV_DIR) + strlen(zv->zv_name) + 1; + devpath = kmem_alloc(devpathlen, KM_SLEEP); + (void) sprintf(devpath, "%s%s", ZVOL_FULL_DEV_DIR, zv->zv_name); + error = lookupname(devpath, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); + kmem_free(devpath, devpathlen); + + ret = !error && IS_SWAPVP(common_specvp(vp)); + + if (vp != NULL) + VN_RELE(vp); + + return (ret); +} + +static int +zvol_dump_init(zvol_state_t *zv, boolean_t resize) +{ + dmu_tx_t *tx; + int error = 0; + objset_t *os = zv->zv_objset; + nvlist_t *nv = NULL; + uint64_t checksum, compress, refresrv; + + ASSERT(MUTEX_HELD(&zvol_state_lock)); + + tx = dmu_tx_create(os); + dmu_tx_hold_free(tx, ZVOL_OBJ, 0, DMU_OBJECT_END); + dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + return (error); + } + + /* + * If we are resizing the dump device then we only need to + * update the refreservation to match the newly updated + * zvolsize. Otherwise, we save off the original state of the + * zvol so that we can restore them if the zvol is ever undumpified. + */ + if (resize) { + error = zap_update(os, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, + &zv->zv_volsize, tx); + } else { + error = dsl_prop_get_integer(zv->zv_name, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL); + error = error ? error : dsl_prop_get_integer(zv->zv_name, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL); + error = error ? error : dsl_prop_get_integer(zv->zv_name, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL); + + error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, + &compress, tx); + error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, tx); + error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, + &refresrv, tx); + } + dmu_tx_commit(tx); + + /* Truncate the file */ + if (!error) + error = zvol_truncate(zv, 0, DMU_OBJECT_END); + + if (error) + return (error); + + /* + * We only need update the zvol's property if we are initializing + * the dump area for the first time. + */ + if (!resize) { + VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0); + VERIFY(nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), + ZIO_COMPRESS_OFF) == 0); + VERIFY(nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), + ZIO_CHECKSUM_OFF) == 0); + + error = zfs_set_prop_nvlist(zv->zv_name, nv); + nvlist_free(nv); + + if (error) + return (error); + } + + /* Allocate the space for the dump */ + error = zvol_prealloc(zv); + return (error); +} + +static int +zvol_dumpify(zvol_state_t *zv) +{ + int error = 0; + uint64_t dumpsize = 0; + dmu_tx_t *tx; + objset_t *os = zv->zv_objset; + + if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) + return (EROFS); + + /* + * We do not support swap devices acting as dump devices. + */ + if (zvol_is_swap(zv)) + return (ENOTSUP); + + if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, + 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { + boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE; + + if ((error = zvol_dump_init(zv, resize)) != 0) { + (void) zvol_dump_fini(zv); + return (error); + } + } + + /* + * Build up our lba mapping. + */ + error = zvol_get_lbas(zv); + if (error) { + (void) zvol_dump_fini(zv); + return (error); + } + + tx = dmu_tx_create(os); + dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + (void) zvol_dump_fini(zv); + return (error); + } + + zv->zv_flags |= ZVOL_DUMPIFIED; + error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, + &zv->zv_volsize, tx); + dmu_tx_commit(tx); + + if (error) { + (void) zvol_dump_fini(zv); + return (error); + } + + txg_wait_synced(dmu_objset_pool(os), 0); + return (0); +} + +static int +zvol_dump_fini(zvol_state_t *zv) +{ + dmu_tx_t *tx; + objset_t *os = zv->zv_objset; + nvlist_t *nv; + int error = 0; + uint64_t checksum, compress, refresrv; + + tx = dmu_tx_create(os); + dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + return (error); + } + + /* + * Attempt to restore the zvol back to its pre-dumpified state. + * This is a best-effort attempt as it's possible that not all + * of these properties were initialized during the dumpify process + * (i.e. error during zvol_dump_init). + */ + (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum); + (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress); + (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv); + + (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx); + zvol_free_extents(zv); + zv->zv_flags &= ~ZVOL_DUMPIFIED; + dmu_tx_commit(tx); + + VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); + (void) nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum); + (void) nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress); + (void) nvlist_add_uint64(nv, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv); + (void) zfs_set_prop_nvlist(zv->zv_name, nv); + nvlist_free(nv); + + return (0); +} diff --git a/usr/src/uts/common/io/dump.c b/usr/src/uts/common/io/dump.c index f2cfd9dfda..6498463087 100644 --- a/usr/src/uts/common/io/dump.c +++ b/usr/src/uts/common/io/dump.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -173,7 +173,10 @@ dump_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rvalp) FOLLOW, NULLVPP, &vp)) != 0) break; mutex_enter(&dump_lock); - error = dumpinit(vp, pathbuf, cmd == DIOCTRYDEV); + if (vp->v_type == VBLK) + error = dumpinit(vp, pathbuf, cmd == DIOCTRYDEV); + else + error = ENOTBLK; mutex_exit(&dump_lock); VN_RELE(vp); break; diff --git a/usr/src/uts/common/os/dumpsubr.c b/usr/src/uts/common/os/dumpsubr.c index b8dba61893..b1c90e0eac 100644 --- a/usr/src/uts/common/os/dumpsubr.c +++ b/usr/src/uts/common/os/dumpsubr.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -62,6 +62,7 @@ #include <sys/vtoc.h> #include <sys/errorq.h> #include <sys/fm/util.h> +#include <sys/fs/zfs.h> #include <vm/hat.h> #include <vm/as.h> @@ -269,6 +270,17 @@ dumpinit(vnode_t *vp, char *name, int justchecking) dump_iosize = dki.dki_maxtransfer * blk_size; dumpbuf_resize(); } + /* + * If we are working with a zvol then call into + * it to dumpify itself. + */ + if (strcmp(dki.dki_dname, ZVOL_DRIVER) == 0) { + if ((error = VOP_IOCTL(cdev_vp, + DKIOCDUMPINIT, NULL, FKIOCTL, kcred, + NULL, NULL)) != 0) { + dumpfini(); + } + } (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0, kcred, NULL); @@ -279,16 +291,43 @@ dumpinit(vnode_t *vp, char *name, int justchecking) cmn_err(CE_CONT, "?dump on %s size %llu MB\n", name, dumpvp_size >> 20); - return (0); + return (error); } void dumpfini(void) { + vattr_t vattr; + boolean_t is_zfs = B_FALSE; + vnode_t *cdev_vp; ASSERT(MUTEX_HELD(&dump_lock)); kmem_free(dumppath, strlen(dumppath) + 1); + /* + * Determine if we are using zvols for our dump device + */ + vattr.va_mask = AT_RDEV; + if (VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL) == 0) { + is_zfs = (getmajor(vattr.va_rdev) == + ddi_name_to_major(ZFS_DRIVER)) ? B_TRUE : B_FALSE; + } + + /* + * If we have a zvol dump device then we call into zfs so + * that it may have a chance to cleanup. + */ + if (is_zfs && + (cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR)) != NULL) { + if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) { + (void) VOP_IOCTL(cdev_vp, DKIOCDUMPFINI, NULL, FKIOCTL, + kcred, NULL, NULL); + (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0, + kcred, NULL); + } + VN_RELE(cdev_vp); + } + (void) VOP_CLOSE(dumpvp, FREAD | FWRITE, 1, (offset_t)0, kcred, NULL); VN_RELE(dumpvp); @@ -798,3 +837,30 @@ dump_resize() dumpbuf_resize(); mutex_exit(&dump_lock); } + +/* + * This function allows for dynamic resizing of a dump area. It assumes that + * the underlying device has update its appropriate size(9P). + */ +int +dumpvp_resize() +{ + int error; + vattr_t vattr; + + mutex_enter(&dump_lock); + vattr.va_mask = AT_SIZE; + if ((error = VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL)) != 0) { + mutex_exit(&dump_lock); + return (error); + } + + if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) { + mutex_exit(&dump_lock); + return (ENOSPC); + } + + dumpvp_size = vattr.va_size & -DUMP_OFFSET; + mutex_exit(&dump_lock); + return (0); +} diff --git a/usr/src/uts/common/sys/cpr.h b/usr/src/uts/common/sys/cpr.h index 6fd5438cc0..13230543f2 100644 --- a/usr/src/uts/common/sys/cpr.h +++ b/usr/src/uts/common/sys/cpr.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -113,6 +113,15 @@ typedef struct cpr_default_info cdef_t; * cf_dev_prom (prom device path of the above special file) * "/sbus/espdma/dma/sd@1:h" * + * If the statefile is on a zvol, the fields would have these values: + * + * cf_type CFT_ZVOL + * cf_path ignored + * cf_fs (the zvol name e.g. "dump" portion of rootpool/dump) + * cf_devfs (devfs path) "/dev/zvol/dsk/<pool>/<zvol>" + * cf_dev_prom (prom device path of the above special file) + * e.g. "/sbus/espdma/dma/sd@1:h" + * * The rest of the fields are autoshutdown and autopm configuration related. * They are updated by pmconfig and consumed by both powerd and dtpower. */ @@ -163,6 +172,7 @@ struct cprconfig { */ #define CFT_UFS 1 /* statefile is ufs file */ #define CFT_SPEC 2 /* statefile is special file */ +#define CFT_ZVOL 3 /* statefile is a zvol */ /* diff --git a/usr/src/uts/common/sys/dkio.h b/usr/src/uts/common/sys/dkio.h index 043c9d58d8..13edae7368 100644 --- a/usr/src/uts/common/sys/dkio.h +++ b/usr/src/uts/common/sys/dkio.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -358,6 +358,9 @@ struct dk_minfo { #define DKIOCSETVOLCAP (DKIOC | 26) /* Set volume capabilities */ #define DKIOCDMR (DKIOC | 27) /* Issue a directed read */ +#define DKIOCDUMPINIT (DKIOC | 28) /* Dumpify a zvol */ +#define DKIOCDUMPFINI (DKIOC | 29) /* Un-Dumpify a zvol */ + typedef uint_t volcapinfo_t; typedef uint_t volcapset_t; diff --git a/usr/src/uts/common/sys/dumphdr.h b/usr/src/uts/common/sys/dumphdr.h index 81e2c7ccb8..72c6e41c71 100644 --- a/usr/src/uts/common/sys/dumphdr.h +++ b/usr/src/uts/common/sys/dumphdr.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -134,6 +134,7 @@ extern void dumpsys(void); extern void dump_messages(void); extern void dump_ereports(void); extern void dumpvp_write(const void *, size_t); +extern int dumpvp_resize(void); extern int dump_plat_addr(void); extern void dump_plat_pfn(void); extern int dump_plat_data(void *); diff --git a/usr/src/uts/common/sys/fs/snode.h b/usr/src/uts/common/sys/fs/snode.h index d0176af293..7f109f5682 100644 --- a/usr/src/uts/common/sys/fs/snode.h +++ b/usr/src/uts/common/sys/fs/snode.h @@ -23,7 +23,7 @@ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -177,6 +177,7 @@ int spec_is_clone(struct vnode *); int spec_is_selfclone(struct vnode *); int spec_fence_snode(dev_info_t *dip, struct vnode *vp); int spec_unfence_snode(dev_info_t *dip); +void spec_size_invalidate(dev_t, vtype_t); /* diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 98a2b5c546..9933839820 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -228,7 +228,7 @@ typedef enum zfs_share_op { #define SPA_VERSION_10 10ULL /* - * When bumping up SPA_VERSION, make sure GRUB ZFS understand the on-disk + * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk * format change. Go to usr/src/grub/grub-0.95/stage2/{zfs-include/, fsys_zfs*}, * and do the appropriate changes. */ @@ -316,11 +316,12 @@ typedef enum zfs_share_op { #define ZPOOL_CONFIG_NPARITY "nparity" #define ZPOOL_CONFIG_HOSTID "hostid" #define ZPOOL_CONFIG_HOSTNAME "hostname" -#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ #define ZPOOL_CONFIG_UNSPARE "unspare" #define ZPOOL_CONFIG_PHYS_PATH "phys_path" #define ZPOOL_CONFIG_IS_LOG "is_log" #define ZPOOL_CONFIG_L2CACHE "l2cache" +#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ +#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */ /* * The persistent vdev state is stored as separate values rather than a single * 'vdev_state' entry. This is because a device can be in multiple states, such @@ -460,6 +461,7 @@ typedef struct vdev_stat { uint64_t vs_scrub_end; /* UTC scrub end time */ } vdev_stat_t; +#define ZVOL_DRIVER "zvol" #define ZFS_DRIVER "zfs" #define ZFS_DEV "/dev/zfs" @@ -475,7 +477,7 @@ typedef struct vdev_stat { * And here are the things we need with /dev, etc. in front of them. */ #define ZVOL_PSEUDO_DEV "/devices/pseudo/zvol@0:" -#define ZVOL_FULL_DEV_DIR "/dev/" ZVOL_DEV_DIR +#define ZVOL_FULL_DEV_DIR "/dev/" ZVOL_DEV_DIR "/" #define ZVOL_PROP_NAME "name" diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files index 824e43d36c..9a756bd90d 100644 --- a/usr/src/uts/intel/Makefile.files +++ b/usr/src/uts/intel/Makefile.files @@ -108,6 +108,12 @@ LX_AUTOFS_OBJS += \ lx_autofs.o # +# ZFS file system module +# +ZFS_OBJS += \ + spa_boot.o + +# # Decompression code # CORE_OBJS += decompress.o diff --git a/usr/src/uts/intel/Makefile.rules b/usr/src/uts/intel/Makefile.rules index 8d88d008fd..ea70b357fb 100644 --- a/usr/src/uts/intel/Makefile.rules +++ b/usr/src/uts/intel/Makefile.rules @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -235,6 +235,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/intel/kdi/%.s $(OBJS_DIR)/%.o: $(UTSBASE)/intel/kdi/$(SUBARCH_DIR)/%.s $(COMPILE.s) -o $@ $< +$(OBJS_DIR)/%.o: $(UTSBASE)/intel/zfs/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + # # krtld compiled into unix # @@ -286,6 +290,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/dtrace/%.c $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/dtrace/%.s @($(LHEAD) $(LINT.s) $< $(LTAIL)) +$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/zfs/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/fs/proc/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/genunix/Makefile b/usr/src/uts/intel/genunix/Makefile index 10802679c5..f6ac2cf2c5 100644 --- a/usr/src/uts/intel/genunix/Makefile +++ b/usr/src/uts/intel/genunix/Makefile @@ -20,7 +20,7 @@ # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -84,6 +84,7 @@ IPCTF_TARGET = $(IPCTF) $(PATCH_BUILD)IPCTF_TARGET = CPPFLAGS += -I$(SRC)/common +CPPFLAGS += -I$(SRC)/uts/common/fs/zfs # # For now, disable these lint checks; maintainers should endeavor diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s index 0f99792ed6..fd7a606594 100644 --- a/usr/src/uts/intel/ia32/ml/modstubs.s +++ b/usr/src/uts/intel/ia32/ml/modstubs.s @@ -739,6 +739,15 @@ fcnname/**/_info: \ #endif /* + * Stubs for zfs + */ +#ifndef ZFS_MODULE + MODULE(zfs,fs); + STUB(zfs, spa_boot_init, nomod_minus_one); + END_MODULE(zfs); +#endif + +/* * Stubs for dcfs */ #ifndef DCFS_MODULE diff --git a/usr/src/uts/intel/specfs/Makefile b/usr/src/uts/intel/specfs/Makefile index b5d9435319..23a13db05e 100644 --- a/usr/src/uts/intel/specfs/Makefile +++ b/usr/src/uts/intel/specfs/Makefile @@ -21,7 +21,7 @@ # # uts/intel/specfs/Makefile # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -63,6 +63,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) MODSTUBS_DIR = $(OBJS_DIR) $(MODSTUBS_O) := AS_CPPFLAGS += -DSPEC_MODULE CLEANFILES += $(MODSTUBS_O) +LDFLAGS += -dy -Nfs/fifofs # # For now, disable these lint checks; maintainers should endeavor diff --git a/usr/src/uts/intel/zfs/Makefile b/usr/src/uts/intel/zfs/Makefile index c9596a4eef..05ab541d5d 100644 --- a/usr/src/uts/intel/zfs/Makefile +++ b/usr/src/uts/intel/zfs/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -60,7 +60,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) # Overrides and depends_on # MODSTUBS_DIR = $(OBJS_DIR) -LDFLAGS += -dy -Nfs/specfs -Ndrv/random -Nmisc/idmap +LDFLAGS += -dy -Nfs/specfs -Ncrypto/swrand -Nmisc/idmap INC_PATH += -I$(UTSBASE)/common/fs/zfs INC_PATH += -I$(SRC)/common diff --git a/usr/src/uts/intel/zfs/spa_boot.c b/usr/src/uts/intel/zfs/spa_boot.c new file mode 100644 index 0000000000..9407f52353 --- /dev/null +++ b/usr/src/uts/intel/zfs/spa_boot.c @@ -0,0 +1,198 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/spa.h> +#include <sys/sunddi.h> + +char * +spa_get_bootfs() +{ + char *zfs_bp; + + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), + DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bp) != + DDI_SUCCESS) + return (NULL); + return (zfs_bp); +} + +void +spa_free_bootfs(char *bootfs) +{ + ddi_prop_free(bootfs); +} + +/* + * Calculate how many device pathnames are in devpath_list. + * The devpath_list could look like this: + * + * "/pci@1f,0/ide@d/disk@0,0:a /pci@1f,o/ide@d/disk@2,0:a" + */ +static int +spa_count_devpath(char *devpath_list) +{ + int numpath; + char *tmp_path, *blank; + + numpath = 0; + tmp_path = devpath_list; + + /* skip leading blanks */ + while (*tmp_path == ' ') + tmp_path++; + + while ((blank = strchr(tmp_path, ' ')) != NULL) { + + numpath++; + /* skip contiguous blanks */ + while (*blank == ' ') + blank++; + tmp_path = blank; + } + + if (strlen(tmp_path) > 0) + numpath++; + + return (numpath); +} + +/* + * Only allow booting the device if it has the same vdev information as + * the most recently updated vdev (highest txg) and is in a valid state. + * + * GRUB passes online/active device path names, e.g. + * "/pci@1f,0/ide@d/disk@0,0:a /pci@1f,o/ide@d/disk@2,0:a" + * to the kernel. The best vdev should have the same matching online/active + * list as what GRUB passes in. + */ +static int +spa_check_devstate(char *devpath_list, char *dev, nvlist_t *conf) +{ + nvlist_t *nvtop, **child; + uint_t label_path, grub_path, c, children; + char *type; + + VERIFY(nvlist_lookup_nvlist(conf, ZPOOL_CONFIG_VDEV_TREE, + &nvtop) == 0); + VERIFY(nvlist_lookup_string(nvtop, ZPOOL_CONFIG_TYPE, &type) == 0); + + if (strcmp(type, VDEV_TYPE_DISK) == 0) + return (spa_rootdev_validate(nvtop)? 0 : EINVAL); + + ASSERT(strcmp(type, VDEV_TYPE_MIRROR) == 0); + + VERIFY(nvlist_lookup_nvlist_array(nvtop, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0); + + /* + * Check if the devpath_list is the same as the path list in conf. + * If these two lists are different, then the booting device is not an + * up-to-date device that can be booted. + */ + label_path = 0; + for (c = 0; c < children; c++) { + char *physpath; + + if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_PHYS_PATH, + &physpath) != 0) + return (EINVAL); + + if (spa_rootdev_validate(child[c])) { + if (strstr(devpath_list, physpath) == NULL) + return (EINVAL); + label_path++; + } else { + char *blank; + + if (blank = strchr(dev, ' ')) + *blank = '\0'; + if (strcmp(physpath, dev) == 0) + return (EINVAL); + if (blank) + *blank = ' '; + } + } + + grub_path = spa_count_devpath(devpath_list); + + if (label_path != grub_path) + return (EINVAL); + + return (0); +} + +/* + * Given a list of vdev physpath names, pick the vdev with the most recent txg, + * and return the point of the device's physpath in the list and the device's + * label configuration. The content of the label would be the most recent + * updated information. + */ +int +spa_get_rootconf(char *devpath_list, char **bestdev, nvlist_t **bestconf) +{ + nvlist_t *conf = NULL; + char *dev = NULL; + uint64_t txg = 0; + char *devpath, *blank; + + devpath = devpath_list; + dev = devpath; + + while (devpath[0] == ' ') + devpath++; + + while ((blank = strchr(devpath, ' ')) != NULL) { + *blank = '\0'; + spa_check_rootconf(devpath, &dev, &conf, &txg); + *blank = ' '; + + while (*blank == ' ') + blank++; + devpath = blank; + } + + /* for the only or the last devpath in the devpath_list */ + if (strlen(devpath) > 0) + spa_check_rootconf(devpath, &dev, &conf, &txg); + + if (conf == NULL) + return (EINVAL); + + /* + * dev/conf is the vdev with the most recent txg. + * Check if the device is in a bootable state. + * dev may have a trailing blank since it points to a string + * in the devpath_list. + */ + if (spa_check_devstate(devpath_list, dev, conf) != 0) + return (EINVAL); + + *bestdev = dev; + *bestconf = conf; + return (0); +} diff --git a/usr/src/uts/sparc/Makefile.files b/usr/src/uts/sparc/Makefile.files index fccd7341e3..cfca9e03ba 100644 --- a/usr/src/uts/sparc/Makefile.files +++ b/usr/src/uts/sparc/Makefile.files @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -76,6 +76,10 @@ FCODE_OBJS += fcode.o #PROC_OBJS += CORE_OBJS += prmachdep.o +# ZFS file system module +ZFS_OBJS += \ + spa_boot.o + # # misc modules # diff --git a/usr/src/uts/sparc/Makefile.rules b/usr/src/uts/sparc/Makefile.rules index b7978200ad..a9ab9e0dba 100644 --- a/usr/src/uts/sparc/Makefile.rules +++ b/usr/src/uts/sparc/Makefile.rules @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -62,6 +62,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/sparc/krtld/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/sparc/zfs/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + # # _RELSEG indicates that the dynamic syms are put in a separate ELF # section so they can be freed later. @@ -99,6 +103,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/sparc/io/%.c $(LINTS_DIR)/%.ln: $(UTSBASE)/sparc/fpu/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) +$(LINTS_DIR)/%.ln: $(UTSBASE)/sparc/zfs/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + $(LINTS_DIR)/%.ln: $(UTSBASE)/sparc/fs/proc/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/sparc/ml/modstubs.s b/usr/src/uts/sparc/ml/modstubs.s index 22be0e8a44..8e4e06a008 100644 --- a/usr/src/uts/sparc/ml/modstubs.s +++ b/usr/src/uts/sparc/ml/modstubs.s @@ -627,6 +627,15 @@ stubs_base: #endif /* + * Stubs for zfs + */ +#ifndef ZFS_MODULE + MODULE(zfs,fs); + STUB(zfs, spa_boot_init, nomod_minus_one); + END_MODULE(zfs); +#endif + +/* * Stubs for dcfs */ #ifndef DCFS_MODULE diff --git a/usr/src/uts/sparc/specfs/Makefile b/usr/src/uts/sparc/specfs/Makefile index 6c5344719b..d19a549d3f 100644 --- a/usr/src/uts/sparc/specfs/Makefile +++ b/usr/src/uts/sparc/specfs/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -21,7 +20,7 @@ # # # uts/sparc/specfs/Makefile -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -64,6 +63,7 @@ MODSTUBS_DIR = $(OBJS_DIR) $(MODSTUBS_O) := AS_CPPFLAGS += -DSPEC_MODULE CLEANFILES += $(MODSTUBS_O) CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy -Nfs/fifofs # # Default build targets. diff --git a/usr/src/uts/sparc/zfs/Makefile b/usr/src/uts/sparc/zfs/Makefile index c9596a4eef..05ab541d5d 100644 --- a/usr/src/uts/sparc/zfs/Makefile +++ b/usr/src/uts/sparc/zfs/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -60,7 +60,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) # Overrides and depends_on # MODSTUBS_DIR = $(OBJS_DIR) -LDFLAGS += -dy -Nfs/specfs -Ndrv/random -Nmisc/idmap +LDFLAGS += -dy -Nfs/specfs -Ncrypto/swrand -Nmisc/idmap INC_PATH += -I$(UTSBASE)/common/fs/zfs INC_PATH += -I$(SRC)/common diff --git a/usr/src/uts/sparc/zfs/spa_boot.c b/usr/src/uts/sparc/zfs/spa_boot.c new file mode 100644 index 0000000000..6a20081bd6 --- /dev/null +++ b/usr/src/uts/sparc/zfs/spa_boot.c @@ -0,0 +1,123 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/spa.h> +#include <sys/bootconf.h> + +char * +spa_get_bootfs() +{ + int proplen; + char *zfs_bp; + + proplen = BOP_GETPROPLEN(bootops, "zfs-bootfs"); + if (proplen == 0) + return (NULL); + + zfs_bp = kmem_zalloc(proplen, KM_SLEEP); + if (BOP_GETPROP(bootops, "zfs-bootfs", zfs_bp) == -1) { + kmem_free(zfs_bp, proplen); + return (NULL); + } + + return (zfs_bp); +} + +void +spa_free_bootfs(char *bootfs) +{ + kmem_free(bootfs, strlen(bootfs) + 1); +} + +/* + * Given the boot device physpath, check if the device is in a valid state. + * If so, return the configuration from the vdev label. + */ +int +spa_get_rootconf(char *devpath, char **bestdev, nvlist_t **bestconf) +{ + nvlist_t *conf = NULL; + char *dev = NULL; + uint64_t txg = 0; + nvlist_t *nvtop, **child; + char *type; + uint_t children, c; + + spa_check_rootconf(devpath, &dev, &conf, &txg); + if (txg == 0 || conf == NULL) + return (EINVAL); + + VERIFY(nvlist_lookup_nvlist(conf, ZPOOL_CONFIG_VDEV_TREE, + &nvtop) == 0); + VERIFY(nvlist_lookup_string(nvtop, ZPOOL_CONFIG_TYPE, &type) == 0); + + if (strcmp(type, VDEV_TYPE_DISK) == 0) { + if (spa_rootdev_validate(nvtop)) + goto out; + else + return (EINVAL); + } + + ASSERT(strcmp(type, VDEV_TYPE_MIRROR) == 0); + + VERIFY(nvlist_lookup_nvlist_array(nvtop, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0); + + /* + * Go thru vdevs in the mirror to see if the given device (devpath) + * is in a healthy state. Also check if the given device has the most + * recent txg. Only the device with the most recent txg has valid + * information and can be booted. + */ + for (c = 0; c < children; c++) { + char *physpath; + + if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_PHYS_PATH, + &physpath) != 0) + return (EINVAL); + + if (strcmp(devpath, physpath) == 0) { + if (!spa_rootdev_validate(child[c])) + return (EINVAL); + } else { + /* get dev with the highest txg */ + if (spa_rootdev_validate(child[c])) { + spa_check_rootconf(physpath, &dev, + &conf, &txg); + } + } + } + + /* Does the given device have the most recent txg? */ + if (strcmp(devpath, dev) != 0) + return (EINVAL); +out: + *bestdev = dev; + *bestconf = conf; + return (0); +} diff --git a/usr/src/uts/sun/sys/promif.h b/usr/src/uts/sun/sys/promif.h index bd761e6fa0..cfb51d55f6 100644 --- a/usr/src/uts/sun/sys/promif.h +++ b/usr/src/uts/sun/sys/promif.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -316,6 +316,7 @@ extern pnode_t prom_findnode_bydevtype(pnode_t id, char *devtype); * file IO */ extern int prom_fopen(ihandle_t, char *); +extern int prom_volopen(ihandle_t, char *); extern int prom_fseek(ihandle_t, int, unsigned long long); extern int prom_fread(ihandle_t, int, caddr_t, size_t); extern int prom_fsize(ihandle_t, int, size_t *); diff --git a/usr/src/uts/sun4u/genunix/Makefile b/usr/src/uts/sun4u/genunix/Makefile index d3968bb247..9d86cdb173 100644 --- a/usr/src/uts/sun4u/genunix/Makefile +++ b/usr/src/uts/sun4u/genunix/Makefile @@ -96,6 +96,7 @@ $(PATCH_BUILD)IPCTF_TARGET = # CFLAGS += $(CCVERBOSE) CPPFLAGS += -I$(SRC)/common +CPPFLAGS += -I$(SRC)/uts/common/fs/zfs # # For now, disable these lint checks; maintainers should endeavor diff --git a/usr/src/uts/sun4v/genunix/Makefile b/usr/src/uts/sun4v/genunix/Makefile index dccdd6dcbb..c2111f62b2 100644 --- a/usr/src/uts/sun4v/genunix/Makefile +++ b/usr/src/uts/sun4v/genunix/Makefile @@ -96,6 +96,7 @@ $(PATCH_BUILD)IPCTF_TARGET = # CFLAGS += $(CCVERBOSE) CPPFLAGS += -I$(SRC)/common +CPPFLAGS += -I$(SRC)/uts/common/fs/zfs # # For now, disable these lint checks; maintainers should endeavor |