diff options
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c | 11 | ||||
-rw-r--r-- | usr/src/cmd/zpool/zpool_main.c | 91 | ||||
-rw-r--r-- | usr/src/cmd/zpool/zpool_util.h | 3 | ||||
-rw-r--r-- | usr/src/cmd/zpool/zpool_vdev.c | 77 | ||||
-rw-r--r-- | usr/src/common/zfs/zpool_prop.c | 4 | ||||
-rw-r--r-- | usr/src/lib/libzfs/common/libzfs.h | 16 | ||||
-rw-r--r-- | usr/src/lib/libzfs/common/libzfs_pool.c | 170 | ||||
-rw-r--r-- | usr/src/man/man1m/zpool.1m | 20 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/metaslab.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/spa.c | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/spa_impl.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/sys/fs/zfs.h | 1 |
13 files changed, 346 insertions, 62 deletions
diff --git a/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c b/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c index e98ee0c9b7..4697128c90 100644 --- a/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c +++ b/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c @@ -155,6 +155,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk) uint64_t offline = 0ULL; char *physpath = NULL; char rawpath[PATH_MAX], fullpath[PATH_MAX]; + zpool_boot_label_t boot_type; + uint64_t boot_size; size_t len; if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0) @@ -220,7 +222,14 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk) len = strlen(rawpath); rawpath[len - 2] = '\0'; - if (zpool_label_disk(g_zfshdl, zhp, rawpath) != 0) { + if (zpool_is_bootable(zhp)) + boot_type = ZPOOL_COPY_BOOT_LABEL; + else + boot_type = ZPOOL_NO_BOOT_LABEL; + + boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL); + if (zpool_label_disk(g_zfshdl, zhp, rawpath, + boot_type, boot_size, NULL) != 0) { (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); return; diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c index 82b9672a44..b61a52185d 100644 --- a/usr/src/cmd/zpool/zpool_main.c +++ b/usr/src/cmd/zpool/zpool_main.c @@ -208,7 +208,8 @@ get_usage(zpool_help_t idx) case HELP_CLEAR: return (gettext("\tclear [-nF] <pool> [device]\n")); case HELP_CREATE: - return (gettext("\tcreate [-fnd] [-o property=value] ... \n" + return (gettext("\tcreate [-fnd] [-B] " + "[-o property=value] ... \n" "\t [-O file-system-property=value] ... \n" "\t [-m mountpoint] [-R root] <pool> <vdev> ...\n")); case HELP_DESTROY: @@ -496,6 +497,8 @@ zpool_do_add(int argc, char **argv) int c; nvlist_t *nvroot; char *poolname; + zpool_boot_label_t boot_type; + uint64_t boot_size; int ret; zpool_handle_t *zhp; nvlist_t *config; @@ -544,9 +547,15 @@ zpool_do_add(int argc, char **argv) return (1); } + if (zpool_is_bootable(zhp)) + boot_type = ZPOOL_COPY_BOOT_LABEL; + else + boot_type = ZPOOL_NO_BOOT_LABEL; + /* pass off to get_vdev_spec for processing */ + boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL); nvroot = make_root_vdev(zhp, force, !force, B_FALSE, dryrun, - argc, argv); + boot_type, boot_size, argc, argv); if (nvroot == NULL) { zpool_close(zhp); return (1); @@ -767,10 +776,11 @@ errout: } /* - * zpool create [-fnd] [-o property=value] ... + * zpool create [-fnd] [-B] [-o property=value] ... * [-O file-system-property=value] ... * [-R root] [-m mountpoint] <pool> <dev> ... * + * -B Create boot partition. * -f Force creation, even if devices appear in use * -n Do not create the pool, but display the resulting layout if it * were to be created. @@ -787,12 +797,16 @@ errout: * we get the nvlist back from get_vdev_spec(), we either print out the contents * (if '-n' was specified), or pass it to libzfs to do the creation. */ + +#define SYSTEM256 (256 * 1024 * 1024) int zpool_do_create(int argc, char **argv) { boolean_t force = B_FALSE; boolean_t dryrun = B_FALSE; boolean_t enable_all_pool_feat = B_TRUE; + zpool_boot_label_t boot_type = ZPOOL_NO_BOOT_LABEL; + uint64_t boot_size = 0; int c; nvlist_t *nvroot = NULL; char *poolname; @@ -804,7 +818,7 @@ zpool_do_create(int argc, char **argv) char *propval; /* check options */ - while ((c = getopt(argc, argv, ":fndR:m:o:O:")) != -1) { + while ((c = getopt(argc, argv, ":fndBR:m:o:O:")) != -1) { switch (c) { case 'f': force = B_TRUE; @@ -815,6 +829,15 @@ zpool_do_create(int argc, char **argv) case 'd': enable_all_pool_feat = B_FALSE; break; + case 'B': + /* + * We should create the system partition. + * Also make sure the size is set. + */ + boot_type = ZPOOL_CREATE_BOOT_LABEL; + if (boot_size == 0) + boot_size = SYSTEM256; + break; case 'R': altroot = optarg; if (add_prop_list(zpool_prop_to_name( @@ -845,6 +868,20 @@ zpool_do_create(int argc, char **argv) goto errout; /* + * Get bootsize value for make_root_vdev(). + */ + if (zpool_name_to_prop(optarg) == ZPOOL_PROP_BOOTSIZE) { + if (zfs_nicestrtonum(g_zfs, propval, + &boot_size) < 0 || boot_size == 0) { + (void) fprintf(stderr, + gettext("bad boot partition size " + "'%s': %s\n"), propval, + libzfs_error_description(g_zfs)); + goto errout; + } + } + + /* * If the user is creating a pool that doesn't support * feature flags, don't enable any features. */ @@ -921,9 +958,43 @@ zpool_do_create(int argc, char **argv) goto errout; } + /* + * Make sure the bootsize is set when ZPOOL_CREATE_BOOT_LABEL is used, + * and not set otherwise. + */ + if (boot_type == ZPOOL_CREATE_BOOT_LABEL) { + const char *propname; + char *strptr, *buf = NULL; + int rv; + + propname = zpool_prop_to_name(ZPOOL_PROP_BOOTSIZE); + if (nvlist_lookup_string(props, propname, &strptr) != 0) { + (void) asprintf(&buf, "%" PRIu64, boot_size); + if (buf == NULL) { + (void) fprintf(stderr, + gettext("internal error: out of memory\n")); + goto errout; + } + rv = add_prop_list(propname, buf, &props, B_TRUE); + free(buf); + if (rv != 0) + goto errout; + } + } else { + const char *propname; + char *strptr; + + propname = zpool_prop_to_name(ZPOOL_PROP_BOOTSIZE); + if (nvlist_lookup_string(props, propname, &strptr) == 0) { + (void) fprintf(stderr, gettext("error: setting boot " + "partition size requires option '-B'\n")); + goto errout; + } + } + /* pass off to get_vdev_spec for bulk processing */ nvroot = make_root_vdev(NULL, force, !force, B_FALSE, dryrun, - argc - 1, argv + 1); + boot_type, boot_size, argc - 1, argv + 1); if (nvroot == NULL) goto errout; @@ -3183,6 +3254,8 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing) nvlist_t *nvroot; char *poolname, *old_disk, *new_disk; zpool_handle_t *zhp; + zpool_boot_label_t boot_type; + uint64_t boot_size; int ret; /* check options */ @@ -3247,8 +3320,14 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing) return (1); } + if (zpool_is_bootable(zhp)) + boot_type = ZPOOL_COPY_BOOT_LABEL; + else + boot_type = ZPOOL_NO_BOOT_LABEL; + + boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL); nvroot = make_root_vdev(zhp, force, B_FALSE, replacing, B_FALSE, - argc, argv); + boot_type, boot_size, argc, argv); if (nvroot == NULL) { zpool_close(zhp); return (1); diff --git a/usr/src/cmd/zpool/zpool_util.h b/usr/src/cmd/zpool/zpool_util.h index 134c730fcf..8777edc9de 100644 --- a/usr/src/cmd/zpool/zpool_util.h +++ b/usr/src/cmd/zpool/zpool_util.h @@ -44,7 +44,8 @@ uint_t num_logs(nvlist_t *nv); */ nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep, - boolean_t replacing, boolean_t dryrun, int argc, char **argv); + boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type, + uint64_t boot_size, int argc, char **argv); nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, splitflags_t flags, int argc, char **argv); diff --git a/usr/src/cmd/zpool/zpool_vdev.c b/usr/src/cmd/zpool/zpool_vdev.c index e60d7c0751..369b150390 100644 --- a/usr/src/cmd/zpool/zpool_vdev.c +++ b/usr/src/cmd/zpool/zpool_vdev.c @@ -876,14 +876,15 @@ check_replication(nvlist_t *config, nvlist_t *newroot) * Go through and find any whole disks in the vdev specification, labelling them * as appropriate. When constructing the vdev spec, we were unable to open this * device in order to provide a devid. Now that we have labelled the disk and - * know that slice 0 is valid, we can construct the devid now. + * know the pool slice is valid, we can construct the devid now. * * If the disk was already labeled with an EFI label, we will have gotten the * devid already (because we were able to open the whole disk). Otherwise, we * need to get the devid after we label the disk. */ static int -make_disks(zpool_handle_t *zhp, nvlist_t *nv) +make_disks(zpool_handle_t *zhp, nvlist_t *nv, zpool_boot_label_t boot_type, + uint64_t boot_size) { nvlist_t **child; uint_t c, children; @@ -892,6 +893,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) uint64_t wholedisk; int fd; int ret; + int slice; ddi_devid_t devid; char *minor = NULL, *devid_str = NULL; @@ -909,20 +911,36 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) * slice and stat()ing the device. */ verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, - &wholedisk) != 0 || !wholedisk) - return (0); diskname = strrchr(path, '/'); assert(diskname != NULL); diskname++; - if (zpool_label_disk(g_zfs, zhp, diskname) == -1) - return (-1); + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + &wholedisk) != 0 || !wholedisk) { + /* + * This is not whole disk, return error if + * boot partition creation was requested + */ + if (boot_type == ZPOOL_CREATE_BOOT_LABEL) { + (void) fprintf(stderr, + gettext("creating boot partition is only " + "supported on whole disk vdevs: %s\n"), + diskname); + return (-1); + } + return (0); + } + + ret = zpool_label_disk(g_zfs, zhp, diskname, boot_type, + boot_size, &slice); + if (ret == -1) + return (ret); /* * Fill in the devid, now that we've labeled the disk. */ - (void) snprintf(buf, sizeof (buf), "%ss0", path); + (void) snprintf(buf, sizeof (buf), "%ss%d", path, slice); if ((fd = open(buf, O_RDONLY)) < 0) { (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), @@ -945,7 +963,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) } /* - * Update the path to refer to the 's0' slice. The presence of + * Update the path to refer to the pool slice. The presence of * the 'whole_disk' field indicates to the CLI that we should * chop off the slice number when displaying the device in * future output. @@ -957,21 +975,36 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) return (0); } - for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + /* illumos kernel does not support booting from multi-vdev pools. */ + if ((boot_type == ZPOOL_CREATE_BOOT_LABEL)) { + if ((strcmp(type, VDEV_TYPE_ROOT) == 0) && children > 1) { + (void) fprintf(stderr, gettext("boot pool " + "can not have more than one vdev\n")); + return (-1); + } + } + + for (c = 0; c < children; c++) { + ret = make_disks(zhp, child[c], boot_type, boot_size); + if (ret != 0) return (ret); + } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0) - for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + for (c = 0; c < children; c++) { + ret = make_disks(zhp, child[c], boot_type, boot_size); + if (ret != 0) return (ret); + } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) - for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + for (c = 0; c < children; c++) { + ret = make_disks(zhp, child[c], boot_type, boot_size); + if (ret != 0) return (ret); + } return (0); } @@ -1367,6 +1400,7 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, { nvlist_t *newroot = NULL, **child; uint_t c, children; + zpool_boot_label_t boot_type; if (argc > 0) { if ((newroot = construct_spec(argc, argv)) == NULL) { @@ -1375,7 +1409,13 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, return (NULL); } - if (!flags.dryrun && make_disks(zhp, newroot) != 0) { + if (zpool_is_bootable(zhp)) + boot_type = ZPOOL_COPY_BOOT_LABEL; + else + boot_type = ZPOOL_NO_BOOT_LABEL; + + if (!flags.dryrun && + make_disks(zhp, newroot, boot_type, 0) != 0) { nvlist_free(newroot); return (NULL); } @@ -1419,7 +1459,8 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, */ nvlist_t * make_root_vdev(zpool_handle_t *zhp, int force, int check_rep, - boolean_t replacing, boolean_t dryrun, int argc, char **argv) + boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type, + uint64_t boot_size, int argc, char **argv) { nvlist_t *newroot; nvlist_t *poolconfig = NULL; @@ -1460,7 +1501,7 @@ make_root_vdev(zpool_handle_t *zhp, int force, int check_rep, /* * Run through the vdev specification and label any whole disks found. */ - if (!dryrun && make_disks(zhp, newroot) != 0) { + if (!dryrun && make_disks(zhp, newroot, boot_type, boot_size) != 0) { nvlist_free(newroot); return (NULL); } diff --git a/usr/src/common/zfs/zpool_prop.c b/usr/src/common/zfs/zpool_prop.c index 9c717442ed..2a4f55d5b9 100644 --- a/usr/src/common/zfs/zpool_prop.c +++ b/usr/src/common/zfs/zpool_prop.c @@ -100,6 +100,10 @@ zpool_prop_init(void) PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if deduped>", "DEDUP"); + /* system partition size */ + zprop_register_number(ZPOOL_PROP_BOOTSIZE, "bootsize", 0, PROP_ONETIME, + ZFS_TYPE_POOL, "<size>", "BOOTSIZE"); + /* default number properties */ zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION, PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION"); diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h index 657ab3f2a2..5f81aa2048 100644 --- a/usr/src/lib/libzfs/common/libzfs.h +++ b/usr/src/lib/libzfs/common/libzfs.h @@ -132,6 +132,18 @@ typedef enum zfs_error { } zfs_error_t; /* + * UEFI boot support parameters. When creating whole disk boot pool, + * zpool create should allow to create EFI System partition for UEFI boot + * program. In case of BIOS, the EFI System partition is not used + * even if it does exist. + */ +typedef enum zpool_boot_label { + ZPOOL_NO_BOOT_LABEL = 0, + ZPOOL_CREATE_BOOT_LABEL, + ZPOOL_COPY_BOOT_LABEL +} zpool_boot_label_t; + +/* * The following data structures are all part * of the zfs_allow_t data structure which is * used for printing 'allow' permissions. @@ -262,7 +274,8 @@ extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); -extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *); +extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *, + zpool_boot_label_t, uint64_t, int *); /* * Functions to manage pool properties @@ -344,6 +357,7 @@ extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **); extern nvlist_t *zpool_get_features(zpool_handle_t *); extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *); extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **); +extern boolean_t zpool_is_bootable(zpool_handle_t *); /* * Import and export functions diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c index f9a05aeb39..9786f2b299 100644 --- a/usr/src/lib/libzfs/common/libzfs_pool.c +++ b/usr/src/lib/libzfs/common/libzfs_pool.c @@ -48,7 +48,7 @@ #include "zfs_comutil.h" #include "zfeature_common.h" -static int read_efi_label(nvlist_t *config, diskaddr_t *sb); +static int read_efi_label(nvlist_t *, diskaddr_t *, boolean_t *); #define BACKUP_SLICE "s2" @@ -313,6 +313,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len, (void) zfs_nicenum(intval, buf, len); } break; + case ZPOOL_PROP_BOOTSIZE: case ZPOOL_PROP_EXPANDSZ: if (intval == 0) { (void) strlcpy(buf, "-", len); @@ -514,6 +515,16 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, } break; + case ZPOOL_PROP_BOOTSIZE: + if (!flags.create) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property '%s' can only be set during pool " + "creation"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + break; + case ZPOOL_PROP_BOOTFS: if (flags.create || flags.import) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -1964,8 +1975,9 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, /* * Search for the requested value. Special cases: * - * - ZPOOL_CONFIG_PATH for whole disk entries. These end in - * "s0" or "s0/old". The "s0" part is hidden from the user, + * - ZPOOL_CONFIG_PATH for whole disk entries. To support + * UEFI boot, these end in "s0" or "s0/old" or "s1" or + * "s1/old". The "s0" or "s1" part is hidden from the user, * but included in the string, so this matches around it. * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE). * @@ -1995,14 +2007,16 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, /* * strings identical except trailing "s0" */ - if (strcmp(&val[vlen - 2], "s0") == 0 && + if ((strcmp(&val[vlen - 2], "s0") == 0 || + strcmp(&val[vlen - 2], "s1") == 0) && strncmp(srchval, val, slen) == 0) return (nv); /* * strings identical except trailing "s0/old" */ - if (strcmp(&val[vlen - 6], "s0/old") == 0 && + if ((strcmp(&val[vlen - 6], "s0/old") == 0 || + strcmp(&val[vlen - 6], "s1/old") == 0) && strcmp(&srchval[slen - 4], "/old") == 0 && strncmp(srchval, val, slen - 4) == 0) return (nv); @@ -3406,15 +3420,17 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, char *tmp = zfs_strdup(hdl, path); /* - * If it starts with c#, and ends with "s0", chop - * the "s0" off, or if it ends with "s0/old", remove - * the "s0" from the middle. + * If it starts with c#, and ends with "s0" or "s1", + * chop the slice off, or if it ends with "s0/old" or + * "s1/old", remove the slice from the middle. */ if (CTD_CHECK(tmp)) { - if (strcmp(&tmp[pathlen - 2], "s0") == 0) { + if (strcmp(&tmp[pathlen - 2], "s0") == 0 || + strcmp(&tmp[pathlen - 2], "s1") == 0) { tmp[pathlen - 2] = '\0'; } else if (pathlen > 6 && - strcmp(&tmp[pathlen - 6], "s0/old") == 0) { + (strcmp(&tmp[pathlen - 6], "s0/old") == 0 || + strcmp(&tmp[pathlen - 6], "s1/old") == 0)) { (void) strcpy(&tmp[pathlen - 6], "/old"); } @@ -3807,15 +3823,18 @@ zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, * Read the EFI label from the config, if a label does not exist then * pass back the error to the caller. If the caller has passed a non-NULL * diskaddr argument then we set it to the starting address of the EFI - * partition. + * partition. If the caller has passed a non-NULL boolean argument, then + * we set it to indicate if the disk does have efi system partition. */ static int -read_efi_label(nvlist_t *config, diskaddr_t *sb) +read_efi_label(nvlist_t *config, diskaddr_t *sb, boolean_t *system) { char *path; int fd; char diskname[MAXPATHLEN]; + boolean_t boot = B_FALSE; int err = -1; + int slice; if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0) return (err); @@ -3826,8 +3845,16 @@ read_efi_label(nvlist_t *config, diskaddr_t *sb) struct dk_gpt *vtoc; if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) { - if (sb != NULL) - *sb = vtoc->efi_parts[0].p_start; + for (slice = 0; slice < vtoc->efi_nparts; slice++) { + if (vtoc->efi_parts[slice].p_tag == V_SYSTEM) + boot = B_TRUE; + if (vtoc->efi_parts[slice].p_tag == V_USR) + break; + } + if (sb != NULL && vtoc->efi_parts[slice].p_tag == V_USR) + *sb = vtoc->efi_parts[slice].p_start; + if (system != NULL) + *system = boot; efi_free(vtoc); } (void) close(fd); @@ -3854,7 +3881,7 @@ find_start_block(nvlist_t *config) &wholedisk) != 0 || !wholedisk) { return (MAXOFFSET_T); } - if (read_efi_label(config, &sb) < 0) + if (read_efi_label(config, &sb, NULL) < 0) sb = MAXOFFSET_T; return (sb); } @@ -3873,7 +3900,8 @@ find_start_block(nvlist_t *config) * stripped of any leading /dev path. */ int -zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name) +zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name, + zpool_boot_label_t boot_type, uint64_t boot_size, int *slice) { char path[MAXPATHLEN]; struct dk_gpt *vtoc; @@ -3931,15 +3959,6 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name) return (zfs_error(hdl, EZFS_NOCAP, errbuf)); } - slice_size = vtoc->efi_last_u_lba + 1; - slice_size -= EFI_MIN_RESV_SIZE; - if (start_block == MAXOFFSET_T) - start_block = NEW_START_BLOCK; - slice_size -= start_block; - - vtoc->efi_parts[0].p_start = start_block; - vtoc->efi_parts[0].p_size = slice_size; - /* * Why we use V_USR: V_BACKUP confuses users, and is considered * disposable by some EFI utilities (since EFI doesn't have a backup @@ -3948,12 +3967,103 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name) * etc. were all pretty specific. V_USR is as close to reality as we * can get, in the absence of V_OTHER. */ - vtoc->efi_parts[0].p_tag = V_USR; - (void) strcpy(vtoc->efi_parts[0].p_name, "zfs"); + /* first fix the partition start block */ + if (start_block == MAXOFFSET_T) + start_block = NEW_START_BLOCK; - vtoc->efi_parts[8].p_start = slice_size + start_block; - vtoc->efi_parts[8].p_size = resv; - vtoc->efi_parts[8].p_tag = V_RESERVED; + /* + * EFI System partition is using slice 0. + * ZFS is on slice 1 and slice 8 is reserved. + * We assume the GPT partition table without system + * partition has zfs p_start == NEW_START_BLOCK. + * If start_block != NEW_START_BLOCK, it means we have + * system partition. Correct solution would be to query/cache vtoc + * from existing vdev member. + */ + if (boot_type == ZPOOL_CREATE_BOOT_LABEL) { + if (boot_size % vtoc->efi_lbasize != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "boot partition size must be a multiple of %d"), + vtoc->efi_lbasize); + (void) close(fd); + efi_free(vtoc); + return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); + } + /* + * System partition size checks. + * Note the 1MB is quite arbitrary value, since we + * are creating dedicated pool, it should be enough + * to hold fat + efi bootloader. May need to be + * adjusted if the bootloader size will grow. + */ + if (boot_size < 1024 * 1024) { + char buf[64]; + zfs_nicenum(boot_size, buf, sizeof (buf)); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Specified size %s for EFI System partition is too " + "small, the minimum size is 1MB."), buf); + (void) close(fd); + efi_free(vtoc); + return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); + } + /* 33MB is tested with mkfs -F pcfs */ + if (hdl->libzfs_printerr && + ((vtoc->efi_lbasize == 512 && + boot_size < 33 * 1024 * 1024) || + (vtoc->efi_lbasize == 4096 && + boot_size < 256 * 1024 * 1024))) { + char buf[64]; + zfs_nicenum(boot_size, buf, sizeof (buf)); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "Warning: EFI System partition size %s is " + "not allowing to create FAT32 file\nsystem, which " + "may result in unbootable system.\n"), buf); + } + /* Adjust zfs partition start by size of system partition. */ + start_block += boot_size / vtoc->efi_lbasize; + } + + if (start_block == NEW_START_BLOCK) { + /* + * Use default layout. + * ZFS is on slice 0 and slice 8 is reserved. + */ + slice_size = vtoc->efi_last_u_lba + 1; + slice_size -= EFI_MIN_RESV_SIZE; + slice_size -= start_block; + if (slice != NULL) + *slice = 0; + + vtoc->efi_parts[0].p_start = start_block; + vtoc->efi_parts[0].p_size = slice_size; + + vtoc->efi_parts[0].p_tag = V_USR; + (void) strcpy(vtoc->efi_parts[0].p_name, "zfs"); + + vtoc->efi_parts[8].p_start = slice_size + start_block; + vtoc->efi_parts[8].p_size = resv; + vtoc->efi_parts[8].p_tag = V_RESERVED; + } else { + slice_size = start_block - NEW_START_BLOCK; + vtoc->efi_parts[0].p_start = NEW_START_BLOCK; + vtoc->efi_parts[0].p_size = slice_size; + vtoc->efi_parts[0].p_tag = V_SYSTEM; + (void) strcpy(vtoc->efi_parts[0].p_name, "loader"); + if (slice != NULL) + *slice = 1; + /* prepare slice 1 */ + slice_size = vtoc->efi_last_u_lba + 1 - slice_size; + slice_size -= resv; + slice_size -= NEW_START_BLOCK; + vtoc->efi_parts[1].p_start = start_block; + vtoc->efi_parts[1].p_size = slice_size; + vtoc->efi_parts[1].p_tag = V_USR; + (void) strcpy(vtoc->efi_parts[1].p_name, "zfs"); + + vtoc->efi_parts[8].p_start = slice_size + start_block; + vtoc->efi_parts[8].p_size = resv; + vtoc->efi_parts[8].p_tag = V_RESERVED; + } if (efi_write(fd, vtoc) != 0) { /* diff --git a/usr/src/man/man1m/zpool.1m b/usr/src/man/man1m/zpool.1m index 04cac897e9..da923aa174 100644 --- a/usr/src/man/man1m/zpool.1m +++ b/usr/src/man/man1m/zpool.1m @@ -23,7 +23,7 @@ .\" Copyright (c) 2013 by Delphix. All rights reserved. .\" Copyright 2016 Nexenta Systems, Inc. .\" -.Dd March 30, 2016 +.Dd Oct 2, 2016 .Dt ZPOOL 1M .Os .Sh NAME @@ -47,6 +47,7 @@ .Nm .Cm create .Op Fl dfn +.Op Fl B .Op Fl m Ar mountpoint .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... .Oo Fl O Ar file-system-property Ns = Ns Ar value Oc Ns ... @@ -430,6 +431,12 @@ The following are read-only properties: Amount of storage available within the pool. This property can also be referred to by its shortened column name, .Sy avail . +.It Sy bootsize +The size of the system boot partition. This property can only be set at pool +creation time and is read-only once pool is created. Setting this property +implies using the +.Fl B +option. .It Sy capacity Percentage of pool space used. This property can also be referred to by its shortened column name, @@ -708,6 +715,7 @@ those errors associated with the specified device or devices are cleared. .Nm .Cm create .Op Fl dfn +.Op Fl B .Op Fl m Ar mountpoint .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... .Oo Fl O Ar file-system-property Ns = Ns Ar value Oc Ns ... @@ -766,6 +774,16 @@ By default all supported features are enabled on the new pool unless the .Fl d option is specified. .Bl -tag -width Ds +.It Fl B +Create whole disk pool with EFI System partition to support booting system +with UEFI firmware. Default size is 256MB. To create boot partition with +custom size, set the +.Sy bootsize +property with the +.Fl o +option. See the +.Sx Properties +section for details. .It Fl d Do not enable any features on the new pool. Individual features can be enabled by setting their corresponding properties to diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c index 5eb43fa7c4..a68dd0daa8 100644 --- a/usr/src/uts/common/fs/zfs/metaslab.c +++ b/usr/src/uts/common/fs/zfs/metaslab.c @@ -394,6 +394,7 @@ metaslab_class_expandable_space(metaslab_class_t *mc) spa_config_enter(mc->mc_spa, SCL_VDEV, FTAG, RW_READER); for (int c = 0; c < rvd->vdev_children; c++) { + uint64_t tspace; vdev_t *tvd = rvd->vdev_child[c]; metaslab_group_t *mg = tvd->vdev_mg; @@ -406,9 +407,13 @@ metaslab_class_expandable_space(metaslab_class_t *mc) * Calculate if we have enough space to add additional * metaslabs. We report the expandable space in terms * of the metaslab size since that's the unit of expansion. + * Adjust by efi system partition size. */ - space += P2ALIGN(tvd->vdev_max_asize - tvd->vdev_asize, - 1ULL << tvd->vdev_ms_shift); + tspace = tvd->vdev_max_asize - tvd->vdev_asize; + if (tspace > mc->mc_spa->spa_bootsize) { + tspace -= mc->mc_spa->spa_bootsize; + } + space += P2ALIGN(tspace, 1ULL << tvd->vdev_ms_shift); } spa_config_exit(mc->mc_spa, SCL_VDEV, FTAG); return (space); diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 24cdfcb93b..b16195da07 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -2738,6 +2738,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); + spa_prop_find(spa, ZPOOL_PROP_BOOTSIZE, &spa->spa_bootsize); spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, &spa->spa_dedup_ditto); diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h index 8413a843cd..73d2df0168 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h @@ -237,6 +237,7 @@ struct spa { int spa_mode; /* FREAD | FWRITE */ spa_log_state_t spa_log_state; /* log state */ uint64_t spa_autoexpand; /* lun expansion on/off */ + uint64_t spa_bootsize; /* efi system partition size */ ddt_t *spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */ uint64_t spa_ddt_stat_object; /* DDT statistics */ uint64_t spa_dedup_ditto; /* dedup ditto threshold */ diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index f31f41fddc..c0905abb46 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -2765,8 +2765,8 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t *vs) * since that determines how much space the pool can expand. */ if (vd->vdev_aux == NULL && tvd != NULL) { - vs->vs_esize = P2ALIGN(vd->vdev_max_asize - vd->vdev_asize, - 1ULL << tvd->vdev_ms_shift); + vs->vs_esize = P2ALIGN(vd->vdev_max_asize - vd->vdev_asize - + spa->spa_bootsize, 1ULL << tvd->vdev_ms_shift); } if (vd->vdev_aux == NULL && vd == vd->vdev_top && !vd->vdev_ishole) { vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation; diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 55f73868d6..92034ee30f 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -204,6 +204,7 @@ typedef enum { ZPOOL_PROP_FRAGMENTATION, ZPOOL_PROP_LEAKED, ZPOOL_PROP_MAXBLOCKSIZE, + ZPOOL_PROP_BOOTSIZE, ZPOOL_NUM_PROPS } zpool_prop_t; |