diff options
| author | ahrens <none@none> | 2005-10-31 11:33:35 -0800 |
|---|---|---|
| committer | ahrens <none@none> | 2005-10-31 11:33:35 -0800 |
| commit | fa9e4066f08beec538e775443c5be79dd423fcab (patch) | |
| tree | 576d99665e57bb7cb70584431adb08c14d47e3ce /usr/src/cmd | |
| parent | f1b64740276f67fc6914c1d855f2af601efe99ac (diff) | |
| download | illumos-joyent-fa9e4066f08beec538e775443c5be79dd423fcab.tar.gz | |
PSARC 2002/240 ZFS
6338653 Integrate ZFS
PSARC 2004/652 - DKIOCFLUSH
5096886 Write caching disks need mechanism to flush cache to physical media
Diffstat (limited to 'usr/src/cmd')
115 files changed, 18852 insertions, 841 deletions
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile index 14d2a8a8b2..c3f7131069 100644 --- a/usr/src/cmd/Makefile +++ b/usr/src/cmd/Makefile @@ -49,6 +49,7 @@ FIRST_SUBDIRS= \ COMMON_SUBDIRS= \ agents \ + availdevs \ lp \ perl \ man \ @@ -420,13 +421,17 @@ COMMON_SUBDIRS= \ yes \ ypcmd \ yppasswd \ + zdb \ zdump \ + zfs \ zic \ zlogin \ zoneadm \ zoneadmd \ zonecfg \ - zonename + zonename \ + zpool \ + ztest i386_SUBDIRS= \ addbadsec \ @@ -664,12 +669,14 @@ MSGSUBDIRS= \ xargs \ yppasswd \ zdump \ + zfs \ zic \ zlogin \ zoneadm \ zoneadmd \ zonecfg \ - zonename + zonename \ + zpool sparc_MSGSUBDIRS= \ fruadm \ diff --git a/usr/src/cmd/allocate/Makefile b/usr/src/cmd/allocate/Makefile index 36a28e3dd0..6b3055bce1 100644 --- a/usr/src/cmd/allocate/Makefile +++ b/usr/src/cmd/allocate/Makefile @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 1989, 1998-2002 Sun Microsystems, Inc. All rights reserved. +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -84,7 +84,7 @@ $(ROOTSECDEV)/% := GROUP = bin $(ROOTSECLIB)/% := FILEMODE = 0751 -allocate := LDLIBS += -lbsm -lsecdb +allocate := LDLIBS += -lbsm -lsec -lsecdb .KEEP_STATE: diff --git a/usr/src/cmd/allocate/allocate3.c b/usr/src/cmd/allocate/allocate3.c index 3488421c1e..0a5e0d0d9d 100644 --- a/usr/src/cmd/allocate/allocate3.c +++ b/usr/src/cmd/allocate/allocate3.c @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 1999-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -255,24 +255,7 @@ list_devices(int optflg, uid_t uid, char *device) static int newdac(char *file, uid_t owner, gid_t group, o_mode_t mode) { - int err = 0; - aclent_t min_acl[MIN_ACL_ENTRIES]; - - min_acl[0].a_type = USER_OBJ; - min_acl[0].a_id = owner; - min_acl[0].a_perm = ((mode & 0700) >> 6); - - min_acl[1].a_type = GROUP_OBJ; - min_acl[1].a_id = group; - min_acl[1].a_perm = ((mode & 0070) >> 3); - - min_acl[2].a_type = CLASS_OBJ; - min_acl[2].a_id = (uid_t)-1; - min_acl[2].a_perm = ((mode & 0070) >> 3); - - min_acl[3].a_type = OTHER_OBJ; - min_acl[3].a_id = (uid_t)-1; - min_acl[3].a_perm = (mode & 0007); + int err = 0; do { if (chown(file, owner, group) == -1) { @@ -281,7 +264,9 @@ newdac(char *file, uid_t owner, gid_t group, o_mode_t mode) } } while (fdetach(file) == 0); - if (acl(file, SETACL, MIN_ACL_ENTRIES, min_acl) < 0) { + err = acl_strip(file, owner, group, (mode_t)mode); + + if (err != 0) { dperror("newdac, unable to setacl"); err = SETACL_PERR; } diff --git a/usr/src/cmd/availdevs/Makefile b/usr/src/cmd/availdevs/Makefile new file mode 100644 index 0000000000..99015803ba --- /dev/null +++ b/usr/src/cmd/availdevs/Makefile @@ -0,0 +1,65 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +PROG= availdevs +OBJS_COMMON= availdevs.o +OBJS= $(OBJS_COMMON) +SRCS= $(OBJS_COMMON:%.o=%.c) + +include ../Makefile.cmd + +ROOTCMDDIR= $(ROOTLIB)/zfs + +INCS += -I../../lib/libzfs_jni/common \ + -I/usr/include/libxml2 + +LDLIBS += -lzfs_jni -lxml2 +CPPFLAGS += $(INCS) -D_LARGEFILE64_SOURCE=1 -D_REENTRANT + +.KEEP_STATE: + +.PARALLEL: + +all: $(PROG) + +$(PROG): $(OBJS) + $(LINK.c) -o $@ $(OBJS) $(LDLIBS) + $(POST_PROCESS) + +%.o: %.c + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) + +install: all $(ROOTCMD) + +clean: + $(RM) $(OBJS) + +FRC: + +include ../Makefile.targ diff --git a/usr/src/cmd/availdevs/availdevs.c b/usr/src/cmd/availdevs/availdevs.c new file mode 100644 index 0000000000..703897f394 --- /dev/null +++ b/usr/src/cmd/availdevs/availdevs.c @@ -0,0 +1,158 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "availdevs.h" +#include <libzfs_jni_diskmgt.h> +#include <libxml/parser.h> + +/* + * Function prototypes + */ + +static void handle_error(const char *, va_list); +static int add_disk_to_xml(dmgt_disk_t *, void *); +static xmlDocPtr create_doc(); +int main(); + +/* + * Static functions + */ + +static void +handle_error(const char *fmt, va_list ap) +{ + (void) vfprintf(stderr, fmt, ap); + (void) fprintf(stderr, "\n"); +} + +static int +add_disk_to_xml(dmgt_disk_t *dp, void *data) +{ + int i, n; + char tmp[64]; + xmlNodePtr available = *((xmlNodePtr *)data); + + xmlNodePtr disk = xmlNewChild( + available, NULL, (xmlChar *)ELEMENT_DISK, NULL); + xmlSetProp(disk, + (xmlChar *)ATTR_DISK_NAME, (xmlChar *)dp->name); + n = snprintf(tmp, sizeof (tmp) - 1, "%llu", dp->size); + tmp[n] = '\0'; + xmlSetProp(disk, (xmlChar *)ATTR_DISK_SIZE, (xmlChar *)tmp); + + if (dp->aliases != NULL) { + for (i = 0; dp->aliases[i] != NULL; i++) { + xmlNodePtr alias = xmlNewChild( + disk, NULL, (xmlChar *)ELEMENT_ALIAS, NULL); + xmlSetProp(alias, + (xmlChar *)ATTR_ALIAS_NAME, + (xmlChar *)dp->aliases[i]); + } + } + + if (dp->slices != NULL) { + for (i = 0; dp->slices[i] != NULL; i++) { + dmgt_slice_t *sp = dp->slices[i]; + xmlNodePtr slice = xmlNewChild( + disk, NULL, (xmlChar *)ELEMENT_SLICE, NULL); + xmlSetProp(slice, + (xmlChar *)ATTR_SLICE_NAME, (xmlChar *)sp->name); + + n = snprintf(tmp, sizeof (tmp) - 1, "%llu", sp->size); + tmp[n] = '\0'; + xmlSetProp(slice, (xmlChar *)ATTR_SLICE_SIZE, + (xmlChar *)tmp); + + n = snprintf(tmp, sizeof (tmp) - 1, "%llu", sp->start); + tmp[n] = '\0'; + xmlSetProp(slice, (xmlChar *)ATTR_SLICE_START, + (xmlChar *)tmp); + + if (sp->used_name != NULL) { + xmlSetProp(slice, + (xmlChar *)ATTR_SLICE_USED_NAME, + (xmlChar *)sp->used_name); + } + + if (sp->used_by != NULL) { + xmlSetProp(slice, (xmlChar *)ATTR_SLICE_USED_BY, + (xmlChar *)sp->used_by); + } + } + } + + return (0); +} + +static xmlDocPtr +create_doc(void) +{ + /* Create the XML document */ + xmlDocPtr doc = xmlNewDoc((xmlChar *)"1.0"); + + /* Create the root node */ + xmlNodePtr root = xmlNewDocNode( + doc, NULL, (xmlChar *)ELEMENT_ROOT, NULL); + xmlAddChild((xmlNodePtr) doc, (xmlNodePtr)root); + + /* Create the available node */ + xmlNewChild(root, NULL, (xmlChar *)ELEMENT_AVAILABLE, NULL); + + return (doc); +} + +/* + * Main entry to availdisks. + * + * @return 0 on successful exit, non-zero otherwise + */ +int +main(void) +{ + int error; + xmlDocPtr doc; + xmlNodePtr root; + xmlNodePtr available; + + /* diskmgt.o error handler */ + dmgt_set_error_handler(handle_error); + + doc = create_doc(); + root = xmlDocGetRootElement(doc); + available = xmlGetLastChild(root); + + error = dmgt_avail_disk_iter(add_disk_to_xml, &available); + if (!error) { + /* Print out XML */ + xmlDocFormatDump(stdout, doc, 1); + } + + xmlFreeDoc(doc); + + return (error != 0); +} diff --git a/usr/src/cmd/availdevs/availdevs.h b/usr/src/cmd/availdevs/availdevs.h new file mode 100644 index 0000000000..3868b237aa --- /dev/null +++ b/usr/src/cmd/availdevs/availdevs.h @@ -0,0 +1,62 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _AVAILDEVS_H +#define _AVAILDEVS_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Constants + */ + +/* Must match the values in XMLDataModel.java */ +#define ELEMENT_ROOT "zfsconfig" +#define ELEMENT_AVAILABLE "available" +#define ELEMENT_DISK "disk" +#define ELEMENT_ALIAS "alias" +#define ELEMENT_SLICE "slice" +#define ATTR_DISK_NAME "name" +#define ATTR_DISK_SIZE "size" +#define ATTR_DISK_INUSE "inuse" +#define ATTR_ALIAS_NAME "name" +#define ATTR_SLICE_NAME "name" +#define ATTR_SLICE_SIZE "size" +#define ATTR_SLICE_START "start" +#define ATTR_SLICE_USED_NAME "used-name" +#define ATTR_SLICE_USED_BY "used-by" +#define VAL_ATTR_TRUE "true" +#define VAL_ATTR_FALSE "false" + +#ifdef __cplusplus +} +#endif + +#endif /* _AVAILDEVS_H */ diff --git a/usr/src/cmd/bart/create.c b/usr/src/cmd/bart/create.c index 17770c35f4..a676cd480b 100644 --- a/usr/src/cmd/bart/create.c +++ b/usr/src/cmd/bart/create.c @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -31,6 +31,7 @@ #include <sys/statvfs.h> #include <sys/wait.h> #include "bart.h" +#include <aclutils.h> static int sanitize_reloc_root(char *root, size_t bufsize); static int create_manifest_filelist(char **argv, char *reloc_root); @@ -623,46 +624,28 @@ sanitized_fname(const char *fname, boolean_t canon_path) static char * get_acl_string(const char *fname, const struct stat64 *statb, int *err_code) { - aclent_t *aclbuf; - int num_acls, ret; - char *acl_info; + acl_t *aclp; + char *acltext; + int error; if (S_ISLNK(statb->st_mode)) { return (safe_strdup("-")); } - /* First, figure out how many ACL entries this file has */ - num_acls = acl(fname, GETACLCNT, 0, NULL); - if (num_acls < 0) { - *err_code = WARNING_EXIT; - perror(fname); - return (safe_strdup("-")); - } - /* - * Next, create a buffer which is big enough for all the ACL entries. - * Then go get the raw data. + * Include trivial acl's */ - aclbuf = (aclent_t *)safe_calloc(sizeof (aclent_t) * num_acls); - ret = acl(fname, GETACL, num_acls, aclbuf); - if (ret < 0) { - *err_code = WARNING_EXIT; - perror(fname); - return (safe_strdup("-")); - } - - /* Convert the raw entries to text */ - acl_info = acltotext(aclbuf, num_acls); - - /* Free up the buffer which held the raw ACL entries */ - free(aclbuf); + error = acl_get(fname, 0, &aclp); - if (acl_info == NULL) { + if (error != 0) { *err_code = WARNING_EXIT; - perror(fname); + (void) fprintf(stderr, "%s: %s\n", fname, acl_strerror(error)); return (safe_strdup("-")); - } else - return (acl_info); + } else { + acltext = acl_totext(aclp); + acl_free(aclp); + return (acltext); + } } diff --git a/usr/src/cmd/chmod/Makefile b/usr/src/cmd/chmod/Makefile index e7f71f4c8d..fad270c1b0 100644 --- a/usr/src/cmd/chmod/Makefile +++ b/usr/src/cmd/chmod/Makefile @@ -22,7 +22,7 @@ # #ident "%Z%%M% %I% %E% SMI" # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # cmd/chmod/Makefile @@ -42,6 +42,8 @@ CPPFLAGS += -D_FILE_OFFSET_BITS=64 LINTFLAGS += -erroff=E_NAME_DEF_NOT_USED2 +LDLIBS += -lsec + POFILE= chmod_cmd.po XGETFLAGS= -a -x chmod.xcl diff --git a/usr/src/cmd/chmod/chmod.c b/usr/src/cmd/chmod/chmod.c index 37f07e0b2e..71dc1de1df 100644 --- a/usr/src/cmd/chmod/chmod.c +++ b/usr/src/cmd/chmod/chmod.c @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,6 +44,7 @@ * chmod option mode files * where * mode is [ugoa][+-=][rwxXlstugo] or an octal number + * mode is [<+|->A[# <number] ]<aclspec> * option is -R and -f */ @@ -63,8 +64,10 @@ #include <string.h> /* strerror() */ #include <stdarg.h> #include <limits.h> +#include <ctype.h> #include <errno.h> #include <sys/acl.h> +#include <aclutils.h> static int rflag; static int fflag; @@ -77,25 +80,38 @@ static char **mav; /* Alternate to argv (for parseargs) */ static char *ms; /* Points to the mode argument */ +#define ACL_ADD 1 +#define ACL_DELETE 2 +#define ACL_SLOT_DELETE 3 +#define ACL_REPLACE 4 +#define ACL_STRIP 5 + +typedef struct acl_args { + acl_t *acl_aclp; + int acl_slot; + int acl_action; +} acl_args_t; + extern mode_t newmode_common(char *ms, mode_t new_mode, mode_t umsk, char *file, char *path, o_mode_t *group_clear_bits, o_mode_t *group_set_bits); static int -dochmod(char *name, char *path, mode_t umsk), -chmodr(char *dir, char *path, mode_t mode, mode_t umsk); +dochmod(char *name, char *path, mode_t umsk, acl_args_t *aclp), +chmodr(char *dir, char *path, mode_t mode, mode_t umsk, acl_args_t *aclp); +static int doacl(char *file, struct stat *st, acl_args_t *aclp); static void handle_acl(char *name, o_mode_t group_clear_bits, - o_mode_t group_set_bits); + o_mode_t group_set_bits); -static void -usage(void); +static void usage(void); -void -errmsg(int severity, int code, char *format, ...); +void errmsg(int severity, int code, char *format, ...); -static void -parseargs(int ac, char *av[]); +static void parseargs(int ac, char *av[]); + +int +parse_acl_args(char *arg, acl_args_t **acl_args); int main(int argc, char *argv[]) @@ -103,6 +119,7 @@ main(int argc, char *argv[]) int i, c; int status = 0; mode_t umsk; + acl_args_t *acl_args = NULL; (void) setlocale(LC_ALL, ""); #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ @@ -134,9 +151,16 @@ main(int argc, char *argv[]) mac -= optind; mav += optind; - if (mac < 2) { - usage(); - exit(2); + if (mac >= 2 && (mav[0][0] == 'A')) { + if (parse_acl_args(*mav, &acl_args)) { + usage(); + exit(2); + } + } else { + if (mac < 2) { + usage(); + exit(2); + } } ms = mav[0]; @@ -144,14 +168,15 @@ main(int argc, char *argv[]) umsk = umask(0); (void) umask(umsk); - for (i = 1; i < mac; i++) - status += dochmod(mav[i], mav[i], umsk); + for (i = 1; i < mac; i++) { + status += dochmod(mav[i], mav[i], umsk, acl_args); + } return (fflag ? 0 : status); } static int -dochmod(char *name, char *path, mode_t umsk) +dochmod(char *name, char *path, mode_t umsk, acl_args_t *aclp) { static struct stat st; int linkflg = 0; @@ -172,9 +197,11 @@ dochmod(char *name, char *path, mode_t umsk) /* Do not recurse if directory is object of symbolic link */ if (rflag && ((st.st_mode & S_IFMT) == S_IFDIR) && !linkflg) - return (chmodr(name, path, st.st_mode, umsk)); + return (chmodr(name, path, st.st_mode, umsk, aclp)); - if (chmod(name, newmode_common(ms, st.st_mode, umsk, name, path, + if (aclp) { + return (doacl(name, &st, aclp)); + } else if (chmod(name, newmode_common(ms, st.st_mode, umsk, name, path, &group_clear_bits, &group_set_bits)) == -1) { errmsg(2, 0, gettext("can't change %s\n"), path); return (1); @@ -195,7 +222,7 @@ dochmod(char *name, char *path, mode_t umsk) static int -chmodr(char *dir, char *path, mode_t mode, mode_t umsk) +chmodr(char *dir, char *path, mode_t mode, mode_t umsk, acl_args_t *aclp) { DIR *dirp; @@ -204,6 +231,7 @@ chmodr(char *dir, char *path, mode_t mode, mode_t umsk) char currdir[PATH_MAX+1]; /* current dir name + '/' */ char parentdir[PATH_MAX+1]; /* parent dir name + '/' */ int ecode; + struct stat st; o_mode_t group_clear_bits, group_set_bits; if (getcwd(savedir, PATH_MAX) == 0) @@ -213,7 +241,14 @@ chmodr(char *dir, char *path, mode_t mode, mode_t umsk) /* * Change what we are given before doing it's contents */ - if (chmod(dir, newmode_common(ms, mode, umsk, dir, path, + if (aclp) { + if (lstat(dir, &st) < 0) { + errmsg(2, 0, gettext("can't access %s\n"), path); + return (1); + } + if (doacl(dir, &st, aclp) != 0) + return (1); + } else if (chmod(dir, newmode_common(ms, mode, umsk, dir, path, &group_clear_bits, &group_set_bits)) < 0) { errmsg(2, 0, gettext("can't change %s\n"), path); return (1); @@ -226,8 +261,11 @@ chmodr(char *dir, char *path, mode_t mode, mode_t umsk) * permissions changes to both the acl mask and the * general group permissions. */ - if (group_clear_bits || group_set_bits) - handle_acl(dir, group_clear_bits, group_set_bits); + + if (aclp == NULL) { /* only necessary when not setting ACL */ + if (group_clear_bits || group_set_bits) + handle_acl(dir, group_clear_bits, group_set_bits); + } if (chdir(dir) < 0) { errmsg(2, 0, "%s/%s: %s\n", savedir, dir, strerror(errno)); @@ -255,7 +293,7 @@ chmodr(char *dir, char *path, mode_t mode, mode_t umsk) for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) { (void) strcpy(currdir, parentdir); (void) strcat(currdir, dp->d_name); - ecode += dochmod(dp->d_name, currdir, umsk); + ecode += dochmod(dp->d_name, currdir, umsk, aclp); } (void) closedir(dirp); if (chdir(savedir) < 0) { @@ -301,13 +339,26 @@ usage(void) "usage:\tchmod [-fR] <absolute-mode> file ...\n")); (void) fprintf(stderr, gettext( + "\tchmod [-fR] <ACL-operation> file ...\n")); + + (void) fprintf(stderr, gettext( "\tchmod [-fR] <symbolic-mode-list> file ...\n")); + (void) fprintf(stderr, gettext( "where \t<symbolic-mode-list> is a comma-separated list of\n")); (void) fprintf(stderr, gettext( "\t[ugoa]{+|-|=}[rwxXlstugo]\n")); + + (void) fprintf(stderr, gettext( + "where \t<ACL-operation> is one of the following\n")); + (void) fprintf(stderr, gettext("\tA-<acl_specification>\n")); + (void) fprintf(stderr, gettext("\tA[number]-\n")); + (void) fprintf(stderr, gettext( + "\tA[number]{+|=}<acl_specification>\n")); + (void) fprintf(stderr, gettext( + "where \t<acl-specification> is a comma-separated list of ACEs\n")); } /* @@ -373,6 +424,74 @@ parseargs(int ac, char *av[]) mav[mac] = (char *)NULL; } +int +parse_acl_args(char *arg, acl_args_t **acl_args) +{ + acl_t *new_acl = NULL; + int slot; + int error; + int len; + int action; + acl_args_t *new_acl_args; + char *acl_spec = NULL; + char *end; + + if (arg[0] != 'A') + return (1); + + slot = strtol(&arg[1], &end, 10); + + len = strlen(arg); + switch (*end) { + case '+': + action = ACL_ADD; + acl_spec = ++end; + break; + case '-': + if (len == 2 && arg[0] == 'A' && arg[1] == '-') + action = ACL_STRIP; + else + action = ACL_DELETE; + if (action != ACL_STRIP) { + acl_spec = ++end; + if (acl_spec[0] == '\0') { + action = ACL_SLOT_DELETE; + acl_spec = NULL; + } else if (arg[1] != '-') + return (1); + } + break; + case '=': + action = ACL_REPLACE; + acl_spec = ++end; + break; + default: + return (1); + } + + if ((action == ACL_REPLACE || action == ACL_ADD) && acl_spec[0] == '\0') + return (1); + + if (acl_spec) { + if (error = acl_fromtext(acl_spec, &new_acl)) { + errmsg(1, 1, "%s\n", acl_strerror(error)); + return (1); + } + } + + new_acl_args = malloc(sizeof (acl_args_t)); + if (new_acl_args == NULL) + return (1); + + new_acl_args->acl_aclp = new_acl; + new_acl_args->acl_slot = slot; + new_acl_args->acl_action = action; + + *acl_args = new_acl_args; + + return (0); +} + /* * This function is called whenever the group permissions of a file * is being modified. According to the chmod(1) manpage, any @@ -388,6 +507,14 @@ handle_acl(char *name, o_mode_t group_clear_bits, o_mode_t group_set_bits) aclent_t *aclp, *tp; o_mode_t newperm; + /* + * if this file system support ace_t acl's + * then simply return since we don't have an + * acl mask to deal with + */ + if (pathconf(name, _PC_ACL_ENABLED) == _ACL_ACE_ENABLED) + return; + if ((aclcnt = acl(name, GETACLCNT, 0, NULL)) <= MIN_ACL_ENTRIES) return; /* it's just a trivial acl; no need to change it */ @@ -424,3 +551,134 @@ handle_acl(char *name, o_mode_t group_clear_bits, o_mode_t group_set_bits) } free(aclp); } + +static int +doacl(char *file, struct stat *st, acl_args_t *acl_args) +{ + acl_t *aclp; + acl_t *set_aclp; + int error = 0; + void *to, *from; + int len; + int isdir; + + isdir = S_ISDIR(st->st_mode); + + error = acl_get(file, 0, &aclp); + + if (error != 0) { + errmsg(1, 1, "%s\n", acl_strerror(error)); + return (1); + } + + switch (acl_args->acl_action) { + case ACL_ADD: + if ((error = acl_addentries(aclp, + acl_args->acl_aclp, acl_args->acl_slot)) != 0) { + errmsg(1, 1, "%s\n", acl_strerror(error)); + acl_free(aclp); + return (1); + } + set_aclp = aclp; + break; + case ACL_SLOT_DELETE: + + if (acl_args->acl_slot + 1 > aclp->acl_cnt) { + errmsg(1, 1, + gettext("Invalid slot specified for removal\n")); + acl_free(aclp); + return (1); + } + + if (acl_args->acl_slot == 0 && aclp->acl_cnt == 1) { + errmsg(1, 1, + gettext("Can't remove all ACL " + "entries from a file\n")); + acl_free(aclp); + return (1); + } + + /* + * remove a single entry + * + * if last entry just adjust acl_cnt + */ + + if ((acl_args->acl_slot + 1) == aclp->acl_cnt) + aclp->acl_cnt--; + else { + to = (char *)aclp->acl_aclp + + (acl_args->acl_slot * aclp->acl_entry_size); + from = (char *)to + aclp->acl_entry_size; + len = (aclp->acl_cnt - acl_args->acl_slot - 1) * + aclp->acl_entry_size; + (void) memmove(to, from, len); + aclp->acl_cnt--; + } + set_aclp = aclp; + break; + + case ACL_DELETE: + if ((error = acl_removeentries(aclp, acl_args->acl_aclp, + acl_args->acl_slot, ACL_REMOVE_ALL)) != 0) { + errmsg(1, 1, "%s\n", acl_strerror(error)); + acl_free(aclp); + return (1); + } + + if (aclp->acl_cnt == 0) { + errmsg(1, 1, + gettext("Can't remove all ACL " + "entries from a file\n")); + acl_free(aclp); + return (1); + } + + set_aclp = aclp; + break; + case ACL_REPLACE: + if (acl_args->acl_slot >= 0) { + error = acl_modifyentries(aclp, acl_args->acl_aclp, + acl_args->acl_slot); + if (error) { + errmsg(1, 1, "%s\n", acl_strerror(error)); + acl_free(aclp); + return (1); + } + set_aclp = aclp; + } else { + set_aclp = acl_args->acl_aclp; + } + break; + case ACL_STRIP: + error = acl_strip(file, st->st_uid, st->st_gid, st->st_mode); + if (error) { + errmsg(1, 1, "%s\n", acl_strerror(error)); + return (1); + } + acl_free(aclp); + return (0); + /*NOTREACHED*/ + default: + errmsg(1, 0, gettext("Unknown ACL action requested\n")); + return (1); + break; + } + + error = acl_check(set_aclp, isdir); + + if (error) { + errmsg(1, 0, "%s\n%s", acl_strerror(error), + gettext("See chmod(1) for more information on " + "valid ACL syntax\n")); + return (1); + } + if ((error = acl_set(file, set_aclp)) != 0) { + errmsg(1, 0, gettext("Failed to set ACL: %s\n"), + acl_strerror(error)); + acl_free(aclp); + return (1); + } + acl_free(aclp); + return (0); +} diff --git a/usr/src/cmd/cmd-inet/usr.bin/rcp.c b/usr/src/cmd/cmd-inet/usr.bin/rcp.c index b21f426e69..7b76b8967a 100644 --- a/usr/src/cmd/cmd-inet/usr.bin/rcp.c +++ b/usr/src/cmd/cmd-inet/usr.bin/rcp.c @@ -1,5 +1,5 @@ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -52,6 +52,7 @@ #include <sys/sendfile.h> #include <sys/sysmacros.h> #include <sys/wait.h> +#include <aclutils.h> /* * It seems like Berkeley got these from pathnames.h? @@ -83,6 +84,7 @@ static int iamremote; static int iamrecursive; static int targetshouldbedirectory; static int aclflag; +static int acl_aclflag; static int retval = 0; static int portnumber = 0; @@ -200,14 +202,14 @@ main(int argc, char *argv[]) } fflag = tflag = 0; - while ((ch = getopt(argc, argv, "axdfprtz:D:k:P:")) != EOF) { + while ((ch = getopt(argc, argv, "axdfprtz:D:k:P:Z")) != EOF) { switch (ch) { case 'd': targetshouldbedirectory = 1; break; case 'f': /* "from" */ fflag = 1; - if (aclflag) + if (aclflag | acl_aclflag) /* ok response */ (void) desrcpwrite(rem, "", 1); break; @@ -220,6 +222,9 @@ main(int argc, char *argv[]) case 't': /* "to" */ tflag = 1; break; + case 'Z': + acl_aclflag++; + break; case 'x': if (!krb5_privacy_allowed()) { (void) fprintf(stderr, gettext("rcp: " @@ -652,9 +657,7 @@ toremote(char *targ, int argc, char *argv[]) if (response() < 0) exit(1); - } - else - { + } else { /* * ACL support: try to find out if the remote @@ -663,8 +666,13 @@ toremote(char *targ, int argc, char *argv[]) * purpose. */ aclflag = 1; + acl_aclflag = 1; - (void) snprintf(bp, buffersize, "%s -t %s", + /* + * First see if the remote side will support + * both aclent_t and ace_t acl's? + */ + (void) snprintf(bp, buffersize, "%s -tZ %s", cmd_sunw, targ); rem = rcmd_af(&host, portnumber, pwd->pw_name, tuser ? tuser : pwd->pw_name, @@ -681,32 +689,49 @@ toremote(char *targ, int argc, char *argv[]) != sizeof (resp)) lostconn(); if (resp != 0) { - /* - * Not OK: - * The other side is running - * non-acl rcp. Try again with - * normal stuff - */ - aclflag = 0; + acl_aclflag = 0; (void) snprintf(bp, buffersize, - "%s -t %s", cmd, targ); + "%s -t %s", cmd_sunw, targ); + (void) close(rem); host = thost; rem = rcmd_af(&host, portnumber, - pwd->pw_name, - tuser ? tuser : - pwd->pw_name, bp, 0, - AF_INET6); + pwd->pw_name, + tuser ? tuser : pwd->pw_name, + bp, 0, AF_INET6); if (rem < 0) exit(1); - if (response() < 0) - exit(1); + + if (read(rem, &resp, sizeof (resp)) + != sizeof (resp)) + lostconn(); + if (resp != 0) { + /* + * Not OK: + * The other side is running + * non-acl rcp. Try again with + * normal stuff + */ + aclflag = 0; + (void) snprintf(bp, buffersize, + "%s -t %s", cmd, targ); + (void) close(rem); + host = thost; + rem = rcmd_af(&host, portnumber, + pwd->pw_name, + tuser ? tuser : + pwd->pw_name, bp, 0, + AF_INET6); + if (rem < 0) + exit(1); + if (response() < 0) + exit(1); + } } /* everything should be fine now */ (void) setuid(userid); } - } source(1, argv + i); } @@ -843,8 +868,9 @@ tolocal(int argc, char *argv[]) * running acl cognizant version of rcp. */ aclflag = 1; + acl_aclflag = 1; - (void) snprintf(bp, buffersize, "%s -f %s", cmd_sunw, src); + (void) snprintf(bp, buffersize, "%s -Zf %s", cmd_sunw, src); rem = rcmd_af(&host, portnumber, pwd->pw_name, suser, bp, 0, AF_INET6); @@ -862,6 +888,24 @@ tolocal(int argc, char *argv[]) if (read(rem, &resp, sizeof (resp)) != sizeof (resp)) lostconn(); if (resp != 0) { + + /* + * Try again without ace_acl support + */ + acl_aclflag = 0; + (void) snprintf(bp, buffersize, "%s -f %s", + cmd_sunw, src); + rem = rcmd_af(&host, portnumber, pwd->pw_name, suser, + bp, 0, AF_INET6); + + if (rem < 0) { + ++errs; + continue; + } + + if (read(rem, &resp, sizeof (resp)) != sizeof (resp)) + lostconn(); + /* * NOT ok: * The other side is running non-acl rcp. @@ -1118,7 +1162,7 @@ notreg: } /* ACL support: send */ - if (aclflag) { + if (aclflag | acl_aclflag) { /* get acl from f and send it over */ if (sendacl(f) == ACL_FAIL) { (void) close(f); @@ -1414,7 +1458,7 @@ sink(int argc, char *argv[]) if (buf[0] == 'D') { if (exists) { if ((stb.st_mode&S_IFMT) != S_IFDIR) { - if (aclflag) { + if (aclflag | acl_aclflag) { /* * consume acl in the pipe * fd = -1 to indicate the @@ -1439,7 +1483,7 @@ sink(int argc, char *argv[]) } /* acl support for directories */ - if (aclflag) { + if (aclflag | acl_aclflag) { int dfd; if ((dfd = open(np, O_RDONLY)) == -1) @@ -1486,7 +1530,7 @@ bad: /* * ACL support: receiving */ - if (aclflag) { + if (aclflag | acl_aclflag) { /* get acl and set it to ofd */ if (recvacl(ofd, exists, pflag) == ACL_FAIL) { (void) close(ofd); @@ -1733,49 +1777,78 @@ static int sendacl(int f) { int aclcnt; - aclent_t *aclbufp; - int aclsize; char *acltext; char buf[BUFSIZ]; + acl_t *aclp; + char acltype; + int aclerror; + int trivial; - if ((aclcnt = facl(f, GETACLCNT, 0, NULL)) < 0) { - error("can't get acl count \n"); + + aclerror = facl_get(f, ACL_NO_TRIVIAL, &aclp); + if (aclerror != 0) { + error("can't retrieve ACL: %s \n", acl_strerror(aclerror)); return (ACL_FAIL); } + /* + * if acl type is not ACLENT_T and were operating in acl_aclflag == 0 + * then don't do the malloc and facl(fd, getcntcmd,...); + * since the remote side doesn't support alternate style ACL's. + */ + + if (aclp && (acl_type(aclp) != ACLENT_T) && (acl_aclflag == 0)) { + aclcnt = MIN_ACL_ENTRIES; + acltype = 'A'; + trivial = ACL_IS_TRIVIAL; + } else { + + aclcnt = (aclp != NULL) ? acl_cnt(aclp) : 0; + + if (aclp) { + acltype = (acl_type(aclp) != ACLENT_T) ? 'Z' : 'A'; + aclcnt = acl_cnt(aclp); + trivial = (acl_flags(aclp) & ACL_IS_TRIVIAL); + } else { + acltype = 'A'; + aclcnt = MIN_ACL_ENTRIES; + trivial = ACL_IS_TRIVIAL; + } + + } + /* send the acl count over */ - (void) snprintf(buf, sizeof (buf), "A%d\n", aclcnt); + (void) snprintf(buf, sizeof (buf), "%c%d\n", acltype, aclcnt); (void) desrcpwrite(rem, buf, strlen(buf)); - /* only send acl when it is non-trivial */ - if (aclcnt > MIN_ACL_ENTRIES) { - aclsize = aclcnt * sizeof (aclent_t); - if ((aclbufp = (aclent_t *)malloc(aclsize)) == NULL) { - error("rcp: cant allocate memory: aclcnt %d\n", - aclcnt); - exit(1); - } - if (facl(f, GETACL, aclcnt, aclbufp) < 0) { - error("rcp: failed to get acl\n"); - return (ACL_FAIL); - } - acltext = acltotext(aclbufp, aclcnt); + /* + * only send acl when we have an aclp, which would + * imply its not trivial. + */ + if (aclp && (trivial != ACL_IS_TRIVIAL)) { + acltext = acl_totext(aclp); if (acltext == NULL) { error("rcp: failed to convert to text\n"); + acl_free(aclp); return (ACL_FAIL); } /* send ACLs over: send the length first */ - (void) snprintf(buf, sizeof (buf), "A%d\n", strlen(acltext)); + (void) snprintf(buf, sizeof (buf), "%c%d\n", + acltype, strlen(acltext)); (void) desrcpwrite(rem, buf, strlen(buf)); (void) desrcpwrite(rem, acltext, strlen(acltext)); free(acltext); - free(aclbufp); - if (response() < 0) + if (response() < 0) { + acl_free(aclp); return (ACL_FAIL); + } } + + if (aclp) + acl_free(aclp); return (ACL_OK); } @@ -1783,7 +1856,7 @@ sendacl(int f) * Use this routine to get acl entry count and acl text size (in bytes) */ static int -getaclinfo(int *cnt) +getaclinfo(int *cnt, int *acltype) { char buf[BUFSIZ]; char *cp; @@ -1793,7 +1866,15 @@ getaclinfo(int *cnt) cp = buf; if (desrcpread(rem, cp, 1) <= 0) return (ACL_FAIL); - if (*cp++ != 'A') { + + switch (*cp++) { + case 'A': + *acltype = 0; + break; + case 'Z': + *acltype = 1; + break; + default: error("rcp: expect an ACL record, but got %c\n", *cp); return (ACL_FAIL); } @@ -1829,15 +1910,24 @@ recvacl(int f, int exists, int preserve) int j; char *tp; char *acltext; /* external format */ - aclent_t *aclbufp; /* internal format */ + acl_t *aclp; + int acltype; + int min_entries; + int aclerror; /* get acl count */ - if (getaclinfo(&aclcnt) != ACL_OK) + if (getaclinfo(&aclcnt, &acltype) != ACL_OK) return (ACL_FAIL); - if (aclcnt > MIN_ACL_ENTRIES) { + if (acltype == 0) { + min_entries = MIN_ACL_ENTRIES; + } else { + min_entries = 1; + } + + if (aclcnt > min_entries) { /* get acl text size */ - if (getaclinfo(&aclsize) != ACL_OK) + if (getaclinfo(&aclsize, &acltype) != ACL_OK) return (ACL_FAIL); if ((acltext = malloc(aclsize + 1)) == NULL) { error("rcp: cant allocate memory: %d\n", aclsize); @@ -1858,19 +1948,21 @@ recvacl(int f, int exists, int preserve) *tp = '\0'; if (preserve || !exists) { - aclbufp = aclfromtext(acltext, &aclcnt); - if (aclbufp == NULL) { - error("rcp: failed to parse acl\n"); + aclerror = acl_fromtext(acltext, &aclp); + if (aclerror != 0) { + error("rcp: failed to parse acl : %s\n", + acl_strerror(aclerror)); return (ACL_FAIL); } + if (f != -1) { - if (facl(f, SETACL, aclcnt, aclbufp) < 0) { + if (facl_set(f, aclp) < 0) { error("rcp: failed to set acl\n"); return (ACL_FAIL); } } /* -1 means that just consume the data in the pipe */ - free(aclbufp); + acl_free(aclp); } free(acltext); (void) desrcpwrite(rem, "", 1); diff --git a/usr/src/cmd/compress/Makefile b/usr/src/cmd/compress/Makefile index abd852409b..135dcad541 100644 --- a/usr/src/cmd/compress/Makefile +++ b/usr/src/cmd/compress/Makefile @@ -22,7 +22,7 @@ # #ident "%Z%%M% %I% %E% SMI" # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -34,6 +34,7 @@ include ../Makefile.cmd CFLAGS += $(CCVERBOSE) CPPFLAGS += -D_FILE_OFFSET_BITS=64 +LDLIBS += -lsec all: $(PROG) diff --git a/usr/src/cmd/compress/compress.c b/usr/src/cmd/compress/compress.c index 53f505d84d..dba163196b 100644 --- a/usr/src/cmd/compress/compress.c +++ b/usr/src/cmd/compress/compress.c @@ -136,6 +136,7 @@ static char rcs_ident[] = #include <strings.h> #include <fcntl.h> #include <dirent.h> +#include <aclutils.h> /* * Multi-byte handling for 'y' or 'n' @@ -1602,8 +1603,8 @@ copystat(char *ifname, struct stat *ifstat, char *ofname) { mode_t mode; struct utimbuf timep; - int aclcnt; - aclent_t *aclp; + acl_t *aclp; + int error; if (fclose(outp)) { perror(ofname); @@ -1654,41 +1655,20 @@ copystat(char *ifname, struct stat *ifstat, char *ofname) if (chmod(ofname, mode)) /* Copy modes */ perror(ofname); - /* Copy ACL info */ - if ((aclcnt = acl(ifname, GETACLCNT, 0, NULL)) < 0) { + error = acl_get(ifname, ACL_NO_TRIVIAL, &aclp); + if (error != 0) { (void) fprintf(stderr, gettext( - "%s: failed to get acl count\n"), - ifname); + "%s: failed to retrieve acl : %s\n"), + ifname, acl_strerror(error)); perm_stat = 1; } - /* - * Get ACL info: don't bother allocating space if - * there are only standard permissions, i.e., - * ACL count < 4. - */ - if (aclcnt > MIN_ACL_ENTRIES) { - if ((aclp = (aclent_t *)malloc( - sizeof (aclent_t) * aclcnt)) == NULL) { - (void) fprintf(stderr, gettext( - "Insufficient memory\n")); - exit(1); - } - if (acl(ifname, GETACL, aclcnt, aclp) < 0) { - (void) fprintf(stderr, gettext( - "%s: failed to get acl entries\n"), - ifname); - perm_stat = 1; - } else { - if (acl(ofname, SETACL, - aclcnt, aclp) < 0) { - (void) fprintf(stderr, gettext( - "%s: failed to set acl " - "entries\n"), ofname); - perm_stat = 1; - } - } - free(aclp); + if (aclp && (acl_set(ofname, aclp) < 0)) { + (void) fprintf(stderr, gettext("%s: failed to set acl " + "entries\n"), ofname); + perm_stat = 1; } + if (aclp) + acl_free(aclp); /* Copy ownership */ (void) chown(ofname, ifstat->st_uid, ifstat->st_gid); diff --git a/usr/src/cmd/cpio/cpio.c b/usr/src/cmd/cpio/cpio.c index cd7e242e27..4a8e442dee 100644 --- a/usr/src/cmd/cpio/cpio.c +++ b/usr/src/cmd/cpio/cpio.c @@ -66,6 +66,7 @@ #include <libintl.h> #include <dirent.h> #include <limits.h> +#include <aclutils.h> /* * Special kludge for off_t being a signed quantity. @@ -170,7 +171,6 @@ static int g_read(int, int, char *, unsigned); static int g_write(int, int, char *, unsigned); static int is_floppy(int); static int is_tape(int); -static int append_secattr(char **, int *, int, char *, char); static void write_ancillary(char *secinfo, int len); static int remove_dir(char *); static int save_cwd(void); @@ -477,9 +477,9 @@ static struct sec_attr { } *attr; static int Pflag = 0; /* flag indicates that acl is preserved */ -static int aclcnt = 0; /* acl entry count */ -static aclent_t *aclp = NULL; /* pointer to ACL */ -static int acl_set = 0; /* True if an acl was set on the file */ +static int acl_is_set = 0; /* True if an acl was set on the file */ + +acl_t *aclp; /* * @@ -595,7 +595,7 @@ static struct xattr_buf *xattrp; static struct xattr_buf *xattr_linkp; static int xattrbadhead; /* is extended attribute header bad? */ -static int append_secattr(char **, int *, int, char *, char); +static int append_secattr(char **, int *, acl_t *); static void write_ancillary(char *, int); /* @@ -676,11 +676,10 @@ main(int argc, char **argv) * the next file. */ if (aclp != NULL) { - free(aclp); - aclcnt = 0; + acl_free(aclp); aclp = NULL; } - acl_set = 0; + acl_is_set = 0; } (void) memset(&Gen, 0, sizeof (Gen)); } @@ -710,10 +709,9 @@ main(int argc, char **argv) Hiddendir = 0; } if (aclp != NULL) { - free(aclp); - aclcnt = 0; + acl_free(aclp); aclp = NULL; - acl_set = 0; + acl_is_set = 0; } } write_trail(); @@ -736,10 +734,9 @@ main(int argc, char **argv) } passret = file_pass(); if (aclp != NULL) { - free(aclp); - aclcnt = 0; + acl_free(aclp); aclp = NULL; - acl_set = 0; + acl_is_set = 0; } if (Gen.g_passdirfd != -1) (void) close(Gen.g_passdirfd); @@ -1577,9 +1574,9 @@ creat_lnk(int dirfd, char *name1_p, char *name2_p) errno = 0; if (!link(name1_p, name2_p)) { if (aclp != NULL) { - free(aclp); + acl_free(aclp); aclp = NULL; - acl_set = 0; + acl_is_set = 0; } cnt = 0; break; @@ -1709,16 +1706,16 @@ creat_spec(int dirfd) "file %s\"", G_p->g_attrfnam_p); } - acl_set = 0; + acl_is_set = 0; if (Pflag && aclp != NULL) { - if (facl(dirfd, SETACL, aclcnt, aclp) < 0) { + if (facl_set(dirfd, aclp) < 0) { msg(ERRN, "failed to set acl on attribute" " directory of %s ", G_p->g_attrfnam_p); } else { - acl_set = 1; + acl_is_set = 1; } - free(aclp); + acl_free(aclp); aclp = NULL; } @@ -1754,18 +1751,18 @@ creat_spec(int dirfd) /* A file by the same name exists. */ /* Take care of ACLs */ - acl_set = 0; + acl_is_set = 0; if (Pflag && aclp != NULL) { - if (acl(nam_p, SETACL, aclcnt, aclp) < 0) { + if (acl_set(nam_p, aclp) < 0) { msg(ERRN, "\"%s\": failed to set acl", nam_p); } else { - acl_set = 1; + acl_is_set = 1; } - free(aclp); + acl_free(aclp); aclp = NULL; } if (Args & OCd) { @@ -1829,17 +1826,17 @@ creat_spec(int dirfd) * The file creation succeeded. Take care of the ACLs. */ - acl_set = 0; + acl_is_set = 0; if (Pflag && aclp != NULL) { - if (acl(nam_p, SETACL, aclcnt, aclp) < 0) { + if (acl_set(nam_p, aclp) < 0) { msg(ERRN, "\"%s\": failed to set acl", nam_p); } else { - acl_set = 1; + acl_is_set = 1; } - free(aclp); + acl_free(aclp); aclp = NULL; } @@ -2206,8 +2203,7 @@ data_out(void) int len = 0; /* append security attributes */ - if (append_secattr(&secinfo, &len, aclcnt, - (char *)aclp, UFSD_ACL) == -1) { + if (append_secattr(&secinfo, &len, aclp) == -1) { msg(ERR, "can create security information"); } @@ -2328,8 +2324,7 @@ data_out(void) int len = 0; /* append security attributes */ - if ((append_secattr(&secinfo, &len, aclcnt, (char *)aclp, - UFSD_ACL)) == -1) + if ((append_secattr(&secinfo, &len, aclp)) == -1) msg(ERR, "can create security information"); /* call append_secattr() if more than one */ @@ -2926,8 +2921,7 @@ file_out(void) int len = 0; /* append security attributes */ - if ((append_secattr(&secinfo, &len, aclcnt, - (char *)aclp, UFSD_ACL)) == -1) + if ((append_secattr(&secinfo, &len, aclp)) == -1) msg(ERR, "can create security information"); /* call append_secattr() if more than one */ @@ -3248,6 +3242,8 @@ gethdr(void) char *preptr; int k = 0; int j; + int error; + int aclcnt; Gen.g_nam_p = Nam_p; do { /* hit == NONE && (Args & OCk) && Buffr.b_cnt > 0 */ @@ -3629,20 +3625,29 @@ gethdr(void) attr = (struct sec_attr *)tp; switch (attr->attr_type) { case UFSD_ACL: + case ACE_ACL: (void) sscanf(attr->attr_len, "%7lo", (ulong_t *)&aclcnt); - /* header is 8 */ + /* header is 8 */ attrsize = 8 + strlen(&attr->attr_info[0]) + 1; - aclp = aclfromtext(&attr->attr_info[0], - &cnt); - if (aclp == NULL) { - msg(ERR, "aclfromtext failed"); + + error = + acl_fromtext(&attr->attr_info[0], + &aclp); + + if (error != 0) { + msg(ERR, + "aclfromtext failed: %s", + acl_strerror(error)); + bytes -= attrsize; break; } - if (aclcnt != cnt) { + + if (aclcnt != acl_cnt(aclp)) { msg(ERR, "acl count error"); + bytes -= attrsize; break; } bytes -= attrsize; @@ -3909,21 +3914,10 @@ getname(void) * standard permissions, i.e. ACL count < 4 */ if ((SrcSt.st_mode & Ftype) != S_IFLNK && Pflag) { - if ((aclcnt = acl(Gen.g_nam_p, GETACLCNT, 0, NULL)) < 0) + if (acl_get(Gen.g_nam_p, ACL_NO_TRIVIAL, &aclp) != 0) msg(ERRN, "Error with acl() of \"%s\"", Gen.g_nam_p); - if (aclcnt > MIN_ACL_ENTRIES) { - aclp = e_zalloc(E_EXIT, sizeof (aclent_t) * aclcnt); - - if (acl(Gen.g_nam_p, GETACL, aclcnt, aclp) < 0) { - msg(ERRN, - "Error with getacl() of \"%s\"", - Gen.g_nam_p); - free(aclp); - aclp = NULL; - } - } - /* else: only traditional permissions, so proceed as usual */ } + /* else: only traditional permissions, so proceed as usual */ if (creat_hdr()) return (1); else return (2); @@ -4332,17 +4326,16 @@ openout(int dirfd) if ((result = openat(dirfd, get_component(nam_p), O_CREAT|O_RDWR|O_TRUNC, (int)G_p->g_mode)) >= 0) { /* acl support */ - acl_set = 0; + acl_is_set = 0; if (Pflag && aclp != NULL) { - if (facl(result, SETACL, aclcnt, aclp) - < 0) { + if (facl_set(result, aclp) < 0) { msg(ERRN, "\"%s\": failed to set acl", nam_p); } else { - acl_set = 1; + acl_is_set = 1; } - free(aclp); + acl_free(aclp); aclp = NULL; } cnt = 0; @@ -4879,7 +4872,7 @@ rstfiles(int over, int dirfd) mode_t orig_mask, new_mask; struct stat sbuf; - if (!(Pflag && acl_set)) { + if (!(Pflag && acl_is_set)) { /* Acl was not set, so we must chmod */ if (LSTAT(dirfd, G_p->g_nam_p, &sbuf) == 0) { if ((sbuf.st_mode & Ftype) != S_IFLNK) { @@ -4927,7 +4920,7 @@ rstfiles(int over, int dirfd) set_tym(dirfd, get_component(onam_p), G_p->g_mtime, G_p->g_mtime); } - if (!acl_set) { + if (!acl_is_set) { if (G_p->g_attrnam_p != (char *)NULL) { error = fchmod(Ofile, (int)G_p->g_mode); } else { @@ -6636,11 +6629,9 @@ is_floppy(int fd) */ static int append_secattr( - char **secinfo, /* existing security info */ - int *secinfo_len, /* length of existing security info */ - int size, /* new attribute size: unit depends on type */ - char *attrp, /* new attribute data pointer */ - char attr_type) /* new attribute type */ + char **secinfo, /* existing security info */ + int *secinfo_len, /* length of existing security info */ + acl_t *aclp) /* new attribute data pointer */ { char *new_secinfo; char *attrtext; @@ -6648,14 +6639,15 @@ append_secattr( int oldsize; /* no need to add */ - if (attrp == (char *)NULL) { + if (aclp == NULL) { return (0); } - switch (attr_type) { - case UFSD_ACL: + switch (acl_type(aclp)) { + case ACLENT_T: + case ACE_T: /* LINTED alignment */ - attrtext = acltotext((aclent_t *)attrp, size); + attrtext = acl_totext(aclp); if (attrtext == NULL) { (void) fprintf(stderr, "acltotext failed\n"); return (-1); @@ -6667,9 +6659,10 @@ append_secattr( (void) fprintf(stderr, "can't allocate memory\n"); return (-1); } - attr->attr_type = '1'; /* UFSD_ACL */ + attr->attr_type = (acl_type(aclp) == ACLENT_T) ? + UFSD_ACL : ACE_ACL; /* acl entry count */ - (void) sprintf(attr->attr_len, "%06o", size); + (void) sprintf(attr->attr_len, "%06o", acl_cnt(aclp)); (void) strcpy((char *)&attr->attr_info[0], attrtext); free(attrtext); break; @@ -6853,11 +6846,10 @@ xattrs_out(int (*func)()) * If aclp still exists then free it since it is was set when base * file was extracted. */ - if (aclp != (aclent_t *)NULL) { - free(aclp); - aclcnt = 0; + if (aclp != NULL) { + acl_free(aclp); aclp = NULL; - acl_set = 0; + acl_is_set = 0; } Gen.g_dirfd = attropen(G_p->g_nam_p, ".", O_RDONLY); @@ -6943,24 +6935,11 @@ xattrs_out(int (*func)()) free(namep); continue; } - if ((aclcnt = facl(filefd, GETACLCNT, - 0, NULL)) < 0) { + if (facl_get(filefd, ACL_NO_TRIVIAL, &aclp) != 0) { msg(ERRN, "Error with acl() on %s", Gen.g_nam_p); } - if (aclcnt > MIN_ACL_ENTRIES) { - aclp = e_zalloc(E_EXIT, - sizeof (aclent_t) * aclcnt); - - if (facl(filefd, GETACL, aclcnt, aclp) < 0) { - msg(ERRN, - "Error with getacl() on %s", - Gen.g_nam_p); - free(aclp); - aclp = NULL; - } - } (void) close(filefd); } (void) creat_hdr(); @@ -6973,11 +6952,10 @@ xattrs_out(int (*func)()) Gen.g_attrfnam_p = (char *)NULL; Gen.g_linktoattrfnam_p = (char *)NULL; Gen.g_linktoattrnam_p = (char *)NULL; - if (aclp != (aclent_t *)NULL) { - free(aclp); - aclcnt = 0; + if (aclp != NULL) { + acl_free(aclp); aclp = NULL; - acl_set = 0; + acl_is_set = 0; } free(namep); } diff --git a/usr/src/cmd/devfsadm/Makefile.com b/usr/src/cmd/devfsadm/Makefile.com index 4d7268b042..41acb0192e 100644 --- a/usr/src/cmd/devfsadm/Makefile.com +++ b/usr/src/cmd/devfsadm/Makefile.com @@ -62,6 +62,7 @@ LINK_SRCS = \ $(COMMON)/sgen_link.c \ $(COMMON)/md_link.c \ $(COMMON)/dtrace_link.c \ + $(COMMON)/zfs_link.c \ $(MISC_LINK_ISA).c LINT_MODULES = $(LINK_SRCS:.c=.ln) @@ -82,6 +83,7 @@ LINK_OBJS = \ sgen_link.o \ md_link.o \ dtrace_link.o \ + zfs_link.o \ $(MISC_LINK_ISA).o LINK_MODS = \ @@ -100,6 +102,7 @@ LINK_MODS = \ SUNW_sgen_link.so \ SUNW_md_link.so \ SUNW_dtrace_link.so \ + SUNW_zfs_link.so \ SUNW_$(MISC_LINK_ISA).so DEVLINKTAB = devlink.tab diff --git a/usr/src/cmd/devfsadm/zfs_link.c b/usr/src/cmd/devfsadm/zfs_link.c new file mode 100644 index 0000000000..50148f3ae6 --- /dev/null +++ b/usr/src/cmd/devfsadm/zfs_link.c @@ -0,0 +1,115 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <regex.h> +#include <devfsadm.h> +#include <stdio.h> +#include <strings.h> +#include <stdlib.h> +#include <limits.h> +#include <sys/mkdev.h> +#include <sys/fs/zfs.h> + +/* zfs and zvol name info */ + +#define ZVOL_LINK_RE_DEVICES "zvol/r?dsk/.*/.*$" + +static int zfs(di_minor_t minor, di_node_t node); + +/* + * devfs create callback register + */ +static devfsadm_create_t zfs_create_cbt[] = { + { "pseudo", "ddi_pseudo", ZFS_DRIVER, + TYPE_EXACT | DRV_EXACT, ILEVEL_0, zfs, + }, +}; +DEVFSADM_CREATE_INIT_V0(zfs_create_cbt); + +/* + * devfs cleanup register + */ +static devfsadm_remove_t zfs_remove_cbt[] = { + { "pseudo", ZVOL_LINK_RE_DEVICES, RM_HOT | RM_POST, + ILEVEL_0, devfsadm_rm_all }, +}; +DEVFSADM_REMOVE_INIT_V0(zfs_remove_cbt); + +/* + * For the zfs control node: + * /dev/zfs -> /devices/pseudo/zfs@0:zfs + * For zvols: + * /dev/zvol/dsk/<pool>/<dataset> -> /devices/pseudo/zfs@0:1 + * /dev/zvol/rdsk/<pool>/<dataset> -> /devices/pseudo/zfs@0:1,raw + */ +static int +zfs(di_minor_t minor, di_node_t node) +{ + dev_t dev; + int err; + char mn[MAXNAMELEN + 1]; + char blkname[MAXNAMELEN + 1]; + char rawname[MAXNAMELEN + 1]; + char path[PATH_MAX + 1]; + char *name; + + (void) strcpy(mn, di_minor_name(minor)); + + if (strcmp(mn, ZFS_DRIVER) == 0) { + (void) devfsadm_mklink(ZFS_DRIVER, node, minor, 0); + } else { + dev = di_minor_devt(minor); + err = di_prop_lookup_strings(dev, node, ZVOL_PROP_NAME, &name); + if (err < 0) { + /* property not defined so can't do anything */ + return (DEVFSADM_CONTINUE); + } + (void) snprintf(blkname, sizeof (blkname), "%dc", + (int)minor(dev)); + (void) snprintf(rawname, sizeof (rawname), "%dc,raw", + (int)minor(dev)); + + /* + * This is where the actual public name gets constructed. + * Change the snprintf format to change the public + * path that gets constructed. + */ + if (strcmp(mn, blkname) == 0) { + (void) snprintf(path, sizeof (path), "%s/%s", + ZVOL_DEV_DIR, name); + } else if (strcmp(mn, rawname) == 0) { + (void) snprintf(path, sizeof (path), "%s/%s", + ZVOL_RDEV_DIR, name); + } else { + return (DEVFSADM_CONTINUE); + } + + (void) devfsadm_mklink(path, node, minor, 0); + } + return (DEVFSADM_CONTINUE); +} diff --git a/usr/src/cmd/dfs.cmds/shareall/shareall.sh b/usr/src/cmd/dfs.cmds/shareall/shareall.sh index 0e6534e41b..8764583778 100644 --- a/usr/src/cmd/dfs.cmds/shareall/shareall.sh +++ b/usr/src/cmd/dfs.cmds/shareall/shareall.sh @@ -88,3 +88,10 @@ then else # for every file system ... cat $infile|/sbin/sh fi + +# If the user has ZFS installed, invoke 'zfs share -a' to share all ZFS +# filesystems +if [ -x /usr/sbin/zfs ] +then + /usr/sbin/zfs share -a +fi diff --git a/usr/src/cmd/find/Makefile b/usr/src/cmd/find/Makefile index 78d41d0b49..d43075a5a6 100644 --- a/usr/src/cmd/find/Makefile +++ b/usr/src/cmd/find/Makefile @@ -22,8 +22,8 @@ # #ident "%Z%%M% %I% %E% SMI" # -# Copyright (c) 1989,1996 by Sun Microsystems, Inc. -# All rights reserved. +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. # PROG= find @@ -34,6 +34,8 @@ include ../Makefile.cmd $(XPG4) := CFLAGS += -DXPG4 CPPFLAGS += -D_FILE_OFFSET_BITS=64 +LDLIBS += -lsec + .KEEP_STATE: all: $(PROG) $(XPG4) diff --git a/usr/src/cmd/find/find.c b/usr/src/cmd/find/find.c index dd8fab87ad..e6944d0031 100644 --- a/usr/src/cmd/find/find.c +++ b/usr/src/cmd/find/find.c @@ -60,7 +60,6 @@ #include <langinfo.h> #include <ftw.h> - #define A_DAY (long)(60*60*24) /* a day full of seconds */ #define A_MIN (long)(60) #define BLKSIZ 512 @@ -964,8 +963,7 @@ struct FTW *state; * nftw()) of the file */ filename = gettail(name); - val = (acl(filename, GETACLCNT, 0, NULL) > - MIN_ACL_ENTRIES); + val = acl_trivial(name); break; } /* @@ -1352,7 +1350,7 @@ list(file, stp) struct stat *stp; { char pmode[32], uname[32], gname[32], fsize[32], ftime[32]; - + int trivial; /* * Each line below contains the relevant permission (column 1) and character @@ -1456,7 +1454,11 @@ list(file, stp) tailname = gettail(file); - if (acl(tailname, GETACLCNT, 0, NULL) > MIN_ACL_ENTRIES) + trivial = acl_trivial(tailname); + if (trivial == -1) + trivial = 0; + + if (trivial == 1) pmode[permoffset(who) + 1] = '+'; else pmode[permoffset(who) + 1] = ' '; diff --git a/usr/src/cmd/fm/dicts/Makefile b/usr/src/cmd/fm/dicts/Makefile index 4cd171fd1c..d2ac4d192b 100644 --- a/usr/src/cmd/fm/dicts/Makefile +++ b/usr/src/cmd/fm/dicts/Makefile @@ -31,7 +31,8 @@ common_DCNAMES = \ FMD \ SMF \ SUNOS \ - PCI + PCI \ + ZFS sparc_DCNAMES = \ SUN4U \ diff --git a/usr/src/cmd/fm/dicts/ZFS.dict b/usr/src/cmd/fm/dicts/ZFS.dict new file mode 100644 index 0000000000..0166183535 --- /dev/null +++ b/usr/src/cmd/fm/dicts/ZFS.dict @@ -0,0 +1,41 @@ +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +#ident "%Z%%M% %I% %E% SMI" +# +# DO NOT EDIT -- this file is generated by the Event Registry. +# + +FMDICT: name=ZFS version=1 maxkey=1 + +ereport.fs.zfs.pool.corrupt_cache=1 +ereport.fs.zfs.device.missing_r=2 +ereport.fs.zfs.device.missing_nr=3 +ereport.fs.zfs.device.corrupt_label_r=4 +ereport.fs.zfs.device.corrupt_label_nr=5 +ereport.fs.zfs.pool.bad_guid_sum=6 +ereport.fs.zfs.pool.corrupt_pool=7 +ereport.fs.zfs.object.corrupt_data=8 +ereport.fs.zfs.device.failing=9 +ereport.fs.zfs.device.version_mismatch=10 diff --git a/usr/src/cmd/fm/dicts/ZFS.po b/usr/src/cmd/fm/dicts/ZFS.po new file mode 100644 index 0000000000..ea5a9c6195 --- /dev/null +++ b/usr/src/cmd/fm/dicts/ZFS.po @@ -0,0 +1,188 @@ +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +#ident "%Z%%M% %I% %E% SMI" +# +# DO NOT EDIT -- this file is generated by the Event Registry. +# +# +# code: ZFS-8000-14 +# keys: ereport.fs.zfs.pool.corrupt_cache +# +msgid "ZFS-8000-14.type" +msgstr "Error" +msgid "ZFS-8000-14.severity" +msgstr "Critical" +msgid "ZFS-8000-14.description" +msgstr "The ZFS cache file is corrupted Refer to %s for more information." +msgid "ZFS-8000-14.response" +msgstr "No automated response will be taken." +msgid "ZFS-8000-14.impact" +msgstr "ZFS filesystems are not available" +msgid "ZFS-8000-14.action" +msgstr "\nTo determine which pools are availabe for import, run the 'zpool status'\ncommand:\n\n\n# zpool import\n pool: test\n id: 12743384782310107047\n state: ONLINE\naction: The pool can be imported using its name or numeric identifier.\nconfig:\n\n test ONLINE\n c0t0d0 ONLINE\n#\n\n\nThis will automatically scan /dev/dsk for any devices\npart of a pool. If you previously had storage pools with devices in a\ndifferent directory, us the '-d' option to 'zpool import' to scan alternate\nlocations.\n\nOnce you have determined which pools are available for import, you can\nimport the pool explicitly by specifying the name or numeric identifier:\n\n\n# zpool import test\n#\n\n\nAlternately, you can import all available pools by specifying the '-a'\noption. Once a pool has been imported, the ZFS cache will be repaired so\nthat the pool will appear normally in the future.\n " +# +# code: ZFS-8000-2Q +# keys: ereport.fs.zfs.device.missing_r +# +msgid "ZFS-8000-2Q.type" +msgstr "Error" +msgid "ZFS-8000-2Q.severity" +msgstr "Major" +msgid "ZFS-8000-2Q.description" +msgstr "A device in a replicated configuration could not be\n opened. Refer to %s for more information." +msgid "ZFS-8000-2Q.response" +msgstr "No automated response will be taken." +msgid "ZFS-8000-2Q.impact" +msgstr "The pool is no longer providing the configured level of\n replication." +msgid "ZFS-8000-2Q.action" +msgstr "\nIf this error was encountered while running 'zpool import', please see the\nsection below. Otherwise, run 'zpool status -x' to determine which pool has\nexperienced a failure:\n\n\n# zpool status -x\n pool: test\n state: DEGRADED\nstatus: One or more devices could not be opened. Sufficient replicas exist for\n the pool to continue functioning in a degraded state.\naction: Attach the missing device and online it using 'zpool online'.\n see: http://www.sun.com/msg/ZFS-8000-2Q\n scrub: none requested\nconfig:\n\n NAME STATE READ WRITE CKSUM\n test DEGRADED 0 0 0\n mirror DEGRADED 0 0 0\n c0t0d0 ONLINE 0 0 0\n c0t0d1 FAULTED 0 0 0 cannot open\n#\n\n\nDetermine which device failed to open by looking for a FAULTED device with\nan additional \"cannot open\" message. If this device has been inadvertently\nremoved from the system, attach the device and bring it online with 'zpool\nonline':\n\n\n# zpool online test c0t0d1\nBringing device 'c0t0d1' online\n#\n\n\nIf the device is no longer available, the device can be replaced using the\n'zpool replace' command:\n\n\n# zpool replace test c0t0d1 c0t0d2\n#\n\n\nExisting data will be resilvered to the new device. Once the resilvering\ncompletes, the device will be removed from the pool.\n\n\nIf this error is encountered during a 'zpool import', it means that one of\nthe devices is not attached to the system:\n\n\n# zpool import\n pool: test\n id: 10121266328238932306\n state: DEGRADED\nstatus: One or more devices are missing from the system.\naction: The pool can be imported despite missing or damaged devices. The\n fault tolerance of the pool may be compromised if imported.\n see: http://www.sun.com/msg/ZFS-8000-2Q\nconfig:\n\n test DEGRADED\n mirror DEGRADED\n c0t0d0 ONLINE\n c0t0d1 FAULTED cannot open\n\n\nUnlike when the pool is active on the system, the device cannot be replaced\nwhile the pool is exported. If the device can be attached to the system,\nattach the device and run 'zpool import' again.\n\nAlternatively, the pool can be imported as-is, though it will be placed in\nthe DEGRADED state due to a missing device. Once the pool has been\nimported, the missing device can be replaced as described above.\n " +# +# code: ZFS-8000-3C +# keys: ereport.fs.zfs.device.missing_nr +# +msgid "ZFS-8000-3C.type" +msgstr "Error" +msgid "ZFS-8000-3C.severity" +msgstr "Critical" +msgid "ZFS-8000-3C.description" +msgstr "A device could not be opened and no replicas are available. Refer to %s for more information." +msgid "ZFS-8000-3C.response" +msgstr "No automated response will be taken." +msgid "ZFS-8000-3C.impact" +msgstr "The pool is no longer available" +msgid "ZFS-8000-3C.action" +msgstr "\nIf this error was encountered while running 'zpool import', please see the\nsection below. Otherwise, run 'zpool status -x' to determine which pool\nhas experienced a failure:\n\n\n# zpool status -x\n pool: test\n state: FAULTED\nstatus: One or more devices could not be opened. There are insufficient\n replicas for the pool to continue functioning.\naction: Attach the missing device and online it using 'zpool online'.\n see: http://www.sun.com/msg/ZFS-8000-3C\n scrub: none requested\nconfig:\n\n NAME STATE READ WRITE CKSUM\n test FAULTED 0 0 0 insufficient replicas\n c0t0d0 ONLINE 0 0 0\n c0t0d1 FAULTED 0 0 0 cannot open\n#\n\n\nAttach the device to the system and run 'zpool status' again. The pool\nshould automatically detect the newly attached device and resume\nfunctioning. You may have to mount the filesystems in the pool explicitly\nusing 'zfs mount'.\n\nIf the device is no longer available and cannot be reattached to the system,\nthen the pool must be destroyed and re-created from a backup source.\n\n\nIf this error is encountered during a 'zpool import', it means that one of\nthe devices is not attached to the system:\n\n\n# zpool import\n pool: test\n id: 10121266328238932306\n state: FAULTED\nstatus: One or more devices are missing from the system.\naction: The pool cannot be imported. Attach the missing devices and\n try again.\n see: http://www.sun.com/msg/ZFS-8000-3C\nconfig:\n\n test FAULTED insufficient replicas\n c0t0d0 ONLINE\n c0t0d1 FAULTED cannot open\n\n\nThe pool cannot be imported until the missing device is attached to the\nsystem. If the device has been made available in an alternate location, you\ncan use the '-d' option to 'zpool import' to search for devices in a\ndifferent directory.\n " +# +# code: ZFS-8000-4J +# keys: ereport.fs.zfs.device.corrupt_label_r +# +msgid "ZFS-8000-4J.type" +msgstr "Error" +msgid "ZFS-8000-4J.severity" +msgstr "Major" +msgid "ZFS-8000-4J.description" +msgstr "A device could not be opened due to a missing or invalid\n device label. Refer to %s for more information." +msgid "ZFS-8000-4J.response" +msgstr "No automated response will be taken." +msgid "ZFS-8000-4J.impact" +msgstr "The pool is no longer providing the configured level of\n replication." +msgid "ZFS-8000-4J.action" +msgstr "\nIf this error is encountered while running 'zpool import', see the section\nbelow. Otherwise, run 'zpool status -x' to determine which pool has the\ndamaged device:\n\n\n# zpool status -x\n pool: test\n state: DEGRADED\nstatus: One or more devices could not be used because the label is missing or\n invalid. Sufficient replicas exist for the pool to continue\n functioning in a degraded state.\naction: Replace the device using 'zpool replace'.\n see: http://www.sun.com/msg/ZFS-8000-4J\n scrub: none requested\nconfig:\n\n NAME STATE READ WRITE CKSUM\n test DEGRADED 0 0 0\n mirror DEGRADED 0 0 0\n c0t0d0 ONLINE 0 0 0\n c0t0d1 FAULTED 0 0 0 corrupted data\n\n\nDetermine which device is damaged by locating the FAULTED device showing\n\"corrupted data\". This indicates that the device label was corrupt. Because\nZFS could not identify the device as the one expected, no automatic resilvering\nwill take place.\n\nThe device can be resilvered by issuing 'zpool replace':\n\n\n# zpool replace test c0t0d1\n\n\nThis will replace the device in situ. To replace the device with another,\ndifferent, device, run 'zpool replace' with an additional argument specifying\nthe new device:\n\n\n# zpool replace test c0t0d1 c0t0d2\n\n\nZFS will being migrating data to the new device as soon as the replace is\nissued. Once the resilvering completes, the original device (if different from\nthe replacement) will be removed, and the pool will be restored to the ONLINE\nstate.\n\n\nIf this error is encountered while running 'zpool import', the pool can be still\nbe imported despite the failure:\n\n\n# zpool import\n pool: test\n id: 5187963178597328409\n state: DEGRADED\nstatus: One or more devices contains corrupted data. The fault tolerance of\n the pool may be compromised if imported.\naction: The pool can be imported using its name or numeric identifier.\n see: http://www.sun.com/msg/ZFS-8000-4J\nconfig:\n\n test DEGRADED\n mirror DEGRADED\n /disk/a ONLINE\n /disk/b FAULTED corrupted data\n\n\nTo import the pool, run 'zpool import':\n\n\n# zpool import test\n\n\nOnce the pool has been imported, the damaged device can be replaced according to\nthe above procedure.\n " +# +# code: ZFS-8000-5E +# keys: ereport.fs.zfs.device.corrupt_label_nr +# +msgid "ZFS-8000-5E.type" +msgstr "Error" +msgid "ZFS-8000-5E.severity" +msgstr "Critical" +msgid "ZFS-8000-5E.description" +msgstr "A device could not be opened due to a missing or invalid\n device label and no replicas are available. Refer to %s for more information." +msgid "ZFS-8000-5E.response" +msgstr "No automated response will be taken." +msgid "ZFS-8000-5E.impact" +msgstr "The pool is no longer available" +msgid "ZFS-8000-5E.action" +msgstr "\nIf this error is encountered during 'zpool import', see the section below.\nOtherwise, run 'zpool status -x' to determine which pool is faulted:\n\n\n# zpool status -x\n pool: test\n state: FAULTED\nstatus: One or more devices could not be used because the the label is missing \n or invalid. There are insufficient replicas for the pool to continue\n functioning.\naction: Destroy and re-create the pool from a backup source.\n see: http://www.sun.com/msg/ZFS-8000-5E\n scrub: none requested\nconfig:\n\n NAME STATE READ WRITE CKSUM\n test FAULTED 0 0 0 insufficient replicas\n c0t0d0 FAULTED 0 0 0 corrupted data\n c0t0d1 ONLINE 0 0 0\n\n\nThe device listed as FAULTED with \"corrupted data\" cannot be opened due to a\ncorrupt label. ZFS will be unable to use the pool, and all data within the pool\nis irrevocably lost. The pool must be destroyed and recreated from an\nappropriate backup source. Using replicated configurations will prevent this\nfrom happening in the future.\n\n\nIf this error is enountered during 'zpool import', the action is the same. The\npool cannot be imported - all data is lost and must be restored from an\nappropriate backup source.\n " +# +# code: ZFS-8000-6X +# keys: ereport.fs.zfs.pool.bad_guid_sum +# +msgid "ZFS-8000-6X.type" +msgstr "Error" +msgid "ZFS-8000-6X.severity" +msgstr "Critical" +msgid "ZFS-8000-6X.description" +msgstr "One or more top level devices are missing. Refer to %s for more information." +msgid "ZFS-8000-6X.response" +msgstr "No automated response will be taken." +msgid "ZFS-8000-6X.impact" +msgstr "The pool cannot be imported" +msgid "ZFS-8000-6X.action" +msgstr "\nRun 'zpool import' to list which pool cannot be imported:\n\n\n# zpool import\n pool: test\n id: 13783646421373024673\n state: FAULTED\nstatus: One or more devices are missing from the system.\naction: The pool cannot be imported. Attach the missing\n devices and try again.\n see: http://www.sun.com/msg/ZFS-8000-6X\nconfig:\n\n test FAULTED missing device\n c0t0d0 ONLINE\n\n Additional devices are known to be part of this pool, though their\n exact configuration cannot be determined.\n\n\nZFS attempts to store enough configuration data on the devices such that the\nconfiguration is recoverable from any subset of devices. In some cases,\nparticularly when an entire toplevel virtual device is not attached to the\nsystem, ZFS will be unable to determine the complete configuration. It will\nalways detect that these devices are missing, even if it cannot identify all of\nthe devices.\n\nThe unknown missing devices must be attached to the system, at which point\n'zpool import' can be used to import the pool.\n " +# +# code: ZFS-8000-72 +# keys: ereport.fs.zfs.pool.corrupt_pool +# +msgid "ZFS-8000-72.type" +msgstr "Error" +msgid "ZFS-8000-72.severity" +msgstr "Critical" +msgid "ZFS-8000-72.description" +msgstr "The metadata required to open the pool is corrupt. Refer to %s for more information." +msgid "ZFS-8000-72.response" +msgstr "No automated response will be taken." +msgid "ZFS-8000-72.impact" +msgstr "The pool is no longer available" +msgid "ZFS-8000-72.action" +msgstr "\nIf this error is encountered during 'zpool import', see the section below.\nOtherwise, run 'zpool status -x' to determine which pool is faulted:\n\n\n# zpool status -x\n# zpool import\n pool: test\n id: 13783646421373024673\n state: FAULTED\nstatus: The pool metadata is corrupted and cannot be opened.\naction: Destroy the pool and restore from backup.\n see: http://www.sun.com/msg/ZFS-8000-72\nconfig:\n\n test FAULTED corrupted data\n c0t0d0 ONLINE\n c0t0d1 ONLINE\n\n\nEven though all the devices are available, the on-disk data has been corrupted\nsuch that the pool cannot be opened. All data within the pool is lost, and the\npool must be destroyed and restored from an appropriate backup source.\n\n\nIf this error is encountered during 'zpool import', the pool is unrecoverable\nand cannot be imported. The pool must be restored from an appropriate backup\nsource.\n " +# +# code: ZFS-8000-8A +# keys: ereport.fs.zfs.object.corrupt_data +# +msgid "ZFS-8000-8A.type" +msgstr "Error" +msgid "ZFS-8000-8A.severity" +msgstr "Critical" +msgid "ZFS-8000-8A.description" +msgstr "A file or directory could not be read due to corrupt data. Refer to %s for more information." +msgid "ZFS-8000-8A.response" +msgstr "No automated response will be taken." +msgid "ZFS-8000-8A.impact" +msgstr "The file or directory is unavailable." +msgid "ZFS-8000-8A.action" +msgstr "\nRun 'zpool status -x' to determine which pool is damaged:\n\n\n# zpool status -x\n pool: test\n state: ONLINE\nstatus: One or more devices has experienced an error and no valid replicas\n are available. Some filesystem data is corrupt, and applications\n may have been affected.\naction: Destroy the pool and restore from backup.\n see: http://www.sun.com/msg/ZFS-8000-8A\n scrub: none requested\nconfig:\n\n NAME STATE READ WRITE CKSUM\n test ONLINE 0 0 2\n c0t0d0 ONLINE 0 0 2\n c0t0d1 ONLINE 0 0 0\n\n\nUnfrotunately, the data cannot be repaired, and the only choice to repair the\ndata is to restore the pool from backup. Applications attempting to access the\ncorrupted data will get an error (EIO), and data may be permanently lost.\n " +# +# code: ZFS-8000-9P +# keys: ereport.fs.zfs.device.failing +# +msgid "ZFS-8000-9P.type" +msgstr "Error" +msgid "ZFS-8000-9P.severity" +msgstr "Minor" +msgid "ZFS-8000-9P.description" +msgstr "A device has experienced uncorrectable errors in a\n replicated configuration. Refer to %s for more information." +msgid "ZFS-8000-9P.response" +msgstr "ZFS has attempted to repair the affected data." +msgid "ZFS-8000-9P.impact" +msgstr "The system is unaffected. The detected errors may\n indicate future failure." +msgid "ZFS-8000-9P.action" +msgstr "\nRun 'zpool status -x' to determine which pool has experienced errors:\n\n\n# zpool status\n pool: test\n state: ONLINE\nstatus: One or more devices has experienced an unrecoverable error. An\n attempt was made to correct the error. Applications are unaffected.\naction: Determine if the device needs to be replaced, and clear the errors\n using 'zpool online' or replace the device with 'zpool replace'.\n see: http://www.sun.com/msg/ZFS-8000-9P\n scrub: none requested\nconfig:\n\n NAME STATE READ WRITE CKSUM\n test ONLINE 0 0 0\n mirror ONLINE 0 0 0\n c0t0d0 ONLINE 0 0 2\n c0t0d1 ONLINE 0 0 0\n\n\nFind the device with a non-zero error count for READ, WRITE, or CKSUM. This\nindicates that the device has experienced a read I/O error, write I/O error, or\nchecksum validation error. Because the device is part of a mirror or RAID-Z\ndevice, ZFS was able to recover from the error and subsequently repair the\ndamaged data.\n\nThese error counts may or may not indicate that the device needs replacement.\nIt depends on how the errors were caused, which the administrator needs to\ndetermine. For example, the following cases will all produce errors that do not\nindicate potential device failure:\n\n\nA network attached device lost connectivity but has now\nrecovered\nA device suffered from a bit flip, and expected event over long\nperiods of time\nAn adminstrator accidentally wrote over a portion of the disk using\nanother program\n\n\nIn these cases, the presence of errors does not indicate that the device is\nlikely to fail in the future, and therefore does not need to be replaced. If\nthis is the case, then the device errors should be cleared using 'zpool online':\n\n\n# zpool online test c0t0d0\n\n\nOn the other hand, errors may very well indicate that the device has failed or\nis about to fail. If there are continual I/O errors to a device that is\notherwise attached and functioning on the system, it most likely needs to be\nreplaced. The administrator should check the system log for any driver\nmessages that may indicate hardware failure. If it is determined that the\ndevice needs to be replaced, then the 'zpool replace' command should be used:\n\n\n# zpool replace test c0t0d0 c0t0d2\n\n\nThis will attach the new device to the pool and begin resilvering data to it.\nOnce the resilvering process is complete, the old device will automatically be\nremoved from the pool, at which point it can safely be removed from the system.\nIf the device needs to be replaced in-place (because there are no available\nspare devices), the original device can be removed and replaced with a new\ndevice, at which point a different form of 'zpool replace' can be used:\n\n\n# zpool replace test c0t0d0\n\n\nThis assumes that the original device at 'c0t0d0' has been replaced with a new\ndevice under the same path, and will be replaced appropriately\n\nYou can monitor the progress of the resilvering operation by using the 'zpool\nstatus -x' command:\n\n\n# zpool status -x\n pool: test\n state: DEGRADED\nstatus: One or more devices is currently being replaced. The pool may not be\n providing the necessary level of replication.\naction: Wait for the resilvering operation to complete\n scrub: resilver in progress, 0.14% done, 0h0m to go\nconfig:\n\n NAME STATE READ WRITE CKSUM\n test ONLINE 0 0 0\n mirror ONLINE 0 0 0\n replacing ONLINE 0 0 0\n c0t0d0 ONLINE 0 0 3\n c0t0d2 ONLINE 0 0 0 58.5K resilvered\n c0t0d1 ONLINE 0 0 0\n\n " +# +# code: ZFS-8000-A5 +# keys: ereport.fs.zfs.device.version_mismatch +# +msgid "ZFS-8000-A5.type" +msgstr "Error" +msgid "ZFS-8000-A5.severity" +msgstr "Major" +msgid "ZFS-8000-A5.description" +msgstr "The on-disk version is not compatible with the running\n system. Refer to %s for more information." +msgid "ZFS-8000-A5.response" +msgstr "No automated response will occur," +msgid "ZFS-8000-A5.impact" +msgstr "The pool is unavailable" +msgid "ZFS-8000-A5.action" +msgstr "\nIf this error is seen during 'zpool import', see the section below. Otherwise,\nrun 'zpool status -x' to determine which pool is faulted:\n\n\n# zpool status -x\n pool: test\n state: FAULTED\nstatus: The ZFS version for the pool is incompatible with the software running\n on this system.\naction: Destroy and re-create the pool.\n scrub: none requested\nconfig:\n\n NAME STATE READ WRITE CKSUM\n test FAULTED 0 0 0 incompatible version\n mirror ONLINE 0 0 0\n c0t0d0 ONLINE 0 0 0\n c0t0d1 ONLINE 0 0 0\n\n\nThe pool cannot be used on this system. Either move the disks to the system\nwhere they were originally created, or destroy the pool and re-create it from\nbackup.\n\n\nIf this error is seen during import, the pool cannot be imported on the current\nsystem. The disks must be attached to the system which originally created the\npool, and imported there.\n " diff --git a/usr/src/cmd/fs.d/Makefile b/usr/src/cmd/fs.d/Makefile index 38039de5ab..b0653b5138 100644 --- a/usr/src/cmd/fs.d/Makefile +++ b/usr/src/cmd/fs.d/Makefile @@ -46,7 +46,8 @@ DEFAULTFILES= fs.dfl include ../Makefile.cmd SUBDIR1= lofs -SUBDIR2= fd pcfs nfs hsfs proc ctfs udfs ufs tmpfs cachefs autofs mntfs objfs +SUBDIR2= zfs fd pcfs nfs hsfs proc ctfs udfs ufs tmpfs cachefs autofs mntfs \ + objfs i386_SUBDIRS= xmemfs i386_I18NDIRS= xmemfs SUBDIRS= $(SUBDIR1) $(SUBDIR2) $($(MACH)_SUBDIRS) diff --git a/usr/src/cmd/fs.d/df.c b/usr/src/cmd/fs.d/df.c index 0d55abfcf5..6852626a68 100644 --- a/usr/src/cmd/fs.d/df.c +++ b/usr/src/cmd/fs.d/df.c @@ -24,13 +24,14 @@ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" +#include <dlfcn.h> #include <stdio.h> #include <stdarg.h> #include <string.h> @@ -51,6 +52,7 @@ #include <sys/mkdev.h> #include <sys/int_limits.h> #include <sys/zone.h> +#include <libzfs.h> #include "fslib.h" @@ -171,6 +173,7 @@ struct df_request { #define DFR_MOUNT_POINT(dfrp) (dfrp)->dfr_mte->mte_mount->mnt_mountp #define DFR_SPECIAL(dfrp) (dfrp)->dfr_mte->mte_mount->mnt_special +#define DFR_FSTYPE(dfrp) (dfrp)->dfr_mte->mte_mount->mnt_fstype #define DFR_ISMOUNTEDFS(dfrp) ((dfrp)->dfr_mte != NULL) #define DFRP(p) ((struct df_request *)(p)) @@ -236,9 +239,23 @@ static void parse_options(int, char **); static char *basename(char *); +/* ARGSUSED */ +static void +dummy_error_handler(const char *fmt, va_list ap) +{ + /* Do nothing */ +} + +static zfs_handle_t *(*_zfs_open)(const char *, int); +static void (*_zfs_close)(zfs_handle_t *); +static uint64_t (*_zfs_prop_get_int)(zfs_handle_t *, zfs_prop_t); +static void (*_zfs_set_error_handler)(void (*)(const char *, va_list)); + void main(int argc, char *argv[]) { + void *hdl; + (void) setlocale(LC_ALL, ""); #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ @@ -252,6 +269,32 @@ main(int argc, char *argv[]) sysv3_set = getenv("SYSV3"); #endif /* _iBCS2 */ + /* + * Dynamically check for libzfs, in case the user hasn't installed the + * SUNWzfs packages. A basic utility such as df shouldn't depend on + * optional filesystems. + */ + if ((hdl = dlopen("libzfs.so", RTLD_LAZY)) != NULL) { + _zfs_set_error_handler = (void (*)()) + dlsym(hdl, "zfs_set_error_handler"); + _zfs_open = (zfs_handle_t *(*)())dlsym(hdl, "zfs_open"); + _zfs_close = (void (*)())dlsym(hdl, "zfs_close"); + _zfs_prop_get_int = (uint64_t (*)()) + dlsym(hdl, "zfs_prop_get_int"); + + if (_zfs_set_error_handler != NULL) { + assert(_zfs_open != NULL); + assert(_zfs_close != NULL); + assert(_zfs_prop_get_int != NULL); + + /* + * Disable ZFS error reporting, so we don't get messages + * like "can't open ..." under race conditions. + */ + _zfs_set_error_handler(dummy_error_handler); + } + } + if (EQ(program_name, DEVNM_CMD)) do_devnm(argc, argv); @@ -1169,6 +1212,68 @@ number_to_scaled_string( return (buf); } +/* + * The statvfs() implementation allows us to return only two values, the total + * number of blocks and the number of blocks free. The equation 'used = total - + * free' will not work for ZFS filesystems, due to the nature of pooled storage. + * We choose to return values in the statvfs structure that will produce correct + * results for 'used' and 'available', but not 'total'. This function will open + * the underlying ZFS dataset if necessary and get the real value. + */ +static void +adjust_total_blocks(struct df_request *dfrp, fsblkcnt64_t *total, + uint64_t blocksize) +{ + zfs_handle_t *zhp; + char *dataset, *slash; + uint64_t quota; + + if (strcmp(DFR_FSTYPE(dfrp), MNTTYPE_ZFS) != 0 || + _zfs_open == NULL) + return; + + /* + * We want to get the total size for this filesystem as bounded by any + * quotas. In order to do this, we start at the current filesystem and + * work upwards until we find a dataset with a quota. If we reach the + * pool itself, then the total space is the amount used plus the amount + * available. + */ + if ((dataset = strdup(DFR_SPECIAL(dfrp))) == NULL) + return; + + slash = dataset + strlen(dataset); + do { + *slash = '\0'; + + if ((zhp = _zfs_open(dataset, ZFS_TYPE_ANY)) == NULL) { + free(dataset); + return; + } + + if ((quota = _zfs_prop_get_int(zhp, ZFS_PROP_QUOTA)) != 0) { + *total = quota / blocksize; + _zfs_close(zhp); + free(dataset); + return; + } + + _zfs_close(zhp); + + } while ((slash = strrchr(dataset, '/')) != NULL); + + + if ((zhp = _zfs_open(dataset, ZFS_TYPE_ANY)) == NULL) { + free(dataset); + return; + } + + *total = (_zfs_prop_get_int(zhp, ZFS_PROP_USED) + + _zfs_prop_get_int(zhp, ZFS_PROP_AVAILABLE)) / blocksize; + + _zfs_close(zhp); + free(dataset); +} /* * The output will appear properly columnized regardless of the names of @@ -1178,6 +1283,7 @@ static void g_output(struct df_request *dfrp, struct statvfs64 *fsp) { fsblkcnt64_t available_blocks = fsp->f_bavail; + fsblkcnt64_t total_blocks = fsp->f_blocks; numbuf_t total_blocks_buf; numbuf_t total_files_buf; numbuf_t free_blocks_buf; @@ -1258,9 +1364,11 @@ g_output(struct df_request *dfrp, struct statvfs64 *fsp) if ((long long)available_blocks < (long long)0) available_blocks = (fsblkcnt64_t)0; + adjust_total_blocks(dfrp, &total_blocks, fsp->f_frsize); + (void) printf("%*s %-*s %*s %-*s %*s %-*s %*s %-*s\n", NCOL1_WIDTH, number_to_string(total_blocks_buf, - fsp->f_blocks, fsp->f_frsize, 512), + total_blocks, fsp->f_frsize, 512), SCOL1_WIDTH, total_blocks_str, NCOL2_WIDTH, number_to_string(free_blocks_buf, fsp->f_bfree, fsp->f_frsize, 512), @@ -1346,6 +1454,8 @@ k_output(struct df_request *dfrp, struct statvfs64 *fsp) file_system = ""; } + adjust_total_blocks(dfrp, &total_blocks, fsp->f_frsize); + if (use_scaling) { /* comes from the -h option */ (void) printf("%-*s %*s %*s %*s %-*s %-s\n", FILESYSTEM_WIDTH, file_system, @@ -1428,6 +1538,7 @@ strings_init() static void t_output(struct df_request *dfrp, struct statvfs64 *fsp) { + fsblkcnt64_t total_blocks = fsp->f_blocks; numbuf_t total_blocks_buf; numbuf_t total_files_buf; numbuf_t free_blocks_buf; @@ -1435,6 +1546,8 @@ t_output(struct df_request *dfrp, struct statvfs64 *fsp) STRINGS_INIT(); + adjust_total_blocks(dfrp, &total_blocks, fsp->f_frsize); + (void) printf("%-*s(%-*s): %*s %s %*s %s\n", MOUNT_POINT_WIDTH, DFR_MOUNT_POINT(dfrp), SPECIAL_DEVICE_WIDTH, DFR_SPECIAL(dfrp), @@ -1456,7 +1569,7 @@ t_output(struct df_request *dfrp, struct statvfs64 *fsp) (void) printf("%*s: %*s %s %*s %s\n", MNT_SPEC_WIDTH, total_str, BLOCK_WIDTH, number_to_string(total_blocks_buf, - fsp->f_blocks, fsp->f_frsize, 512), + total_blocks, fsp->f_frsize, 512), blocks_str, NFILES_WIDTH, number_to_string(total_files_buf, fsp->f_files, 1, 1), diff --git a/usr/src/cmd/fs.d/nfs/svc/nfs-server b/usr/src/cmd/fs.d/nfs/svc/nfs-server index 36cf2cb3ad..dc3339e3d1 100644 --- a/usr/src/cmd/fs.d/nfs/svc/nfs-server +++ b/usr/src/cmd/fs.d/nfs/svc/nfs-server @@ -42,8 +42,7 @@ case "$1" in fi # If /etc/dfs/dfstab exists and has non-blank or non-commented-out - # lines, then run shareall to export them, and then start up mountd - # and nfsd if anything is exported. + # lines, then run shareall to export them. startnfsd=0 if [ -f /etc/dfs/dfstab ] && /usr/bin/egrep -v '^[ ]*(#|$)' \ @@ -52,6 +51,14 @@ case "$1" in /usr/sbin/shareall -F nfs fi + # Share any ZFS filesystems marked for sharing. + + if [ -x /usr/sbin/zfs ]; then + /usr/sbin/zfs share -a + fi + + # Start up mountd and nfsd if anything is exported. + if /usr/bin/grep -s nfs /etc/dfs/sharetab >/dev/null; then startnfsd=1 fi @@ -88,6 +95,14 @@ case "$1" in 'stop') /usr/bin/pkill -x -u 0,1 -z $zone '(nfsd|mountd)' + # Unshare shared ZFS filesystems. + + if [ -x /usr/sbin/zfs ]; then + /usr/sbin/zfs unshare -a + fi + + # Unshare remaining shared filesystems. + if /usr/bin/grep -s nfs /etc/dfs/sharetab >/dev/null; then /usr/sbin/unshareall -F nfs fi diff --git a/usr/src/cmd/fs.d/zfs/Makefile b/usr/src/cmd/fs.d/zfs/Makefile new file mode 100644 index 0000000000..39187b544d --- /dev/null +++ b/usr/src/cmd/fs.d/zfs/Makefile @@ -0,0 +1,58 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +SUBDIRS= fstyp + +all:= TARGET= all +install:= TARGET= install +clean:= TARGET= clean +clobber:= TARGET= clobber +lint:= TARGET= lint +catalog:= TARGET= catalog + +# for messaging catalog +# +POFILE= zfs.po +POFILES= $(SUBDIRS:%=%/%.po) + +.KEEP_STATE: + +.PARALLEL: $(SUBDIRS) + +all install clean clobber lint: $(SUBDIRS) + +catalog: $(POFILE) + +$(POFILE): $(SUBDIRS) + $(RM) $@ + cat $(POFILES) > $@ + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/cmd/fs.d/zfs/fstyp/Makefile b/usr/src/cmd/fs.d/zfs/fstyp/Makefile new file mode 100644 index 0000000000..8bad1c0f15 --- /dev/null +++ b/usr/src/cmd/fs.d/zfs/fstyp/Makefile @@ -0,0 +1,41 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +FSTYPE= zfs +LIBPROG= fstyp + +include ../../Makefile.fstype + +POFILE= fstyp.po + +catalog: $(POFILE) + +LDLIBS += -lzfs -lnvpair + +lint := PROG = $(LIBPROG) +lint: lint_PROG diff --git a/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c b/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c new file mode 100644 index 0000000000..90de2fe5a6 --- /dev/null +++ b/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c @@ -0,0 +1,156 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <unistd.h> +#include <libintl.h> +#include <locale.h> +#include <string.h> +#include <libzfs.h> +#include <errno.h> + +static void +usage(void) +{ + (void) fprintf(stderr, gettext("Usage: fstype [-v] <device>\n")); + exit(1); +} + +static void +dump_nvlist(nvlist_t *list, int indent) +{ + nvpair_t *elem = NULL; + + while ((elem = nvlist_next_nvpair(list, elem)) != NULL) { + switch (nvpair_type(elem)) { + case DATA_TYPE_STRING: + { + char *value; + + verify(nvpair_value_string(elem, &value) == 0); + (void) printf("%*s%s='%s'\n", indent, "", + nvpair_name(elem), value); + } + break; + + case DATA_TYPE_UINT64: + { + uint64_t value; + + verify(nvpair_value_uint64(elem, &value) == 0); + (void) printf("%*s%s=%llu\n", indent, "", + nvpair_name(elem), (u_longlong_t)value); + } + break; + + case DATA_TYPE_NVLIST: + { + nvlist_t *value; + + verify(nvpair_value_nvlist(elem, &value) == 0); + (void) printf("%*s%s\n", indent, "", + nvpair_name(elem)); + dump_nvlist(value, indent + 4); + } + break; + + case DATA_TYPE_NVLIST_ARRAY: + { + nvlist_t **value; + uint_t c, count; + + verify(nvpair_value_nvlist_array(elem, &value, + &count) == 0); + + for (c = 0; c < count; c++) { + (void) printf("%*s%s[%u]\n", indent, "", + nvpair_name(elem), c); + dump_nvlist(value[c], indent + 8); + } + } + break; + + default: + + (void) printf("bad config type %d for %s\n", + nvpair_type(elem), nvpair_name(elem)); + } + } +} + +int +main(int argc, char **argv) +{ + int c, fd; + int verbose = 0; + nvlist_t *config; + + (void) setlocale(LC_ALL, ""); + +#if !defined(TEXT_DOMAIN) +#define TEXT_DOMAIN "SYS_TEST" +#endif + (void) textdomain(TEXT_DOMAIN); + + while ((c = getopt(argc, argv, "v")) != -1) { + switch (c) { + case 'v': + verbose = 1; + break; + default: + usage(); + break; + } + } + + argv += optind; + argc -= optind; + + if (argc != 1) + usage(); + + if ((fd = open64(argv[0], O_RDONLY)) < 0) { + perror("open64"); + return (1); + } + + if ((config = zpool_read_label(fd)) == NULL) + return (1); + + (void) printf("zfs\n"); + + if (verbose) + dump_nvlist(config, 4); + + (void) close(fd); + + return (0); +} diff --git a/usr/src/cmd/getfacl/getfacl.c b/usr/src/cmd/getfacl/getfacl.c index 2c05291fbc..b2e56fdd3a 100644 --- a/usr/src/cmd/getfacl/getfacl.c +++ b/usr/src/cmd/getfacl/getfacl.c @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -112,6 +112,14 @@ main(int argc, char *argv[]) /* Get ACL info of the files */ errno = 0; if ((aclcnt = acl(filep, GETACLCNT, 0, NULL)) < 0) { + if (errno == ENOSYS) { + (void) fprintf(stderr, + gettext("File system doesn't support " + "aclent_t style ACL's.\n" + "See acl(5) for more information on " + "Solaris ACL support.\n")); + exit(2); + } perror(filep); exit(2); } @@ -301,7 +309,7 @@ pruname(uid_t uid) static char uidp[10]; /* big enough */ passwdp = getpwuid(uid); - if (passwdp == (struct passwd *) NULL) { + if (passwdp == (struct passwd *)NULL) { /* could not get passwd information: display uid instead */ (void) sprintf(uidp, "%ld", (long)uid); return (uidp); @@ -316,7 +324,7 @@ prgname(gid_t gid) static char gidp[10]; /* big enough */ groupp = getgrgid(gid); - if (groupp == (struct group *) NULL) { + if (groupp == (struct group *)NULL) { /* could not get group information: display gid instead */ (void) sprintf(gidp, "%ld", (long)gid); return (gidp); diff --git a/usr/src/cmd/ls/Makefile.com b/usr/src/cmd/ls/Makefile.com index e91d7aaf74..6d695f3cd5 100644 --- a/usr/src/cmd/ls/Makefile.com +++ b/usr/src/cmd/ls/Makefile.com @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -36,6 +36,7 @@ SRCS= $(OBJS:%.o=../%.c) include ../../Makefile.cmd +LDLIBS += -lsec CFLAGS += $(CCVERBOSE) $(XPG4) := CFLAGS += -DXPG4 diff --git a/usr/src/cmd/ls/ls.c b/usr/src/cmd/ls/ls.c index e27311335a..540be3b046 100644 --- a/usr/src/cmd/ls/ls.c +++ b/usr/src/cmd/ls/ls.c @@ -61,7 +61,7 @@ #include <unistd.h> #include <libgen.h> #include <errno.h> -#include <libcmdutils.h> +#include <aclutils.h> #ifndef STANDALONE #define TERMINFO @@ -139,6 +139,7 @@ struct lbuf { char acl; /* indicate there are additional acl entries */ int cycle; /* cycle detected flag */ struct ditem *ancinfo; /* maintains ancestor info */ + acl_t *aclp; /* ACL if present */ }; struct dchain { @@ -219,6 +220,7 @@ static int Hflg; static int Lflg; static int Rflg; static int Sflg; +static int vflg; static long hscale; static mode_t flags; static int err = 0; /* Contains return code */ @@ -284,9 +286,8 @@ main(int argc, char *argv[]) mflg = 0; } - while ((c = getopt(argc, argv, - "aAbcCdeEfFghHilLmnopqrRsStux1@")) != EOF) + "aAbcCdeEfFghHilLmnopqrRsStux1@v")) != EOF) switch (c) { case 'a': aflg++; @@ -415,6 +416,18 @@ main(int argc, char *argv[]) cflg = 0; uflg++; continue; + case 'v': + vflg++; +#if !defined(XPG4) + if (lflg) + continue; +#endif + lflg++; + statreq++; + Cflg = 0; + xflg = 0; + mflg = 0; + continue; case 'x': xflg = 1; Cflg = 1; @@ -447,7 +460,7 @@ main(int argc, char *argv[]) } if (opterr) { (void) fprintf(stderr, gettext( - "usage: ls -aAbcCdeEfFghHilLmnopqrRsStux1@ [files]\n")); + "usage: ls -aAbcCdeEfFghHilLmnopqrRsStuxv1@ [files]\n")); exit(2); } @@ -851,6 +864,13 @@ pentry(struct lbuf *ap) curcol += strcol((unsigned char *)dmark); } } + + if (vflg) { + new_line(); + if (p->aclp) { + acl_printacl(p->aclp, num_cols); + } + } } /* print various r,w,x permissions */ @@ -1100,7 +1120,7 @@ gstat(char *file, int argfl, struct ditem *myparent) ssize_t cc; int (*statf)() = ((Lflg) || (Hflg && argfl)) ? stat : lstat; int aclcnt; - aclent_t *aclp; + int error; aclent_t *tp; o_mode_t groupperm, mask; int grouppermfound, maskfound; @@ -1285,75 +1305,77 @@ gstat(char *file, int argfl, struct ditem *myparent) /* ACL: check acl entries count */ if (doacl) { - rep->acl = ' '; - if ((aclcnt = acl(file, GETACLCNT, 0, NULL)) > - MIN_ACL_ENTRIES) { - /* this file has a non-trivial acl */ + error = acl_get(file, 0, &rep->aclp); + if (error) { + (void) fprintf(stderr, + gettext("ls: can't read ACL on %s: %s\n"), + file, acl_strerror(error)); + return (NULL); + } - rep->acl = '+'; + rep->acl = ' '; + if (rep->aclp && + ((acl_flags(rep->aclp) & ACL_IS_TRIVIAL) == 0)) { + rep->acl = '+'; /* - * For files with non-trivial acls, the - * effective group permissions are the - * intersection of the GROUP_OBJ value and - * the CLASS_OBJ (acl mask) value. Determine - * both the GROUP_OBJ and CLASS_OBJ for this - * file and insert the logical AND of those - * two values in the group permissions field - * of the lflags value for this file. + * Special handling for ufs aka aclent_t ACL's */ + if (rep->aclp && + acl_type(rep->aclp) == ACLENT_T) { + /* + * For files with non-trivial acls, the + * effective group permissions are the + * intersection of the GROUP_OBJ value + * and the CLASS_OBJ (acl mask) value. + * Determine both the GROUP_OBJ and + * CLASS_OBJ for this file and insert + * the logical AND of those two values + * in the group permissions field + * of the lflags value for this file. + */ - if ((aclp = (aclent_t *)malloc( - (sizeof (aclent_t)) * aclcnt)) == NULL) { - perror("ls"); - exit(2); - } - - if (acl(file, GETACL, aclcnt, aclp) < 0) { - free(aclp); - (void) fprintf(stderr, "ls: "); - perror(file); - nfiles--; - err = 2; - return (NULL); - } - - /* - * Until found in acl list, assume maximum - * permissions for both group and mask. (Just - * in case the acl lacks either value for - * some reason.) - */ - groupperm = 07; - mask = 07; - grouppermfound = 0; - maskfound = 0; - for (tp = aclp; aclcnt--; tp++) { - if (tp->a_type == GROUP_OBJ) { - groupperm = tp->a_perm; - grouppermfound = 1; - continue; - } - if (tp->a_type == CLASS_OBJ) { - mask = tp->a_perm; - maskfound = 1; + /* + * Until found in acl list, assume + * maximum permissions for both group + * a nd mask. (Just in case the acl + * lacks either value for some reason.) + */ + groupperm = 07; + mask = 07; + grouppermfound = 0; + maskfound = 0; + aclcnt = acl_cnt(rep->aclp); + for (tp = + (aclent_t *)acl_data(rep->aclp); + aclcnt--; tp++) { + if (tp->a_type == GROUP_OBJ) { + groupperm = tp->a_perm; + grouppermfound = 1; + continue; + } + if (tp->a_type == CLASS_OBJ) { + mask = tp->a_perm; + maskfound = 1; + } + if (grouppermfound && maskfound) + break; } - if (grouppermfound && maskfound) - break; - } - free(aclp); - /* reset all the group bits */ - rep->lflags &= ~S_IRWXG; + /* reset all the group bits */ + rep->lflags &= ~S_IRWXG; - /* - * Now set them to the logical AND of the - * GROUP_OBJ permissions and the acl mask. - */ + /* + * Now set them to the logical AND of + * the GROUP_OBJ permissions and the + * acl mask. + */ - rep->lflags |= (groupperm & mask) << 3; + rep->lflags |= (groupperm & mask) << 3; + + } } if (atflg && pathconf(file, _PC_XATTR_EXISTS) == 1) diff --git a/usr/src/cmd/mdb/Makefile.common b/usr/src/cmd/mdb/Makefile.common index c28b9c84fb..7befb7e3a2 100644 --- a/usr/src/cmd/mdb/Makefile.common +++ b/usr/src/cmd/mdb/Makefile.common @@ -30,11 +30,13 @@ # COMMON_MODULES_PROC = \ dof \ + libavl \ libc \ libnvpair \ libsysevent \ libumem \ libuutil \ + libzpool \ mdb_ds \ mdb_test @@ -72,4 +74,5 @@ COMMON_MODULES_KVM = \ sppp \ ufs \ ufs_log \ - usba + usba \ + zfs diff --git a/usr/src/cmd/mdb/common/modules/genunix/avl.c b/usr/src/cmd/mdb/common/modules/genunix/avl.c new file mode 100644 index 0000000000..b10856cfc3 --- /dev/null +++ b/usr/src/cmd/mdb/common/modules/genunix/avl.c @@ -0,0 +1,217 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/avl.h> + +#include <mdb/mdb_modapi.h> + +struct aw_info { + void *aw_buff; /* buffer to hold the tree's data structure */ + avl_tree_t aw_tree; /* copy of avl_tree_t being walked */ +}; + +/* + * common code used to find the addr of the the leftmost child below + * an AVL node + */ +static uintptr_t +avl_leftmostchild(uintptr_t addr, void * buff, size_t offset, size_t size) +{ + avl_node_t *node = (avl_node_t *)((uintptr_t)buff + offset); + + for (;;) { + addr -= offset; + if (mdb_vread(buff, size, addr) == -1) { + mdb_warn("read of avl_node_t failed: %p", addr); + return ((uintptr_t)-1L); + } + if (node->avl_child[0] == NULL) + break; + addr = (uintptr_t)node->avl_child[0]; + } + return (addr); +} + +/* + * initialize a forward walk thru an avl tree. + */ +int +avl_walk_init(mdb_walk_state_t *wsp) +{ + struct aw_info *aw; + avl_tree_t *tree; + uintptr_t addr; + + /* + * allocate the AVL walk data + */ + wsp->walk_data = aw = mdb_zalloc(sizeof (struct aw_info), UM_SLEEP); + + /* + * get an mdb copy of the avl_tree_t being walked + */ + tree = &aw->aw_tree; + if (mdb_vread(tree, sizeof (avl_tree_t), wsp->walk_addr) == -1) { + mdb_warn("read of avl_tree_t failed: %p", wsp->walk_addr); + goto error; + } + if (tree->avl_size < tree->avl_offset + sizeof (avl_node_t)) { + mdb_warn("invalid avl_tree_t at %p, avl_size:%d, avl_offset:%d", + wsp->walk_addr, tree->avl_size, tree->avl_offset); + goto error; + } + + /* + * allocate a buffer to hold the mdb copy of tree's structs + * "node" always points at the avl_node_t field inside the struct + */ + aw->aw_buff = mdb_zalloc(tree->avl_size, UM_SLEEP); + + /* + * get the first avl_node_t address, use same algorithm + * as avl_start() -- leftmost child in tree from root + */ + addr = (uintptr_t)tree->avl_root; + if (addr == NULL) { + wsp->walk_addr = NULL; + return (WALK_NEXT); + } + addr = avl_leftmostchild(addr, aw->aw_buff, tree->avl_offset, + tree->avl_size); + if (addr == (uintptr_t)-1L) + goto error; + + wsp->walk_addr = addr; + return (WALK_NEXT); + +error: + if (aw->aw_buff != NULL) + mdb_free(aw->aw_buff, sizeof (tree->avl_size)); + mdb_free(aw, sizeof (struct aw_info)); + return (WALK_ERR); +} + +/* + * At each step, visit (callback) the current node, then move to the next + * in the AVL tree. Uses the same algorithm as avl_walk(). + */ +int +avl_walk_step(mdb_walk_state_t *wsp) +{ + struct aw_info *aw; + size_t offset; + size_t size; + uintptr_t addr; + avl_node_t *node; + int status; + int was_child; + + /* + * don't walk past the end of the tree! + */ + addr = wsp->walk_addr; + if (addr == NULL) + return (WALK_DONE); + + aw = (struct aw_info *)wsp->walk_data; + size = aw->aw_tree.avl_size; + offset = aw->aw_tree.avl_offset; + node = (avl_node_t *)((uintptr_t)aw->aw_buff + offset); + + /* + * must read the current node for the call back to use + */ + if (mdb_vread(aw->aw_buff, size, addr) == -1) { + mdb_warn("read of avl_node_t failed: %p", addr); + return (WALK_ERR); + } + + /* + * do the call back + */ + status = wsp->walk_callback(addr, aw->aw_buff, wsp->walk_cbdata); + if (status != WALK_NEXT) + return (status); + + /* + * move to the next node.... + * note we read in new nodes, so the pointer to the buffer is fixed + */ + + /* + * if the node has a right child then go to it and then all the way + * thru as many left children as possible + */ + addr = (uintptr_t)node->avl_child[1]; + if (addr != NULL) { + addr = avl_leftmostchild(addr, aw->aw_buff, offset, size); + if (addr == (uintptr_t)-1L) + return (WALK_ERR); + + /* + * othewise return to parent nodes, stopping if we ever return from + * a left child + */ + } else { + for (;;) { + was_child = AVL_XCHILD(node); + addr = (uintptr_t)AVL_XPARENT(node); + if (addr == NULL) + break; + addr -= offset; + if (was_child == 0) /* stop on return from left child */ + break; + if (mdb_vread(aw->aw_buff, size, addr) == -1) { + mdb_warn("read of avl_node_t failed: %p", addr); + return (WALK_ERR); + } + } + } + + wsp->walk_addr = addr; + return (WALK_NEXT); +} + +/* + * Release the memory allocated for the walk + */ +void +avl_walk_fini(mdb_walk_state_t *wsp) +{ + struct aw_info *aw; + + aw = (struct aw_info *)wsp->walk_data; + + if (aw == NULL) + return; + + if (aw->aw_buff != NULL) + mdb_free(aw->aw_buff, aw->aw_tree.avl_size); + + mdb_free(aw, sizeof (struct aw_info)); +} diff --git a/usr/src/cmd/mdb/common/modules/genunix/avl.h b/usr/src/cmd/mdb/common/modules/genunix/avl.h new file mode 100644 index 0000000000..1d2e9dcb88 --- /dev/null +++ b/usr/src/cmd/mdb/common/modules/genunix/avl.h @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _MDB_AVL_H +#define _MDB_AVL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define AVL_WALK_NAME "avl" +#define AVL_WALK_DESC "given any avl_tree_t *, forward walk all " \ + "entries in tree" + +extern int avl_walk_init(mdb_walk_state_t *); +extern int avl_walk_step(mdb_walk_state_t *); +extern void avl_walk_fini(mdb_walk_state_t *wsp); + +#ifdef __cplusplus +} +#endif + +#endif /* _MDB_AVL_H */ diff --git a/usr/src/cmd/mdb/common/modules/genunix/genunix.c b/usr/src/cmd/mdb/common/modules/genunix/genunix.c index 2a3b26ea8c..5db8641cbe 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/genunix.c +++ b/usr/src/cmd/mdb/common/modules/genunix/genunix.c @@ -69,6 +69,7 @@ #include <regex.h> #include <sys/port_impl.h> +#include "avl.h" #include "contract.h" #include "cpupart_mdb.h" #include "devinfo.h" @@ -1868,193 +1869,6 @@ generic_walk_step(mdb_walk_state_t *wsp) wsp->walk_cbdata)); } -struct aw_info { - void *aw_buff; /* buffer to hold the tree's data structure */ - avl_tree_t aw_tree; /* copy of avl_tree_t being walked */ -}; - -/* - * common code used to find the addr of the the leftmost child below - * an AVL node - */ -static uintptr_t -avl_leftmostchild(uintptr_t addr, void * buff, size_t offset, size_t size) -{ - avl_node_t *node = (avl_node_t *)((uintptr_t)buff + offset); - - for (;;) { - addr -= offset; - if (mdb_vread(buff, size, addr) == -1) { - mdb_warn("read of avl_node_t failed: %p", addr); - return ((uintptr_t)-1L); - } - if (node->avl_child[0] == NULL) - break; - addr = (uintptr_t)node->avl_child[0]; - } - return (addr); -} - -/* - * initialize a forward walk thru an avl tree. - */ -int -avl_walk_init(mdb_walk_state_t *wsp) -{ - struct aw_info *aw; - avl_tree_t *tree; - uintptr_t addr; - - /* - * allocate the AVL walk data - */ - wsp->walk_data = aw = mdb_zalloc(sizeof (struct aw_info), UM_SLEEP); - - /* - * get an mdb copy of the avl_tree_t being walked - */ - tree = &aw->aw_tree; - if (mdb_vread(tree, sizeof (avl_tree_t), wsp->walk_addr) == -1) { - mdb_warn("read of avl_tree_t failed: %p", wsp->walk_addr); - goto error; - } - if (tree->avl_size < tree->avl_offset + sizeof (avl_node_t)) { - mdb_warn("invalid avl_tree_t at %p, avl_size:%d, avl_offset:%d", - wsp->walk_addr, tree->avl_size, tree->avl_offset); - goto error; - } - - /* - * allocate a buffer to hold the mdb copy of tree's structs - * "node" always points at the avl_node_t field inside the struct - */ - aw->aw_buff = mdb_zalloc(tree->avl_size, UM_SLEEP); - - /* - * get the first avl_node_t address, use same algorithm - * as avl_start() -- leftmost child in tree from root - */ - addr = (uintptr_t)tree->avl_root; - if (addr == NULL) { - wsp->walk_addr = NULL; - return (WALK_NEXT); - } - addr = avl_leftmostchild(addr, aw->aw_buff, tree->avl_offset, - tree->avl_size); - if (addr == (uintptr_t)-1L) - goto error; - - wsp->walk_addr = addr; - return (WALK_NEXT); - -error: - if (aw->aw_buff != NULL) - mdb_free(aw->aw_buff, sizeof (tree->avl_size)); - mdb_free(aw, sizeof (struct aw_info)); - return (WALK_ERR); -} - -/* - * At each step, visit (callback) the current node, then move to the next - * in the AVL tree. Uses the same algorithm as avl_walk(). - */ -int -avl_walk_step(mdb_walk_state_t *wsp) -{ - struct aw_info *aw; - size_t offset; - size_t size; - uintptr_t addr; - avl_node_t *node; - int status; - int was_child; - - /* - * don't walk past the end of the tree! - */ - addr = wsp->walk_addr; - if (addr == NULL) - return (WALK_DONE); - - aw = (struct aw_info *)wsp->walk_data; - size = aw->aw_tree.avl_size; - offset = aw->aw_tree.avl_offset; - node = (avl_node_t *)((uintptr_t)aw->aw_buff + offset); - - /* - * must read the current node for the call back to use - */ - if (mdb_vread(aw->aw_buff, size, addr) == -1) { - mdb_warn("read of avl_node_t failed: %p", addr); - return (WALK_ERR); - } - - /* - * do the call back - */ - status = wsp->walk_callback(addr, aw->aw_buff, wsp->walk_cbdata); - if (status != WALK_NEXT) - return (status); - - /* - * move to the next node.... - * note we read in new nodes, so the pointer to the buffer is fixed - */ - - /* - * if the node has a right child then go to it and then all the way - * thru as many left children as possible - */ - addr = (uintptr_t)node->avl_child[1]; - if (addr != NULL) { - addr = avl_leftmostchild(addr, aw->aw_buff, offset, size); - if (addr == (uintptr_t)-1L) - return (WALK_ERR); - - /* - * othewise return to parent nodes, stopping if we ever return from - * a left child - */ - } else { - for (;;) { - was_child = AVL_XCHILD(node); - addr = (uintptr_t)AVL_XPARENT(node); - if (addr == NULL) - break; - addr -= offset; - if (was_child == 0) /* stop on return from left child */ - break; - if (mdb_vread(aw->aw_buff, size, addr) == -1) { - mdb_warn("read of avl_node_t failed: %p", addr); - return (WALK_ERR); - } - } - } - - wsp->walk_addr = addr; - return (WALK_NEXT); -} - -/* - * Release the memory allocated for the walk - */ -void -avl_walk_fini(mdb_walk_state_t *wsp) -{ - struct aw_info *aw; - - aw = (struct aw_info *)wsp->walk_data; - - if (aw == NULL) - return; - - if (aw->aw_buff != NULL) - mdb_free(aw->aw_buff, aw->aw_tree.avl_size); - - mdb_free(aw, sizeof (struct aw_info)); -} - - int seg_walk_init(mdb_walk_state_t *wsp) { @@ -3575,6 +3389,8 @@ static const mdb_dcmd_t dcmds[] = { /* from nvpair.c */ { NVPAIR_DCMD_NAME, NVPAIR_DCMD_USAGE, NVPAIR_DCMD_DESCR, nvpair_print }, + { NVLIST_DCMD_NAME, NVLIST_DCMD_USAGE, NVLIST_DCMD_DESCR, + nvlist_print }, /* from rctl.c */ { "rctl_dict", "?", "print systemwide default rctl definitions", @@ -3654,8 +3470,6 @@ static const mdb_dcmd_t dcmds[] = { static const mdb_walker_t walkers[] = { /* from genunix.c */ - { "avl", "given any avl_tree_t *, forward walk all entries in tree", - avl_walk_init, avl_walk_step, avl_walk_fini }, { "anon", "given an amp, list of anon structures", anon_walk_init, anon_walk_step, anon_walk_fini }, { "cpu", "walk cpu structures", cpu_walk_init, cpu_walk_step }, @@ -3702,6 +3516,10 @@ static const mdb_walker_t walkers[] = { { "taskq_entry", "given a taskq_t*, list all taskq_ent_t in the list", taskq_walk_init, taskq_walk_step, NULL, NULL }, + /* from avl.c */ + { AVL_WALK_NAME, AVL_WALK_DESC, + avl_walk_init, avl_walk_step, avl_walk_fini }, + /* from zone.c */ { "zone", "walk a list of kernel zones", zone_walk_init, zone_walk_step, NULL }, @@ -3842,7 +3660,7 @@ static const mdb_walker_t walkers[] = { lgrp_walk_init, lgrp_walk_step, NULL }, /* from list.c */ - { "list", "walk a linked list", + { LIST_WALK_NAME, LIST_WALK_DESC, list_walk_init, list_walk_step, list_walk_fini }, /* from memory.c */ diff --git a/usr/src/cmd/mdb/common/modules/genunix/list.h b/usr/src/cmd/mdb/common/modules/genunix/list.h index 04d02da2c7..10581cc900 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/list.h +++ b/usr/src/cmd/mdb/common/modules/genunix/list.h @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,6 +35,9 @@ extern "C" { #endif +#define LIST_WALK_NAME "list" +#define LIST_WALK_DESC "walk a linked list" + int list_walk_init(mdb_walk_state_t *wsp); int list_walk_step(mdb_walk_state_t *wsp); void list_walk_fini(mdb_walk_state_t *wsp); diff --git a/usr/src/cmd/mdb/common/modules/genunix/nvpair.c b/usr/src/cmd/mdb/common/modules/genunix/nvpair.c index d9025fa6be..463f226a2c 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/nvpair.c +++ b/usr/src/cmd/mdb/common/modules/genunix/nvpair.c @@ -91,9 +91,45 @@ nvpair_walk_step(mdb_walk_state_t *wsp) return (status); } +/* + * ::nvlist [-v] + * + * Print out an entire nvlist. This is shorthand for '::walk nvpair | + * ::nvpair -rq'. The '-v' option invokes '::nvpair' without the "-q" option. + */ +/*ARGSUSED*/ +int +nvlist_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + int verbose = B_FALSE; + mdb_arg_t v; + + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (mdb_getopts(argc, argv, + 'v', MDB_OPT_SETBITS, TRUE, &verbose, + NULL) != argc) + return (DCMD_USAGE); + + v.a_type = MDB_TYPE_STRING; + if (verbose) + v.a_un.a_str = "-r"; + else + v.a_un.a_str = "-rq"; + + return (mdb_pwalk_dcmd("nvpair", "nvpair", 1, &v, addr)); +} /* - * nvpair dcmd + * ::nvpair [-rq] + * + * -r Recursively print any nvlist elements + * -q Quiet mode; print members only as "name=value" + * + * Prints out a single nvpair. By default, all information is printed. When + * given the '-q' option, the type of elements is hidden, and elements are + * instead printed simply as 'name=value'. */ typedef struct { data_type_t type; @@ -136,7 +172,6 @@ nvpair_print_value(char *data, int32_t elem_size, int32_t nelem, { int32_t i; - mdb_printf("value="); if (elem_size == 0) { char *p = data; @@ -186,8 +221,16 @@ nvpair_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) char *data = NULL, *data_end = NULL; char *type_name = NULL; data_type_t type = DATA_TYPE_UNKNOWN; + int quiet = FALSE; + int recurse = FALSE; + + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); - if (!(flags & DCMD_ADDRSPEC) || argc != 0) + if (mdb_getopts(argc, argv, + 'r', MDB_OPT_SETBITS, TRUE, &recurse, + 'q', MDB_OPT_SETBITS, TRUE, &quiet, + NULL) != argc) return (DCMD_USAGE); /* read in the nvpair header so we can get the size */ @@ -218,19 +261,30 @@ nvpair_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) break; } } - /* print out the first line of nvpair info */ - mdb_printf("name='%s'", NVP_NAME(nvpair)); - if (type_name != NULL) { - mdb_printf(" type=%s", type_name); + + if (quiet) { + mdb_printf("%s", NVP_NAME(nvpair)); } else { - /* if the nvpair type is unknown we print the type number */ - mdb_printf(" type=0x%x", type); + /* print out the first line of nvpair info */ + mdb_printf("name='%s'", NVP_NAME(nvpair)); + if (type_name != NULL) { + mdb_printf(" type=%s", type_name); + } else { + /* + * If the nvpair type is unknown we print the type + * number + */ + mdb_printf(" type=0x%x", type); + } + mdb_printf(" items=%d\n", nelem); } - mdb_printf(" items=%d\n", nelem); /* if there is no data and the type is known then we're done */ - if ((nelem == 0) && (type_name != NULL)) + if ((nelem == 0) && (type_name != NULL)) { + if (quiet) + mdb_printf("(unknown)\n"); return (DCMD_OK); + } /* get pointers to the data to print out */ data = (char *)NVP_VALUE(nvpair); @@ -249,20 +303,54 @@ nvpair_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) */ if (type == DATA_TYPE_NVLIST) { char *p = (char *)addr + (data - (char *)nvpair); - mdb_inc_indent(NVPAIR_VALUE_INDENT); - mdb_printf("value=%p\n", p); - mdb_dec_indent(NVPAIR_VALUE_INDENT); + if (recurse) { + if (quiet) + mdb_printf("\n"); + mdb_inc_indent(NVPAIR_VALUE_INDENT); + if (mdb_pwalk_dcmd("nvpair", "nvpair", argc, argv, + (uintptr_t)p) != DCMD_OK) + return (DCMD_ERR); + mdb_dec_indent(NVPAIR_VALUE_INDENT); + } else { + if (!quiet) { + mdb_inc_indent(NVPAIR_VALUE_INDENT); + mdb_printf("value", p); + } + mdb_printf("=%p\n", p); + if (!quiet) + mdb_dec_indent(NVPAIR_VALUE_INDENT); + } return (DCMD_OK); } else if (type == DATA_TYPE_NVLIST_ARRAY) { - mdb_inc_indent(NVPAIR_VALUE_INDENT); - mdb_printf("value="); - for (i = 0; i < nelem; i++, data += sizeof (nvlist_t *)) { - nvlist_t **nl = (nvlist_t **)(void *)data; - mdb_printf("%c%p", " "[i == 0], *nl); + if (recurse) { + for (i = 0; i < nelem; i++, + data += sizeof (nvlist_t *)) { + nvlist_t **nl = (nvlist_t **)(void *)data; + if (quiet && i != 0) + mdb_printf("%s", NVP_NAME(nvpair)); + mdb_printf("[%d]\n", i); + mdb_inc_indent(NVPAIR_VALUE_INDENT); + if (mdb_pwalk_dcmd("nvpair", "nvpair", argc, + argv, (uintptr_t)*nl) != DCMD_OK) + return (DCMD_ERR); + mdb_dec_indent(NVPAIR_VALUE_INDENT); + } + } else { + if (!quiet) { + mdb_inc_indent(NVPAIR_VALUE_INDENT); + mdb_printf("value"); + } + mdb_printf("="); + for (i = 0; i < nelem; i++, + data += sizeof (nvlist_t *)) { + nvlist_t **nl = (nvlist_t **)(void *)data; + mdb_printf("%c%p", " "[i == 0], *nl); + } + mdb_printf("\n"); + if (!quiet) + mdb_dec_indent(NVPAIR_VALUE_INDENT); } - mdb_printf("\n"); - mdb_dec_indent(NVPAIR_VALUE_INDENT); return (DCMD_OK); } @@ -298,9 +386,15 @@ nvpair_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) } } - mdb_inc_indent(NVPAIR_VALUE_INDENT); + if (!quiet) { + mdb_inc_indent(NVPAIR_VALUE_INDENT); + mdb_printf("value="); + } else { + mdb_printf("="); + } nvpair_print_value(data, elem_size, nelem, type); - mdb_dec_indent(NVPAIR_VALUE_INDENT); + if (!quiet) + mdb_dec_indent(NVPAIR_VALUE_INDENT); return (DCMD_OK); } diff --git a/usr/src/cmd/mdb/common/modules/genunix/nvpair.h b/usr/src/cmd/mdb/common/modules/genunix/nvpair.h index 7f5210ec3e..071f90116d 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/nvpair.h +++ b/usr/src/cmd/mdb/common/modules/genunix/nvpair.h @@ -20,8 +20,8 @@ * CDDL HEADER END */ /* - * Copyright (c) 2001 by Sun Microsystems, Inc. - * All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #ifndef _NVPAIR_H @@ -34,9 +34,13 @@ extern "C" { #endif #define NVPAIR_DCMD_NAME "nvpair" -#define NVPAIR_DCMD_USAGE ":" +#define NVPAIR_DCMD_USAGE ":[-rq]" #define NVPAIR_DCMD_DESCR "print out an nvpair" +#define NVLIST_DCMD_NAME "nvlist" +#define NVLIST_DCMD_USAGE ":[-v]" +#define NVLIST_DCMD_DESCR "print out an nvlist" + #define NVPAIR_WALKER_NAME "nvpair" #define NVPAIR_WALKER_DESCR "walk through the nvpairs in an unpacked nvlist" @@ -52,7 +56,9 @@ extern "C" { extern int nvpair_walk_init(mdb_walk_state_t *wsp); extern int nvpair_walk_step(mdb_walk_state_t *wsp); extern int nvpair_print(uintptr_t addr, uint_t flags, - int argc, const mdb_arg_t *argv); + int argc, const mdb_arg_t *argv); +extern int nvlist_print(uintptr_t addr, uint_t flags, + int argc, const mdb_arg_t *argv); #ifdef __cplusplus } diff --git a/usr/src/cmd/mdb/common/modules/libavl/libavl.c b/usr/src/cmd/mdb/common/modules/libavl/libavl.c new file mode 100644 index 0000000000..e35fd049a9 --- /dev/null +++ b/usr/src/cmd/mdb/common/modules/libavl/libavl.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <mdb/mdb_modapi.h> + +#include "../genunix/avl.h" + +static const mdb_walker_t walkers[] = { + { AVL_WALK_NAME, AVL_WALK_DESC, + avl_walk_init, avl_walk_step, avl_walk_fini }, + { NULL } +}; + +static const mdb_modinfo_t modinfo = { + MDB_API_VERSION, NULL, walkers +}; + +const mdb_modinfo_t * +_mdb_init(void) +{ + return (&modinfo); +} diff --git a/usr/src/cmd/mdb/common/modules/libnvpair/libnvpair.c b/usr/src/cmd/mdb/common/modules/libnvpair/libnvpair.c index 7a9c87be8a..c2462e2d1c 100644 --- a/usr/src/cmd/mdb/common/modules/libnvpair/libnvpair.c +++ b/usr/src/cmd/mdb/common/modules/libnvpair/libnvpair.c @@ -20,8 +20,8 @@ * CDDL HEADER END */ /* - * Copyright (c) 2001 by Sun Microsystems, Inc. - * All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -33,6 +33,8 @@ static const mdb_dcmd_t dcmds[] = { { NVPAIR_DCMD_NAME, NVPAIR_DCMD_USAGE, NVPAIR_DCMD_DESCR, nvpair_print }, + { NVLIST_DCMD_NAME, NVLIST_DCMD_USAGE, NVLIST_DCMD_DESCR, + nvlist_print }, { NULL } }; diff --git a/usr/src/cmd/mdb/common/modules/zfs/inc.flg b/usr/src/cmd/mdb/common/modules/zfs/inc.flg new file mode 100644 index 0000000000..bb65300cca --- /dev/null +++ b/usr/src/cmd/mdb/common/modules/zfs/inc.flg @@ -0,0 +1,30 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +find_files "s.*" usr/src/uts/common/fs/zfs/sys +echo_file usr/src/uts/common/sys/fs/zfs.h diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c new file mode 100644 index 0000000000..d34f71f5df --- /dev/null +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c @@ -0,0 +1,1594 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <mdb/mdb_ctf.h> +#include <sys/zfs_context.h> +#include <sys/mdb_modapi.h> +#include <sys/dbuf.h> +#include <sys/dmu_objset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_pool.h> +#include <sys/metaslab_impl.h> +#include <sys/space_map.h> +#include <sys/list.h> +#include <sys/spa_impl.h> +#include <sys/vdev_impl.h> +#include <sys/zio_compress.h> + +#ifndef _KERNEL +#include "../genunix/list.h" +#endif + +#ifdef _KERNEL +#define ZFS_OBJ_NAME "zfs" +#else +#define ZFS_OBJ_NAME "libzpool.so.1" +#endif + +static char * +local_strdup(const char *s) +{ + char *s1 = mdb_alloc(strlen(s) + 1, UM_SLEEP); + + (void) strcpy(s1, s); + return (s1); +} + +static int +getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp, + const char *member, int len, void *buf) +{ + mdb_ctf_id_t id; + ulong_t off; + char name[64]; + + if (idp == NULL) { + if (mdb_ctf_lookup_by_name(type, &id) == -1) { + mdb_warn("couldn't find type %s", type); + return (DCMD_ERR); + } + idp = &id; + } else { + type = name; + mdb_ctf_type_name(*idp, name, sizeof (name)); + } + + if (mdb_ctf_offsetof(*idp, member, &off) == -1) { + mdb_warn("couldn't find member %s of type %s\n", member, type); + return (DCMD_ERR); + } + if (off % 8 != 0) { + mdb_warn("member %s of type %s is unsupported bitfield", + member, type); + return (DCMD_ERR); + } + off /= 8; + + if (mdb_vread(buf, len, addr + off) == -1) { + mdb_warn("failed to read %s from %s at %p", + member, type, addr + off); + return (DCMD_ERR); + } + /* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */ + + return (0); +} + +#define GETMEMB(addr, type, member, dest) \ + getmember(addr, #type, NULL, #member, sizeof (dest), &(dest)) + +#define GETMEMBID(addr, ctfid, member, dest) \ + getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest)) + +static int +getrefcount(uintptr_t addr, mdb_ctf_id_t *id, + const char *member, uint64_t *rc) +{ + static int gotid; + static mdb_ctf_id_t rc_id; + ulong_t off; + + if (!gotid) { + if (mdb_ctf_lookup_by_name("struct refcount", &rc_id) == -1) { + mdb_warn("couldn't find struct refcount"); + return (DCMD_ERR); + } + gotid = TRUE; + } + + if (mdb_ctf_offsetof(*id, member, &off) == -1) { + char name[64]; + mdb_ctf_type_name(*id, name, sizeof (name)); + mdb_warn("couldn't find member %s of type %s\n", member, name); + return (DCMD_ERR); + } + off /= 8; + + return (GETMEMBID(addr + off, &rc_id, rc_count, *rc)); +} + +static int +read_symbol(char *sym_name, void **bufp) +{ + GElf_Sym sym; + + if (mdb_lookup_by_obj(MDB_TGT_OBJ_EVERY, sym_name, &sym)) { + mdb_warn("can't find symbol %s", sym_name); + return (DCMD_ERR); + } + + *bufp = mdb_alloc(sym.st_size, UM_SLEEP); + + if (mdb_vread(*bufp, sym.st_size, sym.st_value) == -1) { + mdb_warn("can't read data for symbol %s", sym_name); + mdb_free(*bufp, sym.st_size); + return (DCMD_ERR); + } + + return (DCMD_OK); +} + +static int verbose; + +static int +freelist_walk_init(mdb_walk_state_t *wsp) +{ + if (wsp->walk_addr == NULL) { + mdb_warn("must supply starting address\n"); + return (WALK_ERR); + } + + wsp->walk_data = 0; /* Index into the freelist */ + return (WALK_NEXT); +} + +static int +freelist_walk_step(mdb_walk_state_t *wsp) +{ + uint64_t entry; + uintptr_t number = (uintptr_t)wsp->walk_data; + char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID" }; + int mapshift = SPA_MINBLOCKSHIFT; + + if (mdb_vread(&entry, sizeof (entry), wsp->walk_addr) == -1) { + mdb_warn("failed to read freelist entry %p", wsp->walk_addr); + return (WALK_DONE); + } + wsp->walk_addr += sizeof (entry); + wsp->walk_data = (void *)(number + 1); + + if (SM_DEBUG_DECODE(entry)) { + mdb_printf("DEBUG: %3u %10s: txg=%llu pass=%llu\n", + number, + ddata[SM_DEBUG_ACTION_DECODE(entry)], + SM_DEBUG_TXG_DECODE(entry), + SM_DEBUG_SYNCPASS_DECODE(entry)); + } else { + mdb_printf("Entry: %3u offsets=%08llx-%08llx type=%c " + "size=%06llx", number, + SM_OFFSET_DECODE(entry) << mapshift, + (SM_OFFSET_DECODE(entry) + SM_RUN_DECODE(entry)) << + mapshift, + SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F', + SM_RUN_DECODE(entry) << mapshift); + if (verbose) + mdb_printf(" (raw=%012llx)\n", entry); + mdb_printf("\n"); + } + return (WALK_NEXT); +} + +/* ARGSUSED */ +static void +freelist_walk_fini(mdb_walk_state_t *wsp) +{ +} + +typedef struct dbuf_walk_data { + dbuf_hash_table_t ht; + int64_t bucket; + uintptr_t dbp; + dmu_buf_impl_t db; +} dbuf_walk_data_t; + +static int +dbuf_walk_init(mdb_walk_state_t *wsp) +{ + dbuf_walk_data_t *dwd; + + if (wsp->walk_addr != NULL) { + mdb_warn("must supply starting address\n"); + return (WALK_ERR); + } + + dwd = mdb_alloc(sizeof (dbuf_walk_data_t), UM_SLEEP); + + if (mdb_readvar(&dwd->ht, "dbuf_hash_table") == -1) { + mdb_warn("failed to read 'dbuf_hash_table'"); + mdb_free(dwd, sizeof (dbuf_walk_data_t)); + return (WALK_ERR); + } + dwd->bucket = -1; + dwd->dbp = 0; + wsp->walk_data = dwd; + return (WALK_NEXT); +} + +static int +dbuf_walk_step(mdb_walk_state_t *wsp) +{ + int status; + dbuf_walk_data_t *dwd = wsp->walk_data; + + while (dwd->dbp == 0) { + dwd->bucket++; + if (dwd->bucket == dwd->ht.hash_table_mask+1) + return (WALK_DONE); + + if (mdb_vread(&dwd->dbp, sizeof (void *), + (uintptr_t)(dwd->ht.hash_table+dwd->bucket)) == -1) { + mdb_warn("failed to read hash bucket %u at %p", + dwd->bucket, dwd->ht.hash_table+dwd->bucket); + return (WALK_DONE); + } + } + + wsp->walk_addr = dwd->dbp; + if (mdb_vread(&dwd->db, sizeof (dmu_buf_impl_t), + wsp->walk_addr) == -1) { + mdb_warn("failed to read dbuf at %p", wsp->walk_addr); + return (WALK_DONE); + } + status = wsp->walk_callback(wsp->walk_addr, &dwd->db, wsp->walk_cbdata); + + dwd->dbp = (uintptr_t)dwd->db.db_hash_next; + return (status); +} + +static void +dbuf_walk_fini(mdb_walk_state_t *wsp) +{ + dbuf_walk_data_t *dwd = wsp->walk_data; + mdb_free(dwd, sizeof (dbuf_walk_data_t)); +} + +static int +dataset_name(uintptr_t addr, char *buf) +{ + static int gotid; + static mdb_ctf_id_t dd_id; + uintptr_t dd_parent; + char dd_myname[MAXNAMELEN]; + + if (!gotid) { + if (mdb_ctf_lookup_by_name("struct dsl_dir", + &dd_id) == -1) { + mdb_warn("couldn't find struct dsl_dir"); + return (DCMD_ERR); + } + gotid = TRUE; + } + if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) || + GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) { + return (DCMD_ERR); + } + + if (dd_parent) { + if (dataset_name(dd_parent, buf)) + return (DCMD_ERR); + strcat(buf, "/"); + } + + if (dd_myname[0]) + strcat(buf, dd_myname); + else + strcat(buf, "???"); + + return (0); +} + +static int +objset_name(uintptr_t addr, char *buf) +{ + static int gotid; + static mdb_ctf_id_t osi_id, ds_id; + uintptr_t os_dsl_dataset; + char ds_snapname[MAXNAMELEN]; + uintptr_t ds_dir; + + buf[0] = '\0'; + + if (!gotid) { + if (mdb_ctf_lookup_by_name("struct objset_impl", + &osi_id) == -1) { + mdb_warn("couldn't find struct objset_impl"); + return (DCMD_ERR); + } + if (mdb_ctf_lookup_by_name("struct dsl_dataset", + &ds_id) == -1) { + mdb_warn("couldn't find struct dsl_dataset"); + return (DCMD_ERR); + } + + gotid = TRUE; + } + + if (GETMEMBID(addr, &osi_id, os_dsl_dataset, os_dsl_dataset)) + return (DCMD_ERR); + + if (os_dsl_dataset == 0) { + strcat(buf, "mos"); + return (0); + } + + if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) || + GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) { + return (DCMD_ERR); + } + + if (ds_dir && dataset_name(ds_dir, buf)) + return (DCMD_ERR); + + if (ds_snapname[0]) { + strcat(buf, "@"); + strcat(buf, ds_snapname); + } + return (0); +} + +static void +enum_lookup(char *out, size_t size, mdb_ctf_id_t id, int val, + const char *prefix) +{ + const char *cp; + size_t len = strlen(prefix); + + if ((cp = mdb_ctf_enum_name(id, val)) != NULL) { + if (strncmp(cp, prefix, len) == 0) + cp += len; + (void) strncpy(out, cp, size); + } else { + mdb_snprintf(out, size, "? (%d)", val); + } +} + +/* ARGSUSED */ +static int +zio_pipeline(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + mdb_ctf_id_t pipe_enum; + int i; + char stage[1024]; + + if (mdb_ctf_lookup_by_name("enum zio_stage", &pipe_enum) == -1) { + mdb_warn("Could not find enum zio_stage"); + return (DCMD_ERR); + } + + for (i = 0; i < 32; i++) { + if (addr & (1U << i)) { + enum_lookup(stage, sizeof (stage), pipe_enum, i, + "ZIO_STAGE_"); + mdb_printf(" %s\n", stage); + } + } + + return (DCMD_OK); +} + +/* ARGSUSED */ +static int +blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + blkptr_t bp; + dva_t *dva; + dmu_object_type_info_t *doti; + zio_compress_info_t *zct; + zio_checksum_info_t *zci; + int i; + char buf[MAXPATHLEN]; + + if (mdb_vread(&bp, sizeof (blkptr_t), addr) == -1) { + mdb_warn("failed to read blkptr_t"); + return (DCMD_ERR); + } + + if (read_symbol("dmu_ot", (void **)&doti) != DCMD_OK) + return (DCMD_ERR); + for (i = 0; i < DMU_OT_NUMTYPES; i++) { + mdb_readstr(buf, sizeof (buf), (uintptr_t)doti[i].ot_name); + doti[i].ot_name = local_strdup(buf); + } + + if (read_symbol("zio_checksum_table", (void **)&zci) != DCMD_OK) + return (DCMD_ERR); + for (i = 0; i < ZIO_CHECKSUM_FUNCTIONS; i++) { + mdb_readstr(buf, sizeof (buf), (uintptr_t)zci[i].ci_name); + zci[i].ci_name = local_strdup(buf); + } + + if (read_symbol("zio_compress_table", (void **)&zct) != DCMD_OK) + return (DCMD_ERR); + for (i = 0; i < ZIO_COMPRESS_FUNCTIONS; i++) { + mdb_readstr(buf, sizeof (buf), (uintptr_t)zct[i].ci_name); + zct[i].ci_name = local_strdup(buf); + } + + for (i = 0; i < SPA_DVAS_PER_BP; i++) { + dva = &bp.blk_dva[i]; + mdb_printf("DVA[%d]: vdev_id %lld / %llx\n", i, + DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva)); + mdb_printf("DVA[%d]: GRID: %04x\t" + "ASIZE: %llx\n", i, DVA_GET_GRID(dva), DVA_GET_ASIZE(dva)); + } + mdb_printf("LSIZE: %-16llx\t\tPSIZE: %llx\n", + BP_GET_LSIZE(&bp), BP_GET_PSIZE(&bp)); + mdb_printf("ENDIAN: %6s GANG: %-5s\tTYPE: %s\n", + BP_GET_BYTEORDER(&bp) ? "LITTLE" : "BIG", + DVA_GET_GANG(dva) ? "TRUE" : "FALSE", + doti[BP_GET_TYPE(&bp)].ot_name); + mdb_printf("BIRTH: %-16llx LEVEL: %-2d\tFILL: %llx\n", + bp.blk_birth, BP_GET_LEVEL(&bp), bp.blk_fill); + mdb_printf("CKFUNC: %-16s\t\tCOMP: %s\n", + zci[BP_GET_CHECKSUM(&bp)].ci_name, + zct[BP_GET_COMPRESS(&bp)].ci_name); + mdb_printf("CKSUM: %llx:%llx:%llx:%llx\n", + bp.blk_cksum.zc_word[0], + bp.blk_cksum.zc_word[1], + bp.blk_cksum.zc_word[2], + bp.blk_cksum.zc_word[3]); + + return (DCMD_OK); +} + +/* ARGSUSED */ +static int +dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + mdb_ctf_id_t id; + dmu_buf_t db; + uintptr_t objset; + uint8_t level; + uint64_t blkid; + uint64_t holds; + char objectname[32]; + char blkidname[32]; + char path[MAXNAMELEN]; + + if (DCMD_HDRSPEC(flags)) { + mdb_printf(" addr object lvl blkid holds os\n"); + } + + if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &id) == -1) { + mdb_warn("couldn't find struct dmu_buf_impl_t"); + return (DCMD_ERR); + } + + if (GETMEMBID(addr, &id, db_objset, objset) || + GETMEMBID(addr, &id, db, db) || + GETMEMBID(addr, &id, db_level, level) || + GETMEMBID(addr, &id, db_blkid, blkid)) { + return (WALK_ERR); + } + + if (getrefcount(addr, &id, "db_holds", &holds)) { + return (WALK_ERR); + } + + if (db.db_object == DMU_META_DNODE_OBJECT) + (void) strcpy(objectname, "mdn"); + else + (void) mdb_snprintf(objectname, sizeof (objectname), "%llx", + (u_longlong_t)db.db_object); + + if (blkid == DB_BONUS_BLKID) + (void) strcpy(blkidname, "bonus"); + else + (void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx", + (u_longlong_t)blkid); + + if (objset_name(objset, path)) { + return (WALK_ERR); + } + + mdb_printf("%p %8s %1u %9s %2llu %s\n", + addr, objectname, level, blkidname, holds, path); + + return (DCMD_OK); +} + +/* ARGSUSED */ +static int +dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ +#define HISTOSZ 32 + uintptr_t dbp; + dmu_buf_impl_t db; + dbuf_hash_table_t ht; + uint64_t bucket, ndbufs; + uint64_t histo[HISTOSZ]; + uint64_t histo2[HISTOSZ]; + int i, maxidx; + + if (mdb_readvar(&ht, "dbuf_hash_table") == -1) { + mdb_warn("failed to read 'dbuf_hash_table'"); + return (DCMD_ERR); + } + + for (i = 0; i < HISTOSZ; i++) { + histo[i] = 0; + histo2[i] = 0; + } + + ndbufs = 0; + for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) { + int len; + + if (mdb_vread(&dbp, sizeof (void *), + (uintptr_t)(ht.hash_table+bucket)) == -1) { + mdb_warn("failed to read hash bucket %u at %p", + bucket, ht.hash_table+bucket); + return (DCMD_ERR); + } + + len = 0; + while (dbp != 0) { + if (mdb_vread(&db, sizeof (dmu_buf_impl_t), + dbp) == -1) { + mdb_warn("failed to read dbuf at %p", dbp); + return (DCMD_ERR); + } + dbp = (uintptr_t)db.db_hash_next; + for (i = MIN(len, HISTOSZ - 1); i >= 0; i--) + histo2[i]++; + len++; + ndbufs++; + } + + if (len >= HISTOSZ) + len = HISTOSZ-1; + histo[len]++; + } + + mdb_printf("hash table has %llu buckets, %llu dbufs " + "(avg %llu buckets/dbuf)\n", + ht.hash_table_mask+1, ndbufs, + (ht.hash_table_mask+1)/ndbufs); + + mdb_printf("\n"); + maxidx = 0; + for (i = 0; i < HISTOSZ; i++) + if (histo[i] > 0) + maxidx = i; + mdb_printf("hash chain length number of buckets\n"); + for (i = 0; i <= maxidx; i++) + mdb_printf("%u %llu\n", i, histo[i]); + + mdb_printf("\n"); + maxidx = 0; + for (i = 0; i < HISTOSZ; i++) + if (histo2[i] > 0) + maxidx = i; + mdb_printf("hash chain depth number of dbufs\n"); + for (i = 0; i <= maxidx; i++) + mdb_printf("%u or more %llu %llu%%\n", + i, histo2[i], histo2[i]*100/ndbufs); + + + return (DCMD_OK); +} + +typedef struct dbufs_data { + mdb_ctf_id_t id; + uint64_t objset; + uint64_t object; + uint64_t level; + uint64_t blkid; + char *osname; +} dbufs_data_t; + +#define DBUFS_UNSET (0xbaddcafedeadbeefULL) + +/* ARGSUSED */ +static int +dbufs_cb(uintptr_t addr, const void *unknown, void *arg) +{ + dbufs_data_t *data = arg; + uintptr_t objset; + dmu_buf_t db; + uint8_t level; + uint64_t blkid; + char osname[MAXNAMELEN]; + + if (GETMEMBID(addr, &data->id, db_objset, objset) || + GETMEMBID(addr, &data->id, db, db) || + GETMEMBID(addr, &data->id, db_level, level) || + GETMEMBID(addr, &data->id, db_blkid, blkid)) { + return (WALK_ERR); + } + + if ((data->objset == DBUFS_UNSET || data->objset == objset) && + (data->osname == NULL || (objset_name(objset, osname) == 0 && + strcmp(data->osname, osname) == 0)) && + (data->object == DBUFS_UNSET || data->object == db.db_object) && + (data->level == DBUFS_UNSET || data->level == level) && + (data->blkid == DBUFS_UNSET || data->blkid == blkid)) { + mdb_printf("%#lr\n", addr); + } + return (WALK_NEXT); +} + +/* ARGSUSED */ +static int +dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + dbufs_data_t data; + char *object = NULL; + char *blkid = NULL; + + data.objset = data.object = data.level = data.blkid = DBUFS_UNSET; + data.osname = NULL; + + if (mdb_getopts(argc, argv, + 'O', MDB_OPT_UINT64, &data.objset, + 'n', MDB_OPT_STR, &data.osname, + 'o', MDB_OPT_STR, &object, + 'l', MDB_OPT_UINT64, &data.level, + 'b', MDB_OPT_STR, &blkid) != argc) { + return (DCMD_USAGE); + } + + if (object) { + if (strcmp(object, "mdn") == 0) { + data.object = DMU_META_DNODE_OBJECT; + } else { + data.object = mdb_strtoull(object); + } + } + + if (blkid) { + if (strcmp(blkid, "bonus") == 0) { + data.blkid = DB_BONUS_BLKID; + } else { + data.blkid = mdb_strtoull(blkid); + } + } + + if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &data.id) == -1) { + mdb_warn("couldn't find struct dmu_buf_impl_t"); + return (DCMD_ERR); + } + + if (mdb_pwalk("dbufs", dbufs_cb, &data, 0) != 0) { + mdb_warn("can't walk dbufs"); + return (DCMD_ERR); + } + + return (DCMD_OK); +} + +typedef struct abuf_find_data { + dva_t dva; + mdb_ctf_id_t id; +} abuf_find_data_t; + +/* ARGSUSED */ +static int +abuf_find_cb(uintptr_t addr, const void *unknown, void *arg) +{ + abuf_find_data_t *data = arg; + dva_t dva; + + if (GETMEMBID(addr, &data->id, b_dva, dva)) { + return (WALK_ERR); + } + + if (dva.dva_word[0] == data->dva.dva_word[0] && + dva.dva_word[1] == data->dva.dva_word[1]) { + mdb_printf("%#lr\n", addr); + } + return (WALK_NEXT); +} + +/* ARGSUSED */ +static int +abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + abuf_find_data_t data; + GElf_Sym sym; + int i; + const char *syms[] = { + "ARC_mru_top", + "ARC_mru_bot", + "ARC_mfu_top", + "ARC_mfu_bot", + }; + + if (argc != 2) + return (DCMD_USAGE); + + for (i = 0; i < 2; i ++) { + switch (argv[i].a_type) { + case MDB_TYPE_STRING: + data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str); + break; + case MDB_TYPE_IMMEDIATE: + data.dva.dva_word[i] = argv[i].a_un.a_val; + break; + default: + return (DCMD_USAGE); + } + } + + if (mdb_ctf_lookup_by_name("struct arc_buf_hdr", &data.id) == -1) { + mdb_warn("couldn't find struct arc_buf_hdr"); + return (DCMD_ERR); + } + + for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) { + if (mdb_lookup_by_name(syms[i], &sym)) { + mdb_warn("can't find symbol %s", syms[i]); + return (DCMD_ERR); + } + + if (mdb_pwalk("list", abuf_find_cb, &data, sym.st_value) != 0) { + mdb_warn("can't walk %s", syms[i]); + return (DCMD_ERR); + } + } + + return (DCMD_OK); +} + +void +abuf_help(void) +{ + mdb_printf("::abuf_find dva_word[0] dva_word[1]\n"); +} + +/* + * ::spa + * + * -c Print configuration information as well + * -v Print vdev state + * -e Print vdev error stats + * + * Print a summarized spa_t. When given no arguments, prints out a table of all + * active pools on the system. + */ +/* ARGSUSED */ +static int +spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + spa_t spa; + char poolname[MAXNAMELEN]; + const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED", + "UNINIT", "UNAVAIL" }; + const char *state; + int config = FALSE; + int vdevs = FALSE; + int errors = FALSE; + + if (mdb_getopts(argc, argv, + 'c', MDB_OPT_SETBITS, TRUE, &config, + 'v', MDB_OPT_SETBITS, TRUE, &vdevs, + 'e', MDB_OPT_SETBITS, TRUE, &errors, + NULL) != argc) + return (DCMD_USAGE); + + if (!(flags & DCMD_ADDRSPEC)) { + if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) { + mdb_warn("can't walk spa"); + return (DCMD_ERR); + } + + return (DCMD_OK); + } + + if (flags & DCMD_PIPE_OUT) { + mdb_printf("%#lr\n", addr); + return (DCMD_OK); + } + + if (DCMD_HDRSPEC(flags)) + mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE", + sizeof (uintptr_t) == 4 ? 60 : 52, "NAME"); + + if (mdb_vread(&spa, sizeof (spa), addr) == -1) { + mdb_warn("failed to read spa_t at %p", addr); + return (DCMD_ERR); + } + + if (mdb_readstr(poolname, sizeof (poolname), (uintptr_t)spa.spa_name) + == -1) { + mdb_warn("failed to read pool name at %p", spa.spa_name); + return (DCMD_ERR); + } + + if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL) + state = "UKNNOWN"; + else + state = statetab[spa.spa_state]; + + mdb_printf("%0?p %9s %s\n", addr, state, poolname); + + if (config) { + mdb_printf("\n"); + mdb_inc_indent(4); + if (mdb_call_dcmd("spa_config", addr, flags, 0, + NULL) != DCMD_OK) + return (DCMD_ERR); + mdb_dec_indent(4); + } + + if (vdevs || errors) { + mdb_arg_t v; + + v.a_type = MDB_TYPE_STRING; + v.a_un.a_str = "-e"; + + mdb_printf("\n"); + mdb_inc_indent(4); + if (mdb_call_dcmd("spa_vdevs", addr, flags, errors ? 1 : 0, + &v) != DCMD_OK) + return (DCMD_ERR); + mdb_dec_indent(4); + } + + return (DCMD_OK); +} + +/* + * ::spa_config + * + * Given a spa_t, print the configuration information stored in spa_config. + * Since it's just an nvlist, format it as an indented list of name=value pairs. + * We simply read the value of spa_config and pass off to ::nvlist. + */ +/* ARGSUSED */ +static int +spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + spa_t spa; + + if (argc != 0 || !(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (mdb_vread(&spa, sizeof (spa), addr) == -1) { + mdb_warn("failed to read spa_t at %p", addr); + return (DCMD_ERR); + } + + if (spa.spa_config == NULL) { + mdb_printf("(none)\n"); + return (DCMD_OK); + } + + return (mdb_call_dcmd("nvlist", (uintptr_t)spa.spa_config, flags, + 0, NULL)); +} + +void +vdev_help(void) +{ + mdb_printf("[vdev_t*]::vdev [-qr]\n" + "\t-> -q display vdev_queue parameters\n" + "\t-> -r recursive (visit all children)\n"); +} + +/* + * ::vdev + * + * Print out a summarized vdev_t, in the following form: + * + * ADDR STATE AUX DESC + * fffffffbcde23df0 HEALTHY - /dev/dsk/c0t0d0 + * + * or with "-q" to print out a vdev_t's vdev_queue parameters: + * + * vdev_t: c26ae4c0 + * c26ae73c min pending 0x2 + * c26ae744 max pending 0x23 + * c26ae74c agg limit 0x20000 + * c26ae754 time shift 0x4 + * c26ae75c ramp rate 0x2 + * + * If '-r' is specified, recursively visit all children. + * + * With '-e', the statistics associated with the vdev are printed as well. + */ +static int +do_print_vdev(uintptr_t addr, int flags, int depth, int queue, int stats, + int recursive) +{ + vdev_t vdev; + char desc[MAXNAMELEN]; + int c, children; + uintptr_t *child; + const char *state, *aux; + + if (mdb_vread(&vdev, sizeof (vdev), (uintptr_t)addr) == -1) { + mdb_warn("failed to read vdev_t at %p\n", (uintptr_t)addr); + return (DCMD_ERR); + } + + if (flags & DCMD_PIPE_OUT) { + mdb_printf("%#lr", addr); + } else { + if (vdev.vdev_path != NULL) { + if (mdb_readstr(desc, sizeof (desc), + (uintptr_t)vdev.vdev_path) == -1) { + mdb_warn("failed to read vdev_path at %p\n", + vdev.vdev_path); + return (DCMD_ERR); + } + } else if (vdev.vdev_ops != NULL) { + vdev_ops_t ops; + if (mdb_vread(&ops, sizeof (ops), + (uintptr_t)vdev.vdev_ops) == -1) { + mdb_warn("failed to read vdev_ops at %p\n", + vdev.vdev_ops); + return (DCMD_ERR); + } + (void) strcpy(desc, ops.vdev_op_type); + } else { + (void) strcpy(desc, "<unknown>"); + } + + if (depth == 0 && DCMD_HDRSPEC(flags)) + mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n", + "ADDR", "STATE", "AUX", + sizeof (uintptr_t) == 4 ? 43 : 35, + "DESCRIPTION"); + + mdb_printf("%0?p ", addr); + + switch (vdev.vdev_state) { + case VDEV_STATE_CLOSED: + state = "CLOSED"; + break; + case VDEV_STATE_OFFLINE: + state = "OFFLINE"; + break; + case VDEV_STATE_CANT_OPEN: + state = "CANT_OPEN"; + break; + case VDEV_STATE_DEGRADED: + state = "DEGRADED"; + break; + case VDEV_STATE_HEALTHY: + state = "HEALTHY"; + break; + default: + state = "UNKNOWN"; + break; + } + + switch (vdev.vdev_stat.vs_aux) { + case VDEV_AUX_NONE: + aux = "-"; + break; + case VDEV_AUX_OPEN_FAILED: + aux = "OPEN_FAILED"; + break; + case VDEV_AUX_CORRUPT_DATA: + aux = "CORRUPT_DATA"; + break; + case VDEV_AUX_NO_REPLICAS: + aux = "NO_REPLICAS"; + break; + case VDEV_AUX_BAD_GUID_SUM: + aux = "BAD_GUID_SUM"; + break; + case VDEV_AUX_TOO_SMALL: + aux = "TOO_SMALL"; + break; + case VDEV_AUX_BAD_LABEL: + aux = "BAD_LABEL"; + break; + default: + aux = "UNKNOWN"; + break; + } + + mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc); + + if (queue) { + mdb_inc_indent(4); + mdb_printf("\n"); + mdb_printf("%p min pending 0x%llx\n", + (uintptr_t)(addr + offsetof(vdev_t, + vdev_queue.vq_min_pending)), + vdev.vdev_queue.vq_min_pending); + mdb_printf("%p max pending 0x%llx\n", + (uintptr_t)(addr + offsetof(vdev_t, + vdev_queue.vq_max_pending)), + vdev.vdev_queue.vq_max_pending); + mdb_printf("%p agg limit 0x%llx\n", + (uintptr_t)(addr + offsetof(vdev_t, + vdev_queue.vq_agg_limit)), + vdev.vdev_queue.vq_agg_limit); + mdb_printf("%p time shift 0x%llx\n", + (uintptr_t)(addr + offsetof(vdev_t, + vdev_queue.vq_time_shift)), + vdev.vdev_queue.vq_time_shift); + mdb_printf("%p ramp rate 0x%llx\n", + (uintptr_t)(addr + offsetof(vdev_t, + vdev_queue.vq_ramp_rate)), + vdev.vdev_queue.vq_ramp_rate); + mdb_dec_indent(4); + } + + if (stats) { + vdev_stat_t *vs = &vdev.vdev_stat; + int i; + + mdb_inc_indent(4); + mdb_printf("\n"); + mdb_printf("%<u> %12s %12s %12s %12s " + "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM", + "IOCTL"); + mdb_printf("OPS "); + for (i = 1; i < ZIO_TYPES; i++) + mdb_printf("%11#llx%s", vs->vs_ops[i], + i == ZIO_TYPES - 1 ? "" : " "); + mdb_printf("\n"); + mdb_printf("BYTES "); + for (i = 1; i < ZIO_TYPES; i++) + mdb_printf("%11#llx%s", vs->vs_bytes[i], + i == ZIO_TYPES - 1 ? "" : " "); + + + mdb_printf("\n"); + mdb_printf("EREAD %10#llx\n", vs->vs_read_errors); + mdb_printf("EWRITE %10#llx\n", vs->vs_write_errors); + mdb_printf("ECKSUM %10#llx\n", + vs->vs_checksum_errors); + mdb_dec_indent(4); + } + + if (queue || stats) + mdb_printf("\n"); + } + + children = vdev.vdev_children; + + if (children == 0 || !recursive) + return (DCMD_OK); + + child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC); + if (mdb_vread(child, children * sizeof (void *), + (uintptr_t)vdev.vdev_child) == -1) { + mdb_warn("failed to read vdev children at %p", vdev.vdev_child); + return (DCMD_ERR); + } + + for (c = 0; c < children; c++) { + if (do_print_vdev(child[c], flags, depth + 2, queue, stats, + recursive)) + return (DCMD_ERR); + } + + return (DCMD_OK); +} + +static int +vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + int print_queue = FALSE; + int recursive = FALSE; + int stats = FALSE; + + if (mdb_getopts(argc, argv, + 'q', MDB_OPT_SETBITS, TRUE, &print_queue, + 'r', MDB_OPT_SETBITS, TRUE, &recursive, + 'e', MDB_OPT_SETBITS, TRUE, &stats, + NULL) != argc) + return (DCMD_USAGE); + + if (!(flags & DCMD_ADDRSPEC)) { + mdb_warn("no vdev_t address given\n"); + return (DCMD_ERR); + } + + return (do_print_vdev(addr, flags, 0, print_queue, stats, recursive)); +} + +typedef struct mdb_spa { + uintptr_t spa_dsl_pool; + uintptr_t spa_root_vdev; +} mdb_spa_t; + +typedef struct mdb_dsl_dir { + uintptr_t dd_phys; + uint64_t dd_used_bytes; + int64_t dd_space_towrite[TXG_SIZE]; +} mdb_dsl_dir_t; + +typedef struct mdb_dsl_dir_phys { + uint64_t dd_used_bytes; + uint64_t dd_compressed_bytes; + uint64_t dd_uncompressed_bytes; +} mdb_dsl_dir_phys_t; + +typedef struct mdb_vdev { + uintptr_t vdev_parent; + uintptr_t vdev_ms; + uint64_t vdev_ms_count; + vdev_stat_t vdev_stat; +} mdb_vdev_t; + +typedef struct mdb_metaslab { + space_map_t ms_allocmap[TXG_SIZE]; + space_map_t ms_freemap[TXG_SIZE]; + space_map_t ms_map; + uint64_t ms_usable_space; +} mdb_metaslab_t; + +/* + * ::spa_space [-b] + * + * Given a spa_t, print out it's on-disk space usage and in-core + * estimates of future usage. If -b is given, print space in bytes. + * Otherwise print in megabytes. + */ +/* ARGSUSED */ +static int +spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + mdb_spa_t spa; + uintptr_t dp_root_dir; + mdb_dsl_dir_t dd; + mdb_dsl_dir_phys_t dsp; + uint64_t children; + uintptr_t childaddr; + uintptr_t *child; + uint64_t ms_allocmap[TXG_SIZE] = {0, 0, 0, 0}; + uint64_t ms_freemap[TXG_SIZE] = {0, 0, 0, 0}; + uint64_t ms_map = 0; + uint64_t ms_usable_space = 0; + int i, j; + int havecompressed = TRUE; + int shift = 20; + char *suffix = "M"; + int bits = FALSE; + + if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bits, NULL) != + argc) + return (DCMD_USAGE); + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (bits) { + shift = 0; + suffix = ""; + } + + if (GETMEMB(addr, struct spa, spa_dsl_pool, spa.spa_dsl_pool) || + GETMEMB(addr, struct spa, spa_root_vdev, spa.spa_root_vdev) || + GETMEMB(spa.spa_root_vdev, struct vdev, vdev_children, children) || + GETMEMB(spa.spa_root_vdev, struct vdev, vdev_child, childaddr) || + GETMEMB(spa.spa_dsl_pool, struct dsl_pool, + dp_root_dir, dp_root_dir) || + GETMEMB(dp_root_dir, struct dsl_dir, dd_phys, dd.dd_phys) || + GETMEMB(dp_root_dir, struct dsl_dir, + dd_used_bytes, dd.dd_used_bytes) || + GETMEMB(dp_root_dir, struct dsl_dir, + dd_space_towrite, dd.dd_space_towrite) || + GETMEMB(dd.dd_phys, struct dsl_dir_phys, + dd_used_bytes, dsp.dd_used_bytes)) { + return (DCMD_ERR); + } + + if (GETMEMB(dd.dd_phys, struct dsl_dir_phys, + dd_compressed_bytes, dsp.dd_compressed_bytes) || + GETMEMB(dd.dd_phys, struct dsl_dir_phys, + dd_uncompressed_bytes, dsp.dd_uncompressed_bytes)) { + havecompressed = FALSE; + } + + child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC); + if (mdb_vread(child, children * sizeof (void *), childaddr) == -1) { + mdb_warn("failed to read root vdev children at %p", childaddr); + return (DCMD_ERR); + } + + mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n", + dd.dd_space_towrite[0] >> shift, suffix, + dd.dd_space_towrite[1] >> shift, suffix, + dd.dd_space_towrite[2] >> shift, suffix, + dd.dd_space_towrite[3] >> shift, suffix); + mdb_printf("dd_used_bytes = %llu%s\n", + dd.dd_used_bytes >> shift, suffix); + + mdb_printf("dd_phys.dd_used_bytes = %llu%s\n", + dsp.dd_used_bytes >> shift, suffix); + if (havecompressed) { + mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n", + dsp.dd_compressed_bytes >> shift, suffix); + mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n", + dsp.dd_uncompressed_bytes >> shift, suffix); + } + + for (i = 0; i < children; i++) { + mdb_vdev_t vd; + uintptr_t *vdev_ms; + + if (GETMEMB(child[i], struct vdev, + vdev_parent, vd.vdev_parent) || + GETMEMB(child[i], struct vdev, + vdev_stat, vd.vdev_stat) || + GETMEMB(child[i], struct vdev, vdev_ms, vd.vdev_ms) || + GETMEMB(child[i], struct vdev, + vdev_ms_count, vd.vdev_ms_count)) { + return (DCMD_ERR); + } + + /* + * If this is the root vdev, its stats are the pool-wide stats. + */ + if (vd.vdev_parent == NULL) { + mdb_printf("pool_alloc = %llu%s\n", + vd.vdev_stat.vs_alloc >> shift, suffix); + mdb_printf("pool_space = %llu%s\n", + vd.vdev_stat.vs_space >> shift, suffix); + } + + /* + * If this is not a top-level vdev, it doesn't have space. + */ + if (vd.vdev_parent != spa.spa_root_vdev) + continue; + + vdev_ms = mdb_alloc(vd.vdev_ms_count * sizeof (void*), + UM_SLEEP | UM_GC); + if (mdb_vread(vdev_ms, vd.vdev_ms_count * sizeof (void*), + (uintptr_t)vd.vdev_ms) == -1) { + mdb_warn("failed to read vdev_ms at %p", vd.vdev_ms); + return (DCMD_ERR); + } + + for (j = 0; j < vd.vdev_ms_count; j++) { + mdb_metaslab_t ms; + + if (GETMEMB(vdev_ms[j], struct metaslab, + ms_allocmap, ms.ms_allocmap) || + GETMEMB(vdev_ms[j], struct metaslab, + ms_freemap, ms.ms_freemap) || + GETMEMB(vdev_ms[j], struct metaslab, + ms_map, ms.ms_map) || + GETMEMB(vdev_ms[j], struct metaslab, + ms_usable_space, ms.ms_usable_space)) { + return (DCMD_ERR); + } + + ms_allocmap[0] += ms.ms_allocmap[0].sm_space; + ms_allocmap[1] += ms.ms_allocmap[1].sm_space; + ms_allocmap[2] += ms.ms_allocmap[2].sm_space; + ms_allocmap[3] += ms.ms_allocmap[3].sm_space; + ms_freemap[0] += ms.ms_freemap[0].sm_space; + ms_freemap[1] += ms.ms_freemap[1].sm_space; + ms_freemap[2] += ms.ms_freemap[2].sm_space; + ms_freemap[3] += ms.ms_freemap[3].sm_space; + ms_map += ms.ms_map.sm_space; + ms_usable_space += ms.ms_usable_space; + } + } + + mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n", + ms_allocmap[0] >> shift, suffix, + ms_allocmap[1] >> shift, suffix, + ms_allocmap[2] >> shift, suffix, + ms_allocmap[3] >> shift, suffix); + mdb_printf("ms_freemap = %llu%s %llu%s %llu%s %llu%s\n", + ms_freemap[0] >> shift, suffix, + ms_freemap[1] >> shift, suffix, + ms_freemap[2] >> shift, suffix, + ms_freemap[3] >> shift, suffix); + mdb_printf("ms_map = %llu%s\n", ms_map >> shift, suffix); + mdb_printf("ms_usable_space = %llu%s\n", + ms_usable_space >> shift, suffix); + + return (DCMD_OK); +} + +/* + * ::spa_verify + * + * Given a spa_t, verify that that the pool is self-consistent. + * Currently, it only checks to make sure that the vdev tree exists. + */ +/* ARGSUSED */ +static int +spa_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + spa_t spa; + + if (argc != 0 || !(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (mdb_vread(&spa, sizeof (spa), addr) == -1) { + mdb_warn("failed to read spa_t at %p", addr); + return (DCMD_ERR); + } + + if (spa.spa_root_vdev == NULL) { + mdb_printf("no vdev tree present\n"); + return (DCMD_OK); + } + + return (DCMD_OK); +} + +/* + * ::spa_vdevs + * + * -e Include error stats + * + * Print out a summarized list of vdevs for the given spa_t. + * This is accomplished by invoking "::vdev -re" on the root vdev. + */ +/* ARGSUSED */ +static int +spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + spa_t spa; + mdb_arg_t v; + int errors = FALSE; + + if (mdb_getopts(argc, argv, + 'e', MDB_OPT_SETBITS, TRUE, &errors, + NULL) != argc) + return (DCMD_USAGE); + + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (mdb_vread(&spa, sizeof (spa), addr) == -1) { + mdb_warn("failed to read spa_t at %p", addr); + return (DCMD_ERR); + } + + v.a_type = MDB_TYPE_STRING; + v.a_un.a_str = errors ? "-re" : "-r"; + + return (mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev, + flags, 1, &v)); +} + +typedef struct txg_list_walk_data { + uintptr_t lw_head[TXG_SIZE]; + int lw_txgoff; + int lw_maxoff; + size_t lw_offset; + void *lw_obj; +} txg_list_walk_data_t; + +static int +txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff) +{ + txg_list_walk_data_t *lwd; + txg_list_t list; + int i; + + lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC); + if (mdb_vread(&list, sizeof (txg_list_t), wsp->walk_addr) == -1) { + mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr); + return (WALK_ERR); + } + + for (i = 0; i < TXG_SIZE; i++) + lwd->lw_head[i] = (uintptr_t)list.tl_head[i]; + lwd->lw_offset = list.tl_offset; + lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t), + UM_SLEEP | UM_GC); + lwd->lw_txgoff = txg; + lwd->lw_maxoff = maxoff; + + wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff]; + wsp->walk_data = lwd; + + return (WALK_NEXT); +} + +static int +txg_list_walk_init(mdb_walk_state_t *wsp) +{ + return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1)); +} + +static int +txg_list0_walk_init(mdb_walk_state_t *wsp) +{ + return (txg_list_walk_init_common(wsp, 0, 0)); +} + +static int +txg_list1_walk_init(mdb_walk_state_t *wsp) +{ + return (txg_list_walk_init_common(wsp, 1, 1)); +} + +static int +txg_list2_walk_init(mdb_walk_state_t *wsp) +{ + return (txg_list_walk_init_common(wsp, 2, 2)); +} + +static int +txg_list3_walk_init(mdb_walk_state_t *wsp) +{ + return (txg_list_walk_init_common(wsp, 3, 3)); +} + +static int +txg_list_walk_step(mdb_walk_state_t *wsp) +{ + txg_list_walk_data_t *lwd = wsp->walk_data; + uintptr_t addr; + txg_node_t *node; + int status; + + while (wsp->walk_addr == NULL && lwd->lw_txgoff < lwd->lw_maxoff) { + lwd->lw_txgoff++; + wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff]; + } + + if (wsp->walk_addr == NULL) + return (WALK_DONE); + + addr = wsp->walk_addr - lwd->lw_offset; + + if (mdb_vread(lwd->lw_obj, + lwd->lw_offset + sizeof (txg_node_t), addr) == -1) { + mdb_warn("failed to read list element at %#lx", addr); + return (WALK_ERR); + } + + status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata); + node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset); + wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff]; + + return (status); +} + +/* ARGSUSED */ +static void +txg_list_walk_fini(mdb_walk_state_t *wsp) +{ +} + +/* + * ::walk spa + * + * Walk all named spa_t structures in the namespace. This is nothing more than + * a layered avl walk. + */ +static int +spa_walk_init(mdb_walk_state_t *wsp) +{ + GElf_Sym sym; + + if (wsp->walk_addr != NULL) { + mdb_warn("spa walk only supports global walks\n"); + return (WALK_ERR); + } + + if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) { + mdb_warn("failed to find symbol 'spa_namespace_avl'"); + return (WALK_ERR); + } + + wsp->walk_addr = (uintptr_t)sym.st_value; + + if (mdb_layered_walk("avl", wsp) == -1) { + mdb_warn("failed to walk 'avl'\n"); + return (WALK_ERR); + } + + return (WALK_NEXT); +} + +static int +spa_walk_step(mdb_walk_state_t *wsp) +{ + spa_t spa; + + if (mdb_vread(&spa, sizeof (spa), wsp->walk_addr) == -1) { + mdb_warn("failed to read spa_t at %p", wsp->walk_addr); + return (WALK_ERR); + } + + return (wsp->walk_callback(wsp->walk_addr, &spa, wsp->walk_cbdata)); +} + +/* + * MDB module linkage information: + * + * We declare a list of structures describing our dcmds, and a function + * named _mdb_init to return a pointer to our module information. + */ + +static const mdb_dcmd_t dcmds[] = { + { "blkptr", ":", "print blkptr_t", blkptr }, + { "dbuf", ":", "print dmu_buf_impl_t", dbuf }, + { "dbuf_stats", ":", "dbuf stats", dbuf_stats }, + { "dbufs", + "\t[-O objset_t*] [-n objset_name | \"mos\"] [-o object | \"mdn\"] \n" + "\t[-l level] [-b blkid | \"bonus\"]", + "find dmu_buf_impl_t's that meet criterion", dbufs }, + { "abuf_find", "dva_word[0] dva_word[1]", + "find arc_buf_hdr_t of a specified DVA", + abuf_find }, + { "spa", "?[-cv]", "spa_t summary", spa_print }, + { "spa_config", ":", "print spa_t configuration", spa_print_config }, + { "spa_verify", ":", "verify spa_t consistency", spa_verify }, + { "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space }, + { "spa_vdevs", ":", "given a spa_t, print vdev summary", spa_vdevs }, + { "vdev", ":[-qre]", "vdev_t summary", vdev_print }, + { "zio_pipeline", ":", "decode a zio pipeline", zio_pipeline }, + { NULL } +}; + +static const mdb_walker_t walkers[] = { + /* + * In userland, there is no generic provider of list_t walkers, so we + * need to add it. + */ +#ifndef _KERNEL + { LIST_WALK_NAME, LIST_WALK_DESC, + list_walk_init, list_walk_step, list_walk_fini }, +#endif + { "dbufs", "walk cached ZFS dbufs", + dbuf_walk_init, dbuf_walk_step, dbuf_walk_fini }, + { "zms_freelist", "walk ZFS metaslab freelist", + freelist_walk_init, freelist_walk_step, freelist_walk_fini }, + { "txg_list", "given any txg_list_t *, walk all entries in all txgs", + txg_list_walk_init, txg_list_walk_step, txg_list_walk_fini }, + { "txg_list0", "given any txg_list_t *, walk all entries in txg 0", + txg_list0_walk_init, txg_list_walk_step, txg_list_walk_fini }, + { "txg_list1", "given any txg_list_t *, walk all entries in txg 1", + txg_list1_walk_init, txg_list_walk_step, txg_list_walk_fini }, + { "txg_list2", "given any txg_list_t *, walk all entries in txg 2", + txg_list2_walk_init, txg_list_walk_step, txg_list_walk_fini }, + { "txg_list3", "given any txg_list_t *, walk all entries in txg 3", + txg_list3_walk_init, txg_list_walk_step, txg_list_walk_fini }, + { "spa", "walk all spa_t entries in the namespace", + spa_walk_init, spa_walk_step, NULL }, + { NULL } +}; + +static const mdb_modinfo_t modinfo = { + MDB_API_VERSION, dcmds, walkers +}; + +const mdb_modinfo_t * +_mdb_init(void) +{ + return (&modinfo); +} diff --git a/usr/src/cmd/mdb/intel/amd64/genunix/Makefile b/usr/src/cmd/mdb/intel/amd64/genunix/Makefile index 3e59587d49..25af0c5a02 100644 --- a/usr/src/cmd/mdb/intel/amd64/genunix/Makefile +++ b/usr/src/cmd/mdb/intel/amd64/genunix/Makefile @@ -29,6 +29,7 @@ MODULE = genunix.so MDBTGT = kvm COMMONSRCS = \ + avl.c \ bio.c \ contract.c \ cpupart.c \ diff --git a/usr/src/cmd/mdb/intel/amd64/libavl/Makefile b/usr/src/cmd/mdb/intel/amd64/libavl/Makefile new file mode 100644 index 0000000000..4f3e9cf60a --- /dev/null +++ b/usr/src/cmd/mdb/intel/amd64/libavl/Makefile @@ -0,0 +1,37 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = libavl.so +MDBTGT = proc + +MODSRCS = libavl.c \ + avl.c + +include ../../../../Makefile.cmd +include ../../../../Makefile.cmd.64 +include ../../Makefile.amd64 +include ../../../Makefile.module diff --git a/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile b/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile new file mode 100644 index 0000000000..d2a5a8c2a6 --- /dev/null +++ b/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile @@ -0,0 +1,52 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = libzpool.so +MDBTGT = proc + +MODSRCS = zfs.c list.c + +include ../../../../Makefile.cmd +include ../../../../Makefile.cmd.64 +include ../../Makefile.amd64 +include ../../../Makefile.module + +MODSRCS_DIR = ../../../common/modules/zfs +GENUNIX_DIR = ../../../common/modules/genunix + +CPPFLAGS += -I../../../../../lib/libzpool/common \ + -I../../../../../uts/common/fs/zfs + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all + +dmod/%.o: $(GENUNIX_DIR)/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +dmod/%.ln: $(GENUNIX_DIR)/%.c + $(LINT.c) -c $< diff --git a/usr/src/cmd/mdb/intel/amd64/zfs/Makefile b/usr/src/cmd/mdb/intel/amd64/zfs/Makefile new file mode 100644 index 0000000000..972d59937e --- /dev/null +++ b/usr/src/cmd/mdb/intel/amd64/zfs/Makefile @@ -0,0 +1,41 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = zfs.so +MDBTGT = kvm + +MODSRCS = zfs.c + +include ../../../../Makefile.cmd +include ../../../../Makefile.cmd.64 +include ../../Makefile.amd64 +include ../../../Makefile.module + +CPPFLAGS += -I../../../../../uts/common/fs/zfs + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/intel/ia32/genunix/Makefile b/usr/src/cmd/mdb/intel/ia32/genunix/Makefile index 72c5f6caf6..eec8884381 100644 --- a/usr/src/cmd/mdb/intel/ia32/genunix/Makefile +++ b/usr/src/cmd/mdb/intel/ia32/genunix/Makefile @@ -29,6 +29,7 @@ MODULE = genunix.so MDBTGT = kvm COMMONSRCS = \ + avl.c \ bio.c \ contract.c \ cpupart.c \ diff --git a/usr/src/cmd/mdb/intel/ia32/libavl/Makefile b/usr/src/cmd/mdb/intel/ia32/libavl/Makefile new file mode 100644 index 0000000000..1f4cfbe075 --- /dev/null +++ b/usr/src/cmd/mdb/intel/ia32/libavl/Makefile @@ -0,0 +1,36 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = libavl.so +MDBTGT = proc + +MODSRCS = libavl.c \ + avl.c + +include ../../../../Makefile.cmd +include ../../Makefile.ia32 +include ../../../Makefile.module diff --git a/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile b/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile new file mode 100644 index 0000000000..c8e8b4bb34 --- /dev/null +++ b/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile @@ -0,0 +1,51 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = libzpool.so +MDBTGT = proc + +MODSRCS = zfs.c list.c + +include ../../../../Makefile.cmd +include ../../Makefile.ia32 +include ../../../Makefile.module + +MODSRCS_DIR = ../../../common/modules/zfs +GENUNIX_DIR = ../../../common/modules/genunix + +CPPFLAGS += -I../../../../../lib/libzpool/common \ + -I../../../../../uts/common/fs/zfs + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all + +dmod/%.o: $(GENUNIX_DIR)/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +dmod/%.ln: $(GENUNIX_DIR)/%.c + $(LINT.c) -c $< diff --git a/usr/src/cmd/mdb/intel/ia32/zfs/Makefile b/usr/src/cmd/mdb/intel/ia32/zfs/Makefile new file mode 100644 index 0000000000..a569d4fd91 --- /dev/null +++ b/usr/src/cmd/mdb/intel/ia32/zfs/Makefile @@ -0,0 +1,41 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# + +MODULE = zfs.so +MDBTGT = kvm + +MODSRCS = zfs.c + +include ../../../../Makefile.cmd +include ../../Makefile.ia32 +include ../../../Makefile.module + +CPPFLAGS += -I../../../../../uts/common/fs/zfs + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/sparc/kmdb/kmdb_kdi_isadep.c b/usr/src/cmd/mdb/sparc/kmdb/kmdb_kdi_isadep.c index 325929a7f3..1d46481cfc 100644 --- a/usr/src/cmd/mdb/sparc/kmdb/kmdb_kdi_isadep.c +++ b/usr/src/cmd/mdb/sparc/kmdb/kmdb_kdi_isadep.c @@ -70,7 +70,7 @@ kdi_xc_one(int cpuid, void (*cb)(void)) /*ARGSUSED1*/ static int -kdi_init_cpus_cb(dnode_t node, void *arg, void *result) +kdi_init_cpus_cb(pnode_t node, void *arg, void *result) { /* * Sun4v dosen't support virtual address cache diff --git a/usr/src/cmd/mdb/sparc/kmdb/kmdb_promif_isadep.c b/usr/src/cmd/mdb/sparc/kmdb/kmdb_promif_isadep.c index 1b9b4dfcd5..c99878bf31 100644 --- a/usr/src/cmd/mdb/sparc/kmdb/kmdb_promif_isadep.c +++ b/usr/src/cmd/mdb/sparc/kmdb/kmdb_promif_isadep.c @@ -69,7 +69,7 @@ kmdb_prom_get_handle(char *name) char * kmdb_prom_get_options_prop(kmdb_auxv_t *kav, char *propname) { - dnode_t node; + pnode_t node; ssize_t len; char *val; @@ -97,18 +97,18 @@ kmdb_prom_free_options_prop(char *val) } int -kmdb_prom_getprop(dnode_t node, char *name, caddr_t value) +kmdb_prom_getprop(pnode_t node, char *name, caddr_t value) { return (prom_getprop(node, name, value)); } typedef struct walk_cpu_data { - int (*wcd_cb)(dnode_t, void *, void *); + int (*wcd_cb)(pnode_t, void *, void *); void *wcd_arg; } walk_cpu_data_t; static int -walk_cpus_cb(dnode_t node, void *arg, void *result) +walk_cpus_cb(pnode_t node, void *arg, void *result) { walk_cpu_data_t *wcd = arg; @@ -139,7 +139,7 @@ walk_cpus_cb(dnode_t node, void *arg, void *result) } void -kmdb_prom_walk_cpus(int (*cb)(dnode_t, void *, void *), void *arg, void *result) +kmdb_prom_walk_cpus(int (*cb)(pnode_t, void *, void *), void *arg, void *result) { walk_cpu_data_t wcd; diff --git a/usr/src/cmd/mdb/sparc/kmdb/kmdb_promif_isadep.h b/usr/src/cmd/mdb/sparc/kmdb/kmdb_promif_isadep.h index 7b5f0a8ee2..f2d160aefe 100644 --- a/usr/src/cmd/mdb/sparc/kmdb/kmdb_promif_isadep.h +++ b/usr/src/cmd/mdb/sparc/kmdb/kmdb_promif_isadep.h @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,12 +39,12 @@ extern "C" { #endif -extern void kmdb_prom_walk_cpus(int (*)(dnode_t, void *, void *), +extern void kmdb_prom_walk_cpus(int (*)(pnode_t, void *, void *), void *, void *); extern void kmdb_prom_enter_mon(void); extern void kmdb_prom_exit_to_mon(void); extern void kmdb_prom_interpret(const char *); -extern int kmdb_prom_getprop(dnode_t, char *, caddr_t); +extern int kmdb_prom_getprop(pnode_t, char *, caddr_t); /* private to promif */ extern int kmdb_prom_translate_virt(uintptr_t, physaddr_t *); diff --git a/usr/src/cmd/mdb/sparc/v7/libavl/Makefile b/usr/src/cmd/mdb/sparc/v7/libavl/Makefile new file mode 100644 index 0000000000..a109e5fb66 --- /dev/null +++ b/usr/src/cmd/mdb/sparc/v7/libavl/Makefile @@ -0,0 +1,36 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = libavl.so +MDBTGT = proc + +MODSRCS = libavl.c \ + avl.c + +include ../../../../Makefile.cmd +include ../../Makefile.sparcv7 +include ../../../Makefile.module diff --git a/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile b/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile new file mode 100644 index 0000000000..501b3ef347 --- /dev/null +++ b/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile @@ -0,0 +1,51 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = libzpool.so +MDBTGT = proc + +MODSRCS = zfs.c list.c + +include ../../../../Makefile.cmd +include ../../Makefile.sparcv7 +include ../../../Makefile.module + +MODSRCS_DIR = ../../../common/modules/zfs +GENUNIX_DIR = ../../../common/modules/genunix + +CPPFLAGS += -I../../../../../lib/libzpool/common \ + -I../../../../../uts/common/fs/zfs + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all + +dmod/%.o: $(GENUNIX_DIR)/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +dmod/%.ln: $(GENUNIX_DIR)/%.c + $(LINT.c) -c $< diff --git a/usr/src/cmd/mdb/sparc/v9/genunix/Makefile b/usr/src/cmd/mdb/sparc/v9/genunix/Makefile index 7cfa4c1b1f..460d76057e 100644 --- a/usr/src/cmd/mdb/sparc/v9/genunix/Makefile +++ b/usr/src/cmd/mdb/sparc/v9/genunix/Makefile @@ -29,6 +29,7 @@ MODULE = genunix.so MDBTGT = kvm COMMONSRCS = \ + avl.c \ bio.c \ contract.c \ cpupart.c \ diff --git a/usr/src/cmd/mdb/sparc/v9/libavl/Makefile b/usr/src/cmd/mdb/sparc/v9/libavl/Makefile new file mode 100644 index 0000000000..7785db8110 --- /dev/null +++ b/usr/src/cmd/mdb/sparc/v9/libavl/Makefile @@ -0,0 +1,37 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = libavl.so +MDBTGT = proc + +MODSRCS = libavl.c \ + avl.c + +include ../../../../Makefile.cmd +include ../../../../Makefile.cmd.64 +include ../../Makefile.sparcv9 +include ../../../Makefile.module diff --git a/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile b/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile new file mode 100644 index 0000000000..ddcd3d89bc --- /dev/null +++ b/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile @@ -0,0 +1,52 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = libzpool.so +MDBTGT = proc + +MODSRCS = zfs.c list.c + +include ../../../../Makefile.cmd +include ../../../../Makefile.cmd.64 +include ../../Makefile.sparcv9 +include ../../../Makefile.module + +MODSRCS_DIR = ../../../common/modules/zfs +GENUNIX_DIR = ../../../common/modules/genunix + +CPPFLAGS += -I../../../../../lib/libzpool/common \ + -I../../../../../uts/common/fs/zfs + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all + +dmod/%.o: $(GENUNIX_DIR)/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +dmod/%.ln: $(GENUNIX_DIR)/%.c + $(LINT.c) -c $< diff --git a/usr/src/cmd/mdb/sparc/v9/zfs/Makefile b/usr/src/cmd/mdb/sparc/v9/zfs/Makefile new file mode 100644 index 0000000000..fd6c408774 --- /dev/null +++ b/usr/src/cmd/mdb/sparc/v9/zfs/Makefile @@ -0,0 +1,41 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = zfs.so +MDBTGT = kvm + +MODSRCS = zfs.c + +include ../../../../Makefile.cmd +include ../../../../Makefile.cmd.64 +include ../../Makefile.sparcv9 +include ../../../Makefile.module + +CPPFLAGS += -I../../../../../uts/common/fs/zfs + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mv/Makefile b/usr/src/cmd/mv/Makefile index 763498f6d5..0225d52219 100644 --- a/usr/src/cmd/mv/Makefile +++ b/usr/src/cmd/mv/Makefile @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -50,9 +50,9 @@ XGETFLAGS += -a -x mv.xcl CPPFLAGS += -D_FILE_OFFSET_BITS=64 LINTFLAGS += -DXPG4 -lint := LDLIBS += -lcmdutils -lavl -$(PROG) := LDLIBS += $(ZLAZYLOAD) -lcmdutils -lavl $(ZNOLAZYLOAD) -$(XPG4) := LDLIBS += $(ZLAZYLOAD) -lcmdutils -lavl $(ZNOLAZYLOAD) +lint := LDLIBS += -lcmdutils -lavl -lsec +$(PROG) := LDLIBS += $(ZLAZYLOAD) -lcmdutils -lavl -lsec $(ZNOLAZYLOAD) +$(XPG4) := LDLIBS += $(ZLAZYLOAD) -lcmdutils -lavl -lsec $(ZNOLAZYLOAD) .KEEP_STATE: diff --git a/usr/src/cmd/mv/mv.c b/usr/src/cmd/mv/mv.c index 507ff30d65..3cdceeafb4 100644 --- a/usr/src/cmd/mv/mv.c +++ b/usr/src/cmd/mv/mv.c @@ -64,6 +64,7 @@ #include <limits.h> #include <sys/acl.h> #include <libcmdutils.h> +#include <aclutils.h> #define FTYPE(A) (A.st_mode) #define FMODE(A) (A.st_mode) @@ -138,11 +139,9 @@ static int attrsilent = 0; static int targetexists = 0; static char yeschr[SCHAR_MAX + 2]; static char nochr[SCHAR_MAX + 2]; -static int s1aclcnt; -static aclent_t *s1aclp = NULL; static int cmdarg; /* command line argument */ static avl_tree_t *stree = NULL; /* source file inode search tree */ - +static acl_t *s1acl; int main(int argc, char *argv[]) @@ -803,9 +802,9 @@ copy: if (pflg || mve) { (void) chmod(target, FMODE(s1)); - if (s1aclp != NULL) { - if ((acl(target, SETACL, - s1aclcnt, s1aclp)) < 0) { + if (s1acl != NULL) { + if ((acl_set(target, + s1acl)) < 0) { if (pflg || mve) { (void) fprintf( stderr, @@ -1065,6 +1064,7 @@ chkfiles(char *source, char **to) int (*statf)() = (cpy && !(Pflg || (Hflg && !cmdarg))) ? stat : lstat; char *target = *to; + int error; /* * Make sure source file exists. @@ -1088,27 +1088,16 @@ chkfiles(char *source, char **to) * Get ACL info: don't bother with ln or mv'ing symlinks */ if ((!lnk) && !(mve && ISLNK(s1))) { - if (s1aclp != NULL) { - free(s1aclp); - s1aclp = NULL; + if (s1acl != NULL) { + acl_free(s1acl); + s1acl = NULL; } - if ((s1aclcnt = acl(source, GETACLCNT, 0, NULL)) < 0) { + if ((error = acl_get(source, ACL_NO_TRIVIAL, &s1acl)) != 0) { (void) fprintf(stderr, - "%s: failed to get acl entries\n", source); + "%s: failed to get acl entries: %s\n", source, + acl_strerror(error)); return (1); } - if (s1aclcnt > MIN_ACL_ENTRIES) { - if ((s1aclp = (aclent_t *)malloc( - sizeof (aclent_t) * s1aclcnt)) == NULL) { - (void) fprintf(stderr, "Insufficient memory\n"); - return (1); - } - if ((acl(source, GETACL, s1aclcnt, s1aclp)) < 0) { - (void) fprintf(stderr, - "%s: failed to get acl entries\n", source); - return (1); - } - } /* else: just permission bits */ } @@ -1563,8 +1552,9 @@ copydir(char *source, char *target) int pret = 0; /* need separate flag if -p is specified */ mode_t fixmode = (mode_t)0; /* cleanup mode after copy */ struct stat s1save; - int s1aclcnt_save; - aclent_t *s1aclp_save = NULL; + acl_t *s1acl_save; + + s1acl_save = NULL; if (cpy && !rflg) { (void) fprintf(stderr, @@ -1597,12 +1587,15 @@ copydir(char *source, char *target) * s1 gets overwritten when doing the recursive copy. */ s1save = s1; - if (s1aclp != NULL) { - if ((s1aclp_save = (aclent_t *)malloc(sizeof (aclent_t) - * s1aclcnt)) != NULL) { - (void) memcpy(s1aclp_save, s1aclp, - sizeof (aclent_t) * s1aclcnt); - s1aclcnt_save = s1aclcnt; + if (s1acl != NULL) { + s1acl_save = acl_dup(s1acl); + if (s1acl_save == NULL) { + (void) fprintf(stderr, gettext("%s: " + "Insufficient memory to save acl" + " entry\n"), cmd); + if (pflg) + return (1); + } #ifdef XPG4 else { @@ -1627,9 +1620,8 @@ copydir(char *source, char *target) * ACL for directory */ if (pflg || mve) { - if (s1aclp_save != NULL) { - if ((acl(target, SETACL, s1aclcnt_save, s1aclp_save)) - < 0) { + if (s1acl_save != NULL) { + if (acl_set(target, s1acl_save) < 0) { #ifdef XPG4 if (pflg || mve) { #else @@ -1639,13 +1631,15 @@ copydir(char *source, char *target) "%s: failed to set acl entries " "on %s\n"), cmd, target); if (pflg) { - free(s1aclp_save); + acl_free(s1acl_save); + s1acl_save = NULL; ret++; } } /* else: silent and continue */ } - free(s1aclp_save); + acl_free(s1acl_save); + s1acl_save = NULL; } if ((pret = chg_mode(target, UID(s1save), GID(s1save), FMODE(s1save))) == 0) @@ -1705,7 +1699,6 @@ use_stdin(void) static int copyattributes(char *source, char *target) { - int ret; int sourcedirfd, targetdirfd; int srcfd, targfd; int tmpfd; @@ -1716,12 +1709,11 @@ copyattributes(char *source, char *target) char *srcbuf, *targbuf; size_t src_size, targ_size; int error = 0; + int aclerror; mode_t mode; int clearflg = 0; - int aclcnt; - int attrdiraclcnt; - aclent_t *aclp = NULL; - aclent_t *attrdiraclp = NULL; + acl_t *xacl = NULL; + acl_t *attrdiracl = NULL; struct stat attrdir, s3, s4; struct timeval times[2]; mode_t targmode; @@ -1918,58 +1910,30 @@ copyattributes(char *source, char *target) * Now set owner and group of attribute directory, implies * changing the ACL of the hidden attribute directory first. */ - if ((attrdiraclcnt = facl(sourcedirfd, - GETACLCNT, 0, NULL)) < 0) { + if ((aclerror = facl_get(sourcedirfd, + ACL_NO_TRIVIAL, &attrdiracl)) != 0) { if (!attrsilent) { (void) fprintf(stderr, gettext( "%s: failed to get acl entries of" " attribute directory for" - " %s\n"), cmd, source); + " %s : %s\n"), cmd, + source, acl_strerror(aclerror)); ++error; } } - if (attrdiraclcnt > MIN_ACL_ENTRIES) { - if ((attrdiraclp = (aclent_t *)malloc( - sizeof (aclent_t) * attrdiraclcnt)) == NULL) { + + if (attrdiracl) { + if (facl_set(targetdirfd, attrdiracl) != 0) { if (!attrsilent) { (void) fprintf(stderr, gettext( - "insufficient memory" - " for acl\n")); + "%s: failed to set acl entries" + " on attribute directory " + "for %s\n"), cmd, target); ++error; } - } else { - if ((ret = facl(sourcedirfd, GETACL, - attrdiraclcnt, attrdiraclp)) == -1) { - if (!attrsilent) { - (void) fprintf(stderr, - gettext( - "%s: failed to get acl" - " entries of attribute" - " directory for" - " %s\n"), cmd, target); - free(attrdiraclp); - attrdiraclp = NULL; - attrdiraclcnt = 0; - ++error; - } - - } - if (ret != -1 && (facl(targetdirfd, SETACL, - attrdiraclcnt, - attrdiraclp) != 0)) { - if (!attrsilent) { - (void) fprintf(stderr, gettext( - "%s: failed to set acl entries" - " on attribute directory " - "for %s\n"), cmd, target); - ++error; - } - free(attrdiraclp); - attrdiraclp = NULL; - attrdiraclcnt = 0; - } + acl_free(attrdiracl); + attrdiracl = NULL; } - } } @@ -2040,52 +2004,17 @@ copyattributes(char *source, char *target) } if (pflg || mve) { - if ((aclcnt = facl(srcattrfd, - GETACLCNT, 0, NULL)) < 0) { + if ((aclerror = facl_get(srcattrfd, + ACL_NO_TRIVIAL, &xacl)) != 0) { if (!attrsilent) { (void) fprintf(stderr, gettext( "%s: failed to get acl entries of" " attribute %s for" - " %s: "), cmd, dp->d_name, source); - perror(""); + " %s: %s"), cmd, dp->d_name, + source, acl_strerror(aclerror)); ++error; } } - if (aclcnt > MIN_ACL_ENTRIES) { - if ((aclp = (aclent_t *)malloc( - sizeof (aclent_t) * aclcnt)) == - NULL) { - if (!attrsilent) { - (void) fprintf(stderr, gettext( - "insufficient memory" - " for acl: ")); - perror(""); - ++error; - } - } else { - - if ((facl(srcattrfd, GETACL, - aclcnt, aclp)) < 0) { - if (!attrsilent) { - (void) fprintf(stderr, - gettext( - "%s: failed to get" - " acl entries of" - " attribute %s for" - /*CSTYLED*/ - " %s: "), cmd, - dp->d_name, target); - free(aclp); - aclp = NULL; - perror(""); - ++error; - } - - } - - } - } - } (void) unlinkat(targetdirfd, dp->d_name, 0); @@ -2105,8 +2034,8 @@ copyattributes(char *source, char *target) /* * preserve ACL */ - if ((pflg || mve) && aclp != NULL) { - if ((facl(targattrfd, SETACL, aclcnt, aclp)) < 0) { + if ((pflg || mve) && xacl != NULL) { + if ((facl_set(targattrfd, xacl)) < 0) { if (!attrsilent) { (void) fprintf(stderr, gettext( "%s: failed to set acl entries on" @@ -2114,9 +2043,8 @@ copyattributes(char *source, char *target) "%s\n"), cmd, dp->d_name, target); ++error; } - free(aclp); - aclp = NULL; - aclcnt = 0; + acl_free(xacl); + xacl = NULL; } } @@ -2231,11 +2159,10 @@ copyattributes(char *source, char *target) } } next: - if (aclp != NULL) { - free(aclp); - aclp = NULL; + if (xacl != NULL) { + acl_free(xacl); + xacl = NULL; } - aclcnt = 0; if (srcbuf != NULL) free(srcbuf); if (targbuf != NULL) @@ -2248,10 +2175,14 @@ next: srcbuf = targbuf = NULL; } out: - if (aclp != NULL) - free(aclp); - if (attrdiraclp != NULL) - free(attrdiraclp); + if (xacl != NULL) { + acl_free(xacl); + xacl = NULL; + } + if (attrdiracl != NULL) { + acl_free(attrdiracl); + attrdiracl = NULL; + } if (srcbuf) free(srcbuf); if (targbuf) diff --git a/usr/src/cmd/pack/Makefile b/usr/src/cmd/pack/Makefile index 938e9e9f16..49dd679511 100644 --- a/usr/src/cmd/pack/Makefile +++ b/usr/src/cmd/pack/Makefile @@ -22,7 +22,7 @@ # #ident "%Z%%M% %I% %E% SMI" # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -32,6 +32,7 @@ include ../Makefile.cmd CFLAGS += $(CCVERBOSE) XGETFLAGS += -a -x $(PROG).xcl +LDLIBS += -lsec .KEEP_STATE: diff --git a/usr/src/cmd/pack/pack.c b/usr/src/cmd/pack/pack.c index fd67a70a3c..c8aad347b3 100644 --- a/usr/src/cmd/pack/pack.c +++ b/usr/src/cmd/pack/pack.c @@ -51,6 +51,8 @@ #include <string.h> #include <dirent.h> #include <unistd.h> +#include <sys/acl.h> +#include <aclutils.h> #undef lint @@ -350,7 +352,9 @@ main(int argc, char *argv[]) register char *cp; int k, sep, errflg = 0; int c; + int error; int fcount = 0; /* count failures */ + acl_t *aclp = NULL; (void) setlocale(LC_ALL, ""); #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ @@ -448,6 +452,7 @@ main(int argc, char *argv[]) "pack: %s: already exists\n"), filename); goto closein; } + if ((outfile = creat(filename, status.st_mode)) < 0) { fprintf(stderr, gettext( "pack: %s: cannot create: "), filename); @@ -455,6 +460,13 @@ main(int argc, char *argv[]) goto closein; } + error = facl_get(infile, ACL_NO_TRIVIAL, &aclp); + + if (error != 0) { + fprintf(stderr, gettext( + "pack: %s: cannot retrieve ACL: %s\n"), argv[k], + acl_strerror(error)); + } if (packfile(argv[k]) && ((pathconf(argv[k], _PC_XATTR_EXISTS) != 1) || (mv_xattrs(infile, outfile, @@ -509,6 +521,12 @@ main(int argc, char *argv[]) perror(""); } chown(filename, status.st_uid, status.st_gid); + if (aclp && (facl_set(outfile, aclp) < 0)) { + fprintf(stderr, gettext( + "pack: %s: failed to set acl entries\n"), + filename); + perror(""); + } if (!errflg) fcount--; /* success after all */ } else { @@ -517,6 +535,10 @@ main(int argc, char *argv[]) } unlink(filename); } + + if (aclp) + acl_free(aclp); + closein: close(outfile); close(infile); } diff --git a/usr/src/cmd/picl/plugins/sun4u/snowbird/frutree/picllibdevinfo.c b/usr/src/cmd/picl/plugins/sun4u/snowbird/frutree/picllibdevinfo.c index 265993d2c6..091e72383e 100644 --- a/usr/src/cmd/picl/plugins/sun4u/snowbird/frutree/picllibdevinfo.c +++ b/usr/src/cmd/picl/plugins/sun4u/snowbird/frutree/picllibdevinfo.c @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -73,10 +73,10 @@ typedef struct { int n_serial; int n_parallel; int n_network; -} list_t; +} plist_t; static void -free_list(list_t *listptr) +free_list(plist_t *listptr) { port_info_t *tmp; port_info_t *nextptr; @@ -110,7 +110,7 @@ compare(const void *a, const void *b) * assigns GeoAddr property for ports based on bus-addr */ static picl_errno_t -assign_geo_addr(list_t *list, frutree_port_type_t type) +assign_geo_addr(plist_t *list, frutree_port_type_t type) { int i = 0; @@ -181,7 +181,7 @@ assign_geo_addr(list_t *list, frutree_port_type_t type) } static picl_errno_t -create_port_config_info(list_t *list, frutree_device_args_t *devp) +create_port_config_info(plist_t *list, frutree_device_args_t *devp) { port_info_t *port_info = NULL; frutree_cache_t *cachep = NULL; @@ -316,7 +316,7 @@ probe_tree(di_node_t node, void *arg) char *devfs_path = NULL; char *bus_addr = NULL; char *drv_name = NULL; - list_t *listptr = NULL; + plist_t *listptr = NULL; port_info_t *port_info = NULL; frutree_port_type_t port_type = UNKNOWN_PORT; di_minor_t minor = DI_MINOR_NIL; @@ -324,7 +324,7 @@ probe_tree(di_node_t node, void *arg) if (arg == NULL) { return (DI_WALK_TERMINATE); } - listptr = (list_t *)arg; + listptr = (plist_t *)arg; while ((minor = di_minor_next(node, minor)) != DI_MINOR_NIL) { nodetype = di_minor_nodetype(minor); @@ -412,7 +412,7 @@ probe_libdevinfo(frutree_frunode_t *frup, frutree_device_args_t ** device, { di_node_t rnode; picl_errno_t rc; - list_t list; + plist_t list; if (frup == NULL) { return (PICL_FAILURE); diff --git a/usr/src/cmd/picl/plugins/sun4u/snowbird/frutree/piclscsi.c b/usr/src/cmd/picl/plugins/sun4u/snowbird/frutree/piclscsi.c index 9ba70fff8c..25280a5644 100644 --- a/usr/src/cmd/picl/plugins/sun4u/snowbird/frutree/piclscsi.c +++ b/usr/src/cmd/picl/plugins/sun4u/snowbird/frutree/piclscsi.c @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -101,23 +101,23 @@ typedef struct node { typedef struct linked_list { node_t *first; int num_nodes; -} list_t; +} plist_t; typedef struct scsi_info { frutree_frunode_t *frup; cfga_list_data_t *cfgalist; - list_t *list; + plist_t *list; int num_list; boolean_t compare_cfgadm; int geo_addr; } scsi_info_t; -static list_t *scsi_list = NULL; +static plist_t *scsi_list = NULL; static cfga_list_data_t *cfglist = NULL; static int nlist = 0; static void -free_list(list_t *list) +free_list(plist_t *list) { node_t *tmp = NULL, *tmp1 = NULL; @@ -136,7 +136,7 @@ free_list(list_t *list) * This routine gets the list of scsi controllers present */ static cfga_err_t -populate_controllers_list(list_t *cntrl_list, cfga_list_data_t *list, int num) +populate_controllers_list(plist_t *cntrl_list, cfga_list_data_t *list, int num) { int i; node_t *nodeptr = NULL; @@ -202,7 +202,7 @@ scsi_info_init() } } - scsi_list = (list_t *)malloc(sizeof (list_t)); + scsi_list = (plist_t *)malloc(sizeof (plist_t)); if (scsi_list == NULL) { free(cfglist); return (PICL_NOSPACE); @@ -231,7 +231,7 @@ scsi_info_fini() * caller should allocate memory for ap_id */ static picl_errno_t -find_scsi_controller(char *devfs_path, list_t *list, char *ap_id) +find_scsi_controller(char *devfs_path, plist_t *list, char *ap_id) { node_t *tmp = NULL; char *lasts = NULL; @@ -274,7 +274,7 @@ get_scsislot_name(char *devfs_path, char *bus_addr, char *name) picl_errno_t rc; int target_id = 0; int numlist; - list_t list; + plist_t list; cfga_err_t ap_list_err; cfga_list_data_t *cfgalist = NULL; char controller[MAXPATHLEN]; @@ -410,7 +410,7 @@ get_bus_addr(char *scsi_loc, char **bus_addr) */ static picl_errno_t dyn_probe_for_scsi_frus(frutree_frunode_t *frup, cfga_list_data_t *cfgalist, - list_t *list, int numlist) + plist_t *list, int numlist) { picl_errno_t rc; int i, geo_addr = 0; @@ -797,7 +797,7 @@ probe_disks(di_node_t node, void *arg) static picl_errno_t probe_scsi_in_libdevinfo(frutree_frunode_t *frup, cfga_list_data_t *cfgalist, - list_t *list, int num_list, boolean_t compare_cfgadm) + plist_t *list, int num_list, boolean_t compare_cfgadm) { di_node_t rnode; scsi_info_t *scsi_data = NULL; @@ -840,7 +840,7 @@ probe_for_scsi_frus(frutree_frunode_t *frup) { int numlist; picl_errno_t rc; - list_t list; + plist_t list; cfga_err_t ap_list_err; cfga_list_data_t *cfgalist = NULL; diff --git a/usr/src/cmd/pt_chmod/Makefile b/usr/src/cmd/pt_chmod/Makefile index c63f74d2fb..39666fc628 100644 --- a/usr/src/cmd/pt_chmod/Makefile +++ b/usr/src/cmd/pt_chmod/Makefile @@ -22,7 +22,7 @@ # #ident "%Z%%M% %I% %E% SMI" # -# Copyright 1989-2003 Sun Microsystems, Inc. All rights reserved. +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -32,7 +32,7 @@ include ../Makefile.cmd FILEMODE= 04511 -LDLIBS += -ldevinfo +LDLIBS += -ldevinfo -lsec .KEEP_STATE: diff --git a/usr/src/cmd/pt_chmod/pt_chmod.c b/usr/src/cmd/pt_chmod/pt_chmod.c index 4d09efd5a0..55aeb33d59 100644 --- a/usr/src/cmd/pt_chmod/pt_chmod.c +++ b/usr/src/cmd/pt_chmod/pt_chmod.c @@ -92,23 +92,8 @@ main(int argc, char **argv) } while (fdetach(tty) == 0); /* Remove ACLs */ - if (acl(tty, GETACLCNT, 0, NULL) > MIN_ACL_ENTRIES) { - aclent_t acls[3]; - acls[0].a_type = USER_OBJ; - acls[0].a_id = 0; - acls[0].a_perm = 6; - - acls[1].a_type = GROUP_OBJ; - acls[1].a_id = gid; - acls[1].a_perm = 2; - - acls[2].a_type = OTHER_OBJ; - acls[2].a_id = 0; - acls[2].a_perm = 0; - - (void) acl(tty, SETACL, 3, acls); - } + (void) acl_strip(tty, 0, gid, 0620); if (chown(tty, getuid(), gid)) return (1); diff --git a/usr/src/cmd/setfacl/setfacl.c b/usr/src/cmd/setfacl/setfacl.c index 0f97da84c3..c5e14dfd4a 100644 --- a/usr/src/cmd/setfacl/setfacl.c +++ b/usr/src/cmd/setfacl/setfacl.c @@ -31,6 +31,11 @@ static char sccsid[] = "@(#)setfacl.c 1.10 05/06/16 SMI"; #endif /* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* * setfacl [-r] -f aclfile file ... * setfacl [-r] -d acl_entries file ... * setfacl [-r] -m acl_entries file ... @@ -48,6 +53,7 @@ static char sccsid[] = "@(#)setfacl.c 1.10 05/06/16 SMI"; #include <sys/acl.h> #include <sys/types.h> #include <unistd.h> +#include <errno.h> #define ADD 1 @@ -202,6 +208,14 @@ get_acl_info(char *filep, aclent_t **aclpp) int aclcnt; if ((aclcnt = acl(filep, GETACLCNT, 0, NULL)) < 0) { + if (errno == ENOSYS) { + (void) fprintf(stderr, + gettext("file system doesn't support aclent_t " + "style ACL's.\n" + "See acl(5) for more information on" + " ACL styles support by Solaris.\n")); + return (-1); + } (void) fprintf(stderr, gettext("%s: failed to get acl count\n"), filep); perror("get acl count error"); diff --git a/usr/src/cmd/sum/sum.c b/usr/src/cmd/sum/sum.c index f303d627b7..c6df811930 100644 --- a/usr/src/cmd/sum/sum.c +++ b/usr/src/cmd/sum/sum.c @@ -40,6 +40,8 @@ #include <stdlib.h> #include <stdio.h> #include <sys/types.h> +#include <errno.h> +#include <string.h> static void usage(void); @@ -115,9 +117,9 @@ main(int argc, char **argv) } if (ferror(f)) { errflg++; - (void) fprintf(stderr, - gettext("sum: read error on %s\n"), - (argc > 0) ? argv[i] : "-"); + (void) fprintf(stderr, gettext("sum: read error " + "on '%s': %s\n"), (argc > 0) ? argv[i] : "-", + strerror(errno)); } if (alg == 1) (void) printf("%.5u %6lld", sum, diff --git a/usr/src/cmd/svc/milestone/devices-local b/usr/src/cmd/svc/milestone/devices-local index e646594475..d58518c56c 100644 --- a/usr/src/cmd/svc/milestone/devices-local +++ b/usr/src/cmd/svc/milestone/devices-local @@ -21,7 +21,7 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T. @@ -30,8 +30,6 @@ # # ident "%Z%%M% %I% %E% SMI" -# GLXXX - The SysV copyright should be unnecessary now? - # Initiate the device reconfiguration process in case we need some # device links established so that we can successfully perform our # remaining standard mounts. @@ -40,6 +38,8 @@ if [ `/sbin/zonename` != "global" ]; then exit 0 fi +. /lib/svc/share/smf_include.sh + svcprop -q -p system/reconfigure system/svc/restarter:default if [ $? -eq 0 ]; then echo 'Configuring devices.' > /dev/msglog 2>&1 @@ -74,4 +74,9 @@ if [ $? -eq 0 ]; then fi fi +# Create any zvol devices +if [ -x /usr/sbin/zfs ]; then + /usr/sbin/zfs volinit || exit $SMF_EXIT_ERR_FATAL +fi + exit 0 diff --git a/usr/src/cmd/svc/milestone/fs-local b/usr/src/cmd/svc/milestone/fs-local index 8199f0f1be..34c771e39d 100644 --- a/usr/src/cmd/svc/milestone/fs-local +++ b/usr/src/cmd/svc/milestone/fs-local @@ -28,6 +28,8 @@ . /lib/svc/share/smf_include.sh +result=$SMF_EXIT_OK + # Mount all local filesystems. cd /; /sbin/mountall -l >/dev/msglog @@ -36,7 +38,7 @@ if [ $rc -ne 0 ]; then msg="WARNING: /sbin/mountall -l failed: exit status $rc" echo $msg echo "$SMF_FMRI:" $msg >/dev/msglog - exit $SMF_EXIT_ERR_FATAL + result=$SMF_EXIT_ERR_FATAL fi # get rid of transient reboot entry in GRUB menu @@ -82,4 +84,17 @@ if [ -n "$vlist" ]; then fi fi -exit $SMF_EXIT_OK +# Mount all ZFS filesystems. + +if [ -x /usr/sbin/zfs ]; then + /usr/sbin/zfs mount -a >/dev/msglog 2>&1 + rc=$? + if [ $rc -ne 0 ]; then + msg="WARNING: /usr/sbin/zfs mount -a failed: exit status $rc" + echo $msg + echo "$SMF_FMRI:" $msg >/dev/msglog + result=$SMF_EXIT_ERR_FATAL + fi +fi + +exit $result diff --git a/usr/src/cmd/tar/tar.c b/usr/src/cmd/tar/tar.c index 9a005ce4b7..66e60e225f 100644 --- a/usr/src/cmd/tar/tar.c +++ b/usr/src/cmd/tar/tar.c @@ -69,6 +69,7 @@ #include <limits.h> #include <iconv.h> #include <assert.h> +#include <aclutils.h> #if defined(__SunOS_5_6) || defined(__SunOS_5_7) extern int defcntl(); #endif @@ -381,7 +382,7 @@ struct file_list { static struct file_list *exclude_tbl[TABLE_SIZE], *include_tbl[TABLE_SIZE]; -static int append_secattr(char **, int *, int, aclent_t *, char); +static int append_secattr(char **, int *, acl_t *); static void write_ancillary(union hblock *, char *, int, char); static void add_file_to_table(struct file_list *table[], char *str); @@ -493,6 +494,7 @@ static char *get_component(char *path); static int retry_attrdir_open(char *name); static char *skipslashes(char *string, char *start); static void chop_endslashes(char *path); + static struct stat stbuf; static int checkflag = 0; @@ -2392,11 +2394,11 @@ doxtract(char *argv[]) int error; int symflag; int want; - aclent_t *aclp = NULL; /* acl buffer pointer */ - int aclcnt = 0; /* acl entries count */ + acl_t *aclp = NULL; /* acl info */ timestruc_t time_zero; /* used for call to doDirTimes */ int dircreate; int convflag; + int cnt; time_zero.tv_sec = 0; time_zero.tv_nsec = 0; @@ -2895,16 +2897,14 @@ filedone: #if defined(O_XATTR) if (xattrp != (struct xattr_buf *)NULL) { if (Hiddendir) - ret = facl(dirfd, SETACL, - aclcnt, aclp); + ret = facl_set(dirfd, aclp); else - ret = facl(ofile, SETACL, - aclcnt, aclp); + ret = facl_set(ofile, aclp); } else { - ret = acl(namep, SETACL, aclcnt, aclp); + ret = acl_set(namep, aclp); } #else - ret = acl(namep, SETACL, aclcnt, aclp); + ret = acl_set(namep, &aclp); #endif if (ret < 0) { if (pflag) { @@ -2914,7 +2914,7 @@ filedone: } /* else: silent and continue */ } - free(aclp); + acl_free(aclp); aclp = NULL; } @@ -2986,30 +2986,41 @@ filedone: } bytes -= TBLOCK; } + bytes = stbuf.st_size; /* got all attributes in secp */ tp = secp; do { attr = (struct sec_attr *)tp; switch (attr->attr_type) { case UFSD_ACL: + case ACE_ACL: (void) sscanf(attr->attr_len, - "%7o", (uint_t *)&aclcnt); + "%7o", + (uint_t *) + &cnt); /* header is 8 */ attrsize = 8 + (int)strlen( &attr->attr_info[0]) + 1; - aclp = aclfromtext( - &attr->attr_info[0], &cnt); - if (aclp == NULL) { + + error = + acl_fromtext( + &attr->attr_info[0], &aclp); + + if (error != 0) { (void) fprintf(stderr, gettext( "aclfromtext " - "failed\n")); + "failed: %s\n"), + acl_strerror( + error)); + bytes -= attrsize; break; } - if (aclcnt != cnt) { + if (acl_cnt(aclp) != cnt) { (void) fprintf(stderr, gettext( "aclcnt error\n")); + bytes -= attrsize; break; } bytes -= attrsize; @@ -5520,9 +5531,7 @@ int append_secattr( char **secinfo, /* existing security info */ int *secinfo_len, /* length of existing security info */ - int size, /* new attribute size: unit depends on type */ - aclent_t *attrp, /* new attribute data pointer */ - char attr_type) /* new attribute type */ + acl_t *aclp) { char *new_secinfo; char *attrtext; @@ -5530,12 +5539,13 @@ append_secattr( int oldsize; /* no need to add */ - if (attrp == NULL) + if (aclp == (void *)NULL) return (0); - switch (attr_type) { - case UFSD_ACL: - attrtext = acltotext((aclent_t *)attrp, size); + switch (acl_type(aclp)) { + case ACLENT_T: + case ACE_T: + attrtext = acl_totext(aclp); if (attrtext == NULL) { (void) fprintf(stderr, "acltotext failed\n"); return (-1); @@ -5547,9 +5557,10 @@ append_secattr( (void) fprintf(stderr, "can't allocate memory\n"); return (-1); } - attr->attr_type = UFSD_ACL; + attr->attr_type = (acl_type(aclp) == ACLENT_T) ? + UFSD_ACL : ACE_ACL; (void) sprintf(attr->attr_len, - "%06o", size); /* acl entry count */ + "%06o", acl_cnt(aclp)); /* acl entry count */ (void) strcpy((char *)&attr->attr_info[0], attrtext); free(attrtext); break; @@ -6705,11 +6716,11 @@ static int put_extra_attributes(char *longname, char *shortname, char *prefix, int filetype, char typeflag) { - int aclcnt; - static aclent_t *aclp; + static acl_t *aclp = NULL; + int error; - if (aclp != (aclent_t *)NULL) { - free(aclp); + if (aclp != NULL) { + acl_free(aclp); aclp = NULL; } #if defined(O_XATTR) @@ -6730,34 +6741,20 @@ put_extra_attributes(char *longname, char *shortname, char *prefix, if (((stbuf.st_mode & S_IFMT) != S_IFLNK)) { /* * Get ACL info: dont bother allocating space if - * there are only standard permissions, i.e. ACL - * count <= 4 + * there is only a trivial ACL. */ - if ((aclcnt = acl(shortname, GETACLCNT, 0, NULL)) < 0) { + if ((error = acl_get(shortname, ACL_NO_TRIVIAL, + &aclp)) != 0) { (void) fprintf(stderr, gettext( - "%s: failed to get acl count\n"), longname); + "%s: failed to retrieve acl : %s\n"), + longname, acl_strerror(error)); return (1); } - if (aclcnt > MIN_ACL_ENTRIES) { - if ((aclp = (aclent_t *)malloc( - sizeof (aclent_t) * aclcnt)) == NULL) { - (void) fprintf(stderr, gettext( - "Insufficient memory\n")); - return (1); - } - if (acl(shortname, GETACL, aclcnt, aclp) < 0) { - (void) fprintf(stderr, gettext( - "%s: failed to get acl entries\n"), - longname); - return (1); - } - } } /* append security attributes if any */ - if (aclp != (aclent_t *)NULL) { - (void) append_secattr(&secinfo, &len, aclcnt, - aclp, UFSD_ACL); + if (aclp != NULL) { + (void) append_secattr(&secinfo, &len, aclp); (void) write_ancillary(&dblock, secinfo, len, ACL_HDR); } } diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c index c0a20c6a64..9e8567f2dc 100644 --- a/usr/src/cmd/truss/codes.c +++ b/usr/src/cmd/truss/codes.c @@ -88,6 +88,7 @@ #include <sys/ptms.h> #include <sys/aggr.h> #include <sys/dld.h> +#include <sys/fs/zfs.h> #include "ramdata.h" #include "proto.h" @@ -855,10 +856,76 @@ const struct ioc { /* dld data-link ioctls */ { (uint_t)DLDIOCATTR, "DLDIOCATTR", "dld_ioc_attr"}, { (uint_t)DLDIOCVLAN, "DLDIOCVLAN", "dld_ioc_vlan"}, + + /* ZFS ioctls */ + { (uint_t)ZFS_IOC_POOL_CREATE, "ZFS_IOC_POOL_CREATE", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_POOL_DESTROY, "ZFS_IOC_POOL_DESTROY", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_POOL_IMPORT, "ZFS_IOC_POOL_IMPORT", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_POOL_EXPORT, "ZFS_IOC_POOL_EXPORT", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_POOL_CONFIGS, "ZFS_IOC_POOL_CONFIGS", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_POOL_GUID, "ZFS_IOC_POOL_GUID", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_POOL_STATS, "ZFS_IOC_POOL_STATS", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_POOL_TRYIMPORT, "ZFS_IOC_POOL_TRYIMPORT", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_POOL_SCRUB, "ZFS_IOC_POOL_SCRUB", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_POOL_FREEZE, "ZFS_IOC_POOL_FREEZE", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_VDEV_ADD, "ZFS_IOC_VDEV_ADD", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_VDEV_REMOVE, "ZFS_IOC_VDEV_REMOVE", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_VDEV_ONLINE, "ZFS_IOC_VDEV_ONLINE", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_VDEV_OFFLINE, "ZFS_IOC_VDEV_OFFLINE", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_VDEV_ATTACH, "ZFS_IOC_VDEV_ATTACH", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_VDEV_DETACH, "ZFS_IOC_VDEV_DETACH", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_OBJSET_STATS, "ZFS_IOC_OBJSET_STATS", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_DATASET_LIST_NEXT, "ZFS_IOC_DATASET_LIST_NEXT", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_SNAPSHOT_LIST_NEXT, "ZFS_IOC_SNAPSHOT_LIST_NEXT", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_SET_PROP, "ZFS_IOC_SET_PROP", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_SET_QUOTA, "ZFS_IOC_SET_QUOTA", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_SET_RESERVATION, "ZFS_IOC_SET_RESERVATION", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_SET_VOLSIZE, "ZFS_IOC_SET_VOLSIZE", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_SET_VOLBLOCKSIZE, "ZFS_IOC_SET_VOLBLOCKSIZE", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_CREATE_MINOR, "ZFS_IOC_CREATE_MINOR", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_REMOVE_MINOR, "ZFS_IOC_REMOVE_MINOR", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_CREATE, "ZFS_IOC_CREATE", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_DESTROY, "ZFS_IOC_DESTROY", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_ROLLBACK, "ZFS_IOC_ROLLBACK", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_RENAME, "ZFS_IOC_RENAME", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_RECVBACKUP, "ZFS_IOC_RECVBACKUP", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_SENDBACKUP, "ZFS_IOC_SENDBACKUP", + "zfs_cmd_t" }, + { (uint_t)0, NULL, NULL } }; - void ioctl_ioccom(char *buf, size_t size, uint_t code, int nbytes, int x, int y) { diff --git a/usr/src/cmd/truss/print.c b/usr/src/cmd/truss/print.c index 767652168b..c58e8c23a3 100644 --- a/usr/src/cmd/truss/print.c +++ b/usr/src/cmd/truss/print.c @@ -1117,6 +1117,9 @@ prt_acl(private_t *pri, int raw, long val) /* print acl() code */ case GETACL: s = "GETACL"; break; case SETACL: s = "SETACL"; break; case GETACLCNT: s = "GETACLCNT"; break; + case ACE_GETACL: s = "ACE_GETACL"; break; + case ACE_SETACL: s = "ACE_SETACL"; break; + case ACE_GETACLCNT: s = "ACE_GETACLCNT"; break; } } diff --git a/usr/src/cmd/ttymon/Makefile b/usr/src/cmd/ttymon/Makefile index 251af55ff3..2db4d184d0 100644 --- a/usr/src/cmd/ttymon/Makefile +++ b/usr/src/cmd/ttymon/Makefile @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -64,7 +64,7 @@ include ../Makefile.cmd CPPFLAGS += -DSYS_NAME $(XPG4):= CPPFLAGS += -DXPG4 sttydefs := LDLIBS += -lnsl -ttymon := LDLIBS += -lnsl -ldevinfo +ttymon := LDLIBS += -lnsl -lsec -ldevinfo # Only stty can be built with -DEUC. ttymon will dump core unless further # changes are made to it. diff --git a/usr/src/cmd/ttymon/tmexpress.c b/usr/src/cmd/ttymon/tmexpress.c index caef33b343..f9bc5363e2 100644 --- a/usr/src/cmd/ttymon/tmexpress.c +++ b/usr/src/cmd/ttymon/tmexpress.c @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -431,23 +431,6 @@ revokedevaccess(char *dev, uid_t uid, gid_t gid, mode_t mode) } while (fdetach(dev) == 0); /* Remove ACLs */ - if (acl(dev, GETACLCNT, 0, NULL) > MIN_ACL_ENTRIES) { - aclent_t acls[3]; - acls[0].a_type = USER_OBJ; - acls[0].a_id = uid; - acls[0].a_perm = 0; - - acls[1].a_type = GROUP_OBJ; - acls[1].a_id = gid; - acls[1].a_perm = 0; - - acls[2].a_type = OTHER_OBJ; - acls[2].a_id = 0; - acls[2].a_perm = 0; - - (void) acl(dev, SETACL, 3, acls); - } - - (void) chmod(dev, mode); + (void) acl_strip(dev, uid, gid, mode); } diff --git a/usr/src/cmd/unpack/Makefile b/usr/src/cmd/unpack/Makefile index 87d0d01e11..076a2013e8 100644 --- a/usr/src/cmd/unpack/Makefile +++ b/usr/src/cmd/unpack/Makefile @@ -22,7 +22,7 @@ # #ident "%Z%%M% %I% %E% SMI" # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -30,6 +30,7 @@ PROG= unpack include ../Makefile.cmd CFLAGS += $(CCVERBOSE) +LDLIBS += -lsec XGETFLAGS += -a -x unpack.xcl .KEEP_STATE: diff --git a/usr/src/cmd/unpack/unpack.c b/usr/src/cmd/unpack/unpack.c index 0d2fe15cb7..766d129066 100644 --- a/usr/src/cmd/unpack/unpack.c +++ b/usr/src/cmd/unpack/unpack.c @@ -24,7 +24,7 @@ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -49,6 +49,8 @@ #include <limits.h> #include <sys/param.h> #include <dirent.h> +#include <sys/acl.h> +#include <aclutils.h> static struct utimbuf u_times; @@ -240,11 +242,13 @@ main(int argc, char *argv[]) { extern int optind; int i, k; + int error; int sep, errflg = 0, pcat = 0; register char *p1, *cp; int fcount = 0; /* failure count */ int max_name; void onsig(int); + acl_t *aclp; if (signal(SIGHUP, SIG_IGN) != SIG_IGN) @@ -334,6 +338,14 @@ main(int argc, char *argv[]) if (pcat) outfile = 1; /* standard output */ else { + + error = facl_get(infile, ACL_NO_TRIVIAL, &aclp); + if (error != 0) { + (void) printf(gettext( + "%s: %s: cannot retrieve ACL : %s\n"), + argv0, filename, acl_strerror(error)); + } + max_name = pathconf(filename, _PC_NAME_MAX); if (max_name == -1) { /* no limit on length of filename */ @@ -395,6 +407,12 @@ main(int argc, char *argv[]) } (void) chown(argvk, status.st_uid, status.st_gid); + if (aclp && (facl_set(outfile, aclp) < 0)) { + (void) printf(gettext("%s: cannot " + "set ACL on %s: "), argv0, argvk); + perror(""); + } + rmflg = 0; (void) printf(gettext("%s: %s: unpacked\n"), argv0, argvk); @@ -415,6 +433,9 @@ main(int argc, char *argv[]) done: (void) close(infile); if (!pcat) (void) close(outfile); + + if (aclp) + acl_free(aclp); } return (fcount); } diff --git a/usr/src/cmd/zdb/Makefile b/usr/src/cmd/zdb/Makefile new file mode 100644 index 0000000000..0ab3c2b8f0 --- /dev/null +++ b/usr/src/cmd/zdb/Makefile @@ -0,0 +1,55 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +PROG:sh= basename `pwd` + +include ../Makefile.cmd + +$(INTEL_BLD)SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET = all +install := TARGET = install +clean := TARGET = clean +clobber := TARGET = clobber +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber lint: $(SUBDIRS) + +install: $(SUBDIRS) + -$(RM) $(ROOTUSRSBINPROG) + -$(LN) $(ISAEXEC) $(ROOTUSRSBINPROG) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../Makefile.targ diff --git a/usr/src/cmd/zdb/Makefile.com b/usr/src/cmd/zdb/Makefile.com new file mode 100644 index 0000000000..e3156cdeb0 --- /dev/null +++ b/usr/src/cmd/zdb/Makefile.com @@ -0,0 +1,62 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +PROG:sh= cd ..; basename `pwd` +SRCS= ../$(PROG).c ../zdb_il.c + +include ../../Makefile.cmd + +INCS += -I../../../lib/libzpool/common +INCS += -I../../../uts/common/fs/zfs + +LDLIBS += -lzpool -lumem -lavl -lnvpair + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all + +CFLAGS += $(CCVERBOSE) +CFLAGS64 += $(CCVERBOSE) +CPPFLAGS += -D_LARGEFILE64_SOURCE=1 -D_REENTRANT $(INCS) + +# lint complains about unused _umem_* functions +LINTFLAGS += -xerroff=E_NAME_DEF_NOT_USED2 +LINTFLAGS64 += -xerroff=E_NAME_DEF_NOT_USED2 + +.KEEP_STATE: + +all: $(PROG) + +$(PROG): $(SRCS) + $(LINK.c) -o $(PROG) $(SRCS) $(LDLIBS) + $(POST_PROCESS) + +clean: + +lint: lint_SRCS + +include ../../Makefile.targ diff --git a/usr/src/cmd/zdb/amd64/Makefile b/usr/src/cmd/zdb/amd64/Makefile new file mode 100644 index 0000000000..c2f8b37b5d --- /dev/null +++ b/usr/src/cmd/zdb/amd64/Makefile @@ -0,0 +1,32 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com +include ../../Makefile.cmd.64 + +install: all $(ROOTUSRSBINPROG64) diff --git a/usr/src/cmd/zdb/i386/Makefile b/usr/src/cmd/zdb/i386/Makefile new file mode 100644 index 0000000000..5c93bf6ac6 --- /dev/null +++ b/usr/src/cmd/zdb/i386/Makefile @@ -0,0 +1,31 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +install: all $(ROOTUSRSBINPROG32) diff --git a/usr/src/cmd/zdb/inc.flg b/usr/src/cmd/zdb/inc.flg new file mode 100644 index 0000000000..bb65300cca --- /dev/null +++ b/usr/src/cmd/zdb/inc.flg @@ -0,0 +1,30 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +find_files "s.*" usr/src/uts/common/fs/zfs/sys +echo_file usr/src/uts/common/sys/fs/zfs.h diff --git a/usr/src/cmd/zdb/sparc/Makefile b/usr/src/cmd/zdb/sparc/Makefile new file mode 100644 index 0000000000..5c93bf6ac6 --- /dev/null +++ b/usr/src/cmd/zdb/sparc/Makefile @@ -0,0 +1,31 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +install: all $(ROOTUSRSBINPROG32) diff --git a/usr/src/cmd/zdb/sparcv9/Makefile b/usr/src/cmd/zdb/sparcv9/Makefile new file mode 100644 index 0000000000..c2f8b37b5d --- /dev/null +++ b/usr/src/cmd/zdb/sparcv9/Makefile @@ -0,0 +1,32 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com +include ../../Makefile.cmd.64 + +install: all $(ROOTUSRSBINPROG64) diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c new file mode 100644 index 0000000000..9163d6a0b0 --- /dev/null +++ b/usr/src/cmd/zdb/zdb.c @@ -0,0 +1,1869 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/spa_impl.h> +#include <sys/dmu.h> +#include <sys/zap.h> +#include <sys/fs/zfs.h> +#include <sys/zfs_znode.h> +#include <sys/vdev.h> +#include <sys/vdev_impl.h> +#include <sys/metaslab_impl.h> +#include <sys/dmu_objset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_pool.h> +#include <sys/dbuf.h> +#include <sys/zil.h> +#include <sys/zil_impl.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <sys/dmu_traverse.h> +#include <sys/zio_checksum.h> +#include <sys/zio_compress.h> + +const char cmdname[] = "zdb"; +uint8_t dump_opt[256]; + +typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); + +extern void dump_intent_log(zilog_t *); +uint64_t *zopt_object = NULL; +int zopt_objects = 0; +int zdb_advance = ADVANCE_PRE; +zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 }; + +/* + * These libumem hooks provide a reasonable set of defaults for the allocator's + * debugging facilities. + */ +const char * +_umem_debug_init() +{ + return ("default,verbose"); /* $UMEM_DEBUG setting */ +} + +const char * +_umem_logging_init(void) +{ + return ("fail,contents"); /* $UMEM_LOGGING setting */ +} + +static void +usage(void) +{ + (void) fprintf(stderr, + "Usage: %s [-udibcsvLU] [-O order] [-B os:obj:level:blkid] " + "dataset [object...]\n" + " %s -C [pool]\n" + " %s -l dev\n", + cmdname, cmdname, cmdname); + + (void) fprintf(stderr, " -u uberblock\n"); + (void) fprintf(stderr, " -d datasets\n"); + (void) fprintf(stderr, " -C cached pool configuration\n"); + (void) fprintf(stderr, " -i intent logs\n"); + (void) fprintf(stderr, " -b block statistics\n"); + (void) fprintf(stderr, " -c checksum all data blocks\n"); + (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); + (void) fprintf(stderr, " -v verbose (applies to all others)\n"); + (void) fprintf(stderr, " -l dump label contents\n"); + (void) fprintf(stderr, " -L live pool (allows some errors)\n"); + (void) fprintf(stderr, " -O [!]<pre|post|prune|data|holes> " + "visitation order\n"); + (void) fprintf(stderr, " -U use zpool.cache in /tmp\n"); + (void) fprintf(stderr, " -B objset:object:level:blkid -- " + "simulate bad block\n"); + (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " + "to make only that option verbose\n"); + (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); + exit(1); +} + +static void +fatal(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + (void) fprintf(stderr, "%s: ", cmdname); + (void) vfprintf(stderr, fmt, ap); + va_end(ap); + (void) fprintf(stderr, "\n"); + + exit(1); +} + +static void +dump_nvlist(nvlist_t *list, int indent) +{ + nvpair_t *elem = NULL; + + while ((elem = nvlist_next_nvpair(list, elem)) != NULL) { + switch (nvpair_type(elem)) { + case DATA_TYPE_STRING: + { + char *value; + + VERIFY(nvpair_value_string(elem, &value) == 0); + (void) printf("%*s%s='%s'\n", indent, "", + nvpair_name(elem), value); + } + break; + + case DATA_TYPE_UINT64: + { + uint64_t value; + + VERIFY(nvpair_value_uint64(elem, &value) == 0); + (void) printf("%*s%s=%llu\n", indent, "", + nvpair_name(elem), (u_longlong_t)value); + } + break; + + case DATA_TYPE_NVLIST: + { + nvlist_t *value; + + VERIFY(nvpair_value_nvlist(elem, &value) == 0); + (void) printf("%*s%s\n", indent, "", + nvpair_name(elem)); + dump_nvlist(value, indent + 4); + } + break; + + case DATA_TYPE_NVLIST_ARRAY: + { + nvlist_t **value; + uint_t c, count; + + VERIFY(nvpair_value_nvlist_array(elem, &value, + &count) == 0); + + for (c = 0; c < count; c++) { + (void) printf("%*s%s[%u]\n", indent, "", + nvpair_name(elem), c); + dump_nvlist(value[c], indent + 8); + } + } + break; + + default: + + (void) printf("bad config type %d for %s\n", + nvpair_type(elem), nvpair_name(elem)); + } + } +} + +/* ARGSUSED */ +static void +dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) +{ + nvlist_t *nv; + size_t nvsize = *(uint64_t *)data; + char *packed = umem_alloc(nvsize, UMEM_NOFAIL); + + dmu_read(os, object, 0, nvsize, packed); + + VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); + + umem_free(packed, nvsize); + + dump_nvlist(nv, 8); + + nvlist_free(nv); +} + +const char dump_zap_stars[] = "****************************************"; +const int dump_zap_width = sizeof (dump_zap_stars) - 1; + +static void +dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE]) +{ + int i; + int minidx = ZAP_HISTOGRAM_SIZE - 1; + int maxidx = 0; + uint64_t max = 0; + + for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) { + if (histo[i] > max) + max = histo[i]; + if (histo[i] > 0 && i > maxidx) + maxidx = i; + if (histo[i] > 0 && i < minidx) + minidx = i; + } + + if (max < dump_zap_width) + max = dump_zap_width; + + for (i = minidx; i <= maxidx; i++) + (void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i], + &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]); +} + +static void +dump_zap_stats(objset_t *os, uint64_t object) +{ + int error; + zap_stats_t zs; + + error = zap_get_stats(os, object, &zs); + if (error) + return; + + if (zs.zs_ptrtbl_len == 0) { + ASSERT(zs.zs_num_blocks == 1); + (void) printf("\tmicrozap: %llu bytes, %llu entries\n", + (u_longlong_t)zs.zs_blocksize, + (u_longlong_t)zs.zs_num_entries); + return; + } + + (void) printf("\tFat ZAP stats:\n"); + (void) printf("\t\tPointer table: %llu elements\n", + (u_longlong_t)zs.zs_ptrtbl_len); + (void) printf("\t\tZAP entries: %llu\n", + (u_longlong_t)zs.zs_num_entries); + (void) printf("\t\tLeaf blocks: %llu\n", + (u_longlong_t)zs.zs_num_leafs); + (void) printf("\t\tTotal blocks: %llu\n", + (u_longlong_t)zs.zs_num_blocks); + (void) printf("\t\tOversize blocks: %llu\n", + (u_longlong_t)zs.zs_num_blocks_large); + + (void) printf("\t\tLeafs with 2^n pointers:\n"); + dump_zap_histogram(zs.zs_leafs_with_2n_pointers); + + (void) printf("\t\tLeafs with n chained:\n"); + dump_zap_histogram(zs.zs_leafs_with_n_chained); + + (void) printf("\t\tBlocks with n*5 entries:\n"); + dump_zap_histogram(zs.zs_blocks_with_n5_entries); + + (void) printf("\t\tBlocks n/10 full:\n"); + dump_zap_histogram(zs.zs_blocks_n_tenths_full); + + (void) printf("\t\tEntries with n chunks:\n"); + dump_zap_histogram(zs.zs_entries_using_n_chunks); + + (void) printf("\t\tBuckets with n entries:\n"); + dump_zap_histogram(zs.zs_buckets_with_n_entries); +} + +/*ARGSUSED*/ +static void +dump_none(objset_t *os, uint64_t object, void *data, size_t size) +{ +} + +/*ARGSUSED*/ +void +dump_uint8(objset_t *os, uint64_t object, void *data, size_t size) +{ +} + +/*ARGSUSED*/ +static void +dump_uint64(objset_t *os, uint64_t object, void *data, size_t size) +{ +} + +/*ARGSUSED*/ +static void +dump_zap(objset_t *os, uint64_t object, void *data, size_t size) +{ + zap_cursor_t zc; + zap_attribute_t attr; + void *prop; + int i; + + dump_zap_stats(os, object); + (void) printf("\n"); + + for (zap_cursor_init(&zc, os, object); + zap_cursor_retrieve(&zc, &attr) == 0; + zap_cursor_advance(&zc)) { + (void) printf("\t\t%s = ", attr.za_name); + if (attr.za_num_integers == 0) { + (void) printf("\n"); + continue; + } + prop = umem_zalloc(attr.za_num_integers * + attr.za_integer_length, UMEM_NOFAIL); + (void) zap_lookup(os, object, attr.za_name, + attr.za_integer_length, attr.za_num_integers, prop); + if (attr.za_integer_length == 1) { + (void) printf("%s", (char *)prop); + } else { + for (i = 0; i < attr.za_num_integers; i++) { + switch (attr.za_integer_length) { + case 2: + (void) printf("%u ", + ((uint16_t *)prop)[i]); + break; + case 4: + (void) printf("%u ", + ((uint32_t *)prop)[i]); + break; + case 8: + (void) printf("%lld ", + (u_longlong_t)((int64_t *)prop)[i]); + break; + } + } + } + (void) printf("\n"); + umem_free(prop, attr.za_num_integers * attr.za_integer_length); + } +} + +static void +dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm) +{ + uint64_t alloc, offset, entry; + int mapshift = sm->sm_shift; + uint64_t mapstart = sm->sm_start; + char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID" }; + + if (smo->smo_object == 0) + return; + + /* + * Print out the freelist entries in both encoded and decoded form. + */ + alloc = 0; + for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) { + dmu_read(os, smo->smo_object, offset, sizeof (entry), &entry); + if (SM_DEBUG_DECODE(entry)) { + (void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n", + (u_longlong_t)(offset / sizeof (entry)), + ddata[SM_DEBUG_ACTION_DECODE(entry)], + SM_DEBUG_TXG_DECODE(entry), + SM_DEBUG_SYNCPASS_DECODE(entry)); + } else { + (void) printf("\t\t[%4llu] %c range:" + " %08llx-%08llx size: %06llx\n", + (u_longlong_t)(offset / sizeof (entry)), + SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F', + (SM_OFFSET_DECODE(entry) << mapshift) + mapstart, + (SM_OFFSET_DECODE(entry) << mapshift) + mapstart + + (SM_RUN_DECODE(entry) << mapshift), + (SM_RUN_DECODE(entry) << mapshift)); + if (SM_TYPE_DECODE(entry) == SM_ALLOC) + alloc += SM_RUN_DECODE(entry) << mapshift; + else + alloc -= SM_RUN_DECODE(entry) << mapshift; + } + } + if (alloc != smo->smo_alloc) { + (void) printf("space_map_object alloc (%llu) INCONSISTENT " + "with space map summary (%llu)\n", + (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc); + } +} + +static void +dump_metaslab(metaslab_t *msp) +{ + char freebuf[5]; + space_map_obj_t *smo = msp->ms_smo; + vdev_t *vd = msp->ms_group->mg_vd; + spa_t *spa = vd->vdev_spa; + + nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf); + + if (dump_opt['d'] <= 5) { + (void) printf("\t%10llx %10llu %5s\n", + (u_longlong_t)msp->ms_map.sm_start, + (u_longlong_t)smo->smo_object, + freebuf); + return; + } + + (void) printf( + "\tvdev %llu offset %08llx spacemap %4llu free %5s\n", + (u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start, + (u_longlong_t)smo->smo_object, freebuf); + + ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift)); + + dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map); +} + +static void +dump_metaslabs(spa_t *spa) +{ + vdev_t *rvd = spa->spa_root_vdev; + vdev_t *vd; + int c, m; + + (void) printf("\nMetaslabs:\n"); + + for (c = 0; c < rvd->vdev_children; c++) { + vd = rvd->vdev_child[c]; + + spa_config_enter(spa, RW_READER); + (void) printf("\n vdev %llu = %s\n\n", + (u_longlong_t)vd->vdev_id, vdev_description(vd)); + spa_config_exit(spa); + + if (dump_opt['d'] <= 5) { + (void) printf("\t%10s %10s %5s\n", + "offset", "spacemap", "free"); + (void) printf("\t%10s %10s %5s\n", + "------", "--------", "----"); + } + for (m = 0; m < vd->vdev_ms_count; m++) + dump_metaslab(vd->vdev_ms[m]); + (void) printf("\n"); + } +} + +static void +dump_dtl(vdev_t *vd, int indent) +{ + avl_tree_t *t = &vd->vdev_dtl_map.sm_root; + spa_t *spa = vd->vdev_spa; + space_seg_t *ss; + vdev_t *pvd; + int c; + + if (indent == 0) + (void) printf("\nDirty time logs:\n\n"); + + spa_config_enter(spa, RW_READER); + (void) printf("\t%*s%s\n", indent, "", vdev_description(vd)); + spa_config_exit(spa); + + for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) { + /* + * Everything in this DTL must appear in all parent DTL unions. + */ + for (pvd = vd; pvd; pvd = pvd->vdev_parent) + ASSERT(vdev_dtl_contains(&pvd->vdev_dtl_map, + ss->ss_start, ss->ss_end - ss->ss_start)); + (void) printf("\t%*soutage [%llu,%llu] length %llu\n", + indent, "", + (u_longlong_t)ss->ss_start, + (u_longlong_t)ss->ss_end - 1, + (u_longlong_t)ss->ss_end - ss->ss_start); + } + + (void) printf("\n"); + + if (dump_opt['d'] > 5 && vd->vdev_children == 0) { + dump_spacemap(vd->vdev_spa->spa_meta_objset, &vd->vdev_dtl, + &vd->vdev_dtl_map); + (void) printf("\n"); + } + + for (c = 0; c < vd->vdev_children; c++) + dump_dtl(vd->vdev_child[c], indent + 4); +} + +/*ARGSUSED*/ +static void +dump_dnode(objset_t *os, uint64_t object, void *data, size_t size) +{ +} + +static uint64_t +blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid) +{ + if (level < 0) + return (blkid); + + return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) * + dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); +} + +/* ARGSUSED */ +static int +zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) +{ + zbookmark_t *zb = &bc->bc_bookmark; + blkptr_t *bp = &bc->bc_blkptr; + dva_t *dva = &bp->blk_dva[0]; + void *data = bc->bc_data; + dnode_phys_t *dnp = bc->bc_dnode; + char buffer[300]; + int l; + + if (bc->bc_errno) { + (void) sprintf(buffer, + "Error %d reading <%llu, %llu, %d, %llu>: ", + bc->bc_errno, + (u_longlong_t)zb->zb_objset, + (u_longlong_t)zb->zb_object, + zb->zb_level, + (u_longlong_t)zb->zb_blkid); + goto out; + } + + if (zb->zb_level == -1) { + ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); + ASSERT3U(BP_GET_LEVEL(bp), ==, 0); + } else { + ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); + ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); + } + + if (zb->zb_level > 0) { + uint64_t fill = 0; + blkptr_t *bpx, *bpend; + + for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx); + bpx < bpend; bpx++) { + if (bpx->blk_birth != 0) { + ASSERT(bpx->blk_fill > 0); + fill += bpx->blk_fill; + } else { + ASSERT(bpx->blk_fill == 0); + } + } + ASSERT3U(fill, ==, bp->blk_fill); + } + + if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) { + uint64_t fill = 0; + dnode_phys_t *dnx, *dnend; + + for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT); + dnx < dnend; dnx++) { + if (dnx->dn_type != DMU_OT_NONE) + fill++; + } + ASSERT3U(fill, ==, bp->blk_fill); + } + + (void) sprintf(buffer, "%16llx ", + (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid)); + + ASSERT(zb->zb_level >= 0); + + for (l = dnp->dn_nlevels - 1; l >= -1; l--) { + if (l == zb->zb_level) { + (void) sprintf(buffer + strlen(buffer), "L%x", + zb->zb_level); + } else { + (void) sprintf(buffer + strlen(buffer), " "); + } + } + +out: + if (bp->blk_birth == 0) { + (void) sprintf(buffer + strlen(buffer), "<hole>"); + (void) printf("%s\n", buffer); + } else { + // XXBP - Need to print number of active BPs here + (void) sprintf(buffer + strlen(buffer), + "vdev=%llu off=%llx %llxL/%llxP/%llxA F=%llu B=%llu", + (u_longlong_t)DVA_GET_VDEV(dva), + (u_longlong_t)DVA_GET_OFFSET(dva), + (u_longlong_t)BP_GET_LSIZE(bp), + (u_longlong_t)BP_GET_PSIZE(bp), + (u_longlong_t)DVA_GET_ASIZE(dva), + (u_longlong_t)bp->blk_fill, + (u_longlong_t)bp->blk_birth); + + (void) printf("%s\n", buffer); + } + + return (bc->bc_errno ? ERESTART : 0); +} + +/*ARGSUSED*/ +static void +dump_indirect(objset_t *os, uint64_t object, void *data, size_t size) +{ + traverse_handle_t *th; + uint64_t objset = dmu_objset_id(os); + int advance = zdb_advance; + + (void) printf("Indirect blocks:\n"); + + if (object == 0) + advance |= ADVANCE_DATA; + + th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance, + ZIO_FLAG_CANFAIL); + th->th_noread = zdb_noread; + + traverse_add_dnode(th, 0, -1ULL, objset, object); + + while (traverse_more(th) == EAGAIN) + continue; + + (void) printf("\n"); + + traverse_fini(th); +} + +/*ARGSUSED*/ +static void +dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) +{ + dsl_dir_phys_t *dd = data; + time_t crtime; + char used[6], compressed[6], uncompressed[6], quota[6], resv[6]; + + if (dd == NULL) + return; + + ASSERT(size == sizeof (*dd)); + + crtime = dd->dd_creation_time; + nicenum(dd->dd_used_bytes, used); + nicenum(dd->dd_compressed_bytes, compressed); + nicenum(dd->dd_uncompressed_bytes, uncompressed); + nicenum(dd->dd_quota, quota); + nicenum(dd->dd_reserved, resv); + + (void) printf("\t\tcreation_time = %s", ctime(&crtime)); + (void) printf("\t\thead_dataset_obj = %llu\n", + (u_longlong_t)dd->dd_head_dataset_obj); + (void) printf("\t\tparent_dir_obj = %llu\n", + (u_longlong_t)dd->dd_parent_obj); + (void) printf("\t\tclone_parent_obj = %llu\n", + (u_longlong_t)dd->dd_clone_parent_obj); + (void) printf("\t\tchild_dir_zapobj = %llu\n", + (u_longlong_t)dd->dd_child_dir_zapobj); + (void) printf("\t\tused_bytes = %s\n", used); + (void) printf("\t\tcompressed_bytes = %s\n", compressed); + (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); + (void) printf("\t\tquota = %s\n", quota); + (void) printf("\t\treserved = %s\n", resv); + (void) printf("\t\tprops_zapobj = %llu\n", + (u_longlong_t)dd->dd_props_zapobj); +} + +/*ARGSUSED*/ +static void +dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) +{ + dsl_dataset_phys_t *ds = data; + time_t crtime; + char used[6], compressed[6], uncompressed[6], unique[6], blkbuf[300]; + + if (ds == NULL) + return; + + ASSERT(size == sizeof (*ds)); + crtime = ds->ds_creation_time; + nicenum(ds->ds_used_bytes, used); + nicenum(ds->ds_compressed_bytes, compressed); + nicenum(ds->ds_uncompressed_bytes, uncompressed); + nicenum(ds->ds_unique_bytes, unique); + sprintf_blkptr(blkbuf, &ds->ds_bp); + + (void) printf("\t\tdataset_obj = %llu\n", + (u_longlong_t)ds->ds_dir_obj); + (void) printf("\t\tprev_snap_obj = %llu\n", + (u_longlong_t)ds->ds_prev_snap_obj); + (void) printf("\t\tprev_snap_txg = %llu\n", + (u_longlong_t)ds->ds_prev_snap_txg); + (void) printf("\t\tnext_snap_obj = %llu\n", + (u_longlong_t)ds->ds_next_snap_obj); + (void) printf("\t\tsnapnames_zapobj = %llu\n", + (u_longlong_t)ds->ds_snapnames_zapobj); + (void) printf("\t\tnum_children = %llu\n", + (u_longlong_t)ds->ds_num_children); + (void) printf("\t\tcreation_time = %s", ctime(&crtime)); + (void) printf("\t\tcreation_txg = %llu\n", + (u_longlong_t)ds->ds_creation_txg); + (void) printf("\t\tdeadlist_obj = %llu\n", + (u_longlong_t)ds->ds_deadlist_obj); + (void) printf("\t\tused_bytes = %s\n", used); + (void) printf("\t\tcompressed_bytes = %s\n", compressed); + (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); + (void) printf("\t\tunique = %s\n", unique); + (void) printf("\t\tfsid_guid = %llu\n", + (u_longlong_t)ds->ds_fsid_guid); + (void) printf("\t\tguid = %llu\n", + (u_longlong_t)ds->ds_guid); + (void) printf("\t\trestoring = %llu\n", + (u_longlong_t)ds->ds_restoring); + (void) printf("\t\tbp = %s\n", blkbuf); +} + +static void +dump_bplist(objset_t *mos, uint64_t object, char *name) +{ + bplist_t bpl = { 0 }; + blkptr_t blk, *bp = &blk; + uint64_t itor = 0; + char numbuf[6]; + + if (dump_opt['d'] < 3) + return; + + bplist_open(&bpl, mos, object); + if (bplist_empty(&bpl)) { + bplist_close(&bpl); + return; + } + + nicenum(bpl.bpl_phys->bpl_bytes, numbuf); + + (void) printf("\n %s: %llu entries, %s\n", + name, (u_longlong_t)bpl.bpl_phys->bpl_entries, numbuf); + + if (dump_opt['d'] < 5) { + bplist_close(&bpl); + return; + } + + (void) printf("\n"); + + while (bplist_iterate(&bpl, &itor, bp) == 0) { + ASSERT(bp->blk_birth != 0); + // XXBP - Do we want to see all DVAs, or just one? + (void) printf("\tItem %3llu: vdev=%llu off=%llx " + "%llxL/%llxP/%llxA F=%llu B=%llu\n", + (u_longlong_t)itor - 1, + (u_longlong_t)DVA_GET_VDEV(&bp->blk_dva[0]), + (u_longlong_t)DVA_GET_OFFSET(&bp->blk_dva[0]), + (u_longlong_t)BP_GET_LSIZE(bp), + (u_longlong_t)BP_GET_PSIZE(bp), + (u_longlong_t)DVA_GET_ASIZE(&bp->blk_dva[0]), + (u_longlong_t)bp->blk_fill, + (u_longlong_t)bp->blk_birth); + } + + bplist_close(&bpl); +} + +static char * +znode_path(objset_t *os, uint64_t object, char *pathbuf, size_t size) +{ + dmu_buf_t *db; + dmu_object_info_t doi; + znode_phys_t *zp; + uint64_t parent = 0; + size_t complen; + char component[MAXNAMELEN + 1]; + char *path; + + path = pathbuf + size; + *--path = '\0'; + + for (;;) { + db = dmu_bonus_hold(os, object); + if (db == NULL) + break; + + dmu_buf_read(db); + dmu_object_info_from_db(db, &doi); + zp = db->db_data; + parent = zp->zp_parent; + dmu_buf_rele(db); + + if (doi.doi_bonus_type != DMU_OT_ZNODE) + break; + + if (parent == object) { + if (path[0] != '/') + *--path = '/'; + return (path); + } + + if (zap_value_search(os, parent, object, component) != 0) + break; + + complen = strlen(component); + path -= complen; + bcopy(component, path, complen); + *--path = '/'; + + object = parent; + } + + (void) sprintf(component, "???<object#%llu>", (u_longlong_t)object); + + complen = strlen(component); + path -= complen; + bcopy(component, path, complen); + + return (path); +} + +/*ARGSUSED*/ +static void +dump_znode(objset_t *os, uint64_t object, void *data, size_t size) +{ + znode_phys_t *zp = data; + time_t z_crtime, z_atime, z_mtime, z_ctime; + char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */ + + ASSERT(size >= sizeof (znode_phys_t)); + + if (dump_opt['d'] < 3) { + (void) printf("\t%s\n", + znode_path(os, object, path, sizeof (path))); + return; + } + + z_crtime = (time_t)zp->zp_crtime[0]; + z_atime = (time_t)zp->zp_atime[0]; + z_mtime = (time_t)zp->zp_mtime[0]; + z_ctime = (time_t)zp->zp_ctime[0]; + + (void) printf("\tpath %s\n", + znode_path(os, object, path, sizeof (path))); + (void) printf("\tatime %s", ctime(&z_atime)); + (void) printf("\tmtime %s", ctime(&z_mtime)); + (void) printf("\tctime %s", ctime(&z_ctime)); + (void) printf("\tcrtime %s", ctime(&z_crtime)); + (void) printf("\tgen %llu\n", (u_longlong_t)zp->zp_gen); + (void) printf("\tmode %llo\n", (u_longlong_t)zp->zp_mode); + (void) printf("\tsize %llu\n", (u_longlong_t)zp->zp_size); + (void) printf("\tparent %llu\n", (u_longlong_t)zp->zp_parent); + (void) printf("\tlinks %llu\n", (u_longlong_t)zp->zp_links); + (void) printf("\txattr %llu\n", (u_longlong_t)zp->zp_xattr); + (void) printf("\trdev 0x%016llx\n", (u_longlong_t)zp->zp_rdev); +} + +/*ARGSUSED*/ +static void +dump_acl(objset_t *os, uint64_t object, void *data, size_t size) +{ +} + +/*ARGSUSED*/ +static void +dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) +{ +} + +static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = { + dump_none, /* unallocated */ + dump_zap, /* object directory */ + dump_uint64, /* object array */ + dump_none, /* packed nvlist */ + dump_packed_nvlist, /* packed nvlist size */ + dump_none, /* bplist */ + dump_none, /* bplist header */ + dump_none, /* SPA space map header */ + dump_none, /* SPA space map */ + dump_none, /* ZIL intent log */ + dump_dnode, /* DMU dnode */ + dump_dmu_objset, /* DMU objset */ + dump_dsl_dir, /* DSL directory */ + dump_zap, /* DSL directory child map */ + dump_zap, /* DSL dataset snap map */ + dump_zap, /* DSL props */ + dump_dsl_dataset, /* DSL dataset */ + dump_znode, /* ZFS znode */ + dump_acl, /* ZFS ACL */ + dump_uint8, /* ZFS plain file */ + dump_zap, /* ZFS directory */ + dump_zap, /* ZFS master node */ + dump_zap, /* ZFS delete queue */ + dump_uint8, /* zvol object */ + dump_zap, /* zvol prop */ + dump_uint8, /* other uint8[] */ + dump_uint64, /* other uint64[] */ + dump_zap, /* other ZAP */ +}; + +static void +dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) +{ + dmu_buf_t *db = NULL; + dmu_object_info_t doi; + dnode_t *dn; + void *bonus = NULL; + size_t bsize = 0; + char iblk[6], dblk[6], lsize[6], psize[6], bonus_size[6], segsize[6]; + char aux[50]; + int error; + + if (*print_header) { + (void) printf("\n Object lvl iblk dblk lsize" + " psize type\n"); + *print_header = 0; + } + + if (object == 0) { + dn = os->os->os_meta_dnode; + } else { + db = dmu_bonus_hold(os, object); + if (db == NULL) + fatal("dmu_bonus_hold(%llu) failed", object); + dmu_buf_read(db); + bonus = db->db_data; + bsize = db->db_size; + dn = ((dmu_buf_impl_t *)db)->db_dnode; + } + dmu_object_info_from_dnode(dn, &doi); + + nicenum(doi.doi_metadata_block_size, iblk); + nicenum(doi.doi_data_block_size, dblk); + nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1), + lsize); + nicenum(doi.doi_physical_blks << 9, psize); + nicenum(doi.doi_bonus_size, bonus_size); + + aux[0] = '\0'; + + if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) + (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)", + zio_checksum_table[doi.doi_checksum].ci_name); + + if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) + (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)", + zio_compress_table[doi.doi_compress].ci_name); + + (void) printf("%10lld %3u %5s %5s %5s %5s %s%s\n", + (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize, + psize, dmu_ot[doi.doi_type].ot_name, aux); + + if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) { + (void) printf("%10s %3s %5s %5s %5s %5s %s\n", + "", "", "", "", bonus_size, "bonus", + dmu_ot[doi.doi_bonus_type].ot_name); + } + + if (verbosity >= 4) { + object_viewer[doi.doi_bonus_type](os, object, bonus, bsize); + object_viewer[doi.doi_type](os, object, NULL, 0); + *print_header = 1; + } + + if (verbosity >= 5) + dump_indirect(os, object, NULL, 0); + + if (verbosity >= 5) { + /* + * Report the list of segments that comprise the object. + */ + uint64_t start = 0; + uint64_t end; + uint64_t blkfill = 1; + int minlvl = 1; + + if (dn->dn_type == DMU_OT_DNODE) { + minlvl = 0; + blkfill = DNODES_PER_BLOCK; + } + + for (;;) { + error = dnode_next_offset(dn, B_FALSE, &start, minlvl, + blkfill); + if (error) + break; + end = start; + error = dnode_next_offset(dn, B_TRUE, &end, minlvl, + blkfill); + nicenum(end - start, segsize); + (void) printf("\t\tsegment [%016llx, %016llx)" + " size %5s\n", (u_longlong_t)start, + (u_longlong_t)end, segsize); + if (error) + break; + start = end; + } + } + + if (db != NULL) + dmu_buf_rele(db); +} + +static char *objset_types[DMU_OST_NUMTYPES] = { + "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; + +/*ARGSUSED*/ +static void +dump_dir(objset_t *os) +{ + dmu_objset_stats_t dds; + uint64_t object, object_count; + char numbuf[8]; + char blkbuf[300]; + char osname[MAXNAMELEN]; + char *type = "UNKNOWN"; + int verbosity = dump_opt['d']; + int print_header = 1; + int i, error; + + dmu_objset_stats(os, &dds); + + if (dds.dds_type < DMU_OST_NUMTYPES) + type = objset_types[dds.dds_type]; + + if (dds.dds_type == DMU_OST_META) { + dds.dds_creation_txg = TXG_INITIAL; + dds.dds_last_txg = os->os->os_rootbp.blk_birth; + dds.dds_objects_used = os->os->os_rootbp.blk_fill; + dds.dds_space_refd = + os->os->os_spa->spa_dsl_pool->dp_mos_dir->dd_used_bytes; + } + + ASSERT3U(dds.dds_objects_used, ==, os->os->os_rootbp.blk_fill); + + nicenum(dds.dds_space_refd, numbuf); + + if (verbosity >= 4) { + (void) strcpy(blkbuf, ", rootbp "); + sprintf_blkptr(blkbuf + strlen(blkbuf), &os->os->os_rootbp); + } else { + blkbuf[0] = '\0'; + } + + dmu_objset_name(os, osname); + + (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, last_txg %llu, " + "%s, %llu objects%s\n", + osname, type, (u_longlong_t)dmu_objset_id(os), + (u_longlong_t)dds.dds_creation_txg, + (u_longlong_t)dds.dds_last_txg, + numbuf, + (u_longlong_t)dds.dds_objects_used, + blkbuf); + + dump_intent_log(dmu_objset_zil(os)); + + if (dmu_objset_ds(os) != NULL) + dump_bplist(dmu_objset_pool(os)->dp_meta_objset, + dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist"); + + if (verbosity < 2) + return; + + if (zopt_objects != 0) { + for (i = 0; i < zopt_objects; i++) + dump_object(os, zopt_object[i], verbosity, + &print_header); + (void) printf("\n"); + return; + } + + dump_object(os, 0, verbosity, &print_header); + object_count = 1; + + object = 0; + while ((error = dmu_object_next(os, &object, B_FALSE)) == 0) { + dump_object(os, object, verbosity, &print_header); + object_count++; + } + + ASSERT3U(object_count, ==, dds.dds_objects_used); + + (void) printf("\n"); + + if (error != ESRCH) + fatal("dmu_object_next() = %d", error); +} + +static void +dump_uberblock(uberblock_t *ub) +{ + time_t timestamp = ub->ub_timestamp; + + (void) printf("Uberblock\n\n"); + (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic); + (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version); + (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg); + (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum); + (void) printf("\ttimestamp = %llu UTC = %s", + (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); + if (dump_opt['u'] >= 3) { + char blkbuf[300]; + sprintf_blkptr(blkbuf, &ub->ub_rootbp); + (void) printf("\trootbp = %s\n", blkbuf); + } + (void) printf("\n"); +} + +static void +dump_config(const char *pool) +{ + spa_t *spa = NULL; + + mutex_enter(&spa_namespace_lock); + while ((spa = spa_next(spa)) != NULL) { + if (pool == NULL) + (void) printf("%s\n", spa_name(spa)); + if (pool == NULL || strcmp(pool, spa_name(spa)) == 0) + dump_nvlist(spa->spa_config, 4); + } + mutex_exit(&spa_namespace_lock); +} + +static void +dump_label(const char *dev) +{ + int fd; + vdev_label_t label; + char *buf = label.vl_vdev_phys.vp_nvlist; + size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist); + struct stat64 statbuf; + uint64_t psize; + int l; + + if ((fd = open(dev, O_RDONLY)) < 0) { + (void) printf("cannot open '%s': %s\n", dev, strerror(errno)); + exit(1); + } + + if (fstat64(fd, &statbuf) != 0) { + (void) printf("failed to stat '%s': %s\n", dev, + strerror(errno)); + exit(1); + } + + psize = statbuf.st_size; + psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); + + for (l = 0; l < VDEV_LABELS; l++) { + + nvlist_t *config = NULL; + + (void) printf("--------------------------------------------\n"); + (void) printf("LABEL %d\n", l); + (void) printf("--------------------------------------------\n"); + + if (pread(fd, &label, sizeof (label), + vdev_label_offset(psize, l, 0)) != sizeof (label)) { + (void) printf("failed to read label %d\n", l); + continue; + } + + if (nvlist_unpack(buf, buflen, &config, 0) != 0) { + (void) printf("failed to unpack label %d\n", l); + continue; + } + dump_nvlist(config, 4); + nvlist_free(config); + } +} + +/*ARGSUSED*/ +static void +dump_one_dir(char *dsname, void *arg) +{ + int error; + objset_t *os; + + error = dmu_objset_open(dsname, DMU_OST_ANY, + DS_MODE_STANDARD | DS_MODE_READONLY, &os); + if (error) { + (void) printf("Could not open %s\n", dsname); + return; + } + dump_dir(os); + dmu_objset_close(os); +} + +static void +zdb_space_map_load(spa_t *spa) +{ + vdev_t *rvd = spa->spa_root_vdev; + vdev_t *vd; + int c, m, error; + + for (c = 0; c < rvd->vdev_children; c++) { + vd = rvd->vdev_child[c]; + for (m = 0; m < vd->vdev_ms_count; m++) { + metaslab_t *msp = vd->vdev_ms[m]; + space_map_t *sm = &msp->ms_allocmap[0]; + mutex_enter(&msp->ms_lock); + error = space_map_load(sm, msp->ms_smo, SM_ALLOC, + spa->spa_meta_objset, msp->ms_usable_end, + sm->sm_size - msp->ms_usable_space); + mutex_exit(&msp->ms_lock); + if (error) + fatal("%s bad space map #%d, error %d", + spa->spa_name, c, error); + } + } +} + +static int +zdb_space_map_claim(spa_t *spa, blkptr_t *bp) +{ + dva_t *dva = &bp->blk_dva[0]; + uint64_t vdev = DVA_GET_VDEV(dva); + uint64_t offset = DVA_GET_OFFSET(dva); + uint64_t size = DVA_GET_ASIZE(dva); + vdev_t *vd; + metaslab_t *msp; + space_map_t *allocmap, *freemap; + int error; + + if ((vd = vdev_lookup_top(spa, vdev)) == NULL) + return (ENXIO); + + if ((offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) + return (ENXIO); + + if (DVA_GET_GANG(dva)) { + zio_gbh_phys_t gbh; + blkptr_t blk = *bp; + int g; + + /* LINTED - compile time assert */ + ASSERT(sizeof (zio_gbh_phys_t) == SPA_GANGBLOCKSIZE); + size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); + DVA_SET_GANG(&blk.blk_dva[0], 0); + DVA_SET_ASIZE(&blk.blk_dva[0], size); + BP_SET_CHECKSUM(&blk, ZIO_CHECKSUM_GANG_HEADER); + BP_SET_PSIZE(&blk, SPA_GANGBLOCKSIZE); + BP_SET_LSIZE(&blk, SPA_GANGBLOCKSIZE); + BP_SET_COMPRESS(&blk, ZIO_COMPRESS_OFF); + error = zio_wait(zio_read(NULL, spa, &blk, + &gbh, SPA_GANGBLOCKSIZE, NULL, NULL, + ZIO_PRIORITY_SYNC_READ, + ZIO_FLAG_CANFAIL | ZIO_FLAG_CONFIG_HELD)); + if (error) + return (error); + if (BP_SHOULD_BYTESWAP(&blk)) + byteswap_uint64_array(&gbh, SPA_GANGBLOCKSIZE); + for (g = 0; g < SPA_GBH_NBLKPTRS; g++) { + if (gbh.zg_blkptr[g].blk_birth == 0) + break; + error = zdb_space_map_claim(spa, &gbh.zg_blkptr[g]); + if (error) + return (error); + } + } + + msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; + allocmap = &msp->ms_allocmap[0]; + freemap = &msp->ms_freemap[0]; + + mutex_enter(&msp->ms_lock); + if (space_map_contains(freemap, offset, size)) { + mutex_exit(&msp->ms_lock); + return (EAGAIN); /* allocated more than once */ + } + + if (!space_map_contains(allocmap, offset, size)) { + mutex_exit(&msp->ms_lock); + return (ESTALE); /* not allocated at all */ + } + + space_map_remove(allocmap, offset, size); + space_map_add(freemap, offset, size); + + mutex_exit(&msp->ms_lock); + + return (0); +} + +static void +zdb_leak(space_map_t *sm, uint64_t start, uint64_t size) +{ + metaslab_t *msp; + + /* LINTED */ + msp = (metaslab_t *)((char *)sm - offsetof(metaslab_t, ms_allocmap[0])); + + (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", + (u_longlong_t)msp->ms_group->mg_vd->vdev_id, + (u_longlong_t)start, + (u_longlong_t)size); +} + +static void +zdb_space_map_vacate(spa_t *spa) +{ + vdev_t *rvd = spa->spa_root_vdev; + vdev_t *vd; + int c, m; + + for (c = 0; c < rvd->vdev_children; c++) { + vd = rvd->vdev_child[c]; + for (m = 0; m < vd->vdev_ms_count; m++) { + metaslab_t *msp = vd->vdev_ms[m]; + mutex_enter(&msp->ms_lock); + space_map_vacate(&msp->ms_allocmap[0], zdb_leak, + &msp->ms_allocmap[0]); + space_map_vacate(&msp->ms_freemap[0], NULL, NULL); + mutex_exit(&msp->ms_lock); + } + } +} + +static void +zdb_refresh_ubsync(spa_t *spa) +{ + uberblock_t ub = { 0 }; + vdev_t *rvd = spa->spa_root_vdev; + zio_t *zio; + + /* + * Reopen all devices to purge zdb's vdev caches. + */ + vdev_reopen(rvd, NULL); + + /* + * Reload the uberblock. + */ + zio = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); + vdev_uberblock_load(zio, rvd, &ub); + (void) zio_wait(zio); + + if (ub.ub_txg != 0) + spa->spa_ubsync = ub; +} + +/* + * Verify that the sum of the sizes of all blocks in the pool adds up + * to the SPA's sa_alloc total. + */ +typedef struct zdb_blkstats { + uint64_t zb_asize; + uint64_t zb_lsize; + uint64_t zb_psize; + uint64_t zb_count; +} zdb_blkstats_t; + +#define DMU_OT_DEFERRED DMU_OT_NONE +#define DMU_OT_TOTAL DMU_OT_NUMTYPES + +#define ZB_TOTAL ZB_MAXLEVEL + +typedef struct zdb_cb { + zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1]; + uint64_t zcb_errors[256]; + traverse_blk_cache_t *zcb_cache; + int zcb_readfails; + int zcb_haderrors; +} zdb_cb_t; + +static blkptr_cb_t zdb_blkptr_cb; + +static void +zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type) +{ + int i, error; + + for (i = 0; i < 4; i++) { + int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; + int t = (i & 1) ? type : DMU_OT_TOTAL; + zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; + + zb->zb_asize += BP_GET_ASIZE(bp); + zb->zb_lsize += BP_GET_LSIZE(bp); + zb->zb_psize += BP_GET_PSIZE(bp); + zb->zb_count++; + } + + if (dump_opt['L']) + return; + + error = zdb_space_map_claim(spa, bp); + + if (error == 0) + return; + + if (error == EAGAIN) + (void) fatal("double-allocation, bp=%p", bp); + + if (error == ESTALE) + (void) fatal("reference to freed block, bp=%p", bp); + + (void) fatal("fatal error %d in bp %p", error, bp); +} + +static void +zdb_log_block_cb(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t first_txg) +{ + if (bp->blk_birth < first_txg) { + zdb_cb_t *zcb = arg; + traverse_blk_cache_t bc = *zcb->zcb_cache; + zbookmark_t *zb = &bc.bc_bookmark; + + zb->zb_objset = bp->blk_cksum.zc_word[2]; + zb->zb_blkid = bp->blk_cksum.zc_word[3]; + bc.bc_blkptr = *bp; + + (void) zdb_blkptr_cb(&bc, zilog->zl_spa, arg); + } +} + +static int +zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) +{ + zbookmark_t *zb = &bc->bc_bookmark; + zdb_cb_t *zcb = arg; + blkptr_t *bp = &bc->bc_blkptr; + dmu_object_type_t type = BP_GET_TYPE(bp); + char blkbuf[300]; + int error = 0; + + if (bc->bc_errno) { + if (zcb->zcb_readfails++ < 10 && dump_opt['L']) { + zdb_refresh_ubsync(spa); + error = EAGAIN; + } else { + zcb->zcb_haderrors = 1; + zcb->zcb_errors[bc->bc_errno]++; + error = ERESTART; + } + + if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno)) + sprintf_blkptr(blkbuf, bp); + else + blkbuf[0] = '\0'; + + (void) printf("zdb_blkptr_cb: Got error %d reading " + "<%llu, %llu, %d, %llx> %s -- %s\n", + bc->bc_errno, + (u_longlong_t)zb->zb_objset, + (u_longlong_t)zb->zb_object, + zb->zb_level, + (u_longlong_t)zb->zb_blkid, + blkbuf, + error == EAGAIN ? "retrying" : "skipping"); + + return (error); + } + + zcb->zcb_readfails = 0; + + ASSERT(bp->blk_birth != 0); + + zdb_count_block(spa, zcb, bp, type); + + if (dump_opt['b'] >= 4) { + sprintf_blkptr(blkbuf, bp); + (void) printf("objset %llu object %llu offset 0x%llx %s\n", + (u_longlong_t)zb->zb_objset, + (u_longlong_t)zb->zb_object, + (u_longlong_t)blkid2offset(bc->bc_dnode, + zb->zb_level, zb->zb_blkid), + blkbuf); + } + + if (type == DMU_OT_OBJSET) { + objset_phys_t *osphys = bc->bc_data; + zilog_t zilog = { 0 }; + zilog.zl_header = &osphys->os_zil_header; + zilog.zl_spa = spa; + + zcb->zcb_cache = bc; + + zil_parse(&zilog, zdb_log_block_cb, NULL, zcb, + spa_first_txg(spa)); + } + + return (0); +} + +static int +dump_block_stats(spa_t *spa) +{ + traverse_handle_t *th; + zdb_cb_t zcb = { 0 }; + zdb_blkstats_t *zb, *tzb; + uint64_t alloc, space; + int leaks = 0; + int advance = zdb_advance; + int flags; + int e; + + if (dump_opt['c']) + advance |= ADVANCE_DATA; + + advance |= ADVANCE_PRUNE; + + (void) printf("\nTraversing all blocks to %sverify" + " nothing leaked ...\n", + dump_opt['c'] ? "verify checksums and " : ""); + + /* + * Load all space maps. As we traverse the pool, if we find a block + * that's not in its space map, that indicates a double-allocation, + * reference to a freed block, or an unclaimed block. Otherwise we + * remove the block from the space map. If the space maps are not + * empty when we're done, that indicates leaked blocks. + */ + if (!dump_opt['L']) + zdb_space_map_load(spa); + + /* + * If there's a deferred-free bplist, process that first. + */ + if (spa->spa_sync_bplist_obj != 0) { + bplist_t *bpl = &spa->spa_sync_bplist; + blkptr_t blk; + uint64_t itor = 0; + + bplist_open(bpl, spa->spa_meta_objset, + spa->spa_sync_bplist_obj); + + while (bplist_iterate(bpl, &itor, &blk) == 0) { + zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED); + if (dump_opt['b'] >= 4) { + char blkbuf[300]; + sprintf_blkptr(blkbuf, &blk); + (void) printf("[%s] %s\n", + "deferred free", blkbuf); + } + } + + bplist_close(bpl); + } + + /* + * Now traverse the pool. If we're read all data to verify checksums, + * do a scrubbing read so that we validate all copies. + */ + flags = ZIO_FLAG_CANFAIL; + if (advance & ADVANCE_DATA) + flags |= ZIO_FLAG_SCRUB; + th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags); + th->th_noread = zdb_noread; + + traverse_add_pool(th, 0, -1ULL); + + while (traverse_more(th) == EAGAIN) + continue; + + traverse_fini(th); + + if (zcb.zcb_haderrors) { + (void) printf("\nError counts:\n\n"); + (void) printf("\t%5s %s\n", "errno", "count"); + for (e = 0; e < 256; e++) { + if (zcb.zcb_errors[e] != 0) { + (void) printf("\t%5d %llu\n", + e, (u_longlong_t)zcb.zcb_errors[e]); + } + } + } + + /* + * Report any leaked segments. + */ + if (!dump_opt['L']) + zdb_space_map_vacate(spa); + + if (dump_opt['L']) + (void) printf("\n\n *** Live pool traversal; " + "block counts are only approximate ***\n\n"); + + alloc = spa_get_alloc(spa); + space = spa_get_space(spa); + + tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL]; + + if (tzb->zb_asize == alloc) { + (void) printf("\n\tNo leaks (block sum matches space" + " maps exactly)\n"); + } else { + (void) printf("block traversal size %llu != alloc %llu " + "(leaked %lld)\n", + (u_longlong_t)tzb->zb_asize, + (u_longlong_t)alloc, + (u_longlong_t)(alloc - tzb->zb_asize)); + leaks = 1; + } + + if (tzb->zb_count == 0) + return (2); + + (void) printf("\n"); + (void) printf("\tbp count: %10llu\n", + (u_longlong_t)tzb->zb_count); + (void) printf("\tbp logical: %10llu\t avg: %6llu\n", + (u_longlong_t)tzb->zb_lsize, + (u_longlong_t)(tzb->zb_lsize / tzb->zb_count)); + (void) printf("\tbp physical: %10llu\t avg:" + " %6llu\tcompression: %6.2f\n", + (u_longlong_t)tzb->zb_psize, + (u_longlong_t)(tzb->zb_psize / tzb->zb_count), + (double)tzb->zb_lsize / tzb->zb_psize); + (void) printf("\tbp allocated: %10llu\t avg:" + " %6llu\tcompression: %6.2f\n", + (u_longlong_t)tzb->zb_asize, + (u_longlong_t)(tzb->zb_asize / tzb->zb_count), + (double)tzb->zb_lsize / tzb->zb_asize); + (void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n", + (u_longlong_t)alloc, 100.0 * alloc / space); + + if (dump_opt['b'] >= 2) { + int l, t, level; + (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" + "\t avg\t comp\t%%Total\tType\n"); + + for (t = 0; t <= DMU_OT_NUMTYPES; t++) { + char csize[6], lsize[6], psize[6], asize[6], avg[6]; + char *typename; + + typename = t == DMU_OT_DEFERRED ? "deferred free" : + t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name; + + if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) { + (void) printf("%6s\t%5s\t%5s\t%5s" + "\t%5s\t%5s\t%6s\t%s\n", + "-", + "-", + "-", + "-", + "-", + "-", + "-", + typename); + continue; + } + + for (l = ZB_TOTAL - 1; l >= -1; l--) { + level = (l == -1 ? ZB_TOTAL : l); + zb = &zcb.zcb_type[level][t]; + + if (zb->zb_asize == 0) + continue; + + if (dump_opt['b'] < 3 && level != ZB_TOTAL) + continue; + + if (level == 0 && zb->zb_asize == + zcb.zcb_type[ZB_TOTAL][t].zb_asize) + continue; + + nicenum(zb->zb_count, csize); + nicenum(zb->zb_lsize, lsize); + nicenum(zb->zb_psize, psize); + nicenum(zb->zb_asize, asize); + nicenum(zb->zb_asize / zb->zb_count, avg); + + (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" + "\t%5.2f\t%6.2f\t", + csize, lsize, psize, asize, avg, + (double)zb->zb_lsize / zb->zb_psize, + 100.0 * zb->zb_asize / tzb->zb_asize); + + if (level == ZB_TOTAL) + (void) printf("%s\n", typename); + else + (void) printf(" L%d %s\n", + level, typename); + } + } + } + + (void) printf("\n"); + + if (leaks) + return (2); + + if (zcb.zcb_haderrors) + return (3); + + return (0); +} + +static void +dump_zpool(spa_t *spa) +{ + dsl_pool_t *dp = spa_get_dsl(spa); + int rc = 0; + + if (dump_opt['u']) + dump_uberblock(&spa->spa_uberblock); + + if (dump_opt['d'] || dump_opt['i']) { + dump_dir(dp->dp_meta_objset); + if (dump_opt['d'] >= 3) { + dump_bplist(dp->dp_meta_objset, + spa->spa_sync_bplist_obj, "Deferred frees"); + dump_dtl(spa->spa_root_vdev, 0); + dump_metaslabs(spa); + } + dmu_objset_find(spa->spa_name, dump_one_dir, NULL, + DS_FIND_SNAPSHOTS); + } + + if (dump_opt['b'] || dump_opt['c']) + rc = dump_block_stats(spa); + + if (dump_opt['s']) + show_pool_stats(spa); + + if (rc != 0) + exit(rc); +} + +int +main(int argc, char **argv) +{ + int i, c; + struct rlimit rl = { 1024, 1024 }; + spa_t *spa; + objset_t *os = NULL; + char *endstr; + int dump_all = 1; + int verbose = 0; + int error; + int flag, set; + + (void) setrlimit(RLIMIT_NOFILE, &rl); + + dprintf_setup(&argc, argv); + + while ((c = getopt(argc, argv, "udibcsvCLO:B:Ul")) != -1) { + switch (c) { + case 'u': + case 'd': + case 'i': + case 'b': + case 'c': + case 's': + case 'C': + case 'l': + dump_opt[c]++; + dump_all = 0; + break; + case 'L': + dump_opt[c]++; + break; + case 'O': + endstr = optarg; + if (endstr[0] == '!') { + endstr++; + set = 0; + } else { + set = 1; + } + if (strcmp(endstr, "post") == 0) { + flag = ADVANCE_PRE; + set = !set; + } else if (strcmp(endstr, "pre") == 0) { + flag = ADVANCE_PRE; + } else if (strcmp(endstr, "prune") == 0) { + flag = ADVANCE_PRUNE; + } else if (strcmp(endstr, "data") == 0) { + flag = ADVANCE_DATA; + } else if (strcmp(endstr, "holes") == 0) { + flag = ADVANCE_HOLES; + } else { + usage(); + } + if (set) + zdb_advance |= flag; + else + zdb_advance &= ~flag; + break; + case 'B': + endstr = optarg - 1; + zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0); + zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0); + zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0); + zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16); + (void) printf("simulating bad block " + "<%llu, %llu, %d, %llx>\n", + (u_longlong_t)zdb_noread.zb_objset, + (u_longlong_t)zdb_noread.zb_object, + zdb_noread.zb_level, + (u_longlong_t)zdb_noread.zb_blkid); + break; + case 'v': + verbose++; + break; + case 'U': + spa_config_dir = "/tmp"; + break; + default: + usage(); + break; + } + } + + kernel_init(FREAD); + + for (c = 0; c < 256; c++) { + if (dump_all && c != 'L' && c != 'l') + dump_opt[c] = 1; + if (dump_opt[c]) + dump_opt[c] += verbose; + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + if (dump_opt['C']) { + dump_config(NULL); + return (0); + } + usage(); + } + + if (dump_opt['l']) { + dump_label(argv[0]); + return (0); + } + + if (dump_opt['C']) + dump_config(argv[0]); + + if (strchr(argv[0], '/') != NULL) { + error = dmu_objset_open(argv[0], DMU_OST_ANY, + DS_MODE_STANDARD | DS_MODE_READONLY, &os); + } else { + error = spa_open(argv[0], &spa, FTAG); + } + + if (error) + fatal("can't open %s: error %d", argv[0], error); + + argv++; + if (--argc > 0) { + zopt_objects = argc; + zopt_object = calloc(zopt_objects, sizeof (uint64_t)); + for (i = 0; i < zopt_objects; i++) { + errno = 0; + zopt_object[i] = strtoull(argv[i], NULL, 0); + if (zopt_object[i] == 0 && errno != 0) + fatal("bad object number %s: %s", + argv[i], strerror(errno)); + } + } + + if (os != NULL) { + dump_dir(os); + dmu_objset_close(os); + } else { + dump_zpool(spa); + spa_close(spa, FTAG); + } + + kernel_fini(); + + return (0); +} diff --git a/usr/src/cmd/zdb/zdb_il.c b/usr/src/cmd/zdb/zdb_il.c new file mode 100644 index 0000000000..924d4a1dec --- /dev/null +++ b/usr/src/cmd/zdb/zdb_il.c @@ -0,0 +1,337 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Print intent log header and statistics. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/dmu.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <sys/zil.h> +#include <sys/zil_impl.h> + +extern uint8_t dump_opt[256]; + +static void +print_log_bp(blkptr_t *bp, const char *prefix) +{ + char blkbuf[200]; + + sprintf_blkptr(blkbuf, bp); + (void) printf("%s%s\n", prefix, blkbuf); +} + +/* ARGSUSED */ +static void +zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr) +{ + time_t crtime = lr->lr_crtime[0]; + char *name = (char *)(lr + 1); + char *link = name + strlen(name) + 1; + + if (txtype == TX_SYMLINK) + (void) printf("\t\t\t%s -> %s\n", name, link); + else + (void) printf("\t\t\t%s\n", name); + + (void) printf("\t\t\t%s", ctime(&crtime)); + (void) printf("\t\t\tdoid %llu, foid %llu, mode %llo\n", + (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_foid, + (longlong_t)lr->lr_mode); + (void) printf("\t\t\tuid %llu, gid %llu, gen %llu, rdev 0x%llx\n", + (u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid, + (u_longlong_t)lr->lr_gen, (u_longlong_t)lr->lr_rdev); +} + +/* ARGSUSED */ +static void +zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr) +{ + (void) printf("\t\t\tdoid %llu, name %s\n", + (u_longlong_t)lr->lr_doid, (char *)(lr + 1)); +} + +/* ARGSUSED */ +static void +zil_prt_rec_link(zilog_t *zilog, int txtype, lr_link_t *lr) +{ + (void) printf("\t\t\tdoid %llu, link_obj %llu, name %s\n", + (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_link_obj, + (char *)(lr + 1)); +} + +/* ARGSUSED */ +static void +zil_prt_rec_rename(zilog_t *zilog, int txtype, lr_rename_t *lr) +{ + char *snm = (char *)(lr + 1); + char *tnm = snm + strlen(snm) + 1; + + (void) printf("\t\t\tsdoid %llu, tdoid %llu\n", + (u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid); + (void) printf("\t\t\tsrc %s tgt %s\n", snm, tnm); +} + +/* ARGSUSED */ +static void +zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr) +{ + char *data, *dlimit; + blkptr_t *bp = &lr->lr_blkptr; + char buf[SPA_MAXBLOCKSIZE]; + int verbose = MAX(dump_opt['d'], dump_opt['i']); + int error; + + (void) printf("\t\t\tfoid %llu, offset 0x%llx," + " length 0x%llx, blkoff 0x%llx\n", + (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset, + (u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blkoff); + + if (verbose < 5) + return; + + if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { + (void) printf("\t\t\thas blkptr, %s\n", + bp->blk_birth >= spa_first_txg(zilog->zl_spa) ? + "will claim" : "won't claim"); + print_log_bp(bp, "\t\t\t"); + if (bp->blk_birth == 0) { + bzero(buf, sizeof (buf)); + } else { + error = zio_wait(zio_read(NULL, zilog->zl_spa, + bp, buf, BP_GET_LSIZE(bp), NULL, NULL, + ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL)); + if (error) + return; + } + data = buf + lr->lr_blkoff; + } else { + data = (char *)(lr + 1); + } + + dlimit = data + MIN(lr->lr_length, + (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE)); + + (void) printf("\t\t\t"); + while (data < dlimit) { + if (isprint(*data)) + (void) printf("%c ", *data); + else + (void) printf("%2X", *data); + data++; + } + (void) printf("\n"); +} + +/* ARGSUSED */ +static void +zil_prt_rec_truncate(zilog_t *zilog, int txtype, lr_truncate_t *lr) +{ + (void) printf("\t\t\tfoid %llu, offset 0x%llx, length 0x%llx\n", + (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset, + (u_longlong_t)lr->lr_length); +} + +/* ARGSUSED */ +static void +zil_prt_rec_setattr(zilog_t *zilog, int txtype, lr_setattr_t *lr) +{ + time_t atime = (time_t)lr->lr_atime[0]; + time_t mtime = (time_t)lr->lr_mtime[0]; + + (void) printf("\t\t\tfoid %llu, mask 0x%llx\n", + (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_mask); + + if (lr->lr_mask & AT_MODE) { + (void) printf("\t\t\tAT_MODE %llo\n", + (longlong_t)lr->lr_mode); + } + + if (lr->lr_mask & AT_UID) { + (void) printf("\t\t\tAT_UID %llu\n", + (u_longlong_t)lr->lr_uid); + } + + if (lr->lr_mask & AT_GID) { + (void) printf("\t\t\tAT_GID %llu\n", + (u_longlong_t)lr->lr_gid); + } + + if (lr->lr_mask & AT_SIZE) { + (void) printf("\t\t\tAT_SIZE %llu\n", + (u_longlong_t)lr->lr_size); + } + + if (lr->lr_mask & AT_ATIME) { + (void) printf("\t\t\tAT_ATIME %llu.%09llu %s", + (u_longlong_t)lr->lr_atime[0], + (u_longlong_t)lr->lr_atime[1], + ctime(&atime)); + } + + if (lr->lr_mask & AT_MTIME) { + (void) printf("\t\t\tAT_MTIME %llu.%09llu %s", + (u_longlong_t)lr->lr_mtime[0], + (u_longlong_t)lr->lr_mtime[1], + ctime(&mtime)); + } +} + +/* ARGSUSED */ +static void +zil_prt_rec_acl(zilog_t *zilog, int txtype, lr_acl_t *lr) +{ + (void) printf("\t\t\tfoid %llu, aclcnt %llu\n", + (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_aclcnt); +} + +typedef void (*zil_prt_rec_func_t)(); +typedef struct zil_rec_info { + zil_prt_rec_func_t zri_print; + char *zri_name; + uint64_t zri_count; +} zil_rec_info_t; + +static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = { + { NULL, "Total " }, + { zil_prt_rec_create, "TX_CREATE " }, + { zil_prt_rec_create, "TX_MKDIR " }, + { zil_prt_rec_create, "TX_MKXATTR " }, + { zil_prt_rec_create, "TX_SYMLINK " }, + { zil_prt_rec_remove, "TX_REMOVE " }, + { zil_prt_rec_remove, "TX_RMDIR " }, + { zil_prt_rec_link, "TX_LINK " }, + { zil_prt_rec_rename, "TX_RENAME " }, + { zil_prt_rec_write, "TX_WRITE " }, + { zil_prt_rec_truncate, "TX_TRUNCATE" }, + { zil_prt_rec_setattr, "TX_SETATTR " }, + { zil_prt_rec_acl, "TX_ACL " }, +}; + +/* ARGSUSED */ +static void +print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t first_txg) +{ + int txtype; + int verbose = MAX(dump_opt['d'], dump_opt['i']); + + txtype = lr->lrc_txtype; + + ASSERT(txtype != 0 && (uint_t)txtype < TX_MAX_TYPE); + ASSERT(lr->lrc_txg); + + (void) printf("\t\t%s len %6llu, txg %llu, seq %llu\n", + zil_rec_info[txtype].zri_name, + (u_longlong_t)lr->lrc_reclen, + (u_longlong_t)lr->lrc_txg, + (u_longlong_t)lr->lrc_seq); + + if (txtype && verbose >= 3) + zil_rec_info[txtype].zri_print(zilog, txtype, lr); + + zil_rec_info[txtype].zri_count++; + zil_rec_info[0].zri_count++; +} + +/* ARGSUSED */ +static void +print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t first_txg) +{ + char blkbuf[200]; + int verbose = MAX(dump_opt['d'], dump_opt['i']); + + if (verbose <= 3) + return; + + if (verbose >= 5) { + (void) strcpy(blkbuf, ", "); + sprintf_blkptr(blkbuf + strlen(blkbuf), bp); + } else { + blkbuf[0] = '\0'; + } + + (void) printf("\tBlock seqno %llu, %s%s\n", + (u_longlong_t)bp->blk_cksum.zc_word[3], + bp->blk_birth >= first_txg ? "will claim" : "won't claim", blkbuf); +} + +static void +print_log_stats(int verbose) +{ + int i, w, p10; + + if (verbose > 3) + (void) printf("\n"); + + if (zil_rec_info[0].zri_count == 0) + return; + + for (w = 1, p10 = 10; zil_rec_info[0].zri_count >= p10; p10 *= 10) + w++; + + for (i = 0; i < TX_MAX_TYPE; i++) + if (zil_rec_info[i].zri_count || verbose >= 3) + (void) printf("\t\t%s %*llu\n", + zil_rec_info[i].zri_name, w, + (u_longlong_t)zil_rec_info[i].zri_count); + (void) printf("\n"); +} + +/* ARGSUSED */ +void +dump_intent_log(zilog_t *zilog) +{ + zil_header_t *zh = zilog->zl_header; + int verbose = MAX(dump_opt['d'], dump_opt['i']); + int i; + + if (zh->zh_log.blk_birth == 0 || verbose < 2) + return; + + (void) printf("\n ZIL header: claim_txg %llu, seq %llu\n", + (u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_replay_seq); + + if (verbose >= 4) + print_log_bp(&zh->zh_log, "\n\tfirst block: "); + + for (i = 0; i < TX_MAX_TYPE; i++) + zil_rec_info[i].zri_count = 0; + + if (verbose >= 2) { + (void) printf("\n"); + zil_parse(zilog, print_log_block, print_log_record, NULL, + spa_first_txg(zilog->zl_spa)); + print_log_stats(verbose); + } +} diff --git a/usr/src/cmd/zfs/Makefile b/usr/src/cmd/zfs/Makefile new file mode 100644 index 0000000000..3a80b1c77e --- /dev/null +++ b/usr/src/cmd/zfs/Makefile @@ -0,0 +1,93 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +PROG= zfs +OBJS= zfs_main.o zfs_iter.o +SRCS= $(OBJS:%.o=%.c) +POFILES= zfs_main.po zfs_iter.po +POFILE= zfs.po + +include ../Makefile.cmd + +FSTYPE= zfs +LINKPROGS= mount umount +ROOTETCFSTYPE= $(ROOTETC)/fs/$(FSTYPE) +USRLIBFSTYPE= $(ROOTLIB)/fs/$(FSTYPE) + +LDLIBS += -lzfs -luutil -lumem + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all + +CPPFLAGS += -D_LARGEFILE64_SOURCE=1 -D_REENTRANT + +# lint complains about unused _umem_* functions +LINTFLAGS += -xerroff=E_NAME_DEF_NOT_USED2 +LINTFLAGS64 += -xerroff=E_NAME_DEF_NOT_USED2 + +ROOTUSRSBINLINKS = $(PROG:%=$(ROOTUSRSBIN)/%) +USRLIBFSTYPELINKS = $(LINKPROGS:%=$(USRLIBFSTYPE)/%) +ROOTETCFSTYPELINKS = $(LINKPROGS:%=$(ROOTETCFSTYPE)/%) + +.KEEP_STATE: + +.PARALLEL: + +all: $(PROG) + +$(PROG): $(OBJS) + $(LINK.c) -o $@ $(OBJS) $(LDLIBS) + $(POST_PROCESS) + +install: all $(ROOTSBINPROG) $(ROOTUSRSBINLINKS) $(USRLIBFSTYPELINKS) \ + $(ROOTETCFSTYPELINKS) + +$(POFILE): $(POFILES) + $(RM) $@ + cat $(POFILES) > $@ + +clean: + $(RM) $(OBJS) + +lint: lint_SRCS + +# Links from /usr/sbin to /sbin +$(ROOTUSRSBINLINKS): + -$(RM) $@; $(SYMLINK) ../../sbin/$(PROG) $@ + +# Links from /usr/lib/fs/zfs to /sbin +$(USRLIBFSTYPELINKS): + -$(RM) $@; $(SYMLINK) ../../../../sbin/$(PROG) $@ + +# Links from /etc/fs/zfs to /sbin +$(ROOTETCFSTYPELINKS): + -$(RM) $@; $(SYMLINK) ../../../sbin/$(PROG) $@ + +FRC: + +include ../Makefile.targ diff --git a/usr/src/cmd/zfs/zfs_iter.c b/usr/src/cmd/zfs/zfs_iter.c new file mode 100644 index 0000000000..8f065c03bd --- /dev/null +++ b/usr/src/cmd/zfs/zfs_iter.c @@ -0,0 +1,247 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <libintl.h> +#include <libuutil.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> + +#include <libzfs.h> + +#include "zfs_util.h" + +/* + * This is a private interface used to gather up all the datasets specified on + * the command line so that we can iterate over them in order. + * + * First, we iterate over all filesystems, gathering them together into an + * AVL tree sorted by name. For snapshots, we order them according to + * creation time. We report errors for any explicitly specified datasets + * that we couldn't open. + * + * When finished, we have an AVL tree of ZFS handles. We go through and execute + * the provided callback for each one, passing whatever data the user supplied. + */ + +typedef struct zfs_node { + zfs_handle_t *zn_handle; + uu_avl_node_t zn_avlnode; +} zfs_node_t; + +typedef struct callback_data { + uu_avl_t *cb_avl; + int cb_recurse; + zfs_type_t cb_types; +} callback_data_t; + +uu_avl_pool_t *avl_pool; + +/* + * Called for each dataset. If the object the object is of an appropriate type, + * add it to the avl tree and recurse over any children as necessary. + */ +int +zfs_callback(zfs_handle_t *zhp, void *data) +{ + callback_data_t *cb = data; + int dontclose = 0; + + /* + * If this object is of the appropriate type, add it to the AVL tree. + */ + if (zfs_get_type(zhp) & cb->cb_types) { + uu_avl_index_t idx; + zfs_node_t *node = safe_malloc(sizeof (zfs_node_t)); + + node->zn_handle = zhp; + uu_avl_node_init(node, &node->zn_avlnode, avl_pool); + if (uu_avl_find(cb->cb_avl, node, NULL, &idx) == NULL) { + uu_avl_insert(cb->cb_avl, node, idx); + dontclose = 1; + } else { + free(node); + } + } + + /* + * If 'recurse' is set, and the datasets can have datasets of the + * appropriate type, then recurse over its children. + */ + if (cb->cb_recurse && (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM || + (cb->cb_types & ZFS_TYPE_SNAPSHOT))) + (void) zfs_iter_children(zhp, zfs_callback, data); + + if (!dontclose) + zfs_close(zhp); + + return (0); +} + +/* ARGSUSED */ +static int +zfs_compare(const void *larg, const void *rarg, void *unused) +{ + zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle; + zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle; + const char *lname = zfs_get_name(l); + const char *rname = zfs_get_name(r); + char *lat, *rat; + uint64_t lcreate, rcreate; + int ret; + + lat = (char *)strchr(lname, '@'); + rat = (char *)strchr(rname, '@'); + + if (lat != NULL) + *lat = '\0'; + if (rat != NULL) + *rat = '\0'; + + ret = strcmp(lname, rname); + if (ret == 0) { + /* + * If we're comparing a dataset to one of its snapshots, we + * always make the full dataset first. + */ + if (lat == NULL) { + ret = -1; + } else if (rat == NULL) { + ret = 1; + } else { + /* + * If we have two snapshots from the same dataset, then + * we want to sort them according to creation time. We + * use the hidden CREATETXG property to get an absolute + * ordering of snapshots. + */ + lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG); + rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG); + + if (lcreate < rcreate) + ret = -1; + else if (lcreate > rcreate) + ret = 1; + } + } + + if (lat != NULL) + *lat = '@'; + if (rat != NULL) + *rat = '@'; + + return (ret); +} + +int +zfs_for_each(int argc, char **argv, int recurse, zfs_type_t types, + zfs_iter_f callback, void *data) +{ + callback_data_t cb; + int ret = 0; + zfs_node_t *node; + uu_avl_walk_t *walk; + + avl_pool = uu_avl_pool_create("zfs_pool", sizeof (zfs_node_t), + offsetof(zfs_node_t, zn_avlnode), zfs_compare, UU_DEFAULT); + + if (avl_pool == NULL) { + (void) fprintf(stderr, + gettext("internal error: out of memory\n")); + exit(1); + } + + cb.cb_recurse = recurse; + cb.cb_types = types; + if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) { + (void) fprintf(stderr, + gettext("internal error: out of memory\n")); + exit(1); + } + + if (argc == 0) { + /* + * If given no arguments, iterate over all datasets. + */ + cb.cb_recurse = 1; + ret = zfs_iter_root(zfs_callback, &cb); + } else { + int i; + zfs_handle_t *zhp; + zfs_type_t argtype; + + /* + * If we're recursive, then we always allow filesystems as + * arguments. If we also are interested in snapshots, then we + * can take volumes as well. + */ + argtype = types; + if (recurse) { + argtype |= ZFS_TYPE_FILESYSTEM; + if (types & ZFS_TYPE_SNAPSHOT) + argtype |= ZFS_TYPE_VOLUME; + } + + for (i = 0; i < argc; i++) { + if ((zhp = zfs_open(argv[i], argtype)) != NULL) + ret = zfs_callback(zhp, &cb); + else + ret = 1; + } + } + + /* + * At this point we've got our AVL tree full of zfs handles, so iterate + * over each one and execute the real user callback. + */ + for (node = uu_avl_first(cb.cb_avl); node != NULL; + node = uu_avl_next(cb.cb_avl, node)) + ret |= callback(node->zn_handle, data); + + /* + * Finally, clean up the AVL tree. + */ + if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) { + (void) fprintf(stderr, + gettext("internal error: out of memory")); + exit(1); + } + + while ((node = uu_avl_walk_next(walk)) != NULL) { + uu_avl_remove(cb.cb_avl, node); + zfs_close(node->zn_handle); + free(node); + } + + uu_avl_walk_end(walk); + uu_avl_destroy(cb.cb_avl); + uu_avl_pool_destroy(avl_pool); + + return (ret); +} diff --git a/usr/src/cmd/zfs/zfs_iter.h b/usr/src/cmd/zfs/zfs_iter.h new file mode 100644 index 0000000000..03428b827b --- /dev/null +++ b/usr/src/cmd/zfs/zfs_iter.h @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef ZFS_ITER_H +#define ZFS_ITER_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +int zfs_for_each(int, char **, int, zfs_type_t, zfs_iter_f, void *); + +#ifdef __cplusplus +} +#endif + +#endif /* ZFS_ITER_H */ diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c new file mode 100644 index 0000000000..78e3fecf99 --- /dev/null +++ b/usr/src/cmd/zfs/zfs_main.c @@ -0,0 +1,2787 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <errno.h> +#include <libgen.h> +#include <libintl.h> +#include <libuutil.h> +#include <locale.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <fcntl.h> +#include <zone.h> +#include <sys/mkdev.h> +#include <sys/mntent.h> +#include <sys/mnttab.h> +#include <sys/mount.h> +#include <sys/stat.h> + +#include <libzfs.h> + +#include "zfs_iter.h" + +static FILE *mnttab_file; + +static int zfs_do_clone(int argc, char **argv); +static int zfs_do_create(int argc, char **argv); +static int zfs_do_destroy(int argc, char **argv); +static int zfs_do_get(int argc, char **argv); +static int zfs_do_inherit(int argc, char **argv); +static int zfs_do_list(int argc, char **argv); +static int zfs_do_mount(int argc, char **argv); +static int zfs_do_rename(int argc, char **argv); +static int zfs_do_rollback(int argc, char **argv); +static int zfs_do_set(int argc, char **argv); +static int zfs_do_snapshot(int argc, char **argv); +static int zfs_do_unmount(int argc, char **argv); +static int zfs_do_share(int argc, char **argv); +static int zfs_do_unshare(int argc, char **argv); +static int zfs_do_backup(int argc, char **argv); +static int zfs_do_restore(int argc, char **argv); + +/* + * These libumem hooks provide a reasonable set of defaults for the allocator's + * debugging facilities. + */ +const char * +_umem_debug_init() +{ + return ("default,verbose"); /* $UMEM_DEBUG setting */ +} + +const char * +_umem_logging_init(void) +{ + return ("fail,contents"); /* $UMEM_LOGGING setting */ +} + +typedef struct zfs_command { + const char *name; + int (*func)(int argc, char **argv); + const char *usage; +} zfs_command_t; + +/* + * Master command table. Each ZFS command has a name, associated function, and + * usage message. These commands are organized according to how they are + * displayed in the usage message. An empty command (one with a NULL name) + * indicates an empty line in the generic usage message. A command with a NULL + * usage message indicates an alias for an existing command, and is not + * displayed in the general usage message. + */ +static zfs_command_t command_table[] = { + { "create", zfs_do_create, + "\tcreate <filesystem>\n" + "\tcreate [-s] [-b blocksize] -V <size> <volume>\n" }, + { "destroy", zfs_do_destroy, + "\tdestroy [-rRf] <filesystem|volume|snapshot>\n" }, + { NULL }, + { "snapshot", zfs_do_snapshot, + "\tsnapshot <filesystem@name|volume@name>\n" }, + { "rollback", zfs_do_rollback, + "\trollback [-rRf] <snapshot>\n" }, + { "clone", zfs_do_clone, + "\tclone <snapshot> <filesystem|volume>\n" }, + { "rename", zfs_do_rename, + "\trename <filesystems|volume|snapshot> " + "<filesystem|volume|snapshot>\n" }, + { NULL }, + { "list", zfs_do_list, + "\tlist [-rH] [-o property[,property]...] [-t type[,type]...]\n" + "\t [filesystem|volume|snapshot] ...\n" }, + { NULL }, + { "set", zfs_do_set, + "\tset <property=value> <filesystem|volume> ...\n" }, + { "get", zfs_do_get, + "\tget [-rHp] [-o field[,field]...] [-s source[,source]...]\n" + "\t <all | property[,property]...> " + "<filesystem|volume|snapshot> ...\n" }, + { "inherit", zfs_do_inherit, + "\tinherit [-r] <property> <filesystem|volume> ...\n" }, + { NULL }, + { "mount", zfs_do_mount, + "\tmount\n" + "\tmount [-o opts] [-O] -a\n" + "\tmount [-o opts] [-O] <filesystem>\n" }, + { NULL }, + { "unmount", zfs_do_unmount, + "\tunmount [-f] -a\n" + "\tunmount [-f] <filesystem|mountpoint>\n" }, + { NULL }, + { "share", zfs_do_share, + "\tshare -a\n" + "\tshare <filesystem>\n" }, + { NULL }, + { "unshare", zfs_do_unshare, + "\tunshare [-f] -a\n" + "\tunshare [-f] <filesystem|mountpoint>\n" }, + { NULL }, + { "backup", zfs_do_backup, + "\tbackup [-i <snapshot>] <snapshot>\n" }, + { "restore", zfs_do_restore, + "\trestore [-vn] <filesystem|volume|snapshot>\n" + "\trestore [-vn] -d <filesystem>\n" }, +}; + +#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) + +zfs_command_t *current_command; + +/* + * Utility function to guarantee malloc() success. + */ +void * +safe_malloc(size_t size) +{ + void *data; + + if ((data = calloc(1, size)) == NULL) { + (void) fprintf(stderr, "internal error: out of memory\n"); + exit(1); + } + + return (data); +} + +/* + * Display usage message. If we're inside a command, display only the usage for + * that command. Otherwise, iterate over the entire command table and display + * a complete usage message. + */ +static void +usage(int requested) +{ + int i; + int show_properties = FALSE; + FILE *fp = requested ? stdout : stderr; + + if (current_command == NULL) { + + (void) fprintf(fp, gettext("usage: zfs command args ...\n")); + (void) fprintf(fp, + gettext("where 'command' is one of the following:\n\n")); + + for (i = 0; i < NCOMMAND; i++) { + if (command_table[i].name == NULL) + (void) fprintf(fp, "\n"); + else + (void) fprintf(fp, "%s", + command_table[i].usage); + } + + (void) fprintf(fp, gettext("\nEach dataset is of the form: " + "pool/[dataset/]*dataset[@name]\n")); + } else { + (void) fprintf(fp, gettext("usage:\n")); + (void) fprintf(fp, current_command->usage); + } + + if (current_command == NULL || + strcmp(current_command->name, "set") == 0 || + strcmp(current_command->name, "get") == 0 || + strcmp(current_command->name, "inherit") == 0 || + strcmp(current_command->name, "list") == 0) + show_properties = TRUE; + + if (show_properties) { + + (void) fprintf(fp, + gettext("\nThe following properties are supported:\n")); + + (void) fprintf(fp, "\n\t%-13s %s %s %s\n\n", + "PROPERTY", "EDIT", "INHERIT", "VALUES"); + + for (i = 0; i < ZFS_NPROP_VISIBLE; i++) { + (void) fprintf(fp, "\t%-13s ", zfs_prop_to_name(i)); + + if (zfs_prop_readonly(i)) + (void) fprintf(fp, " NO "); + else + (void) fprintf(fp, " YES "); + + if (zfs_prop_inheritable(i)) + (void) fprintf(fp, " YES "); + else + (void) fprintf(fp, " NO "); + + if (zfs_prop_values(i) == NULL) + (void) fprintf(fp, "-\n"); + else + (void) fprintf(fp, "%s\n", zfs_prop_values(i)); + } + (void) fprintf(fp, gettext("\nSizes are specified in bytes " + "with standard units such as K, M, G, etc.\n")); + } + + exit(requested ? 0 : 2); +} + +/* + * zfs clone <fs, snap, vol> fs + * + * Given an existing dataset, create a writable copy whose initial contents + * are the same as the source. The newly created dataset maintains a + * dependency on the original; the original cannot be destroyed so long as + * the clone exists. + */ +static int +zfs_do_clone(int argc, char **argv) +{ + zfs_handle_t *zhp; + int ret; + + /* check options */ + if (argc > 1 && argv[1][0] == '-') { + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + argv[1][1]); + usage(FALSE); + } + + /* check number of arguments */ + if (argc < 2) { + (void) fprintf(stderr, gettext("missing source dataset " + "argument\n")); + usage(FALSE); + } + if (argc < 3) { + (void) fprintf(stderr, gettext("missing target dataset " + "argument\n")); + usage(FALSE); + } + if (argc > 3) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + + /* open the source dataset */ + if ((zhp = zfs_open(argv[1], ZFS_TYPE_SNAPSHOT)) == NULL) + return (1); + + /* pass to libzfs */ + ret = zfs_clone(zhp, argv[2]); + + /* create the mountpoint if necessary */ + if (ret == 0) { + zfs_handle_t *clone = zfs_open(argv[2], ZFS_TYPE_ANY); + if (clone != NULL) { + if ((ret = zfs_mount(clone, NULL, 0)) == 0) + ret = zfs_share(clone); + zfs_close(clone); + } + } + + zfs_close(zhp); + + return (ret == 0 ? 0 : 1); +} + +/* + * zfs create fs + * zfs create [-s] -V vol size + * + * Create a new dataset. This command can be used to create filesystems + * and volumes. Snapshot creation is handled by 'zfs snapshot'. + * For volumes, the user must specify a size to be used. + * + * The '-s' flag applies only to volumes, and indicates that we should not try + * to set the reservation for this volume. By default we set a reservation + * equal to the size for any volume. + */ +static int +zfs_do_create(int argc, char **argv) +{ + zfs_type_t type = ZFS_TYPE_FILESYSTEM; + zfs_handle_t *zhp; + char *size = NULL; + char *blocksize = NULL; + int c; + int noreserve = FALSE; + int ret; + + /* check options */ + while ((c = getopt(argc, argv, ":V:b:s")) != -1) { + switch (c) { + case 'V': + type = ZFS_TYPE_VOLUME; + size = optarg; + break; + case 'b': + blocksize = optarg; + break; + case 's': + noreserve = TRUE; + break; + case ':': + (void) fprintf(stderr, gettext("missing size " + "argument\n")); + usage(FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + if (noreserve && type != ZFS_TYPE_VOLUME) { + (void) fprintf(stderr, gettext("'-s' can only be used when " + "creating a volume\n")); + usage(FALSE); + } + + argc -= optind; + argv += optind; + + /* check number of arguments */ + if (argc == 0) { + (void) fprintf(stderr, gettext("missing %s argument\n"), + zfs_type_to_name(type)); + usage(FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + + /* pass to libzfs */ + if (zfs_create(argv[0], type, size, blocksize) != 0) + return (1); + + if ((zhp = zfs_open(argv[0], ZFS_TYPE_ANY)) == NULL) + return (1); + + /* + * Volume handling. By default, we try to create a reservation of equal + * size for the volume. If we can't do this, then destroy the dataset + * and report an error. + */ + if (type == ZFS_TYPE_VOLUME && !noreserve) { + if (zfs_prop_set(zhp, ZFS_PROP_RESERVATION, size) != 0) { + (void) fprintf(stderr, gettext("use '-s' to create a " + "volume without a matching reservation\n")); + (void) zfs_destroy(zhp); + return (1); + } + } + + /* + * Mount and/or share the new filesystem as appropriate. We provide a + * verbose error message to let the user know that their filesystem was + * in fact created, even if we failed to mount or share it. + */ + if (zfs_mount(zhp, NULL, 0) != 0) { + (void) fprintf(stderr, gettext("filesystem successfully " + "created, but not mounted\n")); + ret = 1; + } else if (zfs_share(zhp) != 0) { + (void) fprintf(stderr, gettext("filesystem successfully " + "created, but not shared\n")); + ret = 1; + } else { + ret = 0; + } + + zfs_close(zhp); + return (ret); +} + +/* + * zfs destroy [-rf] <fs, snap, vol> + * + * -r Recursively destroy all children + * -R Recursively destroy all dependents, including clones + * -f Force unmounting of any dependents + * + * Destroys the given dataset. By default, it will unmount any filesystems, + * and refuse to destroy a dataset that has any dependents. A dependent can + * either be a child, or a clone of a child. + */ +typedef struct destroy_cbdata { + int cb_first; + int cb_force; + int cb_recurse; + int cb_error; + int cb_needforce; + int cb_doclones; + zfs_handle_t *cb_target; +} destroy_cbdata_t; + +/* + * Check for any dependents based on the '-r' or '-R' flags. + */ +static int +destroy_check_dependent(zfs_handle_t *zhp, void *data) +{ + destroy_cbdata_t *cbp = data; + const char *tname = zfs_get_name(cbp->cb_target); + const char *name = zfs_get_name(zhp); + + if (strncmp(tname, name, strlen(tname)) == 0 && + (name[strlen(tname)] == '/' || name[strlen(tname)] == '@')) { + /* + * This is a direct descendant, not a clone somewhere else in + * the hierarchy. + */ + if (cbp->cb_recurse) + goto out; + + if (cbp->cb_first) { + (void) fprintf(stderr, gettext("cannot destroy '%s': " + "%s has children\n"), + zfs_get_name(cbp->cb_target), + zfs_type_to_name(zfs_get_type(cbp->cb_target))); + (void) fprintf(stderr, gettext("use '-r' to destroy " + "the following datasets:\n")); + cbp->cb_first = 0; + cbp->cb_error = 1; + } + + (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); + } else { + /* + * This is a clone. We only want to report this if the '-r' + * wasn't specified, or the target is a snapshot. + */ + if (!cbp->cb_recurse && + zfs_get_type(cbp->cb_target) != ZFS_TYPE_SNAPSHOT) + goto out; + + if (cbp->cb_first) { + (void) fprintf(stderr, gettext("cannot destroy '%s': " + "%s has dependent clones\n"), + zfs_get_name(cbp->cb_target), + zfs_type_to_name(zfs_get_type(cbp->cb_target))); + (void) fprintf(stderr, gettext("use '-R' to destroy " + "the following datasets:\n")); + cbp->cb_first = 0; + cbp->cb_error = 1; + } + + (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); + } + +out: + zfs_close(zhp); + return (0); +} + +static int +destroy_callback(zfs_handle_t *zhp, void *data) +{ + destroy_cbdata_t *cbp = data; + + /* + * Ignore pools (which we've already flagged as an error before getting + * here. + */ + if (strchr(zfs_get_name(zhp), '/') == NULL && + zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { + zfs_close(zhp); + return (0); + } + + /* + * Bail out on the first error. + */ + if (zfs_unmount(zhp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0 || + zfs_destroy(zhp) != 0) { + zfs_close(zhp); + return (-1); + } + + zfs_close(zhp); + return (0); +} + + +static int +zfs_do_destroy(int argc, char **argv) +{ + destroy_cbdata_t cb = { 0 }; + int c; + zfs_handle_t *zhp; + + /* check options */ + while ((c = getopt(argc, argv, "frR")) != -1) { + switch (c) { + case 'f': + cb.cb_force = 1; + break; + case 'r': + cb.cb_recurse = 1; + break; + case 'R': + cb.cb_recurse = 1; + cb.cb_doclones = 1; + break; + case '?': + default: + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check number of arguments */ + if (argc == 0) { + (void) fprintf(stderr, gettext("missing path argument\n")); + usage(FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + + /* Open the given dataset */ + if ((zhp = zfs_open(argv[0], ZFS_TYPE_ANY)) == NULL) + return (1); + + cb.cb_target = zhp; + + /* + * Perform an explicit check for pools before going any further. + */ + if (!cb.cb_recurse && strchr(zfs_get_name(zhp), '/') == NULL && + zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { + (void) fprintf(stderr, gettext("cannot destroy '%s': " + "operation does not apply to pools\n"), + zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use 'zfs destroy -r " + "%s' to destroy all datasets in the pool\n"), + zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use 'zpool destroy %s' " + "to destroy the pool itself\n"), zfs_get_name(zhp)); + zfs_close(zhp); + return (1); + } + + + /* + * Check for any dependents and/or clones. + */ + cb.cb_first = 1; + if (!cb.cb_doclones) + (void) zfs_iter_dependents(zhp, destroy_check_dependent, &cb); + + if (cb.cb_error) { + zfs_close(zhp); + return (1); + } + + /* + * Do the real thing. + */ + if (zfs_iter_dependents(zhp, destroy_callback, &cb) == 0 && + destroy_callback(zhp, &cb) == 0) + return (0); + + return (1); +} + +/* + * zfs get [-rH] [-o field[,field]...] [-s source[,source]...] + * prop[,prop...] < fs | snap | vol > ... + * + * -r recurse over any child datasets + * -H scripted mode. Headers are stripped, and fields are separated + * by tabs instead of spaces. + * -o Set of fields to display. One of "name,property,value,source". + * Default is all four. + * -s Set of sources to allow. One of + * "local,default,inherited,temporary,none". Default is all + * five. + * -p Display values in parsable (literal) format. + * + * Prints properties for the given datasets. The user can control which + * columns to display as well as which property types to allow. + */ +typedef struct get_cbdata { + int cb_scripted; + int cb_sources; + int cb_literal; + int cb_columns[4]; + zfs_prop_t cb_prop[ZFS_NPROP_ALL]; + int cb_nprop; +} get_cbdata_t; + +#define GET_COL_NAME 1 +#define GET_COL_PROPERTY 2 +#define GET_COL_VALUE 3 +#define GET_COL_SOURCE 4 + +/* + * Display a single line of output, according to the settings in the callback + * structure. + */ +static void +print_one_property(zfs_handle_t *zhp, get_cbdata_t *cbp, zfs_prop_t prop, + const char *value, zfs_source_t sourcetype, const char *source) +{ + int i; + int width; + const char *str; + char buf[128]; + + /* + * Ignore those source types that the user has chosen to ignore. + */ + if ((sourcetype & cbp->cb_sources) == 0) + return; + + for (i = 0; i < 4; i++) { + switch (cbp->cb_columns[i]) { + case GET_COL_NAME: + width = 15; + str = zfs_get_name(zhp); + break; + + case GET_COL_PROPERTY: + width = 13; + str = zfs_prop_to_name(prop); + break; + + case GET_COL_VALUE: + width = 25; + str = value; + break; + + case GET_COL_SOURCE: + width = 15; + switch (sourcetype) { + case ZFS_SRC_NONE: + str = "-"; + break; + + case ZFS_SRC_DEFAULT: + str = "default"; + break; + + case ZFS_SRC_LOCAL: + str = "local"; + break; + + case ZFS_SRC_TEMPORARY: + str = "temporary"; + break; + + case ZFS_SRC_INHERITED: + (void) snprintf(buf, sizeof (buf), + "inherited from %s", source); + str = buf; + break; + } + break; + + default: + continue; + } + + if (cbp->cb_columns[i + 1] == 0) + (void) printf("%s", str); + else if (cbp->cb_scripted) + (void) printf("%s\t", str); + else + (void) printf("%-*s ", width, str); + + } + + (void) printf("\n"); +} + +/* + * Invoked to display the properties for a single dataset. + */ +static int +get_callback(zfs_handle_t *zhp, void *data) +{ + char buf[ZFS_MAXPROPLEN]; + zfs_source_t sourcetype; + char source[ZFS_MAXNAMELEN]; + get_cbdata_t *cbp = data; + int i; + + /* + * If we've been given a list of properties, always list properties + * in the order given. Otherwise, iterate over all properties and + * determine if we should display them. + */ + if (cbp->cb_nprop != 0) { + for (i = 0; i < cbp->cb_nprop; i++) { + if (zfs_prop_get(zhp, cbp->cb_prop[i], buf, + sizeof (buf), &sourcetype, source, sizeof (source), + cbp->cb_literal) != 0) { + (void) strlcpy(buf, "-", sizeof (buf)); + sourcetype = ZFS_SRC_NONE; + } + + print_one_property(zhp, cbp, cbp->cb_prop[i], + buf, sourcetype, source); + } + } else { + for (i = 0; i < ZFS_NPROP_VISIBLE; i++) { + if (zfs_prop_get(zhp, i, buf, + sizeof (buf), &sourcetype, source, sizeof (source), + cbp->cb_literal) == 0) { + print_one_property(zhp, cbp, i, + buf, sourcetype, source); + } + } + } + + return (0); +} + +static int +zfs_do_get(int argc, char **argv) +{ + get_cbdata_t cb = { 0 }; + int recurse = 0; + int c; + char **subopts = zfs_prop_column_subopts(); + char **shortsubopts = zfs_prop_column_short_subopts(); + int prop; + char *value, *fields, *save_fields; + int i; + + /* + * Set up default columns and sources. + */ + cb.cb_sources = ZFS_SRC_ALL; + cb.cb_columns[0] = GET_COL_NAME; + cb.cb_columns[1] = GET_COL_PROPERTY; + cb.cb_columns[2] = GET_COL_VALUE; + cb.cb_columns[3] = GET_COL_SOURCE; + + /* check options */ + while ((c = getopt(argc, argv, ":o:s:rHp")) != -1) { + switch (c) { + case 'p': + cb.cb_literal = TRUE; + break; + case 'r': + recurse = TRUE; + break; + case 'H': + cb.cb_scripted = TRUE; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(FALSE); + break; + case 'o': + /* + * Process the set of columns to display. We zero out + * the structure to give us a blank slate. + */ + bzero(&cb.cb_columns, sizeof (cb.cb_columns)); + i = 0; + while (*optarg != '\0') { + static char *col_subopts[] = + { "name", "property", "value", "source", + NULL }; + + if (i == 4) { + (void) fprintf(stderr, gettext("too " + "many fields given to -o " + "option\n")); + usage(FALSE); + } + + switch (getsubopt(&optarg, col_subopts, + &value)) { + case 0: + cb.cb_columns[i++] = GET_COL_NAME; + break; + case 1: + cb.cb_columns[i++] = GET_COL_PROPERTY; + break; + case 2: + cb.cb_columns[i++] = GET_COL_VALUE; + break; + case 3: + cb.cb_columns[i++] = GET_COL_SOURCE; + break; + default: + (void) fprintf(stderr, + gettext("invalid column name " + "'%s'\n"), value); + usage(FALSE); + } + } + break; + + case 's': + cb.cb_sources = 0; + while (*optarg != '\0') { + static char *source_subopts[] = { + "local", "default", "inherited", + "temporary", "none", NULL }; + + switch (getsubopt(&optarg, source_subopts, + &value)) { + case 0: + cb.cb_sources |= ZFS_SRC_LOCAL; + break; + case 1: + cb.cb_sources |= ZFS_SRC_DEFAULT; + break; + case 2: + cb.cb_sources |= ZFS_SRC_INHERITED; + break; + case 3: + cb.cb_sources |= ZFS_SRC_TEMPORARY; + break; + case 4: + cb.cb_sources |= ZFS_SRC_NONE; + break; + default: + (void) fprintf(stderr, + gettext("invalid source " + "'%s'\n"), value); + usage(FALSE); + } + } + break; + + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("missing property " + "argument\n")); + usage(FALSE); + } + + fields = argv[0]; + + /* + * Leaving 'cb_nprop' at 0 will cause the callback to iterate over all + * known properties. + */ + if (strcmp(fields, "all") != 0) { + while (*fields != '\0') { + if (cb.cb_nprop == ZFS_NPROP_ALL) { + (void) fprintf(stderr, gettext("too many " + "properties given to -o option\n")); + usage(FALSE); + } + + save_fields = fields; + if ((prop = getsubopt(&fields, subopts, + &value)) == -1) { + fields = save_fields; + prop = getsubopt(&fields, shortsubopts, &value); + } + + if (prop == -1) { + (void) fprintf(stderr, + gettext("invalid property '%s'\n"), value); + usage(FALSE); + } + + /* + * The 'name' property is a one-off special for 'zfs + * list', but is not a valid property for 'zfs get'. + */ + if (zfs_prop_column_name(prop) == NULL || + prop == ZFS_PROP_NAME) { + (void) fprintf(stderr, gettext("invalid " + "property '%s'\n"), zfs_prop_to_name(prop)); + usage(FALSE); + } + + cb.cb_prop[cb.cb_nprop++] = prop; + } + } + + argc--; + argv++; + + /* check for at least one dataset name */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing dataset argument\n")); + usage(FALSE); + } + + /* + * Print out any headers + */ + if (!cb.cb_scripted) { + int i; + for (i = 0; i < 4; i++) { + switch (cb.cb_columns[i]) { + case GET_COL_NAME: + (void) printf("%-15s ", "NAME"); + break; + case GET_COL_PROPERTY: + (void) printf("%-13s ", "PROPERTY"); + break; + case GET_COL_VALUE: + (void) printf("%-25s ", "VALUE"); + break; + case GET_COL_SOURCE: + (void) printf("%s", "SOURCE"); + break; + } + } + (void) printf("\n"); + } + + free(subopts); + for (i = 0; i < ZFS_NPROP_ALL; i++) + if (shortsubopts[i][0]) + free(shortsubopts[i]); + free(shortsubopts); + + /* run for each object */ + return (zfs_for_each(argc, argv, recurse, ZFS_TYPE_ANY, + get_callback, &cb)); +} + +/* + * inherit [-r] <property> <fs|vol> ... + * + * -r Recurse over all children + * + * For each dataset specified on the command line, inherit the given property + * from its parent. Inheriting a property at the pool level will cause it to + * use the default value. The '-r' flag will recurse over all children, and is + * useful for setting a property on a hierarchy-wide basis, regardless of any + * local modifications for each dataset. + */ +static int +inherit_callback(zfs_handle_t *zhp, void *data) +{ + zfs_prop_t prop = (zfs_prop_t)data; + + return (zfs_prop_inherit(zhp, prop) != 0); +} + +static int +zfs_do_inherit(int argc, char **argv) +{ + int recurse = 0; + int c; + zfs_prop_t prop; + char *propname; + + /* check options */ + while ((c = getopt(argc, argv, "r")) != -1) { + switch (c) { + case 'r': + recurse = TRUE; + break; + case '?': + default: + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing property argument\n")); + usage(FALSE); + } + if (argc < 2) { + (void) fprintf(stderr, gettext("missing dataset argument\n")); + usage(FALSE); + } + + propname = argv[0]; + + /* + * Get and validate the property before iterating over the datasets. We + * do this now so as to avoid printing out an error message for each and + * every dataset. + */ + if ((prop = zfs_name_to_prop(propname)) == ZFS_PROP_INVAL) { + (void) fprintf(stderr, gettext("invalid property '%s'\n"), + propname); + usage(FALSE); + } + if (zfs_prop_readonly(prop)) { + (void) fprintf(stderr, gettext("%s property is read-only\n"), + propname); + return (1); + } + if (!zfs_prop_inheritable(prop)) { + (void) fprintf(stderr, gettext("%s property cannot be " + "inherited\n"), propname); + (void) fprintf(stderr, gettext("use 'zfs set %s=none' to " + "clear\n"), propname); + return (1); + } + + return (zfs_for_each(argc - 1, argv + 1, recurse, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + inherit_callback, (void *)prop)); +} + +/* + * list [-rH] [-a | -s] [-o prop[,prop]*] [fs | vol] ... + * + * -r Recurse over all children + * -H Scripted mode; elide headers and separate colums by tabs + * -a Display all datasets + * -s Display only snapshots + * -o Control which fields to display. + * + * When given no arguments, lists all filesystems in the system. + * Otherwise, list the specified datasets, optionally recursing down them if + * '-r' is specified. + * + * If '-a' is given, then all datasets (including snapshots) are displayed. If + * '-s' is given, then only snapshots are displayed. Use of these options + * change the default set of fields output, which can still be overridden with + * '-o'. + */ +typedef struct list_cbdata { + int cb_first; + int cb_scripted; + int cb_fields[ZFS_NPROP_ALL]; + int cb_fieldcount; +} list_cbdata_t; + +/* + * Given a list of columns to display, output appropriate headers for each one. + */ +static void +print_header(int *fields, size_t count) +{ + int i; + + for (i = 0; i < count; i++) { + if (i != 0) + (void) printf(" "); + if (i == count - 1) + (void) printf("%s", zfs_prop_column_name(fields[i])); + else /* LINTED - format specifier */ + (void) printf(zfs_prop_column_format(fields[i]), + zfs_prop_column_name(fields[i])); + } + + (void) printf("\n"); +} + +/* + * Given a dataset and a list of fields, print out all the properties according + * to the described layout. + */ +static void +print_dataset(zfs_handle_t *zhp, int *fields, size_t count, int scripted) +{ + int i; + char property[ZFS_MAXPROPLEN]; + + for (i = 0; i < count; i++) { + if (i != 0) { + if (scripted) + (void) printf("\t"); + else + (void) printf(" "); + } + + if (zfs_prop_get(zhp, fields[i], property, + sizeof (property), NULL, NULL, 0, FALSE) != 0) + (void) strlcpy(property, "-", sizeof (property)); + + if (scripted || i == count - 1) + (void) printf("%s", property); + else /* LINTED - format specifier */ + (void) printf(zfs_prop_column_format(fields[i]), + property); + } + + (void) printf("\n"); +} + +/* + * Generic callback function to list a dataset or snapshot. + */ +static int +list_callback(zfs_handle_t *zhp, void *data) +{ + list_cbdata_t *cbp = data; + + if (cbp->cb_first) { + if (!cbp->cb_scripted) + print_header(cbp->cb_fields, cbp->cb_fieldcount); + cbp->cb_first = FALSE; + } + + print_dataset(zhp, cbp->cb_fields, cbp->cb_fieldcount, + cbp->cb_scripted); + + return (0); +} + +static int +zfs_do_list(int argc, char **argv) +{ + int c; + int recurse = 0; + int scripted = FALSE; + static char default_fields[] = + "name,used,available,referenced,mountpoint"; + int types = ZFS_TYPE_ANY; + char *fields = NULL; + char *basic_fields = default_fields; + list_cbdata_t cb = { 0 }; + char *value; + int ret; + char **subopts = zfs_prop_column_subopts(); + char **shortsubopts = zfs_prop_column_short_subopts(); + int prop; + char *type_subopts[] = { "filesystem", "volume", "snapshot", NULL }; + char *save_fields; + int i; + + /* check options */ + while ((c = getopt(argc, argv, ":o:rt:H")) != -1) { + switch (c) { + case 'o': + fields = optarg; + break; + case 'r': + recurse = TRUE; + break; + case 'H': + scripted = TRUE; + break; + case 't': + types = 0; + while (*optarg != '\0') { + switch (getsubopt(&optarg, type_subopts, + &value)) { + case 0: + types |= ZFS_TYPE_FILESYSTEM; + break; + case 1: + types |= ZFS_TYPE_VOLUME; + break; + case 2: + types |= ZFS_TYPE_SNAPSHOT; + break; + default: + (void) fprintf(stderr, + gettext("invalid type '%s'\n"), + value); + usage(FALSE); + } + } + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + if (fields == NULL) + fields = basic_fields; + + while (*fields != '\0') { + if (cb.cb_fieldcount == ZFS_NPROP_ALL) { + (void) fprintf(stderr, gettext("too many " + "properties given to -o option\n")); + usage(FALSE); + } + + save_fields = fields; + if ((prop = getsubopt(&fields, subopts, &value)) == -1) { + fields = save_fields; + prop = getsubopt(&fields, shortsubopts, &value); + } + + if (prop == -1) { + (void) fprintf(stderr, gettext("invalid property " + "'%s'\n"), value); + usage(FALSE); + } + + if (zfs_prop_column_name(prop) == NULL) { + (void) fprintf(stderr, gettext("invalid property " + "'%s'\n"), zfs_prop_to_name(prop)); + usage(FALSE); + } + + cb.cb_fields[cb.cb_fieldcount++] = prop; + } + + cb.cb_scripted = scripted; + cb.cb_first = TRUE; + + ret = zfs_for_each(argc, argv, recurse, types, list_callback, &cb); + + if (ret == 0 && cb.cb_first == TRUE) + (void) printf(gettext("no datasets available\n")); + + free(subopts); + for (i = 0; i < ZFS_NPROP_ALL; i++) + if (shortsubopts[i][0]) + free(shortsubopts[i]); + free(shortsubopts); + + return (ret); +} + +/* + * zfs rename <fs | snap | vol> <fs | snap | vol> + * + * Renames the given dataset to another of the same type. + */ +/* ARGSUSED */ +static int +zfs_do_rename(int argc, char **argv) +{ + zfs_handle_t *zhp; + int ret = 1; + + /* check options */ + if (argc > 1 && argv[1][0] == '-') { + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + argv[1][1]); + usage(FALSE); + } + + /* check number of arguments */ + if (argc < 2) { + (void) fprintf(stderr, gettext("missing source dataset " + "argument\n")); + usage(FALSE); + } + if (argc < 3) { + (void) fprintf(stderr, gettext("missing target dataset " + "argument\n")); + usage(FALSE); + } + if (argc > 3) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + + if ((zhp = zfs_open(argv[1], ZFS_TYPE_ANY)) == NULL) + return (1); + + if (zfs_rename(zhp, argv[2]) != 0) + goto error; + + ret = 0; +error: + zfs_close(zhp); + return (ret); +} + +/* + * zfs rollback [-rfR] <snapshot> + * + * -r Delete any intervening snapshots before doing rollback + * -R Delete any snapshots and their clones + * -f Force unmount filesystems, even if they are in use. + * + * Given a filesystem, rollback to a specific snapshot, discarding any changes + * since then and making it the active dataset. If more recent snapshots exist, + * the command will complain unless the '-r' flag is given. + */ +typedef struct rollback_cbdata { + uint64_t cb_create; + int cb_first; + int cb_force; + int cb_doclones; + char *cb_target; + int cb_error; + int cb_recurse; + int cb_dependent; +} rollback_cbdata_t; + +/* + * Report any snapshots more recent than the one specified. Used when '-r' is + * not specified. We reuse this same callback for the snapshot dependents - if + * 'cb_dependent' is set, then this is a dependent and we should report it + * without checking the transaction group. + */ +static int +rollback_check(zfs_handle_t *zhp, void *data) +{ + rollback_cbdata_t *cbp = data; + + if (cbp->cb_doclones) + return (0); + + if (!cbp->cb_dependent) { + if (strcmp(zfs_get_name(zhp), cbp->cb_target) != 0 && + zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > + cbp->cb_create) { + + if (cbp->cb_first && !cbp->cb_recurse) { + (void) fprintf(stderr, gettext("cannot " + "rollback to '%s': more recent snapshots " + "exist\n"), + cbp->cb_target); + (void) fprintf(stderr, gettext("use '-r' to " + "force deletion of the following " + "snapshots:\n")); + cbp->cb_first = 0; + cbp->cb_error = 1; + } + + if (cbp->cb_recurse) { + cbp->cb_dependent = TRUE; + (void) zfs_iter_dependents(zhp, rollback_check, + cbp); + cbp->cb_dependent = FALSE; + } else { + (void) fprintf(stderr, "%s\n", + zfs_get_name(zhp)); + } + } + } else { + if (cbp->cb_first && cbp->cb_recurse) { + (void) fprintf(stderr, gettext("cannot rollback to " + "'%s': clones of previous snapshots exist\n"), + cbp->cb_target); + (void) fprintf(stderr, gettext("use '-R' to " + "force deletion of the following clones and " + "dependents:\n")); + cbp->cb_first = 0; + cbp->cb_error = 1; + } + + (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); + } + + zfs_close(zhp); + return (0); +} + +/* + * Unmount any filesystems or snapshots that will need to be destroyed as part + * of the rollback process. + */ +static int +rollback_unmount(zfs_handle_t *zhp, void *data) +{ + rollback_cbdata_t *cbp = data; + + if (!cbp->cb_dependent) { + if (strcmp(zfs_get_name(zhp), cbp->cb_target) != 0 && + zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > + cbp->cb_create) { + + cbp->cb_dependent = TRUE; + (void) zfs_iter_dependents(zhp, rollback_unmount, cbp); + cbp->cb_dependent = FALSE; + + if (zfs_unmount(zhp, NULL, + cbp->cb_force ? MS_FORCE: 0) != 0) + cbp->cb_error = 1; + } + } else if (zfs_unmount(zhp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0) { + cbp->cb_error = 1; + } + + zfs_close(zhp); + return (0); +} + +/* + * Destroy any more recent snapshots. We invoke this callback on any dependents + * of the snapshot first. If the 'cb_dependent' member is non-zero, then this + * is a dependent and we should just destroy it without checking the transaction + * group. + */ +static int +rollback_destroy(zfs_handle_t *zhp, void *data) +{ + rollback_cbdata_t *cbp = data; + + if (!cbp->cb_dependent) { + if (strcmp(zfs_get_name(zhp), cbp->cb_target) != 0 && + zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > + cbp->cb_create) { + + cbp->cb_dependent = TRUE; + (void) zfs_iter_dependents(zhp, rollback_destroy, cbp); + cbp->cb_dependent = FALSE; + + if (zfs_destroy(zhp) != 0) + cbp->cb_error = 1; + } + } else if (zfs_destroy(zhp) != 0) { + cbp->cb_error = 1; + } + + zfs_close(zhp); + return (0); +} + +static int +zfs_do_rollback(int argc, char **argv) +{ + int ret; + int c; + rollback_cbdata_t cb = { 0 }; + int was_mounted; + zfs_handle_t *zhp, *snap; + char parentname[ZFS_MAXNAMELEN]; + char *delim; + + /* check options */ + while ((c = getopt(argc, argv, "rfR")) != -1) { + switch (c) { + case 'f': + cb.cb_force = TRUE; + break; + case 'r': + cb.cb_recurse = 1; + break; + case 'R': + cb.cb_recurse = 1; + cb.cb_doclones = 1; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing dataset argument\n")); + usage(FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + + cb.cb_target = argv[0]; + + /* open the snapshot */ + if ((snap = zfs_open(cb.cb_target, ZFS_TYPE_SNAPSHOT)) == NULL) + return (1); + + (void) strlcpy(parentname, cb.cb_target, sizeof (parentname)); + verify((delim = strrchr(parentname, '@')) != NULL); + *delim = '\0'; + if ((zhp = zfs_open(parentname, ZFS_TYPE_ANY)) == NULL) { + zfs_close(snap); + return (1); + } + + /* See if this dataset is mounted */ + was_mounted = zfs_is_mounted(zhp, NULL); + + cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG); + + /* + * Check for more recent snapshots and/or clones based on the presence + * of '-r' and '-R'. + */ + cb.cb_first = 1; + cb.cb_error = 0; + (void) zfs_iter_children(zhp, rollback_check, &cb); + + if ((ret = cb.cb_error) != 0) + goto out; + + cb.cb_error = 0; + + /* + * Unmount any snapshots as well as the dataset itself. + */ + if ((ret = zfs_iter_children(zhp, rollback_unmount, + &cb)) != 0 || (ret = zfs_unmount(zhp, NULL, + cb.cb_force ? MS_FORCE : 0)) != 0) + goto out; + + (void) zfs_iter_children(zhp, rollback_destroy, &cb); + + if ((ret = cb.cb_error) != 0) + goto out; + + /* + * Now that we have verified that the snapshot is the latest, rollback + * to the given snapshot. + */ + ret = zfs_rollback(zhp); + + /* + * We only want to re-mount the filesystem if it was mounted in the + * first place. + */ + if (was_mounted) + (void) zfs_mount(zhp, NULL, 0); + +out: + zfs_close(snap); + zfs_close(zhp); + + if (ret == 0) + return (0); + else + return (1); +} + +/* + * zfs set property=value { fs | snap | vol } ... + * + * Sets the given property for all datasets specified on the command line. + */ +typedef struct set_cbdata { + char *cb_propname; + char *cb_value; + zfs_prop_t cb_prop; +} set_cbdata_t; + +static int +set_callback(zfs_handle_t *zhp, void *data) +{ + set_cbdata_t *cbp = data; + int ret = 1; + + /* don't allow setting of properties for snapshots */ + if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) { + (void) fprintf(stderr, gettext("cannot set %s property for " + "'%s': snapshot properties cannot be modified\n"), + cbp->cb_propname, zfs_get_name(zhp)); + return (1); + } + + /* + * If we're changing the volsize, and the volsize and reservation are + * the same, then change the reservation as well. + */ + if (cbp->cb_prop == ZFS_PROP_VOLSIZE && + zfs_get_type(zhp) == ZFS_TYPE_VOLUME && + zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE) == + zfs_prop_get_int(zhp, ZFS_PROP_RESERVATION)) { + uint64_t volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); + uint64_t avail = zfs_prop_get_int(zhp, ZFS_PROP_AVAILABLE); + uint64_t value; + + verify(zfs_nicestrtonum(cbp->cb_value, &value) == 0); + + /* + * Warn about raising the volume size greater than the amount of + * available space. + */ + if (value > volsize && (value - volsize) > avail) { + (void) fprintf(stderr, gettext("cannot set " + "%s property for '%s': volume size exceeds " + "amount of available space\n"), + cbp->cb_propname, zfs_get_name(zhp)); + return (1); + } + + if (zfs_prop_set(zhp, ZFS_PROP_RESERVATION, + cbp->cb_value) != 0) { + (void) fprintf(stderr, gettext("volsize and " + "reservation must remain equal\n")); + return (1); + } + } + + /* + * Do not allow the reservation to be set above the volume size. We do + * this here instead of inside libzfs because libzfs violates this rule + * internally. + */ + if (cbp->cb_prop == ZFS_PROP_RESERVATION && + zfs_get_type(zhp) == ZFS_TYPE_VOLUME) { + uint64_t value; + uint64_t volsize; + + volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); + if (strcmp(cbp->cb_value, "none") == 0) + value = 0; + else + verify(zfs_nicestrtonum(cbp->cb_value, &value) == 0); + + if (value > volsize) { + (void) fprintf(stderr, gettext("cannot set %s " + "for '%s': size is greater than current " + "volume size\n"), cbp->cb_propname, + zfs_get_name(zhp)); + return (-1); + } + } + + if (zfs_prop_set(zhp, cbp->cb_prop, cbp->cb_value) != 0) + return (1); + + ret = 0; +error: + return (ret); +} + +static int +zfs_do_set(int argc, char **argv) +{ + set_cbdata_t cb; + + /* check for options */ + if (argc > 1 && argv[1][0] == '-') { + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + argv[1][1]); + usage(FALSE); + } + + /* check number of arguments */ + if (argc < 2) { + (void) fprintf(stderr, gettext("missing property=value " + "argument\n")); + usage(FALSE); + } + if (argc < 3) { + (void) fprintf(stderr, gettext("missing dataset name\n")); + usage(FALSE); + } + + /* validate property=value argument */ + cb.cb_propname = argv[1]; + if ((cb.cb_value = strchr(cb.cb_propname, '=')) == NULL) { + (void) fprintf(stderr, gettext("missing value in " + "property=value argument\n")); + usage(FALSE); + } + + *cb.cb_value = '\0'; + cb.cb_value++; + + if (*cb.cb_propname == '\0') { + (void) fprintf(stderr, + gettext("missing property in property=value argument\n")); + usage(FALSE); + } + if (*cb.cb_value == '\0') { + (void) fprintf(stderr, + gettext("missing value in property=value argument\n")); + usage(FALSE); + } + + /* get the property type */ + if ((cb.cb_prop = zfs_name_to_prop(cb.cb_propname)) == + ZFS_PROP_INVAL) { + (void) fprintf(stderr, + gettext("invalid property '%s'\n"), cb.cb_propname); + usage(FALSE); + } + + /* + * Validate that the value is appropriate for this property. We do this + * once now so we don't generate multiple errors each time we try to + * apply it to a dataset. + */ + if (zfs_prop_validate(cb.cb_prop, cb.cb_value, NULL) != 0) + return (1); + + return (zfs_for_each(argc - 2, argv + 2, FALSE, + ZFS_TYPE_ANY, set_callback, &cb)); +} + +/* + * zfs snapshot <fs@snap> + * + * Creates a snapshot with the given name. While functionally equivalent to + * 'zfs create', it is a separate command to diffferentiate intent. + */ +static int +zfs_do_snapshot(int argc, char **argv) +{ + /* check options */ + if (argc > 1 && argv[1][0] == '-') { + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + argv[1][1]); + usage(FALSE); + } + + /* check number of arguments */ + if (argc < 2) { + (void) fprintf(stderr, gettext("missing snapshot argument\n")); + usage(FALSE); + } + if (argc > 2) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + + return (zfs_snapshot(argv[1]) != 0); +} + +/* + * zfs backup [-i <fs@snap>] <fs@snap> + * + * Send a backup stream to stdout. + */ +static int +zfs_do_backup(int argc, char **argv) +{ + char *fromname = NULL; + zfs_handle_t *zhp_from = NULL, *zhp_to; + int c, err; + + /* check options */ + while ((c = getopt(argc, argv, ":i:")) != -1) { + switch (c) { + case 'i': + fromname = optarg; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing snapshot argument\n")); + usage(FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + + if (isatty(STDOUT_FILENO)) { + (void) fprintf(stderr, + gettext("Error: Backup stream can not be written " + "to a terminal.\n" + "You must redirect standard output.\n")); + return (1); + } + + if (fromname) { + if ((zhp_from = zfs_open(fromname, ZFS_TYPE_SNAPSHOT)) == NULL) + return (1); + } + if ((zhp_to = zfs_open(argv[0], ZFS_TYPE_SNAPSHOT)) == NULL) + return (1); + + err = zfs_backup(zhp_to, zhp_from); + + if (zhp_from) + zfs_close(zhp_from); + zfs_close(zhp_to); + + return (err != 0); +} + +/* + * zfs restore <fs@snap> + * + * Restore a backup stream from stdin. + */ +static int +zfs_do_restore(int argc, char **argv) +{ + int c, err; + int isprefix = FALSE; + int dryrun = FALSE; + int verbose = FALSE; + + /* check options */ + while ((c = getopt(argc, argv, ":dnv")) != -1) { + switch (c) { + case 'd': + isprefix = TRUE; + break; + case 'n': + dryrun = TRUE; + break; + case 'v': + verbose = TRUE; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing snapshot argument\n")); + usage(FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + + if (isatty(STDIN_FILENO)) { + (void) fprintf(stderr, + gettext("Error: Backup stream can not be read " + "from a terminal.\n" + "You must redirect standard input.\n")); + return (1); + } + + err = zfs_restore(argv[0], isprefix, verbose, dryrun); + return (err != 0); +} + + +/* + * Generic callback for sharing or mounting filesystems. Because the code is so + * similar, we have a common function with an extra parameter to determine which + * mode we are using. + */ +#define OP_SHARE 0x1 +#define OP_MOUNT 0x2 + +typedef struct share_mount_cbdata { + int cb_type; + int cb_explicit; + int cb_flags; + const char *cb_options; +} share_mount_cbdata_t; + +/* + * Share or mount the filesystem. + */ +static int +share_mount_callback(zfs_handle_t *zhp, void *data) +{ + char mountpoint[ZFS_MAXPROPLEN]; + char shareopts[ZFS_MAXPROPLEN]; + share_mount_cbdata_t *cbp = data; + const char *cmdname = cbp->cb_type == OP_SHARE ? "share" : "mount"; + struct mnttab mnt; + uint64_t zoned; + + if (cbp->cb_options == NULL) + mnt.mnt_mntopts = ""; + else + mnt.mnt_mntopts = (char *)cbp->cb_options; + + /* + * Check to make sure we can mount/share this dataset. If we are in the + * global zone and the filesystem is exported to a local zone, or if we + * are in a local zone and the filesystem is not exported, then it is an + * error. + */ + zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); + + if (zoned && getzoneid() == GLOBAL_ZONEID) { + if (!cbp->cb_explicit) + return (0); + + (void) fprintf(stderr, gettext("cannot %s '%s': dataset is " + "exported to a local zone\n"), cmdname, zfs_get_name(zhp)); + return (1); + + } else if (!zoned && getzoneid() != GLOBAL_ZONEID) { + if (!cbp->cb_explicit) + return (0); + + (void) fprintf(stderr, gettext("cannot %s '%s': permission " + "denied\n"), cmdname, zfs_get_name(zhp)); + return (1); + } + + /* + * Inore any filesystems which don't apply to us. This includes those + * with a legacy mountpoint, or those with legacy share options. + */ + verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint, + sizeof (mountpoint), NULL, NULL, 0, FALSE) == 0); + verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts, + sizeof (shareopts), NULL, NULL, 0, FALSE) == 0); + + if (cbp->cb_type == OP_SHARE) { + if (strcmp(shareopts, "off") == 0) { + if (!cbp->cb_explicit) + return (0); + + (void) fprintf(stderr, gettext("cannot share '%s': " + "legacy share\n"), zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use share(1M) to " + "share this filesystem\n")); + return (1); + } + } + + /* + * We cannot share or mount legacy filesystems. If the shareopts is + * non-legacy but the mountpoint is legacy, we treat it as a legacy + * share. + */ + if (strcmp(mountpoint, "legacy") == 0) { + if (!cbp->cb_explicit) + return (0); + + (void) fprintf(stderr, gettext("cannot %s '%s': " + "legacy mountpoint\n"), cmdname, zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use %s to " + "%s this filesystem\n"), cbp->cb_type == OP_SHARE ? + "share(1M)" : "mount(1M)", cmdname); + return (1); + } + + if (strcmp(mountpoint, "none") == 0) { + if (!cbp->cb_explicit) + return (0); + + (void) fprintf(stderr, gettext("cannot %s '%s': no " + "mountpoint set\n"), cmdname, zfs_get_name(zhp)); + return (1); + } + + /* + * At this point, we have verified that the mountpoint and/or shareopts + * are appropriate for auto management. Determine if the filesystem is + * currently mounted or shared, and abort if this is an explicit + * request. + */ + switch (cbp->cb_type) { + case OP_SHARE: + if (zfs_is_shared(zhp, NULL)) { + if (cbp->cb_explicit) { + (void) fprintf(stderr, gettext("cannot share " + "'%s': filesystem already shared\n"), + zfs_get_name(zhp)); + return (1); + } else { + return (0); + } + } + break; + + case OP_MOUNT: + if (!hasmntopt(&mnt, MNTOPT_REMOUNT) && + zfs_is_mounted(zhp, NULL)) { + if (cbp->cb_explicit) { + (void) fprintf(stderr, gettext("cannot mount " + "'%s': filesystem already mounted\n"), + zfs_get_name(zhp)); + return (1); + } else { + return (0); + } + } + break; + } + + /* + * Mount and optionally share the filesystem. + */ + switch (cbp->cb_type) { + case OP_SHARE: + { + if (!zfs_is_mounted(zhp, NULL) && + zfs_mount(zhp, NULL, 0) != 0) + return (1); + + if (zfs_share(zhp) != 0) + return (1); + } + break; + + case OP_MOUNT: + if (zfs_mount(zhp, cbp->cb_options, cbp->cb_flags) != 0) + return (1); + break; + } + + return (0); +} + +static int +share_or_mount(int type, int argc, char **argv) +{ + int do_all = 0; + int c, ret; + share_mount_cbdata_t cb = { 0 }; + + cb.cb_type = type; + + /* check options */ + while ((c = getopt(argc, argv, type == OP_MOUNT ? ":ao:O" : "a")) + != -1) { + switch (c) { + case 'a': + do_all = 1; + break; + case 'o': + cb.cb_options = optarg; + break; + case 'O': + cb.cb_flags |= MS_OVERLAY; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check number of arguments */ + if (do_all) { + if (argc != 0) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + + ret = zfs_for_each(argc, argv, TRUE, + ZFS_TYPE_FILESYSTEM, share_mount_callback, &cb); + } else if (argc == 0) { + struct mnttab entry; + + if (type == OP_SHARE) { + (void) fprintf(stderr, gettext("missing filesystem " + "argument\n")); + usage(FALSE); + } + + /* + * When mount is given no arguments, go through /etc/mnttab and + * display any active ZFS mounts. We hide any snapshots, since + * they are controlled automatically. + */ + rewind(mnttab_file); + while (getmntent(mnttab_file, &entry) == 0) { + if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0 || + strchr(entry.mnt_special, '@') != NULL) + continue; + + (void) printf("%-30s %s\n", entry.mnt_special, + entry.mnt_mountp); + } + + ret = 0; + } else { + zfs_handle_t *zhp; + + if (argc > 1) { + (void) fprintf(stderr, + gettext("too many arguments\n")); + usage(FALSE); + } + + if ((zhp = zfs_open(argv[0], ZFS_TYPE_FILESYSTEM)) == NULL) + ret = 1; + else { + cb.cb_explicit = TRUE; + ret = share_mount_callback(zhp, &cb); + zfs_close(zhp); + } + } + + return (ret); +} + +/* + * zfs mount -a + * zfs mount filesystem + * + * Mount all filesystems, or mount the given filesystem. + */ +static int +zfs_do_mount(int argc, char **argv) +{ + return (share_or_mount(OP_MOUNT, argc, argv)); +} + +/* + * zfs share -a + * zfs share filesystem + * + * Share all filesystems, or share the given filesystem. + */ +static int +zfs_do_share(int argc, char **argv) +{ + return (share_or_mount(OP_SHARE, argc, argv)); +} + +typedef struct unshare_unmount_node { + zfs_handle_t *un_zhp; + char *un_mountp; + uu_avl_node_t un_avlnode; +} unshare_unmount_node_t; + +/* ARGSUSED */ +static int +unshare_unmount_compare(const void *larg, const void *rarg, void *unused) +{ + const unshare_unmount_node_t *l = larg; + const unshare_unmount_node_t *r = rarg; + + return (strcmp(l->un_mountp, r->un_mountp)); +} + +/* + * Convenience routine used by zfs_do_umount() and manual_unmount(). Given an + * absolute path, find the entry /etc/mnttab, verify that its a ZFS filesystem, + * and unmount it appropriately. + */ +static int +unshare_unmount_path(int type, char *path, int flags, int is_manual) +{ + zfs_handle_t *zhp; + int ret; + struct stat64 statbuf; + struct extmnttab entry; + const char *cmdname = (type == OP_SHARE) ? "unshare" : "unmount"; + char property[ZFS_MAXPROPLEN]; + + /* + * Search for the path in /etc/mnttab. Rather than looking for the + * specific path, which can be fooled by non-standard paths (i.e. ".." + * or "//"), we stat() the path and search for the corresponding + * (major,minor) device pair. + */ + if (stat64(path, &statbuf) != 0) { + (void) fprintf(stderr, gettext("cannot %s '%s': %s\n"), + cmdname, path, strerror(errno)); + return (1); + } + + /* + * Search for the given (major,minor) pair in the mount table. + */ + rewind(mnttab_file); + while ((ret = getextmntent(mnttab_file, &entry, 0)) == 0) { + if (entry.mnt_major == major(statbuf.st_dev) && + entry.mnt_minor == minor(statbuf.st_dev)) + break; + } + if (ret != 0) { + (void) fprintf(stderr, gettext("cannot %s '%s': not " + "currently mounted\n"), cmdname, path); + return (1); + } + + if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) { + (void) fprintf(stderr, gettext("cannot %s '%s': not a ZFS " + "filesystem\n"), cmdname, path); + return (1); + } + + if ((zhp = zfs_open(entry.mnt_special, ZFS_TYPE_FILESYSTEM)) == NULL) + return (1); + + verify(zfs_prop_get(zhp, type == OP_SHARE ? + ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property, + sizeof (property), NULL, NULL, 0, FALSE) == 0); + + if (type == OP_SHARE) { + if (strcmp(property, "off") == 0) { + (void) fprintf(stderr, gettext("cannot unshare " + "'%s': legacy share\n"), path); + (void) fprintf(stderr, gettext("use " + "unshare(1M) to unshare this filesystem\n")); + ret = 1; + } else if (!zfs_is_shared(zhp, NULL)) { + (void) fprintf(stderr, gettext("cannot unshare '%s': " + "not currently shared\n"), path); + ret = 1; + } else { + ret = zfs_unshareall(zhp); + } + } else { + if (strcmp(property, "legacy") == 0 && !is_manual) { + (void) fprintf(stderr, gettext("cannot unmount " + "'%s': legacy mountpoint\n"), + zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use umount(1M) " + "to unmount this filesystem\n")); + ret = 1; + } else { + ret = zfs_unmountall(zhp, flags); + } + } + + zfs_close(zhp); + + return (ret != 0); +} + +/* + * Generic callback for unsharing or unmounting a filesystem. + */ +static int +unshare_unmount(int type, int argc, char **argv) +{ + int do_all = 0; + int flags = 0; + int ret = 0; + int c; + zfs_handle_t *zhp; + char property[ZFS_MAXPROPLEN]; + + /* check options */ + while ((c = getopt(argc, argv, type == OP_SHARE ? "a" : "af")) != -1) { + switch (c) { + case 'a': + do_all = 1; + break; + case 'f': + flags = MS_FORCE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* ensure correct number of arguments */ + if (do_all) { + if (argc != 0) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + } else if (argc != 1) { + if (argc == 0) + (void) fprintf(stderr, + gettext("missing filesystem argument\n")); + else + (void) fprintf(stderr, + gettext("too many arguments\n")); + usage(FALSE); + } + + if (do_all) { + /* + * We could make use of zfs_for_each() to walk all datasets in + * the system, but this would be very inefficient, especially + * since we would have to linearly search /etc/mnttab for each + * one. Instead, do one pass through /etc/mnttab looking for + * zfs entries and call zfs_unmount() for each one. + * + * Things get a little tricky if the administrator has created + * mountpoints beneath other ZFS filesystems. In this case, we + * have to unmount the deepest filesystems first. To accomplish + * this, we place all the mountpoints in an AVL tree sorted by + * the special type (dataset name), and walk the result in + * reverse to make sure to get any snapshots first. + */ + struct mnttab entry; + uu_avl_pool_t *pool; + uu_avl_t *tree; + unshare_unmount_node_t *node; + uu_avl_index_t idx; + uu_avl_walk_t *walk; + + if ((pool = uu_avl_pool_create("unmount_pool", + sizeof (unshare_unmount_node_t), + offsetof(unshare_unmount_node_t, un_avlnode), + unshare_unmount_compare, + UU_DEFAULT)) == NULL) { + (void) fprintf(stderr, gettext("internal error: " + "out of memory\n")); + exit(1); + } + + if ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL) { + (void) fprintf(stderr, gettext("internal error: " + "out of memory\n")); + exit(1); + } + + rewind(mnttab_file); + while (getmntent(mnttab_file, &entry) == 0) { + + /* ignore non-ZFS entries */ + if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) + continue; + + /* ignore snapshots */ + if (strchr(entry.mnt_special, '@') != NULL) + continue; + + if ((zhp = zfs_open(entry.mnt_special, + ZFS_TYPE_FILESYSTEM)) == NULL) { + ret = 1; + continue; + } + + verify(zfs_prop_get(zhp, type == OP_SHARE ? + ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, + property, sizeof (property), NULL, NULL, + 0, FALSE) == 0); + + /* Ignore legacy mounts and shares */ + if ((type == OP_SHARE && + strcmp(property, "off") == 0) || + (type == OP_MOUNT && + strcmp(property, "legacy") == 0)) { + zfs_close(zhp); + continue; + } + + node = safe_malloc(sizeof (unshare_unmount_node_t)); + node->un_zhp = zhp; + + if ((node->un_mountp = strdup(entry.mnt_mountp)) == + NULL) { + (void) fprintf(stderr, gettext("internal error:" + " out of memory\n")); + exit(1); + } + + uu_avl_node_init(node, &node->un_avlnode, pool); + + if (uu_avl_find(tree, node, NULL, &idx) == NULL) { + uu_avl_insert(tree, node, idx); + } else { + zfs_close(node->un_zhp); + free(node->un_mountp); + free(node); + } + } + + /* + * Walk the AVL tree in reverse, unmounting each filesystem and + * removing it from the AVL tree in the process. + */ + if ((walk = uu_avl_walk_start(tree, + UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL) { + (void) fprintf(stderr, + gettext("internal error: out of memory")); + exit(1); + } + + while ((node = uu_avl_walk_next(walk)) != NULL) { + uu_avl_remove(tree, node); + + switch (type) { + case OP_SHARE: + if (zfs_unshare(node->un_zhp, + node->un_mountp) != 0) + ret = 1; + break; + + case OP_MOUNT: + if (zfs_unmount(node->un_zhp, + node->un_mountp, flags) != 0) + ret = 1; + break; + } + + zfs_close(node->un_zhp); + free(node->un_mountp); + free(node); + } + + uu_avl_walk_end(walk); + uu_avl_destroy(tree); + uu_avl_pool_destroy(pool); + } else { + /* + * We have an argument, but it may be a full path or a ZFS + * filesystem. Pass full paths off to unmount_path() (shared by + * manual_unmount), otherwise open the filesystem and pass to + * zfs_unmount(). + */ + if (argv[0][0] == '/') + return (unshare_unmount_path(type, argv[0], + flags, FALSE)); + + if ((zhp = zfs_open(argv[0], ZFS_TYPE_FILESYSTEM)) == NULL) + return (1); + + verify(zfs_prop_get(zhp, type == OP_SHARE ? + ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property, + sizeof (property), NULL, NULL, 0, FALSE) == 0); + + switch (type) { + case OP_SHARE: + if (strcmp(property, "off") == 0) { + (void) fprintf(stderr, gettext("cannot unshare " + "'%s': legacy share\n"), zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use unshare(1M)" + " to unshare this filesystem\n")); + ret = 1; + } else if (!zfs_is_shared(zhp, NULL)) { + (void) fprintf(stderr, gettext("cannot unshare " + "'%s': not currently shared\n"), + zfs_get_name(zhp)); + ret = 1; + } else if (zfs_unshareall(zhp) != 0) { + ret = 1; + } + break; + + case OP_MOUNT: + if (strcmp(property, "legacy") == 0) { + (void) fprintf(stderr, gettext("cannot unmount " + "'%s': legacy mountpoint\n"), + zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use umount(1M) " + "to unmount this filesystem\n")); + ret = 1; + } else if (!zfs_is_mounted(zhp, NULL)) { + (void) fprintf(stderr, gettext("cannot unmount " + "'%s': not currently mounted\n"), + zfs_get_name(zhp)); + ret = 1; + } else if (zfs_unmountall(zhp, flags) != 0) { + ret = 1; + } + } + + zfs_close(zhp); + } + + return (ret); +} + +/* + * zfs unmount -a + * zfs unmount filesystem + * + * Unmount all filesystems, or a specific ZFS filesystem. + */ +static int +zfs_do_unmount(int argc, char **argv) +{ + return (unshare_unmount(OP_MOUNT, argc, argv)); +} + +/* + * zfs unshare -a + * zfs unshare filesystem + * + * Unshare all filesystems, or a specific ZFS filesystem. + */ +static int +zfs_do_unshare(int argc, char **argv) +{ + return (unshare_unmount(OP_SHARE, argc, argv)); +} + +/* + * Called when invoked as /etc/fs/zfs/mount. Do the mount if the mountpoint is + * 'legacy'. Otherwise, complain that use should be using 'zfs mount'. + */ +static int +manual_mount(int argc, char **argv) +{ + zfs_handle_t *zhp; + char mountpoint[ZFS_MAXPROPLEN]; + char mntopts[MNT_LINE_MAX] = { '\0' }; + int ret; + int c; + int flags = 0; + char *dataset, *path; + + /* check options */ + while ((c = getopt(argc, argv, ":o:O")) != -1) { + switch (c) { + case 'o': + (void) strlcpy(mntopts, optarg, sizeof (mntopts)); + break; + case 'O': + flags |= MS_OVERLAY; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + (void) fprintf(stderr, gettext("usage: mount [-o opts] " + "<path>\n")); + return (2); + } + } + + argc -= optind; + argv += optind; + + /* check that we only have two arguments */ + if (argc != 2) { + if (argc == 0) + (void) fprintf(stderr, gettext("missing dataset " + "argument\n")); + else if (argc == 1) + (void) fprintf(stderr, + gettext("missing mountpoint argument\n")); + else + (void) fprintf(stderr, gettext("too many arguments\n")); + (void) fprintf(stderr, "usage: mount <dataset> <mountpoint>\n"); + return (2); + } + + dataset = argv[0]; + path = argv[1]; + + /* try to open the dataset */ + if ((zhp = zfs_open(dataset, ZFS_TYPE_FILESYSTEM)) == NULL) + return (1); + + (void) zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint, + sizeof (mountpoint), NULL, NULL, 0, FALSE); + + /* check for legacy mountpoint and complain appropriately */ + ret = 0; + if (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) == 0) { + if (mount(dataset, path, MS_OPTIONSTR | flags, MNTTYPE_ZFS, + NULL, 0, mntopts, sizeof (mntopts)) != 0) { + (void) fprintf(stderr, gettext("mount failed: %s\n"), + strerror(errno)); + ret = 1; + } + } else { + (void) fprintf(stderr, gettext("filesystem '%s' cannot be " + "mounted using 'mount -F zfs'\n"), dataset); + (void) fprintf(stderr, gettext("Use 'zfs set mountpoint=%s' " + "instead.\n"), path); + (void) fprintf(stderr, gettext("If you must use 'mount -F zfs' " + "or /etc/vfstab, use 'zfs set mountpoint=legacy'.\n")); + (void) fprintf(stderr, gettext("See zfs(1M) for more " + "information.\n")); + ret = 1; + } + + return (ret); +} + +/* + * Called when invoked as /etc/fs/zfs/umount. Unlike a manual mount, we allow + * unmounts of non-legacy filesystems, as this is the dominant administrative + * interface. + */ +static int +manual_unmount(int argc, char **argv) +{ + int flags = 0; + int c; + + /* check options */ + while ((c = getopt(argc, argv, "f")) != -1) { + switch (c) { + case 'f': + flags = MS_FORCE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + (void) fprintf(stderr, gettext("usage: unmount [-f] " + "<path>\n")); + return (2); + } + } + + argc -= optind; + argv += optind; + + /* check arguments */ + if (argc != 1) { + if (argc == 0) + (void) fprintf(stderr, gettext("missing path " + "argument\n")); + else + (void) fprintf(stderr, gettext("too many arguments\n")); + (void) fprintf(stderr, gettext("usage: unmount [-f] <path>\n")); + return (2); + } + + return (unshare_unmount_path(OP_MOUNT, argv[0], flags, TRUE)); +} + +static int +volcheck(zpool_handle_t *zhp, void *data) +{ + int isinit = (int)data; + + if (isinit) + return (zpool_create_zvol_links(zhp)); + else + return (zpool_remove_zvol_links(zhp)); +} + +/* + * Iterate over all pools in the system and either create or destroy /dev/zvol + * links, depending on the value of 'isinit'. + */ +static int +do_volcheck(int isinit) +{ + return (zpool_iter(volcheck, (void *)isinit) ? 1 : 0); +} + +int +main(int argc, char **argv) +{ + int ret; + int i; + char *progname; + char *cmdname; + + (void) setlocale(LC_ALL, ""); + (void) textdomain(TEXT_DOMAIN); + + opterr = 0; + + if ((mnttab_file = fopen(MNTTAB, "r")) == NULL) { + (void) fprintf(stderr, gettext("internal error: unable to " + "open %s\n"), MNTTAB); + return (1); + } + + /* + * This command also doubles as the /etc/fs mount and unmount program. + * Determine if we should take this behavior based on argv[0]. + */ + progname = basename(argv[0]); + if (strcmp(progname, "mount") == 0) { + ret = manual_mount(argc, argv); + } else if (strcmp(progname, "umount") == 0) { + ret = manual_unmount(argc, argv); + } else { + /* + * Make sure the user has specified some command. + */ + if (argc < 2) { + (void) fprintf(stderr, gettext("missing command\n")); + usage(FALSE); + } + + cmdname = argv[1]; + + /* + * The 'umount' command is an alias for 'unmount' + */ + if (strcmp(cmdname, "umount") == 0) + cmdname = "unmount"; + + /* + * Special case '-?' + */ + if (strcmp(cmdname, "-?") == 0) + usage(TRUE); + + /* + * 'volinit' and 'volfini' do not appear in the usage message, + * so we have to special case them here. + */ + if (strcmp(cmdname, "volinit") == 0) + return (do_volcheck(TRUE)); + else if (strcmp(cmdname, "volfini") == 0) + return (do_volcheck(FALSE)); + + /* + * Run the appropriate command. + */ + for (i = 0; i < NCOMMAND; i++) { + if (command_table[i].name == NULL) + continue; + + if (strcmp(cmdname, command_table[i].name) == 0) { + current_command = &command_table[i]; + ret = command_table[i].func(argc - 1, argv + 1); + break; + } + } + + if (i == NCOMMAND) { + (void) fprintf(stderr, gettext("unrecognized " + "command '%s'\n"), cmdname); + usage(FALSE); + } + } + + (void) fclose(mnttab_file); + + /* + * The 'ZFS_ABORT' environment variable causes us to dump core on exit + * for the purposes of running ::findleaks. + */ + if (getenv("ZFS_ABORT") != NULL) { + (void) printf("dumping core by request\n"); + abort(); + } + + return (ret); +} diff --git a/usr/src/cmd/zfs/zfs_util.h b/usr/src/cmd/zfs/zfs_util.h new file mode 100644 index 0000000000..5b2fcfa9f3 --- /dev/null +++ b/usr/src/cmd/zfs/zfs_util.h @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZFS_UTIL_H +#define _ZFS_UTIL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +void * safe_malloc(size_t size); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_UTIL_H */ diff --git a/usr/src/cmd/zoneadm/Makefile b/usr/src/cmd/zoneadm/Makefile index cdf36e185e..15d5233ab7 100644 --- a/usr/src/cmd/zoneadm/Makefile +++ b/usr/src/cmd/zoneadm/Makefile @@ -38,7 +38,7 @@ $(ROOTMANIFEST) := FILEMODE= 444 $(ROOTMANIFESTDIR)/%: % $(INS.file) -LDLIBS += -lzonecfg -lsocket -lgen -lpool -lbsm +LDLIBS += -lzonecfg -lsocket -lgen -lpool -lbsm -lzfs lint := LINTFLAGS += -ux diff --git a/usr/src/cmd/zoneadm/zoneadm.c b/usr/src/cmd/zoneadm/zoneadm.c index 287aa4ca95..53bf2848bc 100644 --- a/usr/src/cmd/zoneadm/zoneadm.c +++ b/usr/src/cmd/zoneadm/zoneadm.c @@ -65,6 +65,7 @@ #include <sys/sockio.h> #include <sys/mntent.h> #include <limits.h> +#include <libzfs.h> #include <fcntl.h> #include <door.h> @@ -1899,6 +1900,117 @@ next_fs: return (return_code); } +const char *current_dataset; + +/* + * Custom error handler for errors incurred as part of the checks below. We + * want to trim off the leading 'cannot open ...' to create a better error + * message. The only other way this can fail is if we fail to set the 'zoned' + * property. In this case we just pass the error on verbatim. + */ +static void +zfs_error_handler(const char *fmt, va_list ap) +{ + char buf[1024]; + + (void) vsnprintf(buf, sizeof (buf), fmt, ap); + + if (strncmp(gettext("cannot open "), buf, + strlen(gettext("cannot open "))) == 0) + (void) fprintf(stderr, gettext("cannot verify zfs " + "dataset %s%s\n"), current_dataset, strchr(buf, ':')); + else + (void) fprintf(stderr, gettext("cannot verify zfs dataset " + "%s: %s\n"), current_dataset, buf); +} + +/* ARGSUSED */ +static int +check_zvol(zfs_handle_t *zhp, void *unused) +{ + int ret; + + if (zfs_get_type(zhp) == ZFS_TYPE_VOLUME) { + (void) fprintf(stderr, gettext("cannot verify zfs dataset %s: " + "volumes cannot be specified as a zone dataset resource\n"), + zfs_get_name(zhp)); + ret = -1; + } else { + ret = zfs_iter_children(zhp, check_zvol, NULL); + } + + zfs_close(zhp); + + return (ret); +} + +/* + * Validate that the given dataset exists on the system, and that neither it nor + * its children are zvols. + * + * Note that we don't do anything with the 'zoned' property here. All + * management is done in zoneadmd when the zone is actually rebooted. This + * allows us to automatically set the zoned property even when a zone is + * rebooted by the administrator. + */ +static int +verify_datasets(zone_dochandle_t handle) +{ + int return_code = Z_OK; + struct zone_dstab dstab; + zfs_handle_t *zhp; + char propbuf[ZFS_MAXPROPLEN]; + char source[ZFS_MAXNAMELEN]; + zfs_source_t srctype; + + if (zonecfg_setdsent(handle) != Z_OK) { + (void) fprintf(stderr, gettext("cannot verify zfs datasets: " + "unable to enumerate datasets\n")); + return (Z_ERR); + } + + zfs_set_error_handler(zfs_error_handler); + + while (zonecfg_getdsent(handle, &dstab) == Z_OK) { + + current_dataset = dstab.zone_dataset_name; + + if ((zhp = zfs_open(dstab.zone_dataset_name, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) { + return_code = Z_ERR; + continue; + } + + if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, + sizeof (propbuf), &srctype, source, + sizeof (source), 0) == 0 && + (srctype == ZFS_SRC_INHERITED)) { + (void) fprintf(stderr, gettext("cannot verify zfs " + "dataset %s: mountpoint cannot be inherited\n"), + dstab.zone_dataset_name); + return_code = Z_ERR; + zfs_close(zhp); + continue; + } + + if (zfs_get_type(zhp) == ZFS_TYPE_VOLUME) { + (void) fprintf(stderr, gettext("cannot verify zfs " + "dataset %s: volumes cannot be specified as a " + "zone dataset resource\n"), + dstab.zone_dataset_name); + return_code = Z_ERR; + } + + if (zfs_iter_children(zhp, check_zvol, NULL) != 0) + return_code = Z_ERR; + + zfs_close(zhp); + } + (void) zonecfg_enddsent(handle); + + return (return_code); +} + static int verify_details(int cmd_num) { @@ -2009,6 +2121,8 @@ no_net: return_code = Z_ERR; if (!in_alt_root && verify_pool(handle) != Z_OK) return_code = Z_ERR; + if (!in_alt_root && verify_datasets(handle) != Z_OK) + return_code = Z_ERR; zonecfg_fini_handle(handle); if (return_code == Z_ERR) (void) fprintf(stderr, diff --git a/usr/src/cmd/zoneadmd/Makefile b/usr/src/cmd/zoneadmd/Makefile index faf58fd7a8..b561ecb3ec 100644 --- a/usr/src/cmd/zoneadmd/Makefile +++ b/usr/src/cmd/zoneadmd/Makefile @@ -39,7 +39,7 @@ POFILES= $(OBJS:%.o=%.po) CFLAGS += $(CCVERBOSE) LINTFLAGS += -ux LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair -lpool \ - -lgen -lbsm -lcontract + -lgen -lbsm -lcontract -lzfs XGETFLAGS += -a -x zoneadmd.xcl .KEEP_STATE: diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c index 75eca58961..98dd9e67bc 100644 --- a/usr/src/cmd/zoneadmd/vplat.c +++ b/usr/src/cmd/zoneadmd/vplat.c @@ -91,6 +91,7 @@ #include <wait.h> #include <limits.h> #include <libgen.h> +#include <libzfs.h> #include <zone.h> #include <assert.h> @@ -98,6 +99,7 @@ #include <sys/mnttab.h> #include <sys/fs/autofs.h> /* for _autofssys() */ #include <sys/fs/lofs_info.h> +#include <sys/fs/zfs.h> #include <pool.h> #include <sys/pool.h> @@ -1418,6 +1420,14 @@ mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd) goto bad; } while (zonecfg_getfsent(handle, &fstab) == Z_OK) { + /* + * ZFS filesystems will not be accessible under an alternate + * root, since the pool will not be known. Ignore them in this + * case. + */ + if (mount_cmd && strcmp(fstab.zone_fs_type, MNTTYPE_ZFS) == 0) + continue; + num_fs++; if ((tmp_ptr = realloc(fs_ptr, num_fs * sizeof (*tmp_ptr))) == NULL) { @@ -2439,6 +2449,150 @@ get_zone_pool(zlog_t *zlogp, char *poolbuf, size_t bufsz) } static int +get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep) +{ + zone_dochandle_t handle; + struct zone_dstab dstab; + size_t total, offset, len; + int error = -1; + char *str; + + *bufp = NULL; + *bufsizep = 0; + + if ((handle = zonecfg_init_handle()) == NULL) { + zerror(zlogp, B_TRUE, "getting zone configuration handle"); + return (-1); + } + if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { + zerror(zlogp, B_FALSE, "invalid configuration"); + zonecfg_fini_handle(handle); + return (-1); + } + + if (zonecfg_setdsent(handle) != Z_OK) { + zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent"); + goto out; + } + + total = 0; + while (zonecfg_getdsent(handle, &dstab) == Z_OK) + total += strlen(dstab.zone_dataset_name) + 1; + (void) zonecfg_enddsent(handle); + + if (total == 0) { + error = 0; + goto out; + } + + if ((str = malloc(total)) == NULL) { + zerror(zlogp, B_TRUE, "memory allocation failed"); + goto out; + } + + if (zonecfg_setdsent(handle) != Z_OK) { + zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent"); + goto out; + } + offset = 0; + while (zonecfg_getdsent(handle, &dstab) == Z_OK) { + len = strlen(dstab.zone_dataset_name); + (void) strlcpy(str + offset, dstab.zone_dataset_name, + sizeof (dstab.zone_dataset_name) - offset); + offset += len; + if (offset != total - 1) + str[offset++] = ','; + } + (void) zonecfg_enddsent(handle); + + error = 0; + *bufp = str; + *bufsizep = total; + +out: + if (error != 0 && str != NULL) + free(str); + if (handle != NULL) + zonecfg_fini_handle(handle); + + return (error); +} + +/* ARGSUSED */ +static void +zfs_error_handler(const char *fmt, va_list ap) +{ + /* + * Do nothing - we interpret the failures from each libzfs call below. + */ +} + +static int +validate_datasets(zlog_t *zlogp) +{ + zone_dochandle_t handle; + struct zone_dstab dstab; + zfs_handle_t *zhp; + + if ((handle = zonecfg_init_handle()) == NULL) { + zerror(zlogp, B_TRUE, "getting zone configuration handle"); + return (-1); + } + if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { + zerror(zlogp, B_FALSE, "invalid configuration"); + zonecfg_fini_handle(handle); + return (-1); + } + + if (zonecfg_setdsent(handle) != Z_OK) { + zerror(zlogp, B_FALSE, "invalid configuration"); + zonecfg_fini_handle(handle); + return (-1); + } + + zfs_set_error_handler(zfs_error_handler); + + /* + * libzfs opens /dev/zfs during its .init routine. + * zoneadmd automatically closes these files when it daemonizes, + * so we cheat by re-calling the init routine. + */ + zfs_init(); + + while (zonecfg_getdsent(handle, &dstab) == Z_OK) { + + if ((zhp = zfs_open(dstab.zone_dataset_name, + ZFS_TYPE_FILESYSTEM)) == NULL) { + zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'", + dstab.zone_dataset_name); + zonecfg_fini_handle(handle); + return (-1); + } + + /* + * Automatically set the 'zoned' property. We check the value + * first because we'll get EPERM if it is already set. + */ + if (!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) && + zfs_prop_set(zhp, ZFS_PROP_ZONED, "on") != 0) { + zerror(zlogp, B_FALSE, "cannot set 'zoned' " + "property for ZFS dataset '%s'\n", + dstab.zone_dataset_name); + zonecfg_fini_handle(handle); + zfs_close(zhp); + return (-1); + } + + zfs_close(zhp); + } + (void) zonecfg_enddsent(handle); + + zonecfg_fini_handle(handle); + + return (0); +} + +static int bind_to_pool(zlog_t *zlogp, zoneid_t zoneid) { pool_conf_t *poolconf; @@ -2611,6 +2765,8 @@ vplat_create(zlog_t *zlogp, boolean_t mount_cmd) char rootpath[MAXPATHLEN]; char *rctlbuf = NULL; size_t rctlbufsz = 0; + char *zfsbuf = NULL; + size_t zfsbufsz = 0; zoneid_t zoneid = -1; int xerr; char *kzone; @@ -2636,6 +2792,10 @@ vplat_create(zlog_t *zlogp, boolean_t mount_cmd) zerror(zlogp, B_FALSE, "Unable to get list of rctls"); goto error; } + if (get_datasets(zlogp, &zfsbuf, &zfsbufsz) != 0) { + zerror(zlogp, B_FALSE, "Unable to get list of ZFS datasets"); + goto error; + } kzone = zone_name; @@ -2706,7 +2866,7 @@ vplat_create(zlog_t *zlogp, boolean_t mount_cmd) xerr = 0; if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf, - rctlbufsz, &xerr)) == -1) { + rctlbufsz, zfsbuf, zfsbufsz, &xerr)) == -1) { if (xerr == ZE_AREMOUNTS) { if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) { zerror(zlogp, B_FALSE, @@ -2762,6 +2922,11 @@ error: int vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd) { + if (!mount_cmd && validate_datasets(zlogp) != 0) { + lofs_discard_mnttab(); + return (-1); + } + if (create_dev_files(zlogp) != 0 || mount_filesystems(zlogp, mount_cmd) != 0) { lofs_discard_mnttab(); diff --git a/usr/src/cmd/zonecfg/Makefile b/usr/src/cmd/zonecfg/Makefile index 606f4bf40d..ac0ce57047 100644 --- a/usr/src/cmd/zonecfg/Makefile +++ b/usr/src/cmd/zonecfg/Makefile @@ -20,7 +20,7 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -33,7 +33,7 @@ include ../Makefile.cmd LFLAGS = -t YFLAGS = -d -b zonecfg_grammar -LDLIBS += -lzonecfg -ll -lnsl -ltecla +LDLIBS += -lzonecfg -ll -lnsl -ltecla -lzfs CPPFLAGS += -I. CLEANFILES += zonecfg_lex.c zonecfg_grammar.tab.c zonecfg_grammar.tab.h diff --git a/usr/src/cmd/zonecfg/zonecfg.c b/usr/src/cmd/zonecfg/zonecfg.c index 69e931e1a9..8c7c421fad 100644 --- a/usr/src/cmd/zonecfg/zonecfg.c +++ b/usr/src/cmd/zonecfg/zonecfg.c @@ -72,6 +72,7 @@ #include <regex.h> #include <signal.h> #include <libtecla.h> +#include <libzfs.h> #include <libzonecfg.h> #include "zonecfg.h" @@ -153,6 +154,7 @@ static char *res_types[] = { "device", "rctl", "attr", + "dataset", NULL }; @@ -219,6 +221,7 @@ static const char *add_cmds[] = { "add device", "add rctl", "add attr", + "add dataset", NULL }; @@ -229,6 +232,7 @@ static const char *select_cmds[] = { "select device ", "select rctl ", "select attr ", + "select dataset ", NULL }; @@ -308,6 +312,16 @@ static const char *rctl_res_scope_cmds[] = { NULL }; +static const char *dataset_res_scope_cmds[] = { + "cancel", + "end", + "exit", + "help", + "info", + "set name=", + NULL +}; + /* Global variables */ /* set early in main(), never modified thereafter, used all over the place */ @@ -365,6 +379,7 @@ static struct zone_nwiftab old_nwiftab, in_progress_nwiftab; static struct zone_devtab old_devtab, in_progress_devtab; static struct zone_rctltab old_rctltab, in_progress_rctltab; static struct zone_attrtab old_attrtab, in_progress_attrtab; +static struct zone_dstab old_dstab, in_progress_dstab; static GetLine *gl; /* The gl_get_line() resource object */ @@ -426,6 +441,8 @@ CPL_MATCH_FN(cmd_cpl_fn) return (add_stuff(cpl, line, rctl_res_scope_cmds, word_end)); case RT_ATTR: return (add_stuff(cpl, line, attr_res_scope_cmds, word_end)); + case RT_DATASET: + return (add_stuff(cpl, line, dataset_res_scope_cmds, word_end)); } return (0); } @@ -784,6 +801,14 @@ usage(bool verbose, uint_t flags) (void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET), pt_to_str(PT_VALUE), gettext("<unsigned integer>")); break; + case RT_DATASET: + (void) fprintf(fp, gettext("The '%s' resource scope is " + "used to export ZFS datasets.\n"), + rt_to_str(resource_scope)); + (void) fprintf(fp, gettext("Valid commands:\n")); + (void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET), + pt_to_str(PT_NAME), gettext("<name>")); + break; } (void) fprintf(fp, gettext("And from any resource scope, you " "can:\n")); @@ -872,6 +897,8 @@ usage(bool verbose, uint_t flags) (void) fprintf(fp, "\t%s\t\t%s, %s, %s\n", rt_to_str(RT_ATTR), pt_to_str(PT_NAME), pt_to_str(PT_TYPE), pt_to_str(PT_VALUE)); + (void) fprintf(fp, "\t%s\t\t%s\n", rt_to_str(RT_DATASET), + pt_to_str(PT_NAME)); } if (need_to_close) (void) pclose(fp); @@ -1242,6 +1269,7 @@ export_func(cmd_t *cmd) struct zone_devtab devtab; struct zone_attrtab attrtab; struct zone_rctltab rctltab; + struct zone_dstab dstab; struct zone_rctlvaltab *valptr; int err, arg; char zonepath[MAXPATHLEN], outfile[MAXPATHLEN], pool[MAXNAMELEN]; @@ -1411,6 +1439,18 @@ export_func(cmd_t *cmd) } (void) zonecfg_endattrent(handle); + if ((err = zonecfg_setdsent(handle)) != Z_OK) { + zone_perror(zone, err, FALSE); + goto done; + } + while (zonecfg_getdsent(handle, &dstab) == Z_OK) { + (void) fprintf(of, "%s %s\n", cmd_to_str(CMD_ADD), + rt_to_str(RT_DATASET)); + export_prop(of, PT_NAME, dstab.zone_dataset_name); + (void) fprintf(of, "%s\n", cmd_to_str(CMD_END)); + } + (void) zonecfg_enddsent(handle); + done: if (need_to_close) (void) fclose(of); @@ -1507,6 +1547,9 @@ add_resource(cmd_t *cmd) case RT_ATTR: bzero(&in_progress_attrtab, sizeof (in_progress_attrtab)); return; + case RT_DATASET: + bzero(&in_progress_dstab, sizeof (in_progress_dstab)); + return; default: zone_perror(rt_to_str(type), Z_NO_RESOURCE_TYPE, TRUE); long_usage(CMD_ADD, TRUE); @@ -2077,6 +2120,39 @@ fill_in_attrtab(cmd_t *cmd, struct zone_attrtab *attrtab, bool fill_in_only) return (err); } +static int +fill_in_dstab(cmd_t *cmd, struct zone_dstab *dstab, bool fill_in_only) +{ + int err, i; + property_value_ptr_t pp; + + if ((err = initialize(TRUE)) != Z_OK) + return (err); + + dstab->zone_dataset_name[0] = '\0'; + for (i = 0; i < cmd->cmd_prop_nv_pairs; i++) { + pp = cmd->cmd_property_ptr[i]; + if (pp->pv_type != PROP_VAL_SIMPLE || pp->pv_simple == NULL) { + zerr(gettext("A simple value was expected here.")); + saw_error = TRUE; + return (Z_INSUFFICIENT_SPEC); + } + switch (cmd->cmd_prop_name[i]) { + case PT_NAME: + (void) strlcpy(dstab->zone_dataset_name, pp->pv_simple, + sizeof (dstab->zone_dataset_name)); + break; + default: + zone_perror(pt_to_str(cmd->cmd_prop_name[i]), + Z_NO_PROPERTY_TYPE, TRUE); + return (Z_INSUFFICIENT_SPEC); + } + } + if (fill_in_only) + return (Z_OK); + return (zonecfg_lookup_ds(handle, dstab)); +} + static void remove_resource(cmd_t *cmd) { @@ -2086,6 +2162,7 @@ remove_resource(cmd_t *cmd) struct zone_devtab devtab; struct zone_attrtab attrtab; struct zone_rctltab rctltab; + struct zone_dstab dstab; if ((type = cmd->cmd_res_type) == RT_UNKNOWN) { long_usage(CMD_REMOVE, TRUE); @@ -2164,6 +2241,16 @@ remove_resource(cmd_t *cmd) else need_to_commit = TRUE; return; + case RT_DATASET: + if ((err = fill_in_dstab(cmd, &dstab, FALSE)) != Z_OK) { + z_cmd_rt_perror(CMD_REMOVE, RT_DATASET, err, TRUE); + return; + } + if ((err = zonecfg_delete_ds(handle, &dstab)) != Z_OK) + z_cmd_rt_perror(CMD_REMOVE, RT_DATASET, err, TRUE); + else + need_to_commit = TRUE; + return; default: zone_perror(rt_to_str(type), Z_NO_RESOURCE_TYPE, TRUE); long_usage(CMD_REMOVE, TRUE); @@ -2398,6 +2485,14 @@ select_func(cmd_t *cmd) bcopy(&old_attrtab, &in_progress_attrtab, sizeof (struct zone_attrtab)); return; + case RT_DATASET: + if ((err = fill_in_dstab(cmd, &old_dstab, FALSE)) != Z_OK) { + z_cmd_rt_perror(CMD_SELECT, RT_DATASET, err, TRUE); + global_scope = TRUE; + } + bcopy(&old_dstab, &in_progress_dstab, + sizeof (struct zone_dstab)); + return; default: zone_perror(rt_to_str(type), Z_NO_RESOURCE_TYPE, TRUE); long_usage(CMD_SELECT, TRUE); @@ -2801,6 +2896,20 @@ set_func(cmd_t *cmd) return; } return; + case RT_DATASET: + switch (prop_type) { + case PT_NAME: + (void) strlcpy(in_progress_dstab.zone_dataset_name, + prop_id, + sizeof (in_progress_dstab.zone_dataset_name)); + return; + default: + break; + } + zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE, TRUE); + long_usage(CMD_SET, TRUE); + usage(FALSE, HELP_PROPS); + return; default: zone_perror(rt_to_str(res_type), Z_NO_RESOURCE_TYPE, TRUE); long_usage(CMD_SET, TRUE); @@ -3149,6 +3258,46 @@ info_attr(zone_dochandle_t handle, FILE *fp, cmd_t *cmd) rt_to_str(RT_ATTR)); } +static void +output_ds(FILE *fp, struct zone_dstab *dstab) +{ + (void) fprintf(fp, "%s:\n", rt_to_str(RT_DATASET)); + output_prop(fp, PT_NAME, dstab->zone_dataset_name, B_TRUE); +} + +static void +info_ds(zone_dochandle_t handle, FILE *fp, cmd_t *cmd) +{ + struct zone_dstab lookup, user; + bool output = FALSE; + + if (zonecfg_setdevent(handle) != Z_OK) + return; + while (zonecfg_getdsent(handle, &lookup) == Z_OK) { + if (cmd->cmd_prop_nv_pairs == 0) { + output_ds(fp, &lookup); + continue; + } + if (fill_in_dstab(cmd, &user, TRUE) != Z_OK) + continue; + if (strlen(user.zone_dataset_name) > 0 && + strcmp(user.zone_dataset_name, + lookup.zone_dataset_name) != 0) + continue; /* no match */ + output_ds(fp, &lookup); + output = TRUE; + } + (void) zonecfg_enddsent(handle); + /* + * If a property n/v pair was specified, warn the user if there was + * nothing to output. + */ + if (!output && cmd->cmd_prop_nv_pairs > 0) + (void) printf(gettext("No such %s resource.\n"), + rt_to_str(RT_DATASET)); +} + + void info_func(cmd_t *cmd) { @@ -3192,6 +3341,9 @@ info_func(cmd_t *cmd) case RT_ATTR: output_attr(fp, &in_progress_attrtab); break; + case RT_DATASET: + output_ds(fp, &in_progress_dstab); + break; } goto cleanup; } @@ -3208,6 +3360,7 @@ info_func(cmd_t *cmd) info_dev(handle, fp, cmd); info_rctl(handle, fp, cmd); info_attr(handle, fp, cmd); + info_ds(handle, fp, cmd); break; case RT_ZONENAME: info_zonename(handle, fp); @@ -3239,6 +3392,9 @@ info_func(cmd_t *cmd) case RT_ATTR: info_attr(handle, fp, cmd); break; + case RT_DATASET: + info_ds(handle, fp, cmd); + break; default: zone_perror(rt_to_str(cmd->cmd_res_type), Z_NO_RESOURCE_TYPE, TRUE); @@ -3281,6 +3437,7 @@ verify_func(cmd_t *cmd) struct zone_fstab fstab; struct zone_attrtab attrtab; struct zone_rctltab rctltab; + struct zone_dstab dstab; char zonepath[MAXPATHLEN]; int err, ret_val = Z_OK, arg; bool save = FALSE; @@ -3391,6 +3548,29 @@ verify_func(cmd_t *cmd) } (void) zonecfg_endattrent(handle); + if ((err = zonecfg_setdsent(handle)) != Z_OK) { + zone_perror(zone, err, TRUE); + return; + } + while (zonecfg_getdsent(handle, &dstab) == Z_OK) { + if (strlen(dstab.zone_dataset_name) == 0) { + zerr("%s: %s %s", rt_to_str(RT_DATASET), + pt_to_str(PT_NAME), gettext("not specified")); + saw_error = TRUE; + if (ret_val == Z_OK) + ret_val = Z_REQD_PROPERTY_MISSING; + } else if (!zfs_name_valid(dstab.zone_dataset_name, + ZFS_TYPE_FILESYSTEM)) { + zerr("%s: %s %s", rt_to_str(RT_DATASET), + pt_to_str(PT_NAME), gettext("invalid")); + saw_error = TRUE; + if (ret_val == Z_OK) + ret_val = Z_BAD_PROPERTY; + } + + } + (void) zonecfg_enddsent(handle); + if (!global_scope) { zerr(gettext("resource specification incomplete")); saw_error = TRUE; @@ -3442,10 +3622,12 @@ cancel_func(cmd_t *cmd) zonecfg_free_fs_option_list(in_progress_fstab.zone_fs_options); bzero(&in_progress_fstab, sizeof (in_progress_fstab)); bzero(&in_progress_nwiftab, sizeof (in_progress_nwiftab)); + bzero(&in_progress_ipdtab, sizeof (in_progress_ipdtab)); bzero(&in_progress_devtab, sizeof (in_progress_devtab)); zonecfg_free_rctl_value_list(in_progress_rctltab.zone_rctl_valptr); bzero(&in_progress_rctltab, sizeof (in_progress_rctltab)); bzero(&in_progress_attrtab, sizeof (in_progress_attrtab)); + bzero(&in_progress_dstab, sizeof (in_progress_dstab)); } static int @@ -3539,6 +3721,7 @@ end_func(cmd_t *cmd) struct zone_devtab tmp_devtab; struct zone_rctltab tmp_rctltab; struct zone_attrtab tmp_attrtab; + struct zone_dstab tmp_dstab; int err, arg; assert(cmd != NULL); @@ -3808,6 +3991,37 @@ end_func(cmd_t *cmd) &in_progress_attrtab); } break; + case RT_DATASET: + /* First make sure everything was filled in. */ + if (strlen(in_progress_dstab.zone_dataset_name) == 0) { + zerr("%s %s", pt_to_str(PT_NAME), + gettext("not specified")); + saw_error = TRUE; + validation_failed = TRUE; + } + if (validation_failed) + return; + if (end_op == CMD_ADD) { + /* Make sure there isn't already one like this. */ + bzero(&tmp_dstab, sizeof (tmp_dstab)); + (void) strlcpy(tmp_dstab.zone_dataset_name, + in_progress_dstab.zone_dataset_name, + sizeof (tmp_dstab.zone_dataset_name)); + err = zonecfg_lookup_ds(handle, &tmp_dstab); + if (err == Z_OK) { + zerr(gettext("A %s resource " + "with the %s '%s' already exists."), + rt_to_str(RT_DATASET), pt_to_str(PT_NAME), + in_progress_dstab.zone_dataset_name); + saw_error = TRUE; + return; + } + err = zonecfg_add_ds(handle, &in_progress_dstab); + } else { + err = zonecfg_modify_ds(handle, &old_dstab, + &in_progress_dstab); + } + break; default: zone_perror(rt_to_str(resource_scope), Z_NO_RESOURCE_TYPE, TRUE); diff --git a/usr/src/cmd/zonecfg/zonecfg.h b/usr/src/cmd/zonecfg/zonecfg.h index e0fca7a02d..2c37de8a19 100644 --- a/usr/src/cmd/zonecfg/zonecfg.h +++ b/usr/src/cmd/zonecfg/zonecfg.h @@ -79,9 +79,10 @@ typedef int bool; #define RT_DEVICE 8 #define RT_RCTL 9 #define RT_ATTR 10 +#define RT_DATASET 11 #define RT_MIN RT_UNKNOWN -#define RT_MAX RT_ATTR +#define RT_MAX RT_DATASET /* property types: increment PT_MAX when expanding this list */ #define PT_UNKNOWN 0 diff --git a/usr/src/cmd/zonecfg/zonecfg_grammar.y b/usr/src/cmd/zonecfg/zonecfg_grammar.y index abca323bed..4f7f2d6c23 100644 --- a/usr/src/cmd/zonecfg/zonecfg_grammar.y +++ b/usr/src/cmd/zonecfg/zonecfg_grammar.y @@ -61,7 +61,7 @@ extern void yyerror(char *s); %token COMMIT REVERT EXIT SEMICOLON TOKEN ZONENAME ZONEPATH AUTOBOOT POOL NET %token FS IPD ATTR DEVICE RCTL SPECIAL RAW DIR OPTIONS TYPE ADDRESS PHYSICAL %token NAME MATCH PRIV LIMIT ACTION VALUE EQUAL OPEN_SQ_BRACKET CLOSE_SQ_BRACKET -%token OPEN_PAREN CLOSE_PAREN COMMA +%token OPEN_PAREN CLOSE_PAREN COMMA DATASET %type <strval> TOKEN EQUAL OPEN_SQ_BRACKET CLOSE_SQ_BRACKET property_value OPEN_PAREN CLOSE_PAREN COMMA simple_prop_val @@ -668,6 +668,7 @@ resource_type: NET { $$ = RT_NET; } | DEVICE { $$ = RT_DEVICE; } | RCTL { $$ = RT_RCTL; } | ATTR { $$ = RT_ATTR; } + | DATASET { $$ = RT_DATASET; } property_name: SPECIAL { $$ = PT_SPECIAL; } | RAW { $$ = PT_RAW; } diff --git a/usr/src/cmd/zonecfg/zonecfg_lex.l b/usr/src/cmd/zonecfg/zonecfg_lex.l index 1a5de3659e..3c3f1c0da0 100644 --- a/usr/src/cmd/zonecfg/zonecfg_lex.l +++ b/usr/src/cmd/zonecfg/zonecfg_lex.l @@ -159,6 +159,8 @@ char *safe_strdup(char *s); <TSTATE>zonename { return ZONENAME; } <CSTATE>zonename { return ZONENAME; } +<TSTATE>dataset { return DATASET; } + <TSTATE>zonepath { return ZONEPATH; } <CSTATE>zonepath { return ZONEPATH; } diff --git a/usr/src/cmd/zpool/Makefile b/usr/src/cmd/zpool/Makefile new file mode 100644 index 0000000000..818c7b7fe8 --- /dev/null +++ b/usr/src/cmd/zpool/Makefile @@ -0,0 +1,80 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +PROG= zpool +OBJS= zpool_main.o zpool_vdev.o zpool_iter.o zpool_util.o zpool_dataset.o +SRCS= $(OBJS:%.o=%.c) +POFILES= zpool_main.po zpool_vdev.po zpool_iter.po zpool_util.po \ + zpool_dataset.po +POFILE= zpool.po + +include ../Makefile.cmd + +LDLIBS += -lzfs -lnvpair -ldevid -lefi -ldiskmgt -luutil -lumem + +CPPFLAGS += -D_LARGEFILE64_SOURCE=1 -D_REENTRANT + +# lint complains about unused _umem_* functions +LINTFLAGS += -xerroff=E_NAME_DEF_NOT_USED2 +LINTFLAGS64 += -xerroff=E_NAME_DEF_NOT_USED2 + +CACHEDIR= $(ROOTETC)/zfs + +ROOTUSRSBINLINKS = $(PROG:%=$(ROOTUSRSBIN)/%) + +.KEEP_STATE: + +.PARALLEL: + +all: $(PROG) + +$(PROG): $(OBJS) + $(LINK.c) -o $@ $(OBJS) $(LDLIBS) + $(POST_PROCESS) + +install: all $(ROOTSBINPROG) $(CACHEDIR) $(ROOTUSRSBINLINKS) + +$(CACHEDIR): + $(INS.dir) + +$(POFILE): $(POFILES) + $(RM) $@ + cat $(POFILES) > $@ + +clean: + $(RM) $(OBJS) + +lint: lint_SRCS + +# Links from /usr/sbin to /sbin +$(ROOTUSRSBINLINKS): + -$(RM) $@; $(SYMLINK) ../../sbin/$(@F) $@ + +FRC: + +include ../Makefile.targ diff --git a/usr/src/cmd/zpool/zpool_dataset.c b/usr/src/cmd/zpool/zpool_dataset.c new file mode 100644 index 0000000000..0432f53652 --- /dev/null +++ b/usr/src/cmd/zpool/zpool_dataset.c @@ -0,0 +1,148 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <libintl.h> +#include <libzfs.h> +#include <sys/mount.h> + +#include "zpool_util.h" + +/* + * For export and destroy, we have to support iterating over all datasets and + * unmounting and/or destroying them. This file contains the routines to + * support this. + */ +typedef struct cbdata { + int cb_force; + int cb_failed; + const char *cb_mntopts; +} cbdata_t; + +/* + * Unmount a single ZFS dataset. + */ +int +do_unmount(zfs_handle_t *zfsp, void *data) +{ + cbdata_t *cbp = data; + + if (zfs_unmount(zfsp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0) + cbp->cb_failed = 1; + + return (0); +} + +/* + * Unmount all datasets within the given pool. + * + * XXZFS it would be much more efficient, and correct, to iterate over + * mountpoints based on /etc/mnttab. + */ +int +unmount_datasets(zpool_handle_t *zhp, int force) +{ + cbdata_t cb = { 0 }; + zfs_handle_t *zfsp; + + /* For unavailable pools, we don't do anything */ + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) + return (0); + + if ((zfsp = zfs_open(zpool_get_name(zhp), ZFS_TYPE_FILESYSTEM)) == NULL) + return (-1); + + cb.cb_force = force; + + if (zfs_iter_dependents(zfsp, do_unmount, &cb) != 0 || + cb.cb_failed != 0) { + zfs_close(zfsp); + return (-1); + } + + if (do_unmount(zfsp, &cb) != 0 || cb.cb_failed != 0) { + zfs_close(zfsp); + return (-1); + } + + zfs_close(zfsp); + + return (0); +} + +/* + * Mount a single dataset + */ +static int +do_mount(zfs_handle_t *zfsp, void *data) +{ + cbdata_t *cbp = data; + int ret; + + if (zfs_get_type(zfsp) != ZFS_TYPE_FILESYSTEM) + return (0); + + if (zfs_mount(zfsp, cbp->cb_mntopts, 0) != 0) + cbp->cb_failed = 1; + + ret = zfs_iter_children(zfsp, do_mount, data); + + return (ret); +} + + +/* + * Go through and mount all datasets within a pool. We need to mount all + * datasets in order, so that we mount parents before any children. A complete + * fix would gather all mountpoints, sort them, and mount them in lexical order. + * There are many more problems if you start to have nested filesystems - we + * just want to get inherited filesystems right. + */ +int +mount_datasets(zpool_handle_t *zhp, const char *options) +{ + cbdata_t cb = { 0 }; + zfs_handle_t *zfsp; + + cb.cb_mntopts = options; + + /* For unavailable pools, we don't do anything */ + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) + return (0); + + if ((zfsp = zfs_open(zpool_get_name(zhp), ZFS_TYPE_FILESYSTEM)) == NULL) + return (-1); + + if (do_mount(zfsp, &cb) != 0 || cb.cb_failed != 0) { + zfs_close(zfsp); + return (-1); + } + + zfs_close(zfsp); + + return (0); +} diff --git a/usr/src/cmd/zpool/zpool_iter.c b/usr/src/cmd/zpool/zpool_iter.c new file mode 100644 index 0000000000..f99396da81 --- /dev/null +++ b/usr/src/cmd/zpool/zpool_iter.c @@ -0,0 +1,241 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <libintl.h> +#include <libuutil.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> + +#include <libzfs.h> + +#include "zpool_util.h" + +/* + * Private interface for iterating over pools specified on the command line. + * Most consumers will call for_each_pool, but in order to support iostat, we + * allow fined grained control through the zpool_list_t interface. + */ + +typedef struct zpool_node { + zpool_handle_t *zn_handle; + uu_avl_node_t zn_avlnode; + int zn_mark; +} zpool_node_t; + +struct zpool_list { + int zl_findall; + uu_avl_t *zl_avl; + uu_avl_pool_t *zl_pool; +}; + +/* ARGSUSED */ +static int +zpool_compare(const void *larg, const void *rarg, void *unused) +{ + zpool_handle_t *l = ((zpool_node_t *)larg)->zn_handle; + zpool_handle_t *r = ((zpool_node_t *)rarg)->zn_handle; + const char *lname = zpool_get_name(l); + const char *rname = zpool_get_name(r); + + return (strcmp(lname, rname)); +} + +/* + * Callback function for pool_list_get(). Adds the given pool to the AVL tree + * of known pools. + */ +static int +add_pool(zpool_handle_t *zhp, void *data) +{ + zpool_list_t *zlp = data; + zpool_node_t *node = safe_malloc(sizeof (zpool_node_t)); + uu_avl_index_t idx; + + node->zn_handle = zhp; + uu_avl_node_init(node, &node->zn_avlnode, zlp->zl_pool); + if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) { + uu_avl_insert(zlp->zl_avl, node, idx); + } else { + zpool_close(zhp); + free(node); + } + + return (0); +} + +/* + * Create a list of pools based on the given arguments. If we're given no + * arguments, then iterate over all pools in the system and add them to the AVL + * tree. Otherwise, add only those pool explicitly specified on the command + * line. + */ +zpool_list_t * +pool_list_get(int argc, char **argv, int *err) +{ + zpool_list_t *zlp; + + zlp = safe_malloc(sizeof (zpool_list_t)); + + zlp->zl_pool = uu_avl_pool_create("zfs_pool", sizeof (zpool_node_t), + offsetof(zpool_node_t, zn_avlnode), zpool_compare, UU_DEFAULT); + + if (zlp->zl_pool == NULL) + no_memory(); + + if ((zlp->zl_avl = uu_avl_create(zlp->zl_pool, NULL, + UU_DEFAULT)) == NULL) + no_memory(); + + if (argc == 0) { + (void) zpool_iter(add_pool, zlp); + zlp->zl_findall = TRUE; + } else { + int i; + + for (i = 0; i < argc; i++) { + zpool_handle_t *zhp; + + if ((zhp = zpool_open_canfail(argv[i])) != NULL) + (void) add_pool(zhp, zlp); + else + *err = TRUE; + } + } + + return (zlp); +} + +/* + * Search for any new pools, adding them to the list. We only add pools when no + * options were given on the command line. Otherwise, we keep the list fixed as + * those that were explicitly specified. + */ +void +pool_list_update(zpool_list_t *zlp) +{ + if (zlp->zl_findall) + (void) zpool_iter(add_pool, zlp); +} + +/* + * Iterate over all pools in the list, executing the callback for each + */ +int +pool_list_iter(zpool_list_t *zlp, int unavail, zpool_iter_f func, + void *data) +{ + zpool_node_t *node, *next_node; + int ret = 0; + + for (node = uu_avl_first(zlp->zl_avl); node != NULL; node = next_node) { + next_node = uu_avl_next(zlp->zl_avl, node); + if (zpool_get_state(node->zn_handle) != POOL_STATE_UNAVAIL || + unavail) + ret |= func(node->zn_handle, data); + } + + return (ret); +} + +/* + * Remove the given pool from the list. When running iostat, we want to remove + * those pools that no longer exist. + */ +void +pool_list_remove(zpool_list_t *zlp, zpool_handle_t *zhp) +{ + zpool_node_t search, *node; + + search.zn_handle = zhp; + if ((node = uu_avl_find(zlp->zl_avl, &search, NULL, NULL)) != NULL) { + uu_avl_remove(zlp->zl_avl, node); + zpool_close(node->zn_handle); + free(node); + } +} + +/* + * Free all the handles associated with this list. + */ +void +pool_list_free(zpool_list_t *zlp) +{ + uu_avl_walk_t *walk; + zpool_node_t *node; + + if ((walk = uu_avl_walk_start(zlp->zl_avl, UU_WALK_ROBUST)) == NULL) { + (void) fprintf(stderr, + gettext("internal error: out of memory")); + exit(1); + } + + while ((node = uu_avl_walk_next(walk)) != NULL) { + uu_avl_remove(zlp->zl_avl, node); + zpool_close(node->zn_handle); + free(node); + } + + uu_avl_walk_end(walk); + uu_avl_destroy(zlp->zl_avl); + uu_avl_pool_destroy(zlp->zl_pool); + + free(zlp); +} + +/* + * Returns the number of elements in the pool list. + */ +int +pool_list_count(zpool_list_t *zlp) +{ + return (uu_avl_numnodes(zlp->zl_avl)); +} + +/* + * High level function which iterates over all pools given on the command line, + * using the pool_list_* interfaces. + */ +int +for_each_pool(int argc, char **argv, int unavail, zpool_iter_f func, + void *data) +{ + zpool_list_t *list; + int ret = 0; + + if ((list = pool_list_get(argc, argv, &ret)) == NULL) + return (1); + + if (pool_list_iter(list, unavail, func, data) != 0) + ret = 1; + + pool_list_free(list); + + return (ret); +} diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c new file mode 100644 index 0000000000..0a2f2d6cd3 --- /dev/null +++ b/usr/src/cmd/zpool/zpool_main.c @@ -0,0 +1,2471 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <ctype.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <libgen.h> +#include <libintl.h> +#include <libuutil.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <unistd.h> +#include <priv.h> + +#include <sys/stat.h> + +#include <libzfs.h> + +#include "zpool_util.h" + +static int zpool_do_create(int, char **); +static int zpool_do_destroy(int, char **); + +static int zpool_do_add(int, char **); + +static int zpool_do_list(int, char **); +static int zpool_do_iostat(int, char **); +static int zpool_do_status(int, char **); + +static int zpool_do_online(int, char **); +static int zpool_do_offline(int, char **); + +static int zpool_do_attach(int, char **); +static int zpool_do_detach(int, char **); +static int zpool_do_replace(int, char **); + +static int zpool_do_scrub(int, char **); + +static int zpool_do_import(int, char **); +static int zpool_do_export(int, char **); + +/* + * These libumem hooks provide a reasonable set of defaults for the allocator's + * debugging facilities. + */ +const char * +_umem_debug_init() +{ + return ("default,verbose"); /* $UMEM_DEBUG setting */ +} + +const char * +_umem_logging_init(void) +{ + return ("fail,contents"); /* $UMEM_LOGGING setting */ +} + +typedef struct zpool_command { + const char *name; + int (*func)(int, char **); + const char *usage; +} zpool_command_t; + +/* + * Master command table. Each ZFS command has a name, associated function, and + * usage message. These commands are organized according to how they are + * displayed in the usage message. An empty command (one with a NULL name) + * indicates an empty line in the generic usage message. + */ +static zpool_command_t command_table[] = { + { "create", zpool_do_create, + "\tcreate [-fn] [-R root] [-m mountpoint] <pool> <vdev> ...\n" }, + { "destroy", zpool_do_destroy, + "\tdestroy [-f] <pool>\n" }, + + + { NULL }, + + { "add", zpool_do_add, + "\tadd [-fn] <pool> <vdev> ...\n" }, + + { NULL }, + + { "list", zpool_do_list, + "\tlist [-H] [-o field[,field]*] [pool] ...\n" }, + { "iostat", zpool_do_iostat, + "\tiostat [-v] [pool] ... [interval [count]]\n" }, + { "status", zpool_do_status, + "\tstatus [-vx] [pool] ...\n" }, + + { NULL }, + + { "online", zpool_do_online, + "\tonline <pool> <device>\n" }, + { "offline", zpool_do_offline, + "\toffline <pool> <device>\n" }, + + { NULL }, + + { "attach", zpool_do_attach, + "\tattach [-f] <pool> <device> <new_device>\n" }, + { "detach", zpool_do_detach, + "\tdetach <pool> <device>\n" }, + { "replace", zpool_do_replace, + "\treplace [-f] <pool> <device> [new_device]\n" }, + + { NULL }, + + { "scrub", zpool_do_scrub, + "\tscrub [-s] <pool> ...\n" }, + + { NULL }, + + { "import", zpool_do_import, + "\timport [-d dir]\n" + "\timport [-d dir] [-f] [-o opts] [-R root] -a\n" + "\timport [-d dir] [-f] [-o opts] [-R root ]<pool | id> " + "[newpool]\n" }, + { "export", zpool_do_export, + "\texport [-f] <pool> ...\n" }, + + { NULL } +}; + +#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) + +zpool_command_t *current_command; + +/* + * Fields available for 'zpool list'. + */ +typedef enum { + ZPOOL_FIELD_NAME, + ZPOOL_FIELD_SIZE, + ZPOOL_FIELD_USED, + ZPOOL_FIELD_AVAILABLE, + ZPOOL_FIELD_CAPACITY, + ZPOOL_FIELD_HEALTH, + ZPOOL_FIELD_ROOT +} zpool_field_t; + +#define MAX_FIELDS 10 + +typedef struct column_def { + const char *cd_title; + size_t cd_width; + enum { + left_justify, + right_justify + } cd_justify; +} column_def_t; + +static column_def_t column_table[] = { + { "NAME", 20, left_justify }, + { "SIZE", 6, right_justify }, + { "USED", 6, right_justify }, + { "AVAIL", 6, right_justify }, + { "CAP", 5, right_justify }, + { "HEALTH", 9, left_justify }, + { "ALTROOT", 15, left_justify } +}; + +static char *column_subopts[] = { + "name", + "size", + "used", + "available", + "capacity", + "health", + "root", + NULL +}; + +/* + * Display usage message. If we're inside a command, display only the usage for + * that command. Otherwise, iterate over the entire command table and display + * a complete usage message. + */ +void +usage(int requested) +{ + int i; + FILE *fp = requested ? stdout : stderr; + + if (current_command == NULL) { + int i; + + (void) fprintf(fp, gettext("usage: zpool command args ...\n")); + (void) fprintf(fp, + gettext("where 'command' is one of the following:\n\n")); + + for (i = 0; i < NCOMMAND; i++) { + if (command_table[i].name == NULL) + (void) fprintf(fp, "\n"); + else + (void) fprintf(fp, "%s", + command_table[i].usage); + } + } else { + (void) fprintf(fp, gettext("usage:\n")); + (void) fprintf(fp, current_command->usage); + + if (strcmp(current_command->name, "list") == 0) { + (void) fprintf(fp, gettext("\nwhere 'field' is one " + "of the following:\n\n")); + + for (i = 0; column_subopts[i] != NULL; i++) + (void) fprintf(fp, "\t%s\n", column_subopts[i]); + } + } + + exit(requested ? 0 : 2); +} + +const char * +state_to_name(int state) +{ + switch (state) { + case VDEV_STATE_CLOSED: + case VDEV_STATE_CANT_OPEN: + return (gettext("FAULTED")); + case VDEV_STATE_OFFLINE: + return (gettext("OFFLINE")); + case VDEV_STATE_DEGRADED: + return (gettext("DEGRADED")); + case VDEV_STATE_HEALTHY: + return (gettext("ONLINE")); + } + + return (gettext("UNKNOWN")); +} + +void +print_vdev_tree(const char *name, nvlist_t *nv, int indent) +{ + nvlist_t **child; + uint_t c, children; + + if (name != NULL) + (void) printf("\t%*s%s\n", indent, "", name); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return; + + for (c = 0; c < children; c++) + print_vdev_tree(vdev_get_name(child[c]), child[c], indent + 2); +} + +/* + * zpool add [-fn] <pool> <vdev> ... + * + * -f Force addition of devices, even if they appear in use + * -n Do not add the devices, but display the resulting layout if + * they were to be added. + * + * Adds the given vdevs to 'pool'. As with create, the bulk of this work is + * handled by get_vdev_spec(), which constructs the nvlist needed to pass to + * libzfs. + */ +int +zpool_do_add(int argc, char **argv) +{ + int force = FALSE; + int dryrun = FALSE; + int c; + nvlist_t *nvroot; + char *poolname; + int ret; + zpool_handle_t *zhp; + nvlist_t *config; + + /* check options */ + while ((c = getopt(argc, argv, "fn")) != -1) { + switch (c) { + case 'f': + force = TRUE; + break; + case 'n': + dryrun = TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(FALSE); + } + if (argc < 2) { + (void) fprintf(stderr, gettext("missing vdev specification\n")); + usage(FALSE); + } + + poolname = argv[0]; + + argc--; + argv++; + + if ((zhp = zpool_open(poolname)) == NULL) + return (1); + + if ((config = zpool_get_config(zhp)) == NULL) { + (void) fprintf(stderr, gettext("pool '%s' is unavailable\n"), + poolname); + zpool_close(zhp); + return (1); + } + + /* pass off to get_vdev_spec for processing */ + nvroot = make_root_vdev(config, force, !force, argc, argv); + if (nvroot == NULL) { + zpool_close(zhp); + return (1); + } + + if (dryrun) { + nvlist_t *poolnvroot; + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &poolnvroot) == 0); + + (void) printf(gettext("would update '%s' to the following " + "configuration:\n"), zpool_get_name(zhp)); + + print_vdev_tree(poolname, poolnvroot, 0); + print_vdev_tree(NULL, nvroot, 0); + + ret = 0; + } else { + ret = (zpool_add(zhp, nvroot) != 0); + } + + return (ret); +} + +/* + * zpool create [-fn] [-R root] [-m mountpoint] <pool> <dev> ... + * + * -f Force creation, even if devices appear in use + * -n Do not create the pool, but display the resulting layout if it + * were to be created. + * -R Create a pool under an alternate root + * -m Set default mountpoint for the root dataset. By default it's + * '/<pool>' + * + * Creates the the named pool according to the given vdev specification. The + * bulk of the vdev processing is done in get_vdev_spec() in zpool_vdev.c. Once + * we get the nvlist back from get_vdev_spec(), we either print out the contents + * (if '-n' was specified), or pass it to libzfs to do the creation. + */ +int +zpool_do_create(int argc, char **argv) +{ + int force = FALSE; + int dryrun = FALSE; + int c; + nvlist_t *nvroot; + char *poolname; + int ret; + char *altroot = NULL; + char *mountpoint = NULL; + + /* check options */ + while ((c = getopt(argc, argv, ":fnR:m:")) != -1) { + switch (c) { + case 'f': + force = TRUE; + break; + case 'n': + dryrun = TRUE; + break; + case 'R': + altroot = optarg; + break; + case 'm': + mountpoint = optarg; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(FALSE); + } + if (argc < 2) { + (void) fprintf(stderr, gettext("missing vdev specification\n")); + usage(FALSE); + } + + poolname = argv[0]; + + /* + * As a special case, check for use of '/' in the name, and direct the + * user to use 'zfs create' instead. + */ + if (strchr(poolname, '/') != NULL) { + (void) fprintf(stderr, gettext("cannot create '%s': invalid " + "character '/' in pool name\n"), poolname); + (void) fprintf(stderr, gettext("use 'zfs create' to " + "create a dataset\n")); + return (1); + } + + /* pass off to get_vdev_spec for bulk processing */ + nvroot = make_root_vdev(NULL, force, !force, argc - 1, argv + 1); + if (nvroot == NULL) + return (1); + + if (altroot != NULL && altroot[0] != '/') { + (void) fprintf(stderr, gettext("invalid alternate root '%s': " + "must be an absolute path\n")); + return (1); + } + + /* + * Check the validity of the mountpoint and direct the user to use the + * '-m' mountpoint option if it looks like its in use. + */ + if (mountpoint == NULL || + (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 && + strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0)) { + char buf[MAXPATHLEN]; + struct stat64 statbuf; + + if (mountpoint && mountpoint[0] != '/') { + (void) fprintf(stderr, gettext("invalid mountpoint " + "'%s': must be an absolute path, 'legacy', or " + "'none'\n"), mountpoint); + return (1); + } + + if (mountpoint == NULL) { + if (altroot != NULL) + (void) snprintf(buf, sizeof (buf), "%s/%s", + altroot, poolname); + else + (void) snprintf(buf, sizeof (buf), "/%s", + poolname); + } else { + if (altroot != NULL) + (void) snprintf(buf, sizeof (buf), "%s%s", + altroot, mountpoint); + else + (void) snprintf(buf, sizeof (buf), "%s", + mountpoint); + } + + if (stat64(buf, &statbuf) == 0 && + statbuf.st_nlink != 2) { + if (mountpoint == NULL) + (void) fprintf(stderr, gettext("default " + "mountpoint '%s' exists and is not " + "empty\n"), buf); + else + (void) fprintf(stderr, gettext("mountpoint " + "'%s' exists and is not empty\n"), buf); + (void) fprintf(stderr, gettext("use '-m' " + "option to provide a different default\n")); + return (1); + } + } + + + if (dryrun) { + /* + * For a dry run invocation, print out a basic message and run + * through all the vdevs in the list and print out in an + * appropriate hierarchy. + * + * XXZFS find out of we can create the pool? + */ + (void) printf(gettext("would create '%s' with the " + "following layout:\n\n"), poolname); + + print_vdev_tree(poolname, nvroot, 0); + + ret = 0; + } else { + ret = 1; + /* + * Hand off to libzfs. + */ + if (zpool_create(poolname, nvroot, altroot) == 0) { + zfs_handle_t *pool = zfs_open(poolname, + ZFS_TYPE_FILESYSTEM); + if (pool != NULL) { + if (mountpoint != NULL) + verify(zfs_prop_set(pool, + ZFS_PROP_MOUNTPOINT, + mountpoint) == 0); + if (zfs_mount(pool, NULL, 0) == 0) + ret = zfs_share(pool); + zfs_close(pool); + } + } + + } + + nvlist_free(nvroot); + + return (ret); +} + +/* + * zpool destroy <pool> + * + * -f Forcefully unmount any datasets + * + * Destroy the given pool. Automatically unmounts any datasets in the pool. + */ +int +zpool_do_destroy(int argc, char **argv) +{ + int force = FALSE; + int c; + char *pool; + zpool_handle_t *zhp; + int ret; + + /* check options */ + while ((c = getopt(argc, argv, "f")) != -1) { + switch (c) { + case 'f': + force = TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool argument\n")); + usage(FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + + pool = argv[0]; + + if ((zhp = zpool_open_canfail(pool)) == NULL) { + /* + * As a special case, check for use of '/' in the name, and + * direct the user to use 'zfs destroy' instead. + */ + if (strchr(pool, '/') != NULL) + (void) fprintf(stderr, gettext("use 'zfs destroy' to " + "destroy a dataset\n")); + return (1); + } + + if (unmount_datasets(zhp, force) != 0) { + (void) fprintf(stderr, gettext("could not destroy '%s': " + "could not unmount datasets\n"), zpool_get_name(zhp)); + return (1); + } + + ret = (zpool_destroy(zhp) != 0); + + zpool_close(zhp); + + return (ret); +} + +/* + * zpool export [-f] <pool> ... + * + * -f Forcefully unmount datasets + * + * Export the the given pools. By default, the command will attempt to cleanly + * unmount any active datasets within the pool. If the '-f' flag is specified, + * then the datasets will be forcefully unmounted. + */ +int +zpool_do_export(int argc, char **argv) +{ + int force = FALSE; + int c; + zpool_handle_t *zhp; + int ret; + int i; + + /* check options */ + while ((c = getopt(argc, argv, "f")) != -1) { + switch (c) { + case 'f': + force = TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool argument\n")); + usage(FALSE); + } + + ret = 0; + for (i = 0; i < argc; i++) { + if ((zhp = zpool_open_canfail(argv[i])) == NULL) { + ret = 1; + continue; + } + + if (unmount_datasets(zhp, force) != 0) { + ret = 1; + zpool_close(zhp); + continue; + } + + if (zpool_export(zhp) != 0) + ret = 1; + + zpool_close(zhp); + } + + return (ret); +} + +/* + * Given a vdev configuration, determine the maximum width needed for the device + * name column. + */ +static int +max_width(nvlist_t *nv, int depth, int max) +{ + const char *name = vdev_get_name(nv); + nvlist_t **child; + uint_t c, children; + int ret; + + if (strlen(name) + depth > max) + max = strlen(name) + depth; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return (max); + + for (c = 0; c < children; c++) + if ((ret = max_width(child[c], depth + 2, max)) > max) + max = ret; + + return (max); +} + + +/* + * Print the configuration of an exported pool. Iterate over all vdevs in the + * pool, printing out the name and status for each one. + */ +void +print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth) +{ + nvlist_t **child; + uint_t c, children; + vdev_stat_t *vs; + char *type; + + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); + if (strcmp(type, VDEV_TYPE_MISSING) == 0) + return; + + verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS, + (uint64_t **)&vs, &c) == 0); + + (void) printf("\t%*s%-*s", depth, "", namewidth - depth, name); + + if (vs->vs_aux != 0) { + (void) printf(" %-8s ", state_to_name(vs->vs_state)); + + switch (vs->vs_aux) { + case VDEV_AUX_OPEN_FAILED: + (void) printf(gettext("cannot open")); + break; + + case VDEV_AUX_BAD_GUID_SUM: + (void) printf(gettext("missing device")); + break; + + case VDEV_AUX_NO_REPLICAS: + (void) printf(gettext("insufficient replicas")); + break; + + default: + (void) printf(gettext("corrupted data")); + break; + } + } else { + (void) printf(" %s", state_to_name(vs->vs_state)); + } + (void) printf("\n"); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return; + + for (c = 0; c < children; c++) + print_import_config(vdev_get_name(child[c]), child[c], + namewidth, depth + 2); +} + +/* + * Display the status for the given pool. + */ +static void +show_import(nvlist_t *config) +{ + uint64_t pool_state; + vdev_stat_t *vs; + char *name; + uint64_t guid; + char *msgid; + nvlist_t *nvroot; + int reason; + char *health; + uint_t vsc; + int namewidth; + + verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, + &name) == 0); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, + &guid) == 0); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, + &pool_state) == 0); + verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_HEALTH, + &health) == 0); + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, + (uint64_t **)&vs, &vsc) == 0); + + reason = zpool_import_status(config, &msgid); + + (void) printf(" pool: %s\n", name); + (void) printf(" id: %llu\n", guid); + (void) printf(" state: %s\n", health); + + switch (reason) { + case ZPOOL_STATUS_MISSING_DEV_R: + case ZPOOL_STATUS_MISSING_DEV_NR: + case ZPOOL_STATUS_BAD_GUID_SUM: + (void) printf(gettext("status: One or more devices are missing " + "from the system.\n")); + break; + + case ZPOOL_STATUS_CORRUPT_LABEL_R: + case ZPOOL_STATUS_CORRUPT_LABEL_NR: + (void) printf(gettext("status: One or more devices contains " + "corrupted data.\n")); + break; + + case ZPOOL_STATUS_CORRUPT_DATA: + (void) printf(gettext("status: The pool data is corrupted.\n")); + break; + + default: + /* + * No other status can be seen when importing pools. + */ + assert(reason == ZPOOL_STATUS_OK); + } + + /* + * Print out an action according to the overall state of the pool. + */ + if (strcmp(health, gettext("ONLINE")) == 0) { + (void) printf(gettext("action: The pool can be imported" + " using its name or numeric identifier.")); + if (pool_state != POOL_STATE_EXPORTED) + (void) printf(gettext(" The\n\tpool may be active on " + "on another system, but can be imported using\n\t" + "the '-f' flag.\n")); + else + (void) printf("\n"); + } else if (strcmp(health, gettext("DEGRADED")) == 0) { + (void) printf(gettext("action: The pool can be imported " + "despite missing or damaged devices. The\n\tfault " + "tolerance of the pool may be compromised if imported.")); + if (pool_state != POOL_STATE_EXPORTED) + (void) printf(gettext(" The\n\tpool may be active on " + "on another system, but can be imported using\n\t" + "the '-f' flag.\n")); + else + (void) printf("\n"); + } else { + if (reason == ZPOOL_STATUS_MISSING_DEV_R || + reason == ZPOOL_STATUS_MISSING_DEV_NR || + reason == ZPOOL_STATUS_BAD_GUID_SUM) + (void) printf(gettext("action: The pool cannot be " + "imported. Attach the missing\n\tdevices and try " + "again.\n")); + else + (void) printf(gettext("action: The pool cannot be " + "imported due to damaged devices or data.\n")); + } + + if (msgid != NULL) + (void) printf(gettext(" see: http://www.sun.com/msg/%s\n"), + msgid); + + (void) printf(gettext("config:\n\n")); + + namewidth = max_width(nvroot, 0, 0); + if (namewidth < 10) + namewidth = 10; + print_import_config(name, nvroot, namewidth, 0); + + if (reason == ZPOOL_STATUS_BAD_GUID_SUM) { + (void) printf("\n\tAdditional devices are known to " + "be part of this pool, though their\n\texact " + "configuration cannot be determined.\n"); + } +} + +/* + * Perform the import for the given configuration. This passes the heavy + * lifting off to zpool_import(), and then mounts the datasets contained within + * the pool. + */ +static int +do_import(nvlist_t *config, const char *newname, const char *mntopts, + const char *altroot, int force) +{ + zpool_handle_t *zhp; + char *name; + uint64_t state; + + verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, + &name) == 0); + + verify(nvlist_lookup_uint64(config, + ZPOOL_CONFIG_POOL_STATE, &state) == 0); + if (state != POOL_STATE_EXPORTED && !force) { + (void) fprintf(stderr, gettext("cannot import '%s': pool " + "may be in use from other system\n"), name); + (void) fprintf(stderr, gettext("use '-f' to import anyway\n")); + return (1); + } + + if (zpool_import(config, newname, altroot) != 0) + return (1); + + if (newname != NULL) + name = (char *)newname; + + verify((zhp = zpool_open(name)) != NULL); + + if (mount_datasets(zhp, mntopts) != 0) { + zpool_close(zhp); + return (1); + } + + zpool_close(zhp); + return (0); +} + +/* + * zpool import [-d dir] + * import [-R root] [-d dir] [-f] -a + * import [-R root] [-d dir] [-f] <pool | id> [newpool] + * + * -d Scan in a specific directory, other than /dev/dsk. More than + * one directory can be specified using multiple '-d' options. + * + * -R Temporarily import the pool, with all mountpoints relative to + * the given root. The pool will remain exported when the machine + * is rebooted. + * + * -f Force import, even if it appears that the pool is active. + * + * -a Import all pools found. + * + * The import command scans for pools to import, and import pools based on pool + * name and GUID. The pool can also be renamed as part of the import process. + */ +int +zpool_do_import(int argc, char **argv) +{ + char **searchdirs = NULL; + int nsearch = 0; + int c; + int err; + nvlist_t *pools; + int do_all = FALSE; + char *altroot = NULL; + char *mntopts = NULL; + int do_force = FALSE; + nvpair_t *elem; + nvlist_t *config; + uint64_t searchguid; + char *searchname; + nvlist_t *found_config; + int first; + + /* check options */ + while ((c = getopt(argc, argv, ":fd:R:ao:")) != -1) { + switch (c) { + case 'a': + do_all = TRUE; + break; + case 'd': + if (searchdirs == NULL) { + searchdirs = safe_malloc(sizeof (char *)); + } else { + char **tmp = safe_malloc((nsearch + 1) * + sizeof (char *)); + bcopy(searchdirs, tmp, nsearch * + sizeof (char *)); + free(searchdirs); + searchdirs = tmp; + } + searchdirs[nsearch++] = optarg; + break; + case 'f': + do_force = TRUE; + break; + case 'o': + mntopts = optarg; + break; + case 'R': + altroot = optarg; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + if (searchdirs == NULL) { + searchdirs = safe_malloc(sizeof (char *)); + searchdirs[0] = "/dev/dsk"; + nsearch = 1; + } + + /* check argument count */ + if (do_all) { + if (argc != 0) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + } else { + if (argc > 2) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + + /* + * Check for the SYS_CONFIG privilege. We do this explicitly + * here because otherwise any attempt to discover pools will + * silently fail. + */ + if (argc == 0 && !priv_ineffect(PRIV_SYS_CONFIG)) { + (void) fprintf(stderr, gettext("cannot " + "discover pools: permission denied\n")); + return (1); + } + } + + if ((pools = zpool_find_import(nsearch, searchdirs)) == NULL) + return (1); + + /* + * We now have a list of all available pools in the given directories. + * Depending on the arguments given, we do one of the following: + * + * <none> Iterate through all pools and display information about + * each one. + * + * -a Iterate through all pools and try to import each one. + * + * <id> Find the pool that corresponds to the given GUID/pool + * name and import that one. + */ + if (argc != 0) { + char *endptr; + + errno = 0; + searchguid = strtoull(argv[0], &endptr, 10); + if (errno != 0 || *endptr != '\0') + searchname = argv[0]; + else + searchname = NULL; + found_config = NULL; + } + + err = 0; + elem = NULL; + first = TRUE; + while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { + + verify(nvpair_value_nvlist(elem, &config) == 0); + + if (argc == 0) { + if (first) + first = FALSE; + else + (void) printf("\n"); + + if (do_all) + err |= do_import(config, NULL, mntopts, + altroot, do_force); + else + show_import(config); + } else if (searchname != NULL) { + char *name; + + /* + * We are searching for a pool based on name. + */ + verify(nvlist_lookup_string(config, + ZPOOL_CONFIG_POOL_NAME, &name) == 0); + + if (strcmp(name, searchname) == 0) { + if (found_config != NULL) { + (void) fprintf(stderr, gettext( + "cannot import '%s': more than " + "one matching pool\n"), searchname); + (void) fprintf(stderr, gettext( + "import by numeric ID instead\n")); + err = TRUE; + } + found_config = config; + } + } else { + uint64_t guid; + + /* + * Search for a pool by guid. + */ + verify(nvlist_lookup_uint64(config, + ZPOOL_CONFIG_POOL_GUID, &guid) == 0); + + if (guid == searchguid) + found_config = config; + } + } + + /* + * If we were searching for a specific pool, verify that we found a + * pool, and then do the import. + */ + if (argc != 0 && err == 0) { + if (found_config == NULL) { + (void) fprintf(stderr, gettext("cannot import '%s': " + "no such pool available\n"), argv[0]); + err = TRUE; + } else { + err |= do_import(found_config, argc == 1 ? NULL : + argv[1], mntopts, altroot, do_force); + } + } + + /* + * If we were just looking for pools, report an error if none were + * found. + */ + if (argc == 0 && first) + (void) fprintf(stderr, + gettext("no pools available to import\n")); + + nvlist_free(pools); + + return (err ? 1 : 0); +} + +typedef struct iostat_cbdata { + zpool_list_t *cb_list; + int cb_verbose; + int cb_iteration; + int cb_namewidth; +} iostat_cbdata_t; + +static void +print_iostat_separator(iostat_cbdata_t *cb) +{ + int i = 0; + + for (i = 0; i < cb->cb_namewidth; i++) + (void) printf("-"); + (void) printf(" ----- ----- ----- ----- ----- -----\n"); +} + +static void +print_iostat_header(iostat_cbdata_t *cb) +{ + (void) printf("%*s capacity operations bandwidth\n", + cb->cb_namewidth, ""); + (void) printf("%-*s used avail read write read write\n", + cb->cb_namewidth, "pool"); + print_iostat_separator(cb); +} + +/* + * Display a single statistic. + */ +void +print_one_stat(uint64_t value) +{ + char buf[64]; + + zfs_nicenum(value, buf, sizeof (buf)); + (void) printf(" %5s", buf); +} + +/* + * Print out all the statistics for the given vdev. This can either be the + * toplevel configuration, or called recursively. If 'name' is NULL, then this + * is a verbose output, and we don't want to display the toplevel pool stats. + */ +void +print_vdev_stats(const char *name, nvlist_t *oldnv, nvlist_t *newnv, + iostat_cbdata_t *cb, int depth) +{ + nvlist_t **oldchild, **newchild; + uint_t c, children; + vdev_stat_t *oldvs, *newvs; + vdev_stat_t zerovs = { 0 }; + uint64_t tdelta; + double scale; + + if (oldnv != NULL) { + verify(nvlist_lookup_uint64_array(oldnv, ZPOOL_CONFIG_STATS, + (uint64_t **)&oldvs, &c) == 0); + } else { + oldvs = &zerovs; + } + + verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_STATS, + (uint64_t **)&newvs, &c) == 0); + + if (strlen(name) + depth > cb->cb_namewidth) + (void) printf("%*s%s", depth, "", name); + else + (void) printf("%*s%s%*s", depth, "", name, + (int)(cb->cb_namewidth - strlen(name) - depth), ""); + + tdelta = newvs->vs_timestamp - oldvs->vs_timestamp; + + if (tdelta == 0) + scale = 1.0; + else + scale = (double)NANOSEC / tdelta; + + /* only toplevel vdevs have capacity stats */ + if (newvs->vs_space == 0) { + (void) printf(" - -"); + } else { + print_one_stat(newvs->vs_alloc); + print_one_stat(newvs->vs_space - newvs->vs_alloc); + } + + print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_READ] - + oldvs->vs_ops[ZIO_TYPE_READ]))); + + print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_WRITE] - + oldvs->vs_ops[ZIO_TYPE_WRITE]))); + + print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_READ] - + oldvs->vs_bytes[ZIO_TYPE_READ]))); + + print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_WRITE] - + oldvs->vs_bytes[ZIO_TYPE_WRITE]))); + + (void) printf("\n"); + + if (!cb->cb_verbose) + return; + + if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_CHILDREN, + &newchild, &children) != 0) + return; + + if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_CHILDREN, + &oldchild, &c) != 0) + return; + + for (c = 0; c < children; c++) + print_vdev_stats(vdev_get_name(newchild[c]), + oldnv ? oldchild[c] : NULL, newchild[c], cb, depth + 2); +} + +/* + * Callback to print out the iostats for the given pool. + */ +int +print_iostat(zpool_handle_t *zhp, void *data) +{ + iostat_cbdata_t *cb = data; + nvlist_t *oldconfig, *newconfig; + nvlist_t *oldnvroot, *newnvroot; + uint64_t oldtxg, newtxg; + + if (zpool_refresh_stats(zhp, &oldconfig, &newconfig) != 0) { + /* + * This pool has disappeared, so remove it + * from the list and continue. + */ + pool_list_remove(cb->cb_list, zhp); + return (0); + } + + if (cb->cb_iteration == 1) { + if (oldconfig != NULL) + nvlist_free(oldconfig); + oldconfig = NULL; + } + + verify(nvlist_lookup_uint64(newconfig, ZPOOL_CONFIG_POOL_TXG, + &newtxg) == 0); + verify(nvlist_lookup_nvlist(newconfig, ZPOOL_CONFIG_VDEV_TREE, + &newnvroot) == 0); + + if (oldconfig == NULL || + nvlist_lookup_uint64(oldconfig, ZPOOL_CONFIG_POOL_TXG, &oldtxg) || + oldtxg != newtxg || + nvlist_lookup_nvlist(oldconfig, ZPOOL_CONFIG_VDEV_TREE, &oldnvroot)) + oldnvroot = NULL; + + /* + * Print out the statistics for the pool. + */ + print_vdev_stats(zpool_get_name(zhp), oldnvroot, newnvroot, cb, 0); + + if (cb->cb_verbose) + print_iostat_separator(cb); + + if (oldconfig != NULL) + nvlist_free(oldconfig); + + return (0); +} + +int +get_namewidth(zpool_handle_t *zhp, void *data) +{ + iostat_cbdata_t *cb = data; + nvlist_t *config, *nvroot; + + if ((config = zpool_get_config(zhp)) != NULL) { + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + if (!cb->cb_verbose) + cb->cb_namewidth = strlen(zpool_get_name(zhp)); + else + cb->cb_namewidth = max_width(nvroot, 0, 0); + } + + /* + * The width must fall into the range [10,38]. The upper limit is the + * maximum we can have and still fit in 80 columns. + */ + if (cb->cb_namewidth < 10) + cb->cb_namewidth = 10; + if (cb->cb_namewidth > 38) + cb->cb_namewidth = 38; + + return (0); +} + +/* + * zpool iostat [-v] [pool] ... [interval [count]] + * + * -v Display statistics for individual vdevs + * + * This command can be tricky because we want to be able to deal with pool + * creation/destruction as well as vdev configuration changes. The bulk of this + * processing is handled by the pool_list_* routines in zpool_iter.c. We rely + * on pool_list_update() to detect the addition of new pools. Configuration + * changes are all handled within libzfs. + */ +int +zpool_do_iostat(int argc, char **argv) +{ + int c; + int ret; + int npools; + unsigned long interval = 0, count = 0; + zpool_list_t *list; + int verbose = FALSE; + iostat_cbdata_t cb; + + /* check options */ + while ((c = getopt(argc, argv, "v")) != -1) { + switch (c) { + case 'v': + verbose = TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* + * Determine if the last argument is an integer or a pool name + */ + if (argc > 0 && isdigit(argv[argc - 1][0])) { + char *end; + + errno = 0; + interval = strtoul(argv[argc - 1], &end, 10); + + if (*end == '\0' && errno == 0) { + if (interval == 0) { + (void) fprintf(stderr, gettext("interval " + "cannot be zero\n")); + usage(FALSE); + } + + /* + * Ignore the last parameter + */ + argc--; + } else { + /* + * If this is not a valid number, just plow on. The + * user will get a more informative error message later + * on. + */ + interval = 0; + } + } + + /* + * If the last argument is also an integer, then we have both a count + * and an integer. + */ + if (argc > 0 && isdigit(argv[argc - 1][0])) { + char *end; + + errno = 0; + count = interval; + interval = strtoul(argv[argc - 1], &end, 10); + + if (*end == '\0' && errno == 0) { + if (interval == 0) { + (void) fprintf(stderr, gettext("interval " + "cannot be zero\n")); + usage(FALSE); + } + + /* + * Ignore the last parameter + */ + argc--; + } else { + interval = 0; + } + } + + /* + * Construct the list of all interesting pools. + */ + ret = 0; + if ((list = pool_list_get(argc, argv, &ret)) == NULL) + return (1); + + if (pool_list_count(list) == 0 && argc != 0) + return (1); + + if (pool_list_count(list) == 0 && interval == 0) { + (void) fprintf(stderr, gettext("no pools available\n")); + return (1); + } + + /* + * Enter the main iostat loop. + */ + cb.cb_list = list; + cb.cb_verbose = verbose; + cb.cb_iteration = 0; + cb.cb_namewidth = 0; + + for (;;) { + pool_list_update(list); + + if ((npools = pool_list_count(list)) == 0) + break; + + /* + * Iterate over all pools to determine the maximum width + * for the pool / device name column across all pools. + */ + cb.cb_namewidth = 0; + (void) pool_list_iter(list, FALSE, get_namewidth, &cb); + + /* + * If it's the first time, or verbose mode, print the header. + */ + if (++cb.cb_iteration == 1 || verbose) + print_iostat_header(&cb); + + (void) pool_list_iter(list, FALSE, print_iostat, &cb); + + /* + * If there's more than one pool, and we're not in verbose mode + * (which prints a separator for us), then print a separator. + */ + if (npools > 1 && !verbose) + print_iostat_separator(&cb); + + if (verbose) + (void) printf("\n"); + + if (interval == 0) + break; + + if (count != 0 && --count == 0) + break; + + (void) sleep(interval); + } + + pool_list_free(list); + + return (ret); +} + +typedef struct list_cbdata { + int cb_scripted; + int cb_first; + int cb_fields[MAX_FIELDS]; + int cb_fieldcount; +} list_cbdata_t; + +/* + * Given a list of columns to display, output appropriate headers for each one. + */ +void +print_header(int *fields, size_t count) +{ + int i; + column_def_t *col; + const char *fmt; + + for (i = 0; i < count; i++) { + col = &column_table[fields[i]]; + if (i != 0) + (void) printf(" "); + if (col->cd_justify == left_justify) + fmt = "%-*s"; + else + fmt = "%*s"; + + (void) printf(fmt, i == count - 1 ? strlen(col->cd_title) : + col->cd_width, col->cd_title); + } + + (void) printf("\n"); +} + +int +list_callback(zpool_handle_t *zhp, void *data) +{ + list_cbdata_t *cbp = data; + nvlist_t *config; + int i; + char buf[ZPOOL_MAXNAMELEN]; + uint64_t total; + uint64_t used; + const char *fmt; + column_def_t *col; + + if (cbp->cb_first) { + if (!cbp->cb_scripted) + print_header(cbp->cb_fields, cbp->cb_fieldcount); + cbp->cb_first = FALSE; + } + + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + config = NULL; + } else { + config = zpool_get_config(zhp); + total = zpool_get_space_total(zhp); + used = zpool_get_space_used(zhp); + } + + for (i = 0; i < cbp->cb_fieldcount; i++) { + if (i != 0) { + if (cbp->cb_scripted) + (void) printf("\t"); + else + (void) printf(" "); + } + + col = &column_table[cbp->cb_fields[i]]; + + switch (cbp->cb_fields[i]) { + case ZPOOL_FIELD_NAME: + (void) strlcpy(buf, zpool_get_name(zhp), sizeof (buf)); + break; + + case ZPOOL_FIELD_SIZE: + if (config == NULL) + (void) strlcpy(buf, "-", sizeof (buf)); + else + zfs_nicenum(total, buf, sizeof (buf)); + break; + + case ZPOOL_FIELD_USED: + if (config == NULL) + (void) strlcpy(buf, "-", sizeof (buf)); + else + zfs_nicenum(used, buf, sizeof (buf)); + break; + + case ZPOOL_FIELD_AVAILABLE: + if (config == NULL) + (void) strlcpy(buf, "-", sizeof (buf)); + else + zfs_nicenum(total - used, buf, sizeof (buf)); + break; + + case ZPOOL_FIELD_CAPACITY: + if (config == NULL) { + (void) strlcpy(buf, "-", sizeof (buf)); + } else { + uint64_t capacity = (total == 0 ? 0 : + (used * 100 / total)); + (void) snprintf(buf, sizeof (buf), "%llu%%", + capacity); + } + break; + + case ZPOOL_FIELD_HEALTH: + if (config == NULL) { + (void) strlcpy(buf, "FAULTED", sizeof (buf)); + } else { + nvlist_t *nvroot; + vdev_stat_t *vs; + uint_t vsc; + + verify(nvlist_lookup_nvlist(config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); + verify(nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_STATS, (uint64_t **)&vs, + &vsc) == 0); + (void) strlcpy(buf, state_to_name(vs->vs_state), + sizeof (buf)); + } + break; + + case ZPOOL_FIELD_ROOT: + if (config == NULL) + (void) strlcpy(buf, "-", sizeof (buf)); + else if (zpool_get_root(zhp, buf, sizeof (buf)) != 0) + (void) strlcpy(buf, "-", sizeof (buf)); + break; + } + + if (cbp->cb_scripted) + (void) printf("%s", buf); + else { + if (col->cd_justify == left_justify) + fmt = "%-*s"; + else + fmt = "%*s"; + + (void) printf(fmt, i == cbp->cb_fieldcount - 1 ? + strlen(buf) : col->cd_width, buf); + } + } + + (void) printf("\n"); + + return (0); +} + +/* + * zpool list [-H] [-o field[,field]*] [pool] ... + * + * -H Scripted mode. Don't display headers, and separate fields by + * a single tab. + * -o List of fields to display. Defaults to all fields, or + * "name,size,used,available,capacity,health,root" + * + * List all pools in the system, whether or not they're healthy. Output space + * statistics for each one, as well as health status summary. + */ +int +zpool_do_list(int argc, char **argv) +{ + int c; + int ret; + list_cbdata_t cb = { 0 }; + static char default_fields[] = + "name,size,used,available,capacity,health,root"; + char *fields = default_fields; + char *value; + + /* check options */ + while ((c = getopt(argc, argv, ":Ho:")) != -1) { + switch (c) { + case 'H': + cb.cb_scripted = TRUE; + break; + case 'o': + fields = optarg; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + while (*fields != '\0') { + if (cb.cb_fieldcount == MAX_FIELDS) { + (void) fprintf(stderr, gettext("too many " + "properties given to -o option\n")); + usage(FALSE); + } + + if ((cb.cb_fields[cb.cb_fieldcount] = getsubopt(&fields, + column_subopts, &value)) == -1) { + (void) fprintf(stderr, gettext("invalid property " + "'%s'\n"), value); + usage(FALSE); + } + + cb.cb_fieldcount++; + } + + + cb.cb_first = TRUE; + + ret = for_each_pool(argc, argv, TRUE, list_callback, &cb); + + if (argc == 0 && cb.cb_first) { + (void) printf(gettext("no pools available\n")); + return (0); + } + + return (ret); +} + +static nvlist_t * +zpool_get_vdev_by_name(nvlist_t *nv, char *name) +{ + nvlist_t **child; + uint_t c, children; + nvlist_t *match; + char *path; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) { + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); + if (strncmp(name, "/dev/dsk/", 9) == 0) + name += 9; + if (strncmp(path, "/dev/dsk/", 9) == 0) + path += 9; + if (strcmp(name, path) == 0) + return (nv); + return (NULL); + } + + for (c = 0; c < children; c++) + if ((match = zpool_get_vdev_by_name(child[c], name)) != NULL) + return (match); + + return (NULL); +} + +static int +zpool_do_attach_or_replace(int argc, char **argv, int replacing) +{ + int force = FALSE; + int c; + nvlist_t *nvroot; + char *poolname, *old_disk, *new_disk; + zpool_handle_t *zhp; + nvlist_t *config; + + /* check options */ + while ((c = getopt(argc, argv, "f")) != -1) { + switch (c) { + case 'f': + force = TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(FALSE); + } + + poolname = argv[0]; + + if (argc < 2) { + (void) fprintf(stderr, + gettext("missing <device> specification\n")); + usage(FALSE); + } + + old_disk = argv[1]; + + if (argc < 3) { + if (!replacing) { + (void) fprintf(stderr, + gettext("missing <new_device> specification\n")); + usage(FALSE); + } + new_disk = old_disk; + argc -= 1; + argv += 1; + } else { + new_disk = argv[2]; + argc -= 2; + argv += 2; + } + + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(FALSE); + } + + if ((zhp = zpool_open(poolname)) == NULL) + return (1); + + if ((config = zpool_get_config(zhp)) == NULL) { + (void) fprintf(stderr, gettext("pool '%s' is unavailable\n"), + poolname); + zpool_close(zhp); + return (1); + } + + nvroot = make_root_vdev(config, force, B_FALSE, argc, argv); + if (nvroot == NULL) { + zpool_close(zhp); + return (1); + } + + return (zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing)); +} + +/* + * zpool replace [-f] <pool> <device> <new_device> + * + * -f Force attach, even if <new_device> appears to be in use. + * + * Replace <device> with <new_device>. + */ +/* ARGSUSED */ +int +zpool_do_replace(int argc, char **argv) +{ + return (zpool_do_attach_or_replace(argc, argv, B_TRUE)); +} + +/* + * zpool attach [-f] <pool> <device> <new_device> + * + * -f Force attach, even if <new_device> appears to be in use. + * + * Attach <new_device> to the mirror containing <device>. If <device> is not + * part of a mirror, then <device> will be transformed into a mirror of + * <device> and <new_device>. In either case, <new_device> will begin life + * with a DTL of [0, now], and will immediately begin to resilver itself. + */ +int +zpool_do_attach(int argc, char **argv) +{ + return (zpool_do_attach_or_replace(argc, argv, B_FALSE)); +} + +/* + * zpool detach [-f] <pool> <device> + * + * -f Force detach of <device>, even if DTLs argue against it + * (not supported yet) + * + * Detach a device from a mirror. The operation will be refused if <device> + * is the last device in the mirror, or if the DTLs indicate that this device + * has the only valid copy of some data. + */ +/* ARGSUSED */ +int +zpool_do_detach(int argc, char **argv) +{ + int c; + char *poolname, *path; + zpool_handle_t *zhp; + + /* check options */ + while ((c = getopt(argc, argv, "f")) != -1) { + switch (c) { + case 'f': + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(FALSE); + } + + if (argc < 2) { + (void) fprintf(stderr, + gettext("missing <device> specification\n")); + usage(FALSE); + } + + poolname = argv[0]; + path = argv[1]; + + if ((zhp = zpool_open(poolname)) == NULL) + return (1); + + return (zpool_vdev_detach(zhp, path)); +} + +/* + * zpool online [-t] <pool> <device> + * + * -t Only bring the device on-line temporarily. The online + * state will not be persistent across reboots. + */ +/* ARGSUSED */ +int +zpool_do_online(int argc, char **argv) +{ + int c, i; + char *poolname; + zpool_handle_t *zhp; + int ret = 0; + + /* check options */ + while ((c = getopt(argc, argv, "t")) != -1) { + switch (c) { + case 't': + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name\n")); + usage(FALSE); + } + if (argc < 2) { + (void) fprintf(stderr, gettext("missing device name\n")); + usage(FALSE); + } + + poolname = argv[0]; + + if ((zhp = zpool_open(poolname)) == NULL) + return (1); + + for (i = 1; i < argc; i++) + if (zpool_vdev_online(zhp, argv[i]) == 0) + (void) printf(gettext("Bringing device %s online\n"), + argv[i]); + else + ret = 1; + + return (ret); +} + +/* + * zpool offline [-ft] <pool> <device> + * + * -f Force the device into the offline state, even if doing + * so would appear to compromise pool availability. + * (not supported yet) + * + * -t Only take the device off-line temporarily. The offline + * state will not be persistent across reboots. + * (not supported yet) + */ +/* ARGSUSED */ +int +zpool_do_offline(int argc, char **argv) +{ + int c, i; + char *poolname; + zpool_handle_t *zhp; + int ret = 0; + + /* check options */ + while ((c = getopt(argc, argv, "ft")) != -1) { + switch (c) { + case 'f': + case 't': + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name\n")); + usage(FALSE); + } + if (argc < 2) { + (void) fprintf(stderr, gettext("missing device name\n")); + usage(FALSE); + } + + poolname = argv[0]; + + if ((zhp = zpool_open(poolname)) == NULL) + return (1); + + for (i = 1; i < argc; i++) + if (zpool_vdev_offline(zhp, argv[i]) == 0) + (void) printf(gettext("Bringing device %s offline\n"), + argv[i]); + else + ret = 1; + + return (ret); +} + +typedef struct scrub_cbdata { + int cb_type; +} scrub_cbdata_t; + +int +scrub_callback(zpool_handle_t *zhp, void *data) +{ + scrub_cbdata_t *cb = data; + + return (zpool_scrub(zhp, cb->cb_type) != 0); +} + +/* + * zpool scrub [-s] <pool> ... + * + * -s Stop. Stops any in-progress scrub. + */ +int +zpool_do_scrub(int argc, char **argv) +{ + int c; + scrub_cbdata_t cb; + + cb.cb_type = POOL_SCRUB_EVERYTHING; + + /* check options */ + while ((c = getopt(argc, argv, "s")) != -1) { + switch (c) { + case 's': + cb.cb_type = POOL_SCRUB_NONE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(FALSE); + } + + return (for_each_pool(argc, argv, TRUE, scrub_callback, &cb)); +} + +typedef struct status_cbdata { + int cb_verbose; + int cb_explain; + int cb_count; + int cb_first; +} status_cbdata_t; + +/* + * Print out detailed scrub status. + */ +void +print_scrub_status(nvlist_t *nvroot) +{ + vdev_stat_t *vs; + uint_t vsc; + time_t start, end, now; + double fraction_done; + uint64_t examined, total, minutes_left; + char *scrub_type; + + verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, + (uint64_t **)&vs, &vsc) == 0); + + /* + * If there's never been a scrub, there's not much to say. + */ + if (vs->vs_scrub_end == 0 && vs->vs_scrub_type == POOL_SCRUB_NONE) { + (void) printf(gettext("none requested\n")); + return; + } + + scrub_type = (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ? + "resilver" : "scrub"; + + start = vs->vs_scrub_start; + end = vs->vs_scrub_end; + now = time(NULL); + examined = vs->vs_scrub_examined; + total = vs->vs_alloc; + + if (end != 0) { + (void) printf(gettext("%s %s with %llu errors on %s"), + scrub_type, vs->vs_scrub_complete ? "completed" : "stopped", + (u_longlong_t)vs->vs_scrub_errors, ctime(&end)); + return; + } + + if (examined == 0) + examined = 1; + if (examined > total) + total = examined; + + fraction_done = (double)examined / total; + minutes_left = (uint64_t)((now - start) * + (1 - fraction_done) / fraction_done / 60); + + (void) printf(gettext("%s in progress, %.2f%% done, %lluh%um to go\n"), + scrub_type, 100 * fraction_done, + (u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60)); +} + +/* + * Print out configuration state as requested by status_callback. + */ +void +print_status_config(const char *name, nvlist_t *nv, int namewidth, int depth) +{ + nvlist_t **child; + uint_t c, children; + vdev_stat_t *vs; + char rbuf[6], wbuf[6], cbuf[6], repaired[6]; + + verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS, + (uint64_t **)&vs, &c) == 0); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + children = 0; + + (void) printf("\t%*s%-*s %-8s", depth, "", namewidth - depth, + name, state_to_name(vs->vs_state)); + + zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf)); + zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf)); + zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf)); + (void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf); + + if (vs->vs_aux != 0) { + (void) printf(" "); + + switch (vs->vs_aux) { + case VDEV_AUX_OPEN_FAILED: + (void) printf(gettext("cannot open")); + break; + + case VDEV_AUX_BAD_GUID_SUM: + (void) printf(gettext("missing device")); + break; + + case VDEV_AUX_NO_REPLICAS: + (void) printf(gettext("insufficient replicas")); + break; + + default: + (void) printf(gettext("corrupted data")); + break; + } + } else if (vs->vs_scrub_repaired != 0 && children == 0) { + /* + * Report bytes resilvered/repaired on leaf devices. + */ + zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired)); + (void) printf(gettext(" %s %s"), repaired, + (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ? + "resilvered" : "repaired"); + } + + (void) printf("\n"); + + for (c = 0; c < children; c++) + print_status_config(vdev_get_name(child[c]), child[c], + namewidth, depth + 2); +} + +/* + * Display a summary of pool status. Displays a summary such as: + * + * pool: tank + * status: DEGRADED + * reason: One or more devices ... + * see: http://www.sun.com/msg/ZFS-xxxx-01 + * config: + * mirror DEGRADED + * c1t0d0 OK + * c2t0d0 FAULTED + * + * When given the '-v' option, we print out the complete config. If the '-e' + * option is specified, then we print out error rate information as well. + */ +int +status_callback(zpool_handle_t *zhp, void *data) +{ + status_cbdata_t *cbp = data; + nvlist_t *config, *nvroot; + char *msgid; + int reason; + char *health; + + config = zpool_get_config(zhp); + reason = zpool_get_status(zhp, &msgid); + + cbp->cb_count++; + + /* + * If we were given 'zpool status -x', only report those pools with + * problems. + */ + if (reason == ZPOOL_STATUS_OK && cbp->cb_explain) + return (0); + + if (cbp->cb_first) + cbp->cb_first = FALSE; + else + (void) printf("\n"); + + verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_HEALTH, + &health) == 0); + + (void) printf(gettext(" pool: %s\n"), zpool_get_name(zhp)); + (void) printf(gettext(" state: %s\n"), health); + + switch (reason) { + case ZPOOL_STATUS_MISSING_DEV_R: + (void) printf(gettext("status: One or more devices could not " + "be opened. Sufficient replicas exist for\n\tthe pool to " + "continue functioning in a degraded state.\n")); + (void) printf(gettext("action: Attach the missing device and " + "online it using 'zpool online'.\n")); + break; + + case ZPOOL_STATUS_MISSING_DEV_NR: + (void) printf(gettext("status: One or more devices could not " + "be opened. There are insufficient\n\treplicas for the " + "pool to continue functioning.\n")); + (void) printf(gettext("action: Attach the missing device and " + "online it using 'zpool online'.\n")); + break; + + case ZPOOL_STATUS_CORRUPT_LABEL_R: + (void) printf(gettext("status: One or more devices could not " + "be used because the label is missing or\n\tinvalid. " + "Sufficient replicas exist for the pool to continue\n\t" + "functioning in a degraded state.\n")); + (void) printf(gettext("action: Replace the device using " + "'zpool replace'.\n")); + break; + + case ZPOOL_STATUS_CORRUPT_LABEL_NR: + (void) printf(gettext("status: One or more devices could not " + "be used because the the label is missing \n\tor invalid. " + "There are insufficient replicas for the pool to " + "continue\n\tfunctioning.\n")); + (void) printf(gettext("action: Destroy and re-create the pool " + "from a backup source.\n")); + break; + + case ZPOOL_STATUS_FAILING_DEV: + (void) printf(gettext("status: One or more devices has " + "experienced an unrecoverable error. An\n\tattempt was " + "made to correct the error. Applications are " + "unaffected.\n")); + (void) printf(gettext("action: Determine if the device needs " + "to be replaced, and clear the errors\n\tusing " + "'zpool online' or replace the device with 'zpool " + "replace'.\n")); + break; + + case ZPOOL_STATUS_OFFLINE_DEV: + (void) printf(gettext("status: One or more devices has " + "been taken offline by the adminstrator.\n\tSufficient " + "replicas exist for the pool to continue functioning in " + "a\n\tdegraded state.\n")); + (void) printf(gettext("action: Online the device using " + "'zpool online' or replace the device with\n\t'zpool " + "replace'.\n")); + break; + + case ZPOOL_STATUS_RESILVERING: + (void) printf(gettext("status: One or more devices is " + "currently being resilvered. The pool will\n\tcontinue " + "to function, possibly in a degraded state.\n")); + (void) printf(gettext("action: Wait for the resilver to " + "complete.\n")); + break; + + default: + /* + * The remaining errors can't actually be generated, yet. + */ + assert(reason == ZPOOL_STATUS_OK); + } + + if (msgid != NULL) + (void) printf(gettext(" see: http://www.sun.com/msg/%s\n"), + msgid); + + if (config != NULL) { + int namewidth; + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + (void) printf(gettext(" scrub: ")); + print_scrub_status(nvroot); + + namewidth = max_width(nvroot, 0, 0); + if (namewidth < 10) + namewidth = 10; + + (void) printf(gettext("config:\n\n")); + (void) printf(gettext("\t%-*s %-8s %5s %5s %5s\n"), namewidth, + "NAME", "STATE", "READ", "WRITE", "CKSUM"); + print_status_config(zpool_get_name(zhp), nvroot, namewidth, 0); + } else { + (void) printf(gettext("config: The configuration cannot be " + "determined.\n")); + } + + return (0); +} + +/* + * zpool status [-vx] [pool] ... + * + * -v Display complete error logs + * -x Display only pools with potential problems + * + * Describes the health status of all pools or some subset. + */ +int +zpool_do_status(int argc, char **argv) +{ + int c; + int ret; + status_cbdata_t cb = { 0 }; + + /* check options */ + while ((c = getopt(argc, argv, "vx")) != -1) { + switch (c) { + case 'v': + cb.cb_verbose = TRUE; + break; + case 'x': + cb.cb_explain = TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(FALSE); + } + } + + argc -= optind; + argv += optind; + + cb.cb_first = TRUE; + + ret = for_each_pool(argc, argv, TRUE, status_callback, &cb); + + if (argc == 0 && cb.cb_count == 0) + (void) printf(gettext("no pools available\n")); + else if (cb.cb_explain && cb.cb_first) { + if (argc == 0) { + (void) printf(gettext("all pools are healthy\n")); + } else { + int i; + for (i = 0; i < argc; i++) + (void) printf(gettext("pool '%s' is healthy\n"), + argv[i]); + } + } + + return (ret); +} + +int +main(int argc, char **argv) +{ + int ret; + int i; + char *cmdname; + + (void) setlocale(LC_ALL, ""); + (void) textdomain(TEXT_DOMAIN); + + opterr = 0; + + /* + * Make sure the user has specified some command. + */ + if (argc < 2) { + (void) fprintf(stderr, gettext("missing command\n")); + usage(FALSE); + } + + cmdname = argv[1]; + + /* + * Special case '-?' + */ + if (strcmp(cmdname, "-?") == 0) + usage(TRUE); + + /* + * Run the appropriate command. + */ + for (i = 0; i < NCOMMAND; i++) { + if (command_table[i].name == NULL) + continue; + + if (strcmp(cmdname, command_table[i].name) == 0) { + current_command = &command_table[i]; + ret = command_table[i].func(argc - 1, argv + 1); + break; + } + } + + /* + * 'freeze' is a vile debugging abomination, so we treat it as such. + */ + if (strcmp(cmdname, "freeze") == 0 && argc == 3) { + char buf[8192]; + int fd = open("/dev/zpoolctl", O_RDWR); + (void) strcpy((void *)buf, argv[2]); + return (!!ioctl(fd, ZFS_IOC_POOL_FREEZE, buf)); + } + + if (i == NCOMMAND) { + (void) fprintf(stderr, gettext("unrecognized " + "command '%s'\n"), cmdname); + usage(FALSE); + } + + /* + * The 'ZFS_ABORT' environment variable causes us to dump core on exit + * for the purposes of running ::findleaks. + */ + if (getenv("ZFS_ABORT") != NULL) { + (void) printf("dumping core by request\n"); + abort(); + } + + return (ret); +} diff --git a/usr/src/cmd/zpool/zpool_util.c b/usr/src/cmd/zpool/zpool_util.c new file mode 100644 index 0000000000..a9e7bb600c --- /dev/null +++ b/usr/src/cmd/zpool/zpool_util.c @@ -0,0 +1,100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <libgen.h> +#include <libintl.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> + +#include "zpool_util.h" + +/* + * Utility function to guarantee malloc() success. + */ +void * +safe_malloc(size_t size) +{ + void *data; + + if ((data = calloc(1, size)) == NULL) { + (void) fprintf(stderr, "internal error: out of memory\n"); + exit(1); + } + + return (data); +} + +/* + * Same as above, but for strdup() + */ +char * +safe_strdup(const char *str) +{ + char *ret; + + if ((ret = strdup(str)) == NULL) { + (void) fprintf(stderr, "internal error: out of memory\n"); + exit(1); + } + + return (ret); +} + +/* + * Display an out of memory error message and abort the current program. + */ +void +no_memory(void) +{ + assert(errno == ENOMEM); + (void) fprintf(stderr, + gettext("internal error: out of memory\n")); + exit(1); +} + +/* + * Given a vdev, return the name to display in iostat. If the vdev has a path, + * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type. + */ +const char * +vdev_get_name(nvlist_t *nv) +{ + char *path, *type; + + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) { + if (strncmp(path, "/dev/dsk/", 9) == 0) + return (path + 9); + return (path); + } + + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); + + return (type); +} diff --git a/usr/src/cmd/zpool/zpool_util.h b/usr/src/cmd/zpool/zpool_util.h new file mode 100644 index 0000000000..7287a96cb3 --- /dev/null +++ b/usr/src/cmd/zpool/zpool_util.h @@ -0,0 +1,78 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef ZPOOL_UTIL_H +#define ZPOOL_UTIL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <libnvpair.h> +#include <libzfs.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Basic utility functions + */ +void *safe_malloc(size_t); +char *safe_strdup(const char *); +void no_memory(void); + +const char *vdev_get_name(nvlist_t *nv); + +/* + * Virtual device functions + */ +nvlist_t *make_root_vdev(nvlist_t *poolconfig, int force, int check_rep, + int argc, char **argv); + +/* + * Pool list functions + */ +int for_each_pool(int, char **, int unavail, zpool_iter_f, void *); + +typedef struct zpool_list zpool_list_t; + +zpool_list_t *pool_list_get(int, char **, int *); +void pool_list_update(zpool_list_t *); +int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *); +void pool_list_free(zpool_list_t *); +int pool_list_count(zpool_list_t *); +void pool_list_remove(zpool_list_t *, zpool_handle_t *); + +/* + * Dataset functions + */ +int unmount_datasets(zpool_handle_t *, int); +int mount_datasets(zpool_handle_t *, const char *); + +#ifdef __cplusplus +} +#endif + +#endif /* ZPOOL_UTIL_H */ diff --git a/usr/src/cmd/zpool/zpool_vdev.c b/usr/src/cmd/zpool/zpool_vdev.c new file mode 100644 index 0000000000..669807285d --- /dev/null +++ b/usr/src/cmd/zpool/zpool_vdev.c @@ -0,0 +1,1395 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Functions to convert between a list of vdevs and an nvlist representing the + * configuration. Each entry in the list can be one of: + * + * Device vdevs + * disk=(path=..., devid=...) + * file=(path=...) + * + * Group vdevs + * raidz=(...) + * mirror=(...) + * + * While the underlying implementation supports it, group vdevs cannot contain + * other group vdevs. All userland verification of devices is contained within + * this file. If successful, the nvlist returned can be passed directly to the + * kernel; we've done as much verification as possible in userland. + * + * The only function exported by this file is 'get_vdev_spec'. The function + * performs several passes: + * + * 1. Construct the vdev specification. Performs syntax validation and + * makes sure each device is valid. + * 2. Check for devices in use. Using libdiskmgt, makes sure that no + * devices are also in use. Some can be overridden using the 'force' + * flag, others cannot. + * 3. Check for replication errors if the 'force' flag is not specified. + * validates that the replication level is consistent across the + * entire pool. + * 4. Label any whole disks with an EFI label. + */ + +#include <assert.h> +#include <devid.h> +#include <errno.h> +#include <fcntl.h> +#include <libdiskmgt.h> +#include <libintl.h> +#include <libnvpair.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sys/efi_partition.h> +#include <sys/stat.h> +#include <sys/vtoc.h> +#include <sys/mntent.h> + +#include <libzfs.h> + +#include "zpool_util.h" + +#define DISK_ROOT "/dev/dsk" +#define RDISK_ROOT "/dev/rdsk" +#define BACKUP_SLICE "s2" + +/* + * For any given vdev specification, we can have multiple errors. The + * vdev_error() function keeps track of whether we have seen an error yet, and + * prints out a header if its the first error we've seen. + */ +int error_seen; +int is_force; + +void +vdev_error(const char *fmt, ...) +{ + va_list ap; + + if (!error_seen) { + (void) fprintf(stderr, gettext("invalid vdev specification\n")); + if (!is_force) + (void) fprintf(stderr, gettext("use '-f' to override " + "the following errors:\n")); + else + (void) fprintf(stderr, gettext("the following errors " + "must be manually repaired:\n")); + error_seen = TRUE; + } + + va_start(ap, fmt); + (void) vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void +_libdskmgt_error(int err, const char *file, int line) +{ + if (err == 0) + no_memory(); + + /* + * Some of the libdiskmgt stuff requires root privileges in order to + * examine devices. Bail out gracefully in this case. + */ + if (err == EACCES) { + (void) fprintf(stderr, gettext("cannot determine disk " + "configuration: permission denied\n")); + exit(1); + } + + (void) fprintf(stderr, gettext("internal error: disk configuration " + "error %d at line %d of file %s\n"), err, line, file); + abort(); +} + +#define libdskmgt_error(err) (_libdskmgt_error((err), __FILE__, __LINE__)) + +/* + * Checks whether a single slice overlaps with any of the slices in the provided + * list. Called by check_overlapping(). + */ +int +is_overlapping(dm_descriptor_t slice, dm_descriptor_t media, + dm_descriptor_t *slice_list, int *error, char **overlaps_with) +{ + int i = 0; + uint32_t in_snum; + uint64_t start_block = 0; + uint64_t end_block = 0; + uint64_t media_size = 0; + uint64_t size = 0; + nvlist_t *media_attrs; + nvlist_t *slice_attrs; + + media_attrs = dm_get_attributes(media, error); + if (*error != 0) { + return (-1); + } + + if (media_attrs == NULL) { + return (0); + } + + *error = nvlist_lookup_uint64(media_attrs, DM_NACCESSIBLE, &media_size); + if (*error != 0) { + nvlist_free(media_attrs); + return (-1); + } + + slice_attrs = dm_get_attributes(slice, error); + if (*error != 0) { + nvlist_free(media_attrs); + return (-1); + } + /* + * Not really possible, but the error above would catch any system + * errors. + */ + if (slice_attrs == NULL) { + nvlist_free(media_attrs); + return (0); + } + + *error = nvlist_lookup_uint64(slice_attrs, DM_START, &start_block); + if (*error != 0) { + nvlist_free(media_attrs); + nvlist_free(slice_attrs); + return (-1); + } + + *error = nvlist_lookup_uint64(slice_attrs, DM_SIZE, &size); + if (*error != 0) { + nvlist_free(media_attrs); + nvlist_free(slice_attrs); + return (-1); + } + *error = nvlist_lookup_uint32(slice_attrs, DM_INDEX, &in_snum); + if (*error != 0) { + nvlist_free(media_attrs); + nvlist_free(slice_attrs); + return (-1); + } + + end_block = (start_block + size) - 1; + + for (i = 0; slice_list[i]; i ++) { + uint64_t other_start; + uint64_t other_end; + uint64_t other_size; + uint32_t snum; + + nvlist_t *other_attrs = dm_get_attributes(slice_list[i], error); + if (*error != 0) { + return (-1); + } + + if (other_attrs == NULL) + continue; + + *error = nvlist_lookup_uint64(other_attrs, DM_START, + &other_start); + if (*error) { + nvlist_free(media_attrs); + nvlist_free(slice_attrs); + nvlist_free(other_attrs); + return (-1); + } + + *error = nvlist_lookup_uint64(other_attrs, DM_SIZE, + &other_size); + + if (*error) { + nvlist_free(media_attrs); + nvlist_free(slice_attrs); + nvlist_free(other_attrs); + return (-1); + } + + other_end = (other_size + other_start) - 1; + + *error = nvlist_lookup_uint32(other_attrs, DM_INDEX, + &snum); + + if (*error) { + nvlist_free(media_attrs); + nvlist_free(slice_attrs); + nvlist_free(other_attrs); + return (-1); + } + + /* + * Check to see if there are > 2 overlapping regions + * on this media in the same region as this slice. + * This is done by assuming the following: + * Slice 2 is the backup slice if it is the size + * of the whole disk + * If slice 2 is the overlap and slice 2 is the size of + * the whole disk, continue. If another slice is found + * that overlaps with our slice, return it. + * There is the potential that there is more than one slice + * that our slice overlaps with, however, we only return + * the first overlapping slice we find. + * + */ + + if (start_block >= other_start && start_block <= other_end) { + if ((snum == 2 && (other_size == media_size)) || + snum == in_snum) { + continue; + } else { + char *str = dm_get_name(slice_list[i], error); + if (*error != 0) { + nvlist_free(media_attrs); + nvlist_free(slice_attrs); + nvlist_free(other_attrs); + return (-1); + } + *overlaps_with = strdup(str); + dm_free_name(str); + nvlist_free(media_attrs); + nvlist_free(slice_attrs); + nvlist_free(other_attrs); + return (1); + } + } else if (other_start >= start_block && + other_start <= end_block) { + if ((snum == 2 && (other_size == media_size)) || + snum == in_snum) { + continue; + } else { + char *str = dm_get_name(slice_list[i], error); + if (*error != 0) { + nvlist_free(media_attrs); + nvlist_free(slice_attrs); + nvlist_free(other_attrs); + return (-1); + } + *overlaps_with = strdup(str); + dm_free_name(str); + nvlist_free(media_attrs); + nvlist_free(slice_attrs); + nvlist_free(other_attrs); + return (1); + } + } + nvlist_free(other_attrs); + } + nvlist_free(media_attrs); + nvlist_free(slice_attrs); + return (0); +} + +/* + * Check to see whether the given slice overlaps with any other slices. Get the + * associated slice information and pass on to is_overlapping(). + */ +int +check_overlapping(const char *slicename, dm_descriptor_t slice) +{ + dm_descriptor_t *media; + dm_descriptor_t *slices; + int error; + char *overlaps; + int ret = 0; + + /* + * Get the list of slices be fetching the associated media, and then all + * associated slices. + */ + media = dm_get_associated_descriptors(slice, DM_MEDIA, &error); + if (media == NULL || *media == NULL || error != 0) + libdskmgt_error(error); + + slices = dm_get_associated_descriptors(*media, DM_SLICE, &error); + if (slices == NULL || *slices == NULL || error != 0) + libdskmgt_error(error); + + + overlaps = NULL; + if (is_overlapping(slice, *media, slices, &error, &overlaps)) { + vdev_error(gettext("device '%s' overlaps with '%s'\n"), + slicename, overlaps); + ret = -1; + } + + if (overlaps != NULL) + free(overlaps); + dm_free_descriptors(slices); + dm_free_descriptors(media); + + return (ret); +} + +/* + * Validate the given slice. If 'diskname' is non-NULL, then this is a single + * slice on a complete disk. If 'force' is set, then the user specified '-f' + * and we only want to report error for completely forbidden uses. + */ +int +check_slice(const char *slicename, dm_descriptor_t slice, int force, + int overlap) +{ + nvlist_t *stats; + int err; + nvpair_t *nvwhat, *nvdesc; + char *what, *desc, *name; + int found = FALSE; + int found_zfs = FALSE; + int fd; + + if ((stats = dm_get_stats(slice, DM_SLICE_STAT_USE, &err)) == NULL) + libdskmgt_error(err); + + /* + * Always check to see if this is used by an active ZFS pool. + */ + if ((fd = open(slicename, O_RDONLY)) > 0) { + if (zpool_in_use(fd, &desc, &name)) { + + if (!force) { + vdev_error(gettext("%s is part of %s pool " + "'%s'\n"), slicename, desc, name); + found = found_zfs = TRUE; + } + + free(desc); + free(name); + } + + (void) close(fd); + } + + /* + * This slice is in use. Print out a descriptive message describing who + * is using it. The 'used_by' nvlist is formatted as: + * + * (used_by=what, used_name=desc, ...) + * + * Each 'used_by' must be accompanied by a 'used_name'. + */ + nvdesc = NULL; + for (;;) { + nvwhat = nvlist_next_nvpair(stats, nvdesc); + nvdesc = nvlist_next_nvpair(stats, nvwhat); + + if (nvwhat == NULL || nvdesc == NULL) + break; + + assert(strcmp(nvpair_name(nvwhat), DM_USED_BY) == 0); + assert(strcmp(nvpair_name(nvdesc), DM_USED_NAME) == 0); + + verify(nvpair_value_string(nvwhat, &what) == 0); + verify(nvpair_value_string(nvdesc, &desc) == 0); + + /* + * For currently mounted filesystems, filesystems in + * /etc/vfstab, or dedicated dump devices, we can never use + * them, even if '-f' is specified. The rest of the errors + * indicate that a filesystem was detected on disk, which can be + * overridden with '-f'. + */ + if (strcmp(what, DM_USE_MOUNT) == 0 || + strcmp(what, DM_USE_VFSTAB) == 0 || + strcmp(what, DM_USE_DUMP) == 0) { + found = TRUE; + if (strcmp(what, DM_USE_MOUNT) == 0) { + vdev_error(gettext("%s is " + "currently mounted on %s\n"), + slicename, desc); + } else if (strcmp(what, DM_USE_VFSTAB) == 0) { + vdev_error(gettext("%s is usually " + "mounted at %s in /etc/vfstab\n"), + slicename, desc); + } else if (strcmp(what, DM_USE_DUMP) == 0) { + vdev_error(gettext("%s is the " + "dedicated dump device\n"), slicename); + } + } else if (!force) { + found = TRUE; + if (strcmp(what, DM_USE_SVM) == 0) { + vdev_error(gettext("%s is part of " + "SVM volume %s\n"), slicename, desc); + } else if (strcmp(what, DM_USE_LU) == 0) { + vdev_error(gettext("%s is in use " + "for live upgrade %s\n"), slicename, desc); + } else if (strcmp(what, DM_USE_VXVM) == 0) { + vdev_error(gettext("%s is part of " + "VxVM volume %s\n"), slicename, desc); + } else if (strcmp(what, DM_USE_FS) == 0) { + /* + * We should have already caught ZFS in-use + * filesystems above. If the ZFS version is + * different, or there was some other critical + * failure, it's possible for fstyp to report it + * as in-use, but zpool_open_by_dev() to fail. + */ + if (strcmp(desc, MNTTYPE_ZFS) != 0) + vdev_error(gettext("%s contains a %s " + "filesystem\n"), slicename, desc); + else if (!found_zfs) + vdev_error(gettext("%s is part of an " + "outdated or damaged ZFS " + "pool\n"), slicename); + } else { + vdev_error(gettext("is used by %s as %s\n"), + slicename, what, desc); + } + } else { + found = FALSE; + } + } + + /* + * Perform any overlap checking if requested to do so. + */ + if (overlap && !force) + found |= (check_overlapping(slicename, slice) != 0); + + return (found ? -1 : 0); +} + +/* + * Validate a whole disk. Iterate over all slices on the disk and make sure + * that none is in use by calling check_slice(). + */ +/* ARGSUSED */ +int +check_disk(const char *name, dm_descriptor_t disk, int force) +{ + dm_descriptor_t *drive, *media, *slice; + int err = 0; + int i; + int ret; + + /* + * Get the drive associated with this disk. This should never fail, + * because we already have an alias handle open for the device. + */ + if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE, + &err)) == NULL || *drive == NULL) + libdskmgt_error(err); + + if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA, + &err)) == NULL) + libdskmgt_error(err); + + dm_free_descriptors(drive); + + /* + * It is possible that the user has specified a removable media drive, + * and the media is not present. + */ + if (*media == NULL) { + vdev_error(gettext("'%s' has no media in drive\n"), name); + dm_free_descriptors(media); + return (-1); + } + + if ((slice = dm_get_associated_descriptors(*media, DM_SLICE, + &err)) == NULL) + libdskmgt_error(err); + + dm_free_descriptors(media); + + ret = 0; + + /* + * Iterate over all slices and report any errors. We don't care about + * overlapping slices because we are using the whole disk. + */ + for (i = 0; slice[i] != NULL; i++) { + if (check_slice(dm_get_name(slice[i], &err), slice[i], + force, FALSE) != 0) + ret = -1; + } + + dm_free_descriptors(slice); + return (ret); +} + + +/* + * Validate a device. Determines whether the device is a disk, slice, or + * partition, and passes it off to an appropriate function. + */ +int +check_device(const char *path, int force) +{ + dm_descriptor_t desc; + int err; + char *dev, rpath[MAXPATHLEN]; + + /* + * For whole disks, libdiskmgt does not include the leading dev path. + */ + dev = strrchr(path, '/'); + assert(dev != NULL); + dev++; + if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) + return (check_disk(path, desc, force)); + + /* + * If 'err' is not ENODEV, then we've had an unexpected error from + * libdiskmgt. The only explanation is that we ran out of memory. + */ + if (err != ENODEV) + libdskmgt_error(err); + + /* + * Determine if this is a slice. + */ + if ((desc = dm_get_descriptor_by_name(DM_SLICE, (char *)path, &err)) + != NULL) + return (check_slice(path, desc, force, TRUE)); + + if (err != ENODEV) + libdskmgt_error(err); + + /* + * Check for a partition. libdiskmgt expects path of /dev/rdsk when + * dealing with partitions, so convert it. + */ + (void) snprintf(rpath, sizeof (rpath), "/dev/rdsk/%s", dev); + if ((desc = dm_get_descriptor_by_name(DM_PARTITION, rpath, &err)) + != NULL) { + /* XXZFS perform checking on partitions */ + return (0); + } + + if (err != ENODEV) + libdskmgt_error(err); + + /* + * At this point, libdiskmgt failed to find the device as either a whole + * disk or a slice. Ignore these errors, as we know that it at least a + * block device. The user may have provided us with some unknown device + * that libdiskmgt doesn't know about. + */ + return (0); +} + +/* + * Check that a file is valid. All we can do in this case is check that it's + * not in use by another pool. + */ +int +check_file(const char *file, int force) +{ + char *desc, *name; + int fd; + int ret = 0; + + if ((fd = open(file, O_RDONLY)) < 0) + return (0); + + if (zpool_in_use(fd, &desc, &name)) { + if (strcmp(desc, gettext("active")) == 0 || + !force) { + vdev_error(gettext("%s is part of %s pool '%s'\n"), + file, desc, name); + ret = -1; + } + + free(desc); + free(name); + } + + (void) close(fd); + return (ret); +} + +static int +is_whole_disk(const char *arg, struct stat64 *statbuf) +{ + char path[MAXPATHLEN]; + + (void) snprintf(path, sizeof (path), "%s%s", arg, BACKUP_SLICE); + if (stat64(path, statbuf) == 0) + return (TRUE); + + return (FALSE); +} + +/* + * Create a leaf vdev. Determine if this is a file or a device. If it's a + * device, fill in the device id to make a complete nvlist. Valid forms for a + * leaf vdev are: + * + * /dev/dsk/xxx Complete disk path + * /xxx Full path to file + * xxx Shorthand for /dev/dsk/xxx + */ +nvlist_t * +make_leaf_vdev(const char *arg) +{ + char path[MAXPATHLEN]; + struct stat64 statbuf; + nvlist_t *vdev = NULL; + char *type = NULL; + int wholedisk = FALSE; + + /* + * Determine what type of vdev this is, and put the full path into + * 'path'. We detect whether this is a device of file afterwards by + * checking the st_mode of the file. + */ + if (arg[0] == '/') { + /* + * Complete device or file path. Exact type is determined by + * examining the file descriptor afterwards. + */ + if (is_whole_disk(arg, &statbuf)) { + wholedisk = TRUE; + } else if (stat64(arg, &statbuf) != 0) { + (void) fprintf(stderr, + gettext("cannot open '%s': %s\n"), + arg, strerror(errno)); + return (NULL); + } + + (void) strlcpy(path, arg, sizeof (path)); + } else { + /* + * This may be a short path for a device, or it could be total + * gibberish. Check to see if it's a known device in + * /dev/dsk/. As part of this check, see if we've been given a + * an entire disk (minus the slice number). + */ + (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, + arg); + if (is_whole_disk(path, &statbuf)) { + wholedisk = TRUE; + } else if (stat64(path, &statbuf) != 0) { + /* + * If we got ENOENT, then the user gave us + * gibberish, so try to direct them with a + * reasonable error message. Otherwise, + * regurgitate strerror() since it's the best we + * can do. + */ + if (errno == ENOENT) { + (void) fprintf(stderr, + gettext("cannot open '%s': no such " + "device in %s\n"), arg, DISK_ROOT); + (void) fprintf(stderr, + gettext("must be a full path or " + "shorthand device name\n")); + return (NULL); + } else { + (void) fprintf(stderr, + gettext("cannot open '%s': %s\n"), + path, strerror(errno)); + return (NULL); + } + } + } + + /* + * Determine whether this is a device or a file. + */ + if (S_ISBLK(statbuf.st_mode)) { + type = VDEV_TYPE_DISK; + } else if (S_ISREG(statbuf.st_mode)) { + type = VDEV_TYPE_FILE; + } else { + (void) fprintf(stderr, gettext("cannot use '%s': must be a " + "block device or regular file\n"), path); + return (NULL); + } + + /* + * Finally, we have the complete device or file, and we know that it is + * acceptable to use. Construct the nvlist to describe this vdev. All + * vdevs have a 'path' element, and devices also have a 'devid' element. + */ + verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0); + verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0); + verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0); + + /* + * For a whole disk, defer getting its devid until after labeling it. + */ + if (S_ISBLK(statbuf.st_mode) && !wholedisk) { + /* + * Get the devid for the device. + */ + int fd; + ddi_devid_t devid; + char *minor = NULL, *devid_str = NULL; + + if ((fd = open(path, O_RDONLY)) < 0) { + (void) fprintf(stderr, gettext("cannot open '%s': " + "%s\n"), path, strerror(errno)); + nvlist_free(vdev); + return (NULL); + } + + if (devid_get(fd, &devid) == 0) { + if (devid_get_minor_name(fd, &minor) == 0 && + (devid_str = devid_str_encode(devid, minor)) != + NULL) { + verify(nvlist_add_string(vdev, + ZPOOL_CONFIG_DEVID, devid_str) == 0); + } + if (devid_str != NULL) + devid_str_free(devid_str); + if (minor != NULL) + devid_str_free(minor); + devid_free(devid); + } + + (void) close(fd); + } + + return (vdev); +} + +/* + * Go through and verify the replication level of the pool is consistent. + * Performs the following checks: + * + * For the new spec, verifies that devices in mirrors and raidz are the + * same size. + * + * If the current configuration already has inconsistent replication + * levels, ignore any other potential problems in the new spec. + * + * Otherwise, make sure that the current spec (if there is one) and the new + * spec have consistent replication levels. + */ +typedef struct replication_level { + char *type; + int level; +} replication_level_t; + +/* + * Given a list of toplevel vdevs, return the current replication level. If + * the config is inconsistent, then NULL is returned. If 'fatal' is set, then + * an error message will be displayed for each self-inconsistent vdev. + */ +replication_level_t * +get_replication(nvlist_t *nvroot, int fatal) +{ + nvlist_t **top; + uint_t t, toplevels; + nvlist_t **child; + uint_t c, children; + nvlist_t *nv; + char *type; + replication_level_t lastrep, rep, *ret; + int dontreport; + + ret = safe_malloc(sizeof (replication_level_t)); + + verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &top, &toplevels) == 0); + + lastrep.type = NULL; + for (t = 0; t < toplevels; t++) { + nv = top[t]; + + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) { + /* + * This is a 'file' or 'disk' vdev. + */ + rep.type = type; + rep.level = 1; + } else { + uint64_t vdev_size; + + /* + * This is a mirror or RAID-Z vdev. Go through and make + * sure the contents are all the same (files vs. disks), + * keeping track of the number of elements in the + * process. + * + * We also check that the size of each vdev (if it can + * be determined) is the same. + */ + rep.type = type; + rep.level = 0; + + /* + * The 'dontreport' variable indicatest that we've + * already reported an error for this spec, so don't + * bother doing it again. + */ + type = NULL; + dontreport = 0; + vdev_size = -1ULL; + for (c = 0; c < children; c++) { + nvlist_t *cnv = child[c]; + char *path; + struct stat64 statbuf; + uint64_t size = -1ULL; + char *childtype; + int fd, err; + + rep.level++; + + verify(nvlist_lookup_string(cnv, + ZPOOL_CONFIG_TYPE, &childtype) == 0); + verify(nvlist_lookup_string(cnv, + ZPOOL_CONFIG_PATH, &path) == 0); + + /* + * If we have a raidz/mirror that combines disks + * with files, report it as an error. + */ + if (!dontreport && type != NULL && + strcmp(type, childtype) != 0) { + if (ret != NULL) + free(ret); + ret = NULL; + if (fatal) + vdev_error(gettext( + "mismatched replication " + "level: %s contains both " + "files and devices\n"), + rep.type); + else + return (NULL); + dontreport = TRUE; + } + + /* + * According to stat(2), the value of 'st_size' + * is undefined for block devices and character + * devices. But there is no effective way to + * determine the real size in userland. + * + * Instead, we'll take advantage of an + * implementation detail of spec_size(). If the + * device is currently open, then we (should) + * return a valid size. + * + * If we still don't get a valid size (indicated + * by a size of 0 or MAXOFFSET_T), then ignore + * this device altogether. + */ + if ((fd = open(path, O_RDONLY)) >= 0) { + err = fstat64(fd, &statbuf); + (void) close(fd); + } else { + err = stat64(path, &statbuf); + } + + if (err != 0 || + statbuf.st_size == 0 || + statbuf.st_size == MAXOFFSET_T) + continue; + + size = statbuf.st_size; + + /* + * Also check the size of each device. If they + * differ, then report an error. + */ + if (!dontreport && vdev_size != -1ULL && + size != vdev_size) { + if (ret != NULL) + free(ret); + ret = NULL; + if (fatal) + vdev_error(gettext( + "%s contains devices of " + "different sizes\n"), + rep.type); + else + return (NULL); + dontreport = TRUE; + } + + type = childtype; + vdev_size = size; + } + } + + /* + * At this point, we have the replication of the last toplevel + * vdev in 'rep'. Compare it to 'lastrep' to see if its + * different. + */ + if (lastrep.type != NULL) { + if (strcmp(lastrep.type, rep.type) != 0) { + if (ret != NULL) + free(ret); + ret = NULL; + if (fatal) + vdev_error(gettext( + "mismatched replication " + "level: both %s and %s vdevs are " + "present\n"), + lastrep.type, rep.type); + else + return (NULL); + } else if (lastrep.level != rep.level) { + if (ret) + free(ret); + ret = NULL; + if (fatal) + vdev_error(gettext( + "mismatched replication " + "level: %d-way %s and %d-way %s " + "vdevs are present\n"), + lastrep.level, lastrep.type, + rep.level, rep.type); + else + return (NULL); + } + } + lastrep = rep; + } + + if (ret != NULL) { + ret->type = rep.type; + ret->level = rep.level; + } + + return (ret); +} + +/* + * Check the replication level of the vdev spec against the current pool. Calls + * get_replication() to make sure the new spec is self-consistent. If the pool + * has a consistent replication level, then we ignore any errors. Otherwise, + * report any difference between the two. + */ +int +check_replication(nvlist_t *config, nvlist_t *newroot) +{ + replication_level_t *current = NULL, *new; + int ret; + + /* + * If we have a current pool configuration, check to see if it's + * self-consistent. If not, simply return success. + */ + if (config != NULL) { + nvlist_t *nvroot; + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + if ((current = get_replication(nvroot, FALSE)) == NULL) + return (0); + } + + /* + * Get the replication level of the new vdev spec, reporting any + * inconsistencies found. + */ + if ((new = get_replication(newroot, TRUE)) == NULL) { + free(current); + return (-1); + } + + /* + * Check to see if the new vdev spec matches the replication level of + * the current pool. + */ + ret = 0; + if (current != NULL) { + if (strcmp(current->type, new->type) != 0 || + current->level != new->level) { + vdev_error(gettext( + "mismatched replication level: pool uses %d-way %s " + "and new vdev uses %d-way %s\n"), + current->level, current->type, new->level, + new->type); + ret = -1; + } + } + + free(new); + if (current != NULL) + free(current); + + return (ret); +} + +/* + * Label an individual disk. The name provided is the short name, stripped of + * any leading /dev path. + */ +int +label_disk(char *name) +{ + char path[MAXPATHLEN]; + struct dk_gpt *vtoc; + int fd; + size_t resv = 16384; + + (void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name, + BACKUP_SLICE); + + if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) { + /* + * This shouldn't happen. We've long since verified that this + * is a valid device. + */ + (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), + path, strerror(errno)); + return (-1); + } + + + if (efi_alloc_and_init(fd, 9, &vtoc) != 0) { + /* + * The only way this can fail is if we run out of memory, or we + * were unable to read the disk geometry. + */ + if (errno == ENOMEM) + no_memory(); + + (void) fprintf(stderr, gettext("cannot label '%s': unable to " + "read disk geometry\n"), name); + (void) close(fd); + return (-1); + } + + vtoc->efi_parts[0].p_start = vtoc->efi_first_u_lba; + vtoc->efi_parts[0].p_size = vtoc->efi_last_u_lba + 1 - + vtoc->efi_first_u_lba - resv; + + /* + * Why we use V_USR: V_BACKUP confuses users, and is considered + * disposable by some EFI utilities (since EFI doesn't have a backup + * slice). V_UNASSIGNED is supposed to be used only for zero size + * partitions, and efi_write() will fail if we use it. V_ROOT, V_BOOT, + * etc. were all pretty specific. V_USR is as close to reality as we + * can get, in the absence of V_OTHER. + */ + vtoc->efi_parts[0].p_tag = V_USR; + (void) strcpy(vtoc->efi_parts[0].p_name, "zfs"); + + vtoc->efi_parts[8].p_start = vtoc->efi_last_u_lba + 1 - resv; + vtoc->efi_parts[8].p_size = resv; + vtoc->efi_parts[8].p_tag = V_RESERVED; + + if (efi_write(fd, vtoc) != 0) { + /* + * Currently, EFI labels are not supported for IDE disks, and it + * is likely that they will not be supported on other drives for + * some time. Print out a helpful error message directing the + * user to manually label the disk and give a specific slice. + */ + (void) fprintf(stderr, gettext("cannot label '%s': failed to " + "write EFI label\n"), name); + (void) fprintf(stderr, gettext("use fdisk(1M) to partition " + "the disk, and provide a specific slice\n")); + (void) close(fd); + return (-1); + } + + (void) close(fd); + return (0); +} + +/* + * Go through and find any whole disks in the vdev specification, labelling them + * as appropriate. When constructing the vdev spec, we were unable to open this + * device in order to provide a devid. Now that we have labelled the disk and + * know that slice 0 is valid, we can construct the devid now. + * + * If the disk was already labelled with an EFI label, we will have gotten the + * devid already (because we were able to open the whole disk). Otherwise, we + * need to get the devid after we label the disk. + */ +int +make_disks(nvlist_t *nv) +{ + nvlist_t **child; + uint_t c, children; + char *type, *path, *diskname; + char buf[MAXPATHLEN]; + struct stat64 statbuf; + int fd; + int ret; + ddi_devid_t devid; + char *minor = NULL, *devid_str = NULL; + + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) { + + if (strcmp(type, VDEV_TYPE_DISK) != 0) + return (0); + + /* + * We have a disk device. Get the path to the device + * and see if its a whole disk by appending the backup + * slice and stat()ing the device. + */ + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); + + if (!is_whole_disk(path, &statbuf)) + return (0); + + diskname = strrchr(path, '/'); + assert(diskname != NULL); + diskname++; + if (label_disk(diskname) != 0) + return (-1); + + /* + * Fill in the devid, now that we've labeled the disk. + */ + (void) snprintf(buf, sizeof (buf), "%ss0", path); + if ((fd = open(buf, O_RDONLY)) < 0) { + (void) fprintf(stderr, + gettext("cannot open '%s': %s\n"), + buf, strerror(errno)); + return (-1); + } + + if (devid_get(fd, &devid) == 0) { + if (devid_get_minor_name(fd, &minor) == 0 && + (devid_str = devid_str_encode(devid, minor)) != + NULL) { + verify(nvlist_add_string(nv, + ZPOOL_CONFIG_DEVID, devid_str) == 0); + } + if (devid_str != NULL) + devid_str_free(devid_str); + if (minor != NULL) + devid_str_free(minor); + devid_free(devid); + } + + (void) close(fd); + + return (0); + } + + for (c = 0; c < children; c++) + if ((ret = make_disks(child[c])) != 0) + return (ret); + + return (0); +} + +/* + * Go through and find any devices that are in use. We rely on libdiskmgt for + * the majority of this task. + */ +int +check_in_use(nvlist_t *nv, int force) +{ + nvlist_t **child; + uint_t c, children; + char *type, *path; + int ret; + + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) { + + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); + + if (strcmp(type, VDEV_TYPE_DISK) == 0) + ret = check_device(path, force); + + if (strcmp(type, VDEV_TYPE_FILE) == 0) + ret = check_file(path, force); + + return (ret); + } + + for (c = 0; c < children; c++) + if ((ret = check_in_use(child[c], force)) != 0) + return (ret); + + return (0); +} + +/* + * Construct a syntactically valid vdev specification, + * and ensure that all devices and files exist and can be opened. + * Note: we don't bother freeing anything in the error paths + * because the program is just going to exit anyway. + */ +nvlist_t * +construct_spec(int argc, char **argv) +{ + nvlist_t *nvroot, *nv, **top; + int t, toplevels; + + top = NULL; + toplevels = 0; + + while (argc > 0) { + nv = NULL; + + /* + * If it's a mirror or raidz, the subsequent arguments are + * its leaves -- until we encounter the next mirror or raidz. + */ + if (strcmp(argv[0], VDEV_TYPE_MIRROR) == 0 || + strcmp(argv[0], VDEV_TYPE_RAIDZ) == 0) { + + char *type = argv[0]; + nvlist_t **child = NULL; + int children = 0; + int c; + + for (c = 1; c < argc; c++) { + if (strcmp(argv[c], VDEV_TYPE_MIRROR) == 0 || + strcmp(argv[c], VDEV_TYPE_RAIDZ) == 0) + break; + children++; + child = realloc(child, + children * sizeof (nvlist_t *)); + if (child == NULL) + no_memory(); + if ((nv = make_leaf_vdev(argv[c])) == NULL) + return (NULL); + child[children - 1] = nv; + } + + argc -= c; + argv += c; + + /* + * Mirrors and RAID-Z devices require at least + * two components. + */ + if (children < 2) { + (void) fprintf(stderr, + gettext("invalid vdev specification: " + "%s requires at least 2 devices\n"), type); + return (NULL); + } + + verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) == 0); + verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, + type) == 0); + verify(nvlist_add_nvlist_array(nv, + ZPOOL_CONFIG_CHILDREN, child, children) == 0); + + for (c = 0; c < children; c++) + nvlist_free(child[c]); + free(child); + } else { + /* + * We have a device. Pass off to make_leaf_vdev() to + * construct the appropriate nvlist describing the vdev. + */ + if ((nv = make_leaf_vdev(argv[0])) == NULL) + return (NULL); + argc--; + argv++; + } + + toplevels++; + top = realloc(top, toplevels * sizeof (nvlist_t *)); + if (top == NULL) + no_memory(); + top[toplevels - 1] = nv; + } + + /* + * Finally, create nvroot and add all top-level vdevs to it. + */ + verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0); + verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, + VDEV_TYPE_ROOT) == 0); + verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + top, toplevels) == 0); + + for (t = 0; t < toplevels; t++) + nvlist_free(top[t]); + free(top); + + return (nvroot); +} + +/* + * Get and validate the contents of the given vdev specification. This ensures + * that the nvlist returned is well-formed, that all the devices exist, and that + * they are not currently in use by any other known consumer. The 'poolconfig' + * parameter is the current configuration of the pool when adding devices + * existing pool, and is used to perform additional checks, such as changing the + * replication level of the pool. It can be 'NULL' to indicate that this is a + * new pool. The 'force' flag controls whether devices should be forcefully + * added, even if they appear in use. + */ +nvlist_t * +make_root_vdev(nvlist_t *poolconfig, int force, int check_rep, + int argc, char **argv) +{ + nvlist_t *newroot; + + is_force = force; + + /* + * Construct the vdev specification. If this is successful, we know + * that we have a valid specification, and that all devices can be + * opened. + */ + if ((newroot = construct_spec(argc, argv)) == NULL) + return (NULL); + + /* + * Validate each device to make sure that its not shared with another + * subsystem. We do this even if 'force' is set, because there are some + * uses (such as a dedicated dump device) that even '-f' cannot + * override. + */ + if (check_in_use(newroot, force) != 0) { + nvlist_free(newroot); + return (NULL); + } + + /* + * Check the replication level of the given vdevs and report any errors + * found. We include the existing pool spec, if any, as we need to + * catch changes against the existing replication level. + */ + if (check_rep && check_replication(poolconfig, newroot) != 0) { + nvlist_free(newroot); + return (NULL); + } + + /* + * Run through the vdev specification and label any whole disks found. + */ + if (make_disks(newroot) != 0) { + nvlist_free(newroot); + return (NULL); + } + + return (newroot); +} diff --git a/usr/src/cmd/ztest/Makefile b/usr/src/cmd/ztest/Makefile new file mode 100644 index 0000000000..52e17eb413 --- /dev/null +++ b/usr/src/cmd/ztest/Makefile @@ -0,0 +1,59 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +PROG:sh= basename `pwd` + +include ../Makefile.cmd + +$(INTEL_BLD)SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET = all +install := TARGET = install +clean := TARGET = clean +clobber := TARGET = clobber +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber lint: $(SUBDIRS) + +# +# This should really be $(LN), but protocmp detects link inconsistencies +# between isaexec (which we ship) and ztest (which we do not ship). +# +install: $(SUBDIRS) + -$(RM) $(ROOTPROG) + -$(CP) $(ISAEXEC) $(ROOTPROG) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../Makefile.targ diff --git a/usr/src/cmd/ztest/Makefile.com b/usr/src/cmd/ztest/Makefile.com new file mode 100644 index 0000000000..c5695f0037 --- /dev/null +++ b/usr/src/cmd/ztest/Makefile.com @@ -0,0 +1,61 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +PROG= ztest +SRCS= ../$(PROG).c + +include ../../Makefile.cmd + +INCS += -I../../../lib/libzpool/common +INCS += -I../../../uts/common/fs/zfs + +LDLIBS += -lumem -lzpool -lm -lnvpair + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all +CFLAGS += -g $(CCVERBOSE) +CFLAGS64 += -g $(CCVERBOSE) +CPPFLAGS += -D_LARGEFILE64_SOURCE=1 -D_REENTRANT $(INCS) + +# lint complains about unused _umem_* functions +LINTFLAGS += -xerroff=E_NAME_DEF_NOT_USED2 +LINTFLAGS64 += -xerroff=E_NAME_DEF_NOT_USED2 + +.KEEP_STATE: + +all: $(PROG) + +$(PROG): $(SRCS) + $(LINK.c) -o $(PROG) $(SRCS) $(LDLIBS) + $(POST_PROCESS) + +clean: + +lint: lint_SRCS + +include ../../Makefile.targ diff --git a/usr/src/cmd/ztest/amd64/Makefile b/usr/src/cmd/ztest/amd64/Makefile new file mode 100644 index 0000000000..f259815d6e --- /dev/null +++ b/usr/src/cmd/ztest/amd64/Makefile @@ -0,0 +1,32 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com +include ../../Makefile.cmd.64 + +install: all $(ROOTPROG64) diff --git a/usr/src/cmd/ztest/i386/Makefile b/usr/src/cmd/ztest/i386/Makefile new file mode 100644 index 0000000000..8ca4d0b763 --- /dev/null +++ b/usr/src/cmd/ztest/i386/Makefile @@ -0,0 +1,31 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +install: all $(ROOTPROG32) diff --git a/usr/src/cmd/ztest/inc.flg b/usr/src/cmd/ztest/inc.flg new file mode 100644 index 0000000000..bb65300cca --- /dev/null +++ b/usr/src/cmd/ztest/inc.flg @@ -0,0 +1,30 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +find_files "s.*" usr/src/uts/common/fs/zfs/sys +echo_file usr/src/uts/common/sys/fs/zfs.h diff --git a/usr/src/cmd/ztest/sparc/Makefile b/usr/src/cmd/ztest/sparc/Makefile new file mode 100644 index 0000000000..8ca4d0b763 --- /dev/null +++ b/usr/src/cmd/ztest/sparc/Makefile @@ -0,0 +1,31 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +install: all $(ROOTPROG32) diff --git a/usr/src/cmd/ztest/sparcv9/Makefile b/usr/src/cmd/ztest/sparcv9/Makefile new file mode 100644 index 0000000000..f259815d6e --- /dev/null +++ b/usr/src/cmd/ztest/sparcv9/Makefile @@ -0,0 +1,32 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com +include ../../Makefile.cmd.64 + +install: all $(ROOTPROG64) diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c new file mode 100644 index 0000000000..ce870c5a11 --- /dev/null +++ b/usr/src/cmd/ztest/ztest.c @@ -0,0 +1,3303 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * The objective of this program is to provide a DMU/ZAP/SPA stress test + * that runs entirely in userland, is easy to use, and easy to extend. + * + * The overall design of the ztest program is as follows: + * + * (1) For each major functional area (e.g. adding vdevs to a pool, + * creating and destroying datasets, reading and writing objects, etc) + * we have a simple routine to test that functionality. These + * individual routines do not have to do anything "stressful". + * + * (2) We turn these simple functionality tests into a stress test by + * running them all in parallel, with as many threads as desired, + * and spread across as many datasets, objects, and vdevs as desired. + * + * (3) While all this is happening, we inject faults into the pool to + * verify that self-healing data really works. + * + * (4) Every time we open a dataset, we change its checksum and compression + * functions. Thus even individual objects vary from block to block + * in which checksum they use and whether they're compressed. + * + * (5) To verify that we never lose on-disk consistency after a crash, + * we run the entire test in a child of the main process. + * At random times, the child self-immolates with a SIGKILL. + * This is the software equivalent of pulling the power cord. + * The parent then runs the test again, using the existing + * storage pool, as many times as desired. + * + * (6) To verify that we don't have future leaks or temporal incursions, + * many of the functional tests record the transaction group number + * as part of their data. When reading old data, they verify that + * the transaction group number is less than the current, open txg. + * If you add a new test, please do this if applicable. + * + * When run with no arguments, ztest runs for about five minutes and + * produces no output if successful. To get a little bit of information, + * specify -V. To get more information, specify -VV, and so on. + * + * To turn this into an overnight stress test, use -T to specify run time. + * + * You can ask more more vdevs [-v], datasets [-d], or threads [-t] + * to increase the pool capacity, fanout, and overall stress level. + * + * The -N(okill) option will suppress kills, so each child runs to completion. + * This can be useful when you're trying to distinguish temporal incursions + * from plain old race conditions. + */ + +#include <sys/zfs_context.h> +#include <sys/spa.h> +#include <sys/dmu.h> +#include <sys/txg.h> +#include <sys/zap.h> +#include <sys/dmu_traverse.h> +#include <sys/dmu_objset.h> +#include <sys/poll.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/wait.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/zio.h> +#include <sys/zio_checksum.h> +#include <sys/zio_compress.h> +#include <sys/zil.h> +#include <sys/vdev_impl.h> +#include <sys/spa_impl.h> +#include <sys/dsl_prop.h> +#include <sys/refcount.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <signal.h> +#include <umem.h> +#include <dlfcn.h> +#include <ctype.h> +#include <math.h> +#include <sys/fs/zfs.h> + +static char cmdname[] = "ztest"; +static char *zopt_pool = cmdname; + +static uint64_t zopt_vdevs = 5; +static uint64_t zopt_vdevtime; +static int zopt_mirrors = 2; +static int zopt_raidz = 4; +static size_t zopt_vdev_size = SPA_MINDEVSIZE; +static int zopt_dirs = 7; +static int zopt_threads = 23; +static uint64_t zopt_passtime = 60; /* 60 seconds */ +static uint64_t zopt_killrate = 70; /* 70% kill rate */ +static int zopt_verbose = 0; +static int zopt_init = 1; +static char *zopt_dir = "/tmp"; +static uint64_t zopt_time = 300; /* 5 minutes */ +static int zopt_maxfaults; + +typedef struct ztest_args { + char *za_pool; + objset_t *za_os; + zilog_t *za_zilog; + thread_t za_thread; + uint64_t za_instance; + uint64_t za_random; + uint64_t za_diroff; + uint64_t za_diroff_shared; + hrtime_t za_start; + hrtime_t za_stop; + hrtime_t za_kill; + traverse_handle_t *za_th; +} ztest_args_t; + +typedef void ztest_func_t(ztest_args_t *); + +/* + * Note: these aren't static because we want dladdr() to work. + */ +ztest_func_t ztest_dmu_read_write; +ztest_func_t ztest_dmu_write_parallel; +ztest_func_t ztest_dmu_object_alloc_free; +ztest_func_t ztest_zap; +ztest_func_t ztest_zap_parallel; +ztest_func_t ztest_traverse; +ztest_func_t ztest_dsl_prop_get_set; +ztest_func_t ztest_dmu_objset_create_destroy; +ztest_func_t ztest_dmu_snapshot_create_destroy; +ztest_func_t ztest_spa_create_destroy; +ztest_func_t ztest_fault_inject; +ztest_func_t ztest_vdev_attach_detach; +ztest_func_t ztest_vdev_LUN_growth; +ztest_func_t ztest_vdev_add_remove; +ztest_func_t ztest_scrub; +ztest_func_t ztest_spa_rename; + +typedef struct ztest_info { + ztest_func_t *zi_func; /* test function */ + uint64_t *zi_interval; /* execute every <interval> seconds */ + uint64_t zi_calls; /* per-pass count */ + uint64_t zi_call_time; /* per-pass time */ + uint64_t zi_call_total; /* cumulative total */ + uint64_t zi_call_target; /* target cumulative total */ +} ztest_info_t; + +uint64_t zopt_always = 0; /* all the time */ +uint64_t zopt_often = 1; /* every second */ +uint64_t zopt_sometimes = 10; /* every 10 seconds */ +uint64_t zopt_rarely = 60; /* every 60 seconds */ + +ztest_info_t ztest_info[] = { + { ztest_dmu_read_write, &zopt_always }, + { ztest_dmu_write_parallel, &zopt_always }, + { ztest_dmu_object_alloc_free, &zopt_always }, + { ztest_zap, &zopt_always }, + { ztest_zap_parallel, &zopt_always }, + { ztest_traverse, &zopt_often }, + { ztest_dsl_prop_get_set, &zopt_sometimes }, + { ztest_dmu_objset_create_destroy, &zopt_sometimes }, + { ztest_dmu_snapshot_create_destroy, &zopt_sometimes }, + { ztest_spa_create_destroy, &zopt_sometimes }, + { ztest_fault_inject, &zopt_sometimes }, + { ztest_spa_rename, &zopt_rarely }, + { ztest_vdev_attach_detach, &zopt_rarely }, + { ztest_vdev_LUN_growth, &zopt_rarely }, + { ztest_vdev_add_remove, &zopt_vdevtime }, + { ztest_scrub, &zopt_vdevtime }, +}; + +#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t)) + +#define ZTEST_SYNC_LOCKS 16 + +/* + * Stuff we need to share writably between parent and child. + */ +typedef struct ztest_shared { + mutex_t zs_vdev_lock; + rwlock_t zs_name_lock; + uint64_t zs_vdev_primaries; + uint64_t zs_enospc_count; + hrtime_t zs_start_time; + hrtime_t zs_stop_time; + uint64_t zs_alloc; + uint64_t zs_space; + ztest_info_t zs_info[ZTEST_FUNCS]; + mutex_t zs_sync_lock[ZTEST_SYNC_LOCKS]; + uint64_t zs_seq[ZTEST_SYNC_LOCKS]; +} ztest_shared_t; + +typedef struct ztest_block_tag { + uint64_t bt_objset; + uint64_t bt_object; + uint64_t bt_offset; + uint64_t bt_txg; + uint64_t bt_thread; + uint64_t bt_seq; +} ztest_block_tag_t; + +static char ztest_dev_template[] = "%s/%s.%llua"; +static ztest_shared_t *ztest_shared; + +static int ztest_random_fd; +static int ztest_dump_core = 1; + +extern uint64_t zio_gang_bang; + +#define ZTEST_DIROBJ 1 +#define ZTEST_MICROZAP_OBJ 2 +#define ZTEST_FATZAP_OBJ 3 + +#define ZTEST_DIROBJ_BLOCKSIZE (1 << 10) +#define ZTEST_DIRSIZE 256 + +/* + * These libumem hooks provide a reasonable set of defaults for the allocator's + * debugging facilities. + */ +const char * +_umem_debug_init() +{ + return ("default,verbose"); /* $UMEM_DEBUG setting */ +} + +const char * +_umem_logging_init(void) +{ + return ("fail,contents"); /* $UMEM_LOGGING setting */ +} + +#define FATAL_MSG_SZ 1024 + +char *fatal_msg; + +static void +fatal(int do_perror, char *message, ...) +{ + va_list args; + int save_errno = errno; + char buf[FATAL_MSG_SZ]; + + (void) fflush(stdout); + + va_start(args, message); + (void) sprintf(buf, "ztest: "); + /* LINTED */ + (void) vsprintf(buf + strlen(buf), message, args); + va_end(args); + if (do_perror) { + (void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf), + ": %s", strerror(save_errno)); + } + (void) fprintf(stderr, "%s\n", buf); + fatal_msg = buf; /* to ease debugging */ + if (ztest_dump_core) + abort(); + exit(3); +} + +static int +str2shift(const char *buf) +{ + const char *ends = "BKMGTPEZ"; + int i; + + if (buf[0] == '\0') + return (0); + for (i = 0; i < strlen(ends); i++) { + if (toupper(buf[0]) == ends[i]) + break; + } + if (i == strlen(ends)) + fatal(0, "invalid bytes suffix: %s", buf); + if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) { + return (10*i); + } + fatal(0, "invalid bytes suffix: %s", buf); + return (-1); +} + +static uint64_t +nicenumtoull(const char *buf) +{ + char *end; + uint64_t val; + + val = strtoull(buf, &end, 0); + if (end == buf) { + fatal(0, "bad numeric value: %s", buf); + } else if (end[0] == '.') { + double fval = strtod(buf, &end); + fval *= pow(2, str2shift(end)); + if (fval > UINT64_MAX) + fatal(0, "value too large: %s", buf); + val = (uint64_t)fval; + } else { + int shift = str2shift(end); + if (shift >= 64 || (val << shift) >> shift != val) + fatal(0, "value too large: %s", buf); + val <<= shift; + } + return (val); +} + +static void +usage(void) +{ + char nice_vdev_size[10]; + char nice_gang_bang[10]; + + nicenum(zopt_vdev_size, nice_vdev_size); + nicenum(zio_gang_bang, nice_gang_bang); + + (void) printf("Usage: %s\n" + "\t[-v vdevs (default: %llu)]\n" + "\t[-s size_of_each_vdev (default: %s)]\n" + "\t[-m mirror_copies (default: %d)]\n" + "\t[-r raidz_disks (default: %d)]\n" + "\t[-d datasets (default: %d)]\n" + "\t[-t threads (default: %d)]\n" + "\t[-g gang_block_threshold (default: %s)]\n" + "\t[-i initialize pool i times (default: %d)]\n" + "\t[-k kill percentage (default: %llu%%)]\n" + "\t[-p pool_name (default: %s)]\n" + "\t[-f file directory for vdev files (default: %s)]\n" + "\t[-V(erbose)] (use multiple times for ever more blather)\n" + "\t[-E(xisting)] (use existing pool instead of creating new one\n" + "\t[-I(mport)] (discover and import existing pools)\n" + "\t[-T time] total run time (default: %llu sec)\n" + "\t[-P passtime] time per pass (default: %llu sec)\n" + "", + cmdname, + (u_longlong_t)zopt_vdevs, /* -v */ + nice_vdev_size, /* -s */ + zopt_mirrors, /* -m */ + zopt_raidz, /* -r */ + zopt_dirs, /* -d */ + zopt_threads, /* -t */ + nice_gang_bang, /* -g */ + zopt_init, /* -i */ + (u_longlong_t)zopt_killrate, /* -k */ + zopt_pool, /* -p */ + zopt_dir, /* -f */ + (u_longlong_t)zopt_time, /* -T */ + (u_longlong_t)zopt_passtime); /* -P */ + exit(1); +} + +static uint64_t +ztest_random(uint64_t range) +{ + uint64_t r; + + if (range == 0) + return (0); + + if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r)) + fatal(1, "short read from /dev/urandom"); + + return (r % range); +} + +static void +ztest_record_enospc(char *s) +{ + dprintf("ENOSPC doing: %s\n", s ? s : "<unknown>"); + ztest_shared->zs_enospc_count++; +} + +static void +process_options(int argc, char **argv) +{ + int opt; + uint64_t value; + + /* By default, test gang blocks for blocks 32K and greater */ + zio_gang_bang = 32 << 10; + + while ((opt = getopt(argc, argv, + "v:s:m:r:c:d:t:g:i:k:p:f:VEIT:P:S")) != EOF) { + value = 0; + switch (opt) { + case 'v': + case 's': + case 'm': + case 'r': + case 'c': + case 'd': + case 't': + case 'g': + case 'i': + case 'k': + case 'T': + case 'P': + value = nicenumtoull(optarg); + } + switch (opt) { + case 'v': + zopt_vdevs = value; + break; + case 's': + zopt_vdev_size = MAX(SPA_MINDEVSIZE, value); + break; + case 'm': + zopt_mirrors = value; + break; + case 'r': + zopt_raidz = MAX(1, value); + break; + case 'd': + zopt_dirs = MAX(1, value); + break; + case 't': + zopt_threads = MAX(1, value); + break; + case 'g': + zio_gang_bang = MAX(SPA_MINBLOCKSIZE << 1, value); + break; + case 'i': + zopt_init = value; + break; + case 'k': + zopt_killrate = value; + break; + case 'p': + zopt_pool = strdup(optarg); + break; + case 'f': + zopt_dir = strdup(optarg); + break; + case 'V': + zopt_verbose++; + break; + case 'E': + zopt_init = 0; + break; + case 'T': + zopt_time = value; + break; + case 'P': + zopt_passtime = MAX(1, value); + break; + case '?': + default: + usage(); + break; + } + } + + zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time / zopt_vdevs : UINT64_MAX); + zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz >= 2 ? 2 : 1) - 1; +} + +static nvlist_t * +make_vdev_file(size_t size) +{ + char dev_name[MAXPATHLEN]; + uint64_t vdev; + int fd; + nvlist_t *file; + + if (size == 0) { + (void) snprintf(dev_name, sizeof (dev_name), "%s", + "/dev/bogus"); + } else { + vdev = ztest_shared->zs_vdev_primaries++; + (void) sprintf(dev_name, ztest_dev_template, + zopt_dir, zopt_pool, vdev); + + fd = open(dev_name, O_RDWR | O_CREAT | O_TRUNC, 0666); + if (fd == -1) + fatal(1, "can't open %s", dev_name); + if (ftruncate(fd, size) != 0) + fatal(1, "can't ftruncate %s", dev_name); + (void) close(fd); + } + + VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0); + VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0); + VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, dev_name) == 0); + + return (file); +} + +static nvlist_t * +make_vdev_raidz(size_t size, int r) +{ + nvlist_t *raidz, **child; + int c; + + if (r < 2) + return (make_vdev_file(size)); + + child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL); + + for (c = 0; c < r; c++) + child[c] = make_vdev_file(size); + + VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0); + VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE, + VDEV_TYPE_RAIDZ) == 0); + VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN, + child, r) == 0); + + for (c = 0; c < r; c++) + nvlist_free(child[c]); + + umem_free(child, r * sizeof (nvlist_t *)); + + return (raidz); +} + +static nvlist_t * +make_vdev_mirror(size_t size, int r, int m) +{ + nvlist_t *mirror, **child; + int c; + + if (m < 1) + return (make_vdev_raidz(size, r)); + + child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL); + + for (c = 0; c < m; c++) + child[c] = make_vdev_raidz(size, r); + + VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0); + VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE, + VDEV_TYPE_MIRROR) == 0); + VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN, + child, m) == 0); + + for (c = 0; c < m; c++) + nvlist_free(child[c]); + + umem_free(child, m * sizeof (nvlist_t *)); + + return (mirror); +} + +static nvlist_t * +make_vdev_root(size_t size, int r, int m, int t) +{ + nvlist_t *root, **child; + int c; + + ASSERT(t > 0); + + child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL); + + for (c = 0; c < t; c++) + child[c] = make_vdev_mirror(size, r, m); + + VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0); + VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); + VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN, + child, t) == 0); + + for (c = 0; c < t; c++) + nvlist_free(child[c]); + + umem_free(child, t * sizeof (nvlist_t *)); + + return (root); +} + +static void +ztest_set_random_blocksize(objset_t *os, uint64_t object, dmu_tx_t *tx) +{ + int bs = SPA_MINBLOCKSHIFT + + ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1); + int ibs = DN_MIN_INDBLKSHIFT + + ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1); + int error; + + error = dmu_object_set_blocksize(os, object, 1ULL << bs, ibs, tx); + if (error) { + char osname[300]; + dmu_objset_name(os, osname); + fatal(0, "dmu_object_set_blocksize('%s', %llu, %d, %d) = %d", + osname, object, 1 << bs, ibs, error); + } +} + +static uint8_t +ztest_random_checksum(void) +{ + uint8_t checksum; + + do { + checksum = ztest_random(ZIO_CHECKSUM_FUNCTIONS); + } while (zio_checksum_table[checksum].ci_zbt); + + if (checksum == ZIO_CHECKSUM_OFF) + checksum = ZIO_CHECKSUM_ON; + + return (checksum); +} + +static uint8_t +ztest_random_compress(void) +{ + return ((uint8_t)ztest_random(ZIO_COMPRESS_FUNCTIONS)); +} + +typedef struct ztest_replay { + objset_t *zr_os; + uint64_t zr_assign; +} ztest_replay_t; + +static int +ztest_replay_create(ztest_replay_t *zr, lr_create_t *lr, boolean_t byteswap) +{ + objset_t *os = zr->zr_os; + dmu_tx_t *tx; + int error; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + tx = dmu_tx_create(os); + dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); + error = dmu_tx_assign(tx, zr->zr_assign); + if (error) { + dmu_tx_abort(tx); + return (error); + } + + error = dmu_object_claim(os, lr->lr_doid, lr->lr_mode, 0, + DMU_OT_NONE, 0, tx); + ASSERT(error == 0); + dmu_tx_commit(tx); + + if (zopt_verbose >= 5) { + char osname[MAXNAMELEN]; + dmu_objset_name(os, osname); + (void) printf("replay create of %s object %llu" + " in txg %llu = %d\n", + osname, (u_longlong_t)lr->lr_doid, + (u_longlong_t)zr->zr_assign, error); + } + + return (error); +} + +static int +ztest_replay_remove(ztest_replay_t *zr, lr_remove_t *lr, boolean_t byteswap) +{ + objset_t *os = zr->zr_os; + dmu_tx_t *tx; + int error; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + tx = dmu_tx_create(os); + dmu_tx_hold_free(tx, lr->lr_doid, 0, DMU_OBJECT_END); + error = dmu_tx_assign(tx, zr->zr_assign); + if (error) { + dmu_tx_abort(tx); + return (error); + } + + error = dmu_object_free(os, lr->lr_doid, tx); + dmu_tx_commit(tx); + + return (error); +} + +zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = { + NULL, /* 0 no such transaction type */ + ztest_replay_create, /* TX_CREATE */ + NULL, /* TX_MKDIR */ + NULL, /* TX_MKXATTR */ + NULL, /* TX_SYMLINK */ + ztest_replay_remove, /* TX_REMOVE */ + NULL, /* TX_RMDIR */ + NULL, /* TX_LINK */ + NULL, /* TX_RENAME */ + NULL, /* TX_WRITE */ + NULL, /* TX_TRUNCATE */ + NULL, /* TX_SETATTR */ + NULL, /* TX_ACL */ +}; + +/* + * Verify that we can't destroy an active pool, create an existing pool, + * or create a pool with a bad vdev spec. + */ +void +ztest_spa_create_destroy(ztest_args_t *za) +{ + int error; + spa_t *spa; + nvlist_t *nvroot; + + /* + * Attempt to create using a bad file. + */ + nvroot = make_vdev_root(0, 0, 0, 1); + error = spa_create("ztest_bad_file", nvroot, NULL); + nvlist_free(nvroot); + if (error != ENOENT) + fatal(0, "spa_create(bad_file) = %d", error); + + /* + * Attempt to create using a bad mirror. + */ + nvroot = make_vdev_root(0, 0, 2, 1); + error = spa_create("ztest_bad_mirror", nvroot, NULL); + nvlist_free(nvroot); + if (error != ENOENT) + fatal(0, "spa_create(bad_mirror) = %d", error); + + /* + * Attempt to create an existing pool. It shouldn't matter + * what's in the nvroot; we should fail with EEXIST. + */ + (void) rw_rdlock(&ztest_shared->zs_name_lock); + nvroot = make_vdev_root(0, 0, 0, 1); + error = spa_create(za->za_pool, nvroot, NULL); + nvlist_free(nvroot); + if (error != EEXIST) + fatal(0, "spa_create(whatever) = %d", error); + + error = spa_open(za->za_pool, &spa, FTAG); + if (error) + fatal(0, "spa_open() = %d", error); + + error = spa_destroy(za->za_pool); + if (error != EBUSY) + fatal(0, "spa_destroy() = %d", error); + + spa_close(spa, FTAG); + (void) rw_unlock(&ztest_shared->zs_name_lock); +} + +/* + * Verify that vdev_add() works as expected. + */ +void +ztest_vdev_add_remove(ztest_args_t *za) +{ + spa_t *spa = dmu_objset_spa(za->za_os); + uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz; + nvlist_t *nvroot; + int error; + + if (zopt_verbose >= 6) + (void) printf("adding vdev\n"); + + (void) mutex_lock(&ztest_shared->zs_vdev_lock); + + spa_config_enter(spa, RW_READER); + + ztest_shared->zs_vdev_primaries = + spa->spa_root_vdev->vdev_children * leaves; + + spa_config_exit(spa); + + nvroot = make_vdev_root(zopt_vdev_size, zopt_raidz, zopt_mirrors, 1); + error = spa_vdev_add(spa, nvroot); + nvlist_free(nvroot); + + (void) mutex_unlock(&ztest_shared->zs_vdev_lock); + + if (error == ENOSPC) + ztest_record_enospc("spa_vdev_add"); + else if (error != 0) + fatal(0, "spa_vdev_add() = %d", error); + + if (zopt_verbose >= 6) + (void) printf("spa_vdev_add = %d, as expected\n", error); +} + +/* + * Verify that we can attach and detach devices. + */ +void +ztest_vdev_attach_detach(ztest_args_t *za) +{ + spa_t *spa = dmu_objset_spa(za->za_os); + vdev_t *rvd = spa->spa_root_vdev; + vdev_t *vd0, *vd1, *pvd; + nvlist_t *root, *file; + uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz; + uint64_t leaf, top; + size_t size0, size1; + char path0[MAXPATHLEN], path1[MAXPATHLEN]; + int replacing; + int error, expected_error; + int fd; + + (void) mutex_lock(&ztest_shared->zs_vdev_lock); + + spa_config_enter(spa, RW_READER); + + /* + * Decide whether to do an attach or a replace. + */ + replacing = ztest_random(2); + + /* + * Pick a random top-level vdev. + */ + top = ztest_random(rvd->vdev_children); + + /* + * Pick a random leaf within it. + */ + leaf = ztest_random(leaves); + + /* + * Generate the path to this leaf. The filename will end with 'a'. + * We'll alternate replacements with a filename that ends with 'b'. + */ + (void) snprintf(path0, sizeof (path0), + ztest_dev_template, zopt_dir, zopt_pool, top * leaves + leaf); + + bcopy(path0, path1, MAXPATHLEN); + + /* + * If the 'a' file isn't part of the pool, the 'b' file must be. + */ + if (vdev_lookup_by_path(rvd, path0) == NULL) + path0[strlen(path0) - 1] = 'b'; + else + path1[strlen(path1) - 1] = 'b'; + + /* + * Now path0 represents something that's already in the pool, + * and path1 is the thing we'll try to attach. + */ + vd0 = vdev_lookup_by_path(rvd, path0); + vd1 = vdev_lookup_by_path(rvd, path1); + ASSERT(vd0 != NULL); + pvd = vd0->vdev_parent; + + + /* + * Make size1 a little bigger or smaller than size0. + * If it's smaller, the attach should fail. + * If it's larger, and we're doing a replace, + * we should get dynamic LUN growth when we're done. + */ + size0 = vd0->vdev_psize; + size1 = 10 * size0 / (9 + ztest_random(3)); + + /* + * If pvd is not a mirror or root, the attach should fail with ENOTSUP, + * unless it's a replace; in that case any non-replacing parent is OK. + * + * If vd1 is already part of the pool, it should fail with EBUSY. + * + * If vd1 is too small, it should fail with EOVERFLOW. + */ + if (pvd->vdev_ops != &vdev_mirror_ops && + pvd->vdev_ops != &vdev_root_ops && + (!replacing || pvd->vdev_ops == &vdev_replacing_ops)) + expected_error = ENOTSUP; + else if (vd1 != NULL) + expected_error = EBUSY; + else if (size1 < size0) + expected_error = EOVERFLOW; + else + expected_error = 0; + + /* + * If vd1 isn't already part of the pool, create it. + */ + if (vd1 == NULL) { + fd = open(path1, O_RDWR | O_CREAT | O_TRUNC, 0666); + if (fd == -1) + fatal(1, "can't open %s", path1); + if (ftruncate(fd, size1) != 0) + fatal(1, "can't ftruncate %s", path1); + (void) close(fd); + } + + spa_config_exit(spa); + + /* + * Build the nvlist describing path1. + */ + VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0); + VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0); + VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path1) == 0); + + VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0); + VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); + VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN, + &file, 1) == 0); + + error = spa_vdev_attach(spa, path0, root, replacing); + + nvlist_free(file); + nvlist_free(root); + + /* + * If our parent was the replacing vdev, but the replace completed, + * then instead of failing with ENOTSUP we may either succeed, + * fail with ENODEV, or fail with EOVERFLOW. + */ + if (expected_error == ENOTSUP && + (error == 0 || error == ENODEV || error == EOVERFLOW)) + expected_error = error; + + if (error != expected_error) { + fatal(0, "attach (%s, %s, %d) returned %d, expected %d", + path0, path1, replacing, error, expected_error); + } + + (void) mutex_unlock(&ztest_shared->zs_vdev_lock); +} + +/* + * Verify that dynamic LUN growth works as expected. + */ +/* ARGSUSED */ +void +ztest_vdev_LUN_growth(ztest_args_t *za) +{ + spa_t *spa = dmu_objset_spa(za->za_os); + char dev_name[MAXPATHLEN]; + uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz; + uint64_t vdev; + size_t fsize; + int fd; + + (void) mutex_lock(&ztest_shared->zs_vdev_lock); + + /* + * Pick a random leaf vdev. + */ + spa_config_enter(spa, RW_READER); + vdev = ztest_random(spa->spa_root_vdev->vdev_children * leaves); + spa_config_exit(spa); + + (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev); + + if ((fd = open(dev_name, O_RDWR)) != -1) { + /* + * Determine the size. + */ + fsize = lseek(fd, 0, SEEK_END); + + /* + * If it's less than 2x the original size, grow by around 3%. + */ + if (fsize < 2 * zopt_vdev_size) { + size_t newsize = fsize + ztest_random(fsize / 32); + (void) ftruncate(fd, newsize); + if (zopt_verbose >= 6) { + (void) printf("%s grew from %lu to %lu bytes\n", + dev_name, (ulong_t)fsize, (ulong_t)newsize); + } + } + (void) close(fd); + } + + (void) mutex_unlock(&ztest_shared->zs_vdev_lock); +} + +/* ARGSUSED */ +static void +ztest_create_cb(objset_t *os, void *arg, dmu_tx_t *tx) +{ + /* + * Create the directory object. + */ + VERIFY(dmu_object_claim(os, ZTEST_DIROBJ, + DMU_OT_UINT64_OTHER, ZTEST_DIROBJ_BLOCKSIZE, + DMU_OT_UINT64_OTHER, sizeof (ztest_block_tag_t), tx) == 0); + + VERIFY(zap_create_claim(os, ZTEST_MICROZAP_OBJ, + DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0); + + VERIFY(zap_create_claim(os, ZTEST_FATZAP_OBJ, + DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0); +} + +/* ARGSUSED */ +static void +ztest_destroy_cb(char *name, void *arg) +{ + objset_t *os; + dmu_object_info_t doi; + int error; + + /* + * Verify that the dataset contains a directory object. + */ + error = dmu_objset_open(name, DMU_OST_OTHER, + DS_MODE_STANDARD | DS_MODE_READONLY, &os); + ASSERT3U(error, ==, 0); + error = dmu_object_info(os, ZTEST_DIROBJ, &doi); + ASSERT3U(error, ==, 0); + ASSERT3U(doi.doi_type, ==, DMU_OT_UINT64_OTHER); + ASSERT3S(doi.doi_physical_blks, >=, 0); + dmu_objset_close(os); + + /* + * Destroy the dataset. + */ + error = dmu_objset_destroy(name); + ASSERT3U(error, ==, 0); +} + +/* + * Verify that dmu_objset_{create,destroy,open,close} work as expected. + */ +static uint64_t +ztest_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t object, int mode) +{ + itx_t *itx; + lr_create_t *lr; + size_t namesize; + char name[24]; + + (void) sprintf(name, "ZOBJ_%llu", (u_longlong_t)object); + namesize = strlen(name) + 1; + + itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize + + ztest_random(ZIL_MAX_BLKSZ)); + lr = (lr_create_t *)&itx->itx_lr; + bzero(lr + 1, lr->lr_common.lrc_reclen - sizeof (*lr)); + lr->lr_doid = object; + lr->lr_foid = 0; + lr->lr_mode = mode; + lr->lr_uid = 0; + lr->lr_gid = 0; + lr->lr_gen = dmu_tx_get_txg(tx); + lr->lr_crtime[0] = time(NULL); + lr->lr_crtime[1] = 0; + lr->lr_rdev = 0; + bcopy(name, (char *)(lr + 1), namesize); + + return (zil_itx_assign(zilog, itx, tx)); +} + +#ifndef lint +static uint64_t +ztest_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t object) +{ + itx_t *itx; + lr_remove_t *lr; + size_t namesize; + char name[24]; + + (void) sprintf(name, "ZOBJ_%llu", (u_longlong_t)object); + namesize = strlen(name) + 1; + + itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize + + ztest_random(8000)); + lr = (lr_remove_t *)&itx->itx_lr; + lr->lr_doid = object; + bcopy(name, (char *)(lr + 1), namesize); + + return (zil_itx_assign(zilog, itx, tx)); +} +#endif /* lint */ + +void +ztest_dmu_objset_create_destroy(ztest_args_t *za) +{ + int error; + objset_t *os; + char name[100]; + int mode, basemode, expected_error; + zilog_t *zilog; + uint64_t seq; + uint64_t objects; + ztest_replay_t zr; + + (void) rw_rdlock(&ztest_shared->zs_name_lock); + (void) snprintf(name, 100, "%s/%s_temp_%llu", za->za_pool, za->za_pool, + (u_longlong_t)za->za_instance); + + basemode = DS_MODE_LEVEL(za->za_instance); + if (basemode == DS_MODE_NONE) + basemode++; + + /* + * If this dataset exists from a previous run, process its replay log + * half of the time. If we don't replay it, then dmu_objset_destroy() + * (invoked from ztest_destroy_cb() below) should just throw it away. + */ + if (ztest_random(2) == 0 && + dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_PRIMARY, &os) == 0) { + zr.zr_os = os; + zil_replay(os, &zr, &zr.zr_assign, ztest_replay_vector, NULL); + dmu_objset_close(os); + } + + /* + * There may be an old instance of the dataset we're about to + * create lying around from a previous run. If so, destroy it + * and all of its snapshots. + */ + dmu_objset_find(name, ztest_destroy_cb, NULL, DS_FIND_SNAPSHOTS); + + /* + * Verify that the destroyed dataset is no longer in the namespace. + */ + error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os); + if (error != ENOENT) + fatal(1, "dmu_objset_open(%s) found destroyed dataset %p", + name, os); + + /* + * Verify that we can create a new dataset. + */ + error = dmu_objset_create(name, DMU_OST_OTHER, NULL, ztest_create_cb, + NULL); + if (error) { + if (error == ENOSPC) { + ztest_record_enospc("dmu_objset_create"); + (void) rw_unlock(&ztest_shared->zs_name_lock); + return; + } + fatal(0, "dmu_objset_create(%s) = %d", name, error); + } + + error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os); + if (error) { + fatal(0, "dmu_objset_open(%s) = %d", name, error); + } + + /* + * Open the intent log for it. + */ + zilog = zil_open(os, NULL); + + /* + * Put a random number of objects in there. + */ + objects = ztest_random(50); + seq = 0; + while (objects-- != 0) { + uint64_t object; + dmu_tx_t *tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, sizeof (name)); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + } else { + object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, + DMU_OT_NONE, 0, tx); + ztest_set_random_blocksize(os, object, tx); + seq = ztest_log_create(zilog, tx, object, + DMU_OT_UINT64_OTHER); + dmu_write(os, object, 0, sizeof (name), name, tx); + dmu_tx_commit(tx); + } + if (ztest_random(5) == 0) { + zil_commit(zilog, seq, FSYNC); + } + if (ztest_random(5) == 0) { + error = zil_suspend(zilog); + if (error == 0) { + zil_resume(zilog); + } + } + } + + /* + * Verify that we cannot create an existing dataset. + */ + error = dmu_objset_create(name, DMU_OST_OTHER, NULL, NULL, NULL); + if (error != EEXIST) + fatal(0, "created existing dataset, error = %d", error); + + /* + * Verify that multiple dataset opens are allowed, but only when + * the new access mode is compatible with the base mode. + * We use a mixture of typed and typeless opens, and when the + * open succeeds, verify that the discovered type is correct. + */ + for (mode = DS_MODE_STANDARD; mode < DS_MODE_LEVELS; mode++) { + objset_t *os2; + error = dmu_objset_open(name, DMU_OST_OTHER, mode, &os2); + expected_error = (basemode + mode < DS_MODE_LEVELS) ? 0 : EBUSY; + if (error != expected_error) + fatal(0, "dmu_objset_open('%s') = %d, expected %d", + name, error, expected_error); + if (error == 0) + dmu_objset_close(os2); + } + + zil_close(zilog); + dmu_objset_close(os); + + error = dmu_objset_destroy(name); + if (error) + fatal(0, "dmu_objset_destroy(%s) = %d", name, error); + + (void) rw_unlock(&ztest_shared->zs_name_lock); +} + +/* + * Verify that dmu_snapshot_{create,destroy,open,close} work as expected. + */ +void +ztest_dmu_snapshot_create_destroy(ztest_args_t *za) +{ + int error; + objset_t *os = za->za_os; + char snapname[100]; + char osname[MAXNAMELEN]; + + (void) rw_rdlock(&ztest_shared->zs_name_lock); + dmu_objset_name(os, osname); + (void) snprintf(snapname, 100, "%s@%llu", osname, + (u_longlong_t)za->za_instance); + + error = dmu_objset_destroy(snapname); + if (error != 0 && error != ENOENT) + fatal(0, "dmu_objset_destroy() = %d", error); + error = dmu_objset_create(snapname, DMU_OST_OTHER, NULL, NULL, NULL); + if (error == ENOSPC) + ztest_record_enospc("dmu_take_snapshot"); + else if (error != 0 && error != EEXIST) + fatal(0, "dmu_take_snapshot() = %d", error); + (void) rw_unlock(&ztest_shared->zs_name_lock); +} + +#define ZTEST_TRAVERSE_BLOCKS 1000 + +static int +ztest_blk_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) +{ + ztest_args_t *za = arg; + zbookmark_t *zb = &bc->bc_bookmark; + blkptr_t *bp = &bc->bc_blkptr; + dnode_phys_t *dnp = bc->bc_dnode; + traverse_handle_t *th = za->za_th; + uint64_t size = BP_GET_LSIZE(bp); + + ASSERT(dnp != NULL); + + if (bc->bc_errno) + return (ERESTART); + + /* + * Once in a while, abort the traverse. We only do this to odd + * instance numbers to ensure that even ones can run to completion. + */ + if ((za->za_instance & 1) && ztest_random(10000) == 0) + return (EINTR); + + if (bp->blk_birth == 0) { + ASSERT(th->th_advance & ADVANCE_HOLES); + return (0); + } + + if (zb->zb_level == 0 && !(th->th_advance & ADVANCE_DATA) && + bc == &th->th_cache[ZB_DN_CACHE][0]) { + ASSERT(bc->bc_data == NULL); + return (0); + } + + ASSERT(bc->bc_data != NULL); + + /* + * This is an expensive question, so don't ask it too often. + */ + if (((za->za_random ^ th->th_callbacks) & 0xff) == 0) { + void *xbuf = umem_alloc(size, UMEM_NOFAIL); + if (arc_tryread(spa, bp, xbuf) == 0) { + ASSERT(bcmp(bc->bc_data, xbuf, size) == 0); + } + umem_free(xbuf, size); + } + + if (zb->zb_level > 0) { + ASSERT3U(size, ==, 1ULL << dnp->dn_indblkshift); + return (0); + } + + if (zb->zb_level == -1) { + ASSERT3U(size, ==, sizeof (objset_phys_t)); + return (0); + } + + ASSERT(zb->zb_level == 0); + ASSERT3U(size, ==, dnp->dn_datablkszsec << DEV_BSHIFT); + + return (0); +} + +/* + * Verify that live pool traversal works. + */ +void +ztest_traverse(ztest_args_t *za) +{ + spa_t *spa = dmu_objset_spa(za->za_os); + traverse_handle_t *th = za->za_th; + int rc, advance; + uint64_t cbstart, cblimit; + + if (th == NULL) { + advance = 0; + + if (ztest_random(2) == 0) + advance |= ADVANCE_PRE; + + if (ztest_random(2) == 0) + advance |= ADVANCE_PRUNE; + + if (ztest_random(2) == 0) + advance |= ADVANCE_DATA; + + if (ztest_random(2) == 0) + advance |= ADVANCE_HOLES; + + th = za->za_th = traverse_init(spa, ztest_blk_cb, za, advance, + ZIO_FLAG_CANFAIL); + + traverse_add_pool(th, 0, -1ULL); + } + + advance = th->th_advance; + cbstart = th->th_callbacks; + cblimit = cbstart + ((advance & ADVANCE_DATA) ? 100 : 1000); + + while ((rc = traverse_more(th)) == EAGAIN && th->th_callbacks < cblimit) + continue; + + if (zopt_verbose >= 5) + (void) printf("traverse %s%s%s%s %llu blocks to " + "<%llu, %llu, %d, %llx>%s\n", + (advance & ADVANCE_PRE) ? "pre" : "post", + (advance & ADVANCE_PRUNE) ? "|prune" : "", + (advance & ADVANCE_DATA) ? "|data" : "", + (advance & ADVANCE_HOLES) ? "|holes" : "", + (u_longlong_t)(th->th_callbacks - cbstart), + (u_longlong_t)th->th_lastcb.zb_objset, + (u_longlong_t)th->th_lastcb.zb_object, + th->th_lastcb.zb_level, + (u_longlong_t)th->th_lastcb.zb_blkid, + rc == 0 ? " [done]" : + rc == EINTR ? " [aborted]" : + rc == EAGAIN ? "" : + strerror(rc)); + + if (rc != EAGAIN) { + if (rc != 0 && rc != EINTR) + fatal(0, "traverse_more(%p) = %d", th, rc); + traverse_fini(th); + za->za_th = NULL; + } +} + +/* + * Verify that dmu_object_{alloc,free} work as expected. + */ +void +ztest_dmu_object_alloc_free(ztest_args_t *za) +{ + objset_t *os = za->za_os; + dmu_buf_t *db; + dmu_tx_t *tx; + uint64_t batchobj, object, batchsize, endoff, temp; + int b, c, error, bonuslen; + dmu_object_info_t doi; + char osname[MAXNAMELEN]; + + dmu_objset_name(os, osname); + + endoff = -8ULL; + batchsize = 2; + + /* + * Create a batch object if necessary, and record it in the directory. + */ + dmu_read(os, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t), &batchobj); + if (batchobj == 0) { + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, + sizeof (uint64_t)); + dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + ztest_record_enospc("create a batch object"); + dmu_tx_abort(tx); + return; + } + batchobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, + DMU_OT_NONE, 0, tx); + ztest_set_random_blocksize(os, batchobj, tx); + dmu_write(os, ZTEST_DIROBJ, za->za_diroff, + sizeof (uint64_t), &batchobj, tx); + dmu_tx_commit(tx); + } + + /* + * Destroy the previous batch of objects. + */ + for (b = 0; b < batchsize; b++) { + dmu_read(os, batchobj, b * sizeof (uint64_t), + sizeof (uint64_t), &object); + if (object == 0) + continue; + /* + * Read and validate contents. + * We expect the nth byte of the bonus buffer to be n. + */ + db = dmu_bonus_hold(os, object); + + dmu_object_info_from_db(db, &doi); + ASSERT(doi.doi_type == DMU_OT_UINT64_OTHER); + ASSERT(doi.doi_bonus_type == DMU_OT_PLAIN_OTHER); + ASSERT3S(doi.doi_physical_blks, >=, 0); + + dmu_buf_read(db); + + bonuslen = db->db_size; + + for (c = 0; c < bonuslen; c++) { + if (((uint8_t *)db->db_data)[c] != + (uint8_t)(c + bonuslen)) { + fatal(0, + "bad bonus: %s, obj %llu, off %d: %u != %u", + osname, object, c, + ((uint8_t *)db->db_data)[c], + (uint8_t)(c + bonuslen)); + } + } + + dmu_buf_rele(db); + + /* + * We expect the word at endoff to be our object number. + */ + dmu_read(os, object, endoff, sizeof (uint64_t), &temp); + + if (temp != object) { + fatal(0, "bad data in %s, got %llu, expected %llu", + osname, temp, object); + } + + /* + * Destroy old object and clear batch entry. + */ + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, batchobj, + b * sizeof (uint64_t), sizeof (uint64_t)); + dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + ztest_record_enospc("free object"); + dmu_tx_abort(tx); + return; + } + error = dmu_object_free(os, object, tx); + if (error) { + fatal(0, "dmu_object_free('%s', %llu) = %d", + osname, object, error); + } + object = 0; + + dmu_object_set_checksum(os, batchobj, + ztest_random_checksum(), tx); + dmu_object_set_compress(os, batchobj, + ztest_random_compress(), tx); + + dmu_write(os, batchobj, b * sizeof (uint64_t), + sizeof (uint64_t), &object, tx); + + dmu_tx_commit(tx); + } + + /* + * Before creating the new batch of objects, generate a bunch of churn. + */ + for (b = ztest_random(100); b > 0; b--) { + tx = dmu_tx_create(os); + dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + ztest_record_enospc("churn objects"); + dmu_tx_abort(tx); + return; + } + object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, + DMU_OT_NONE, 0, tx); + ztest_set_random_blocksize(os, object, tx); + error = dmu_object_free(os, object, tx); + if (error) { + fatal(0, "dmu_object_free('%s', %llu) = %d", + osname, object, error); + } + dmu_tx_commit(tx); + } + + /* + * Create a new batch of objects with randomly chosen + * blocksizes and record them in the batch directory. + */ + for (b = 0; b < batchsize; b++) { + uint32_t va_blksize; + u_longlong_t va_nblocks; + + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, batchobj, b * sizeof (uint64_t), + sizeof (uint64_t)); + dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, endoff, + sizeof (uint64_t)); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + ztest_record_enospc("create batchobj"); + dmu_tx_abort(tx); + return; + } + bonuslen = (int)ztest_random(dmu_bonus_max()) + 1; + + object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, + DMU_OT_PLAIN_OTHER, bonuslen, tx); + + ztest_set_random_blocksize(os, object, tx); + + dmu_object_set_checksum(os, object, + ztest_random_checksum(), tx); + dmu_object_set_compress(os, object, + ztest_random_compress(), tx); + + dmu_write(os, batchobj, b * sizeof (uint64_t), + sizeof (uint64_t), &object, tx); + + /* + * Write to both the bonus buffer and the regular data. + */ + db = dmu_bonus_hold(os, object); + ASSERT3U(bonuslen, ==, db->db_size); + + dmu_object_size_from_db(db, &va_blksize, &va_nblocks); + ASSERT3S(va_nblocks, >=, 0); + + dmu_buf_will_dirty(db, tx); + + /* + * See comments above regarding the contents of + * the bonus buffer and the word at endoff. + */ + for (c = 0; c < db->db_size; c++) + ((uint8_t *)db->db_data)[c] = (uint8_t)(c + bonuslen); + + dmu_buf_rele(db); + + /* + * Write to a large offset to increase indirection. + */ + dmu_write(os, object, endoff, sizeof (uint64_t), &object, tx); + + dmu_tx_commit(tx); + } +} + +/* + * Verify that dmu_{read,write} work as expected. + */ +typedef struct bufwad { + uint64_t bw_index; + uint64_t bw_txg; + uint64_t bw_data; +} bufwad_t; + +typedef struct dmu_read_write_dir { + uint64_t dd_packobj; + uint64_t dd_bigobj; + uint64_t dd_chunk; +} dmu_read_write_dir_t; + +void +ztest_dmu_read_write(ztest_args_t *za) +{ + objset_t *os = za->za_os; + dmu_read_write_dir_t dd; + dmu_tx_t *tx; + int i, freeit, error; + uint64_t n, s, txg; + bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT; + uint64_t packoff, packsize, bigoff, bigsize; + uint64_t regions = 997; + uint64_t stride = 123456789ULL; + uint64_t width = 40; + int free_percent = 5; + + /* + * This test uses two objects, packobj and bigobj, that are always + * updated together (i.e. in the same tx) so that their contents are + * in sync and can be compared. Their contents relate to each other + * in a simple way: packobj is a dense array of 'bufwad' structures, + * while bigobj is a sparse array of the same bufwads. Specifically, + * for any index n, there are three bufwads that should be identical: + * + * packobj, at offset n * sizeof (bufwad_t) + * bigobj, at the head of the nth chunk + * bigobj, at the tail of the nth chunk + * + * The chunk size is arbitrary. It doesn't have to be a power of two, + * and it doesn't have any relation to the object blocksize. + * The only requirement is that it can hold at least two bufwads. + * + * Normally, we write the bufwad to each of these locations. + * However, free_percent of the time we instead write zeroes to + * packobj and perform a dmu_free_range() on bigobj. By comparing + * bigobj to packobj, we can verify that the DMU is correctly + * tracking which parts of an object are allocated and free, + * and that the contents of the allocated blocks are correct. + */ + + /* + * Read the directory info. If it's the first time, set things up. + */ + dmu_read(os, ZTEST_DIROBJ, za->za_diroff, sizeof (dd), &dd); + if (dd.dd_chunk == 0) { + ASSERT(dd.dd_packobj == 0); + ASSERT(dd.dd_bigobj == 0); + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (dd)); + dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + ztest_record_enospc("create r/w directory"); + dmu_tx_abort(tx); + return; + } + + dd.dd_packobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, + DMU_OT_NONE, 0, tx); + dd.dd_bigobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, + DMU_OT_NONE, 0, tx); + dd.dd_chunk = (1000 + ztest_random(1000)) * sizeof (uint64_t); + + ztest_set_random_blocksize(os, dd.dd_packobj, tx); + ztest_set_random_blocksize(os, dd.dd_bigobj, tx); + + dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (dd), &dd, + tx); + dmu_tx_commit(tx); + } + + /* + * Prefetch a random chunk of the big object. + * Our aim here is to get some async reads in flight + * for blocks that we may free below; the DMU should + * handle this race correctly. + */ + n = ztest_random(regions) * stride + ztest_random(width); + s = 1 + ztest_random(2 * width - 1); + dmu_prefetch(os, dd.dd_bigobj, n * dd.dd_chunk, s * dd.dd_chunk); + + /* + * Pick a random index and compute the offsets into packobj and bigobj. + */ + n = ztest_random(regions) * stride + ztest_random(width); + s = 1 + ztest_random(width - 1); + + packoff = n * sizeof (bufwad_t); + packsize = s * sizeof (bufwad_t); + + bigoff = n * dd.dd_chunk; + bigsize = s * dd.dd_chunk; + + packbuf = umem_alloc(packsize, UMEM_NOFAIL); + bigbuf = umem_alloc(bigsize, UMEM_NOFAIL); + + /* + * free_percent of the time, free a range of bigobj rather than + * overwriting it. + */ + freeit = (ztest_random(100) < free_percent); + + /* + * Read the current contents of our objects. + */ + dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf); + dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf); + + /* + * Get a tx for the mods to both packobj and bigobj. + */ + tx = dmu_tx_create(os); + + dmu_tx_hold_write(tx, dd.dd_packobj, packoff, packsize); + + if (freeit) + dmu_tx_hold_free(tx, dd.dd_bigobj, bigoff, bigsize); + else + dmu_tx_hold_write(tx, dd.dd_bigobj, bigoff, bigsize); + + error = dmu_tx_assign(tx, TXG_WAIT); + + if (error) { + ztest_record_enospc("dmu r/w range"); + dmu_tx_abort(tx); + umem_free(packbuf, packsize); + umem_free(bigbuf, bigsize); + return; + } + + txg = dmu_tx_get_txg(tx); + + /* + * For each index from n to n + s, verify that the existing bufwad + * in packobj matches the bufwads at the head and tail of the + * corresponding chunk in bigobj. Then update all three bufwads + * with the new values we want to write out. + */ + for (i = 0; i < s; i++) { + /* LINTED */ + pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); + /* LINTED */ + bigH = (bufwad_t *)((char *)bigbuf + i * dd.dd_chunk); + /* LINTED */ + bigT = (bufwad_t *)((char *)bigH + dd.dd_chunk) - 1; + + ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); + ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); + + if (pack->bw_txg > txg) + fatal(0, "future leak: got %llx, open txg is %llx", + pack->bw_txg, txg); + + if (pack->bw_data != 0 && pack->bw_index != n + i) + fatal(0, "wrong index: got %llx, wanted %llx+%llx", + pack->bw_index, n, i); + + if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) + fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); + + if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) + fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); + + if (freeit) { + bzero(pack, sizeof (bufwad_t)); + } else { + pack->bw_index = n + i; + pack->bw_txg = txg; + pack->bw_data = 1 + ztest_random(-2ULL); + } + *bigH = *pack; + *bigT = *pack; + } + + /* + * We've verified all the old bufwads, and made new ones. + * Now write them out. + */ + dmu_write(os, dd.dd_packobj, packoff, packsize, packbuf, tx); + + if (freeit) { + if (zopt_verbose >= 6) { + (void) printf("freeing offset %llx size %llx" + " txg %llx\n", + (u_longlong_t)bigoff, + (u_longlong_t)bigsize, + (u_longlong_t)txg); + } + dmu_free_range(os, dd.dd_bigobj, bigoff, bigsize, tx); + } else { + if (zopt_verbose >= 6) { + (void) printf("writing offset %llx size %llx" + " txg %llx\n", + (u_longlong_t)bigoff, + (u_longlong_t)bigsize, + (u_longlong_t)txg); + } + dmu_write(os, dd.dd_bigobj, bigoff, bigsize, bigbuf, tx); + } + + dmu_tx_commit(tx); + + /* + * Sanity check the stuff we just wrote. + */ + { + void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); + void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); + + dmu_read(os, dd.dd_packobj, packoff, packsize, packcheck); + dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigcheck); + + ASSERT(bcmp(packbuf, packcheck, packsize) == 0); + ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); + + umem_free(packcheck, packsize); + umem_free(bigcheck, bigsize); + } + + umem_free(packbuf, packsize); + umem_free(bigbuf, bigsize); +} + +void +ztest_dmu_write_parallel(ztest_args_t *za) +{ + objset_t *os = za->za_os; + dmu_tx_t *tx; + dmu_buf_t *db; + int i, b, error, do_free, bs; + uint64_t off, txg_how, txg; + mutex_t *lp; + char osname[MAXNAMELEN]; + char iobuf[SPA_MAXBLOCKSIZE]; + ztest_block_tag_t rbt, wbt; + + dmu_objset_name(os, osname); + bs = ZTEST_DIROBJ_BLOCKSIZE; + + /* + * Have multiple threads write to large offsets in ZTEST_DIROBJ + * to verify that having multiple threads writing to the same object + * in parallel doesn't cause any trouble. + * Also do parallel writes to the bonus buffer on occasion. + */ + for (i = 0; i < 50; i++) { + b = ztest_random(ZTEST_SYNC_LOCKS); + lp = &ztest_shared->zs_sync_lock[b]; + + do_free = (ztest_random(4) == 0); + + off = za->za_diroff_shared + ((uint64_t)b << SPA_MAXBLOCKSHIFT); + + if (ztest_random(4) == 0) { + /* + * Do the bonus buffer instead of a regular block. + */ + do_free = 0; + off = -1ULL; + } + + tx = dmu_tx_create(os); + + if (off == -1ULL) + dmu_tx_hold_bonus(tx, ZTEST_DIROBJ); + else if (do_free) + dmu_tx_hold_free(tx, ZTEST_DIROBJ, off, bs); + else + dmu_tx_hold_write(tx, ZTEST_DIROBJ, off, bs); + + txg_how = ztest_random(2) == 0 ? TXG_WAIT : TXG_NOWAIT; + error = dmu_tx_assign(tx, txg_how); + if (error) { + dmu_tx_abort(tx); + if (error == ERESTART) { + ASSERT(txg_how == TXG_NOWAIT); + txg_wait_open(dmu_objset_pool(os), 0); + continue; + } + ztest_record_enospc("dmu write parallel"); + return; + } + txg = dmu_tx_get_txg(tx); + + if (do_free) { + (void) mutex_lock(lp); + dmu_free_range(os, ZTEST_DIROBJ, off, bs, tx); + (void) mutex_unlock(lp); + dmu_tx_commit(tx); + continue; + } + + wbt.bt_objset = dmu_objset_id(os); + wbt.bt_object = ZTEST_DIROBJ; + wbt.bt_offset = off; + wbt.bt_txg = txg; + wbt.bt_thread = za->za_instance; + + if (off == -1ULL) { + wbt.bt_seq = 0; + db = dmu_bonus_hold(os, ZTEST_DIROBJ); + ASSERT3U(db->db_size, ==, sizeof (wbt)); + dmu_buf_read(db); + bcopy(db->db_data, &rbt, db->db_size); + if (rbt.bt_objset != 0) { + ASSERT3U(rbt.bt_objset, ==, wbt.bt_objset); + ASSERT3U(rbt.bt_object, ==, wbt.bt_object); + ASSERT3U(rbt.bt_offset, ==, wbt.bt_offset); + ASSERT3U(rbt.bt_txg, <=, wbt.bt_txg); + } + dmu_buf_will_dirty(db, tx); + bcopy(&wbt, db->db_data, db->db_size); + dmu_buf_rele(db); + dmu_tx_commit(tx); + continue; + } + + (void) mutex_lock(lp); + + wbt.bt_seq = ztest_shared->zs_seq[b]++; + + dmu_write(os, ZTEST_DIROBJ, off, sizeof (wbt), &wbt, tx); + + (void) mutex_unlock(lp); + + if (ztest_random(100) == 0) + (void) poll(NULL, 0, 1); /* open dn_notxholds window */ + + dmu_tx_commit(tx); + + if (ztest_random(1000) == 0) + txg_wait_synced(dmu_objset_pool(os), txg); + + if (ztest_random(2) == 0) { + blkptr_t blk = { 0 }; + uint64_t blkoff; + + txg_suspend(dmu_objset_pool(os)); + (void) mutex_lock(lp); + error = dmu_sync(os, ZTEST_DIROBJ, off, &blkoff, &blk, + txg); + (void) mutex_unlock(lp); + if (error) { + txg_resume(dmu_objset_pool(os)); + dprintf("dmu_sync(%s, %d, %llx) = %d\n", + osname, ZTEST_DIROBJ, off, error); + continue; + } + + if (blk.blk_birth == 0) { /* concurrent free */ + txg_resume(dmu_objset_pool(os)); + continue; + } + + ASSERT(blk.blk_fill == 1); + ASSERT3U(BP_GET_TYPE(&blk), ==, DMU_OT_UINT64_OTHER); + ASSERT3U(BP_GET_LEVEL(&blk), ==, 0); + ASSERT3U(BP_GET_LSIZE(&blk), ==, bs); + + /* + * Read the block that dmu_sync() returned to + * make sure its contents match what we wrote. + * We do this while still txg_suspend()ed to ensure + * that the block can't be reused before we read it. + */ + error = zio_wait(zio_read(NULL, dmu_objset_spa(os), + &blk, iobuf, bs, NULL, NULL, + ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_MUSTSUCCEED)); + ASSERT(error == 0); + + txg_resume(dmu_objset_pool(os)); + + bcopy(&iobuf[blkoff], &rbt, sizeof (rbt)); + + ASSERT3U(rbt.bt_objset, ==, wbt.bt_objset); + ASSERT3U(rbt.bt_object, ==, wbt.bt_object); + ASSERT3U(rbt.bt_offset, ==, wbt.bt_offset); + + /* + * The semantic of dmu_sync() is that we always + * push the most recent version of the data, + * so in the face of concurrent updates we may + * see a newer version of the block. That's OK. + */ + ASSERT3U(rbt.bt_txg, >=, wbt.bt_txg); + if (rbt.bt_thread == wbt.bt_thread) + ASSERT3U(rbt.bt_seq, ==, wbt.bt_seq); + else + ASSERT3U(rbt.bt_seq, >, wbt.bt_seq); + } + } +} + +/* + * Verify that zap_{create,destroy,add,remove,update} work as expected. + */ +#define ZTEST_ZAP_MIN_INTS 1 +#define ZTEST_ZAP_MAX_INTS 4 +#define ZTEST_ZAP_MAX_PROPS 1000 + +void +ztest_zap(ztest_args_t *za) +{ + objset_t *os = za->za_os; + uint64_t object; + uint64_t txg, last_txg; + uint64_t value[ZTEST_ZAP_MAX_INTS]; + uint64_t zl_ints, zl_intsize, prop; + int i, ints; + int iters = 100; + dmu_tx_t *tx; + char propname[100], txgname[100]; + int error; + char osname[MAXNAMELEN]; + char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" }; + + dmu_objset_name(os, osname); + + /* + * Create a new object if necessary, and record it in the directory. + */ + dmu_read(os, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t), &object); + + if (object == 0) { + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, + sizeof (uint64_t)); + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, 2); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + ztest_record_enospc("create zap test obj"); + dmu_tx_abort(tx); + return; + } + object = zap_create(os, DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx); + if (error) { + fatal(0, "zap_create('%s', %llu) = %d", + osname, object, error); + } + ASSERT(object != 0); + dmu_write(os, ZTEST_DIROBJ, za->za_diroff, + sizeof (uint64_t), &object, tx); + /* + * Generate a known hash collision, and verify that + * we can lookup and remove both entries. + */ + for (i = 0; i < 2; i++) { + value[i] = i; + error = zap_add(os, object, hc[i], sizeof (uint64_t), + 1, &value[i], tx); + ASSERT3U(error, ==, 0); + } + for (i = 0; i < 2; i++) { + error = zap_add(os, object, hc[i], sizeof (uint64_t), + 1, &value[i], tx); + ASSERT3U(error, ==, EEXIST); + error = zap_length(os, object, hc[i], + &zl_intsize, &zl_ints); + ASSERT3U(error, ==, 0); + ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); + ASSERT3U(zl_ints, ==, 1); + } + for (i = 0; i < 2; i++) { + error = zap_remove(os, object, hc[i], tx); + ASSERT3U(error, ==, 0); + } + + dmu_tx_commit(tx); + } + + ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS); + + while (--iters >= 0) { + prop = ztest_random(ZTEST_ZAP_MAX_PROPS); + (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); + (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); + bzero(value, sizeof (value)); + last_txg = 0; + + /* + * If these zap entries already exist, validate their contents. + */ + error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); + if (error == 0) { + ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); + ASSERT3U(zl_ints, ==, 1); + + error = zap_lookup(os, object, txgname, zl_intsize, + zl_ints, &last_txg); + + ASSERT3U(error, ==, 0); + + error = zap_length(os, object, propname, &zl_intsize, + &zl_ints); + + ASSERT3U(error, ==, 0); + ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); + ASSERT3U(zl_ints, ==, ints); + + error = zap_lookup(os, object, propname, zl_intsize, + zl_ints, value); + + ASSERT3U(error, ==, 0); + + for (i = 0; i < ints; i++) { + ASSERT3U(value[i], ==, last_txg + object + i); + } + } else { + ASSERT3U(error, ==, ENOENT); + } + + /* + * Atomically update two entries in our zap object. + * The first is named txg_%llu, and contains the txg + * in which the property was last updated. The second + * is named prop_%llu, and the nth element of its value + * should be txg + object + n. + */ + tx = dmu_tx_create(os); + dmu_tx_hold_zap(tx, object, 2); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + ztest_record_enospc("create zap entry"); + dmu_tx_abort(tx); + return; + } + txg = dmu_tx_get_txg(tx); + + if (last_txg > txg) + fatal(0, "zap future leak: old %llu new %llu", + last_txg, txg); + + for (i = 0; i < ints; i++) + value[i] = txg + object + i; + + error = zap_update(os, object, txgname, sizeof (uint64_t), + 1, &txg, tx); + if (error) + fatal(0, "zap_update('%s', %llu, '%s') = %d", + osname, object, txgname, error); + + error = zap_update(os, object, propname, sizeof (uint64_t), + ints, value, tx); + if (error) + fatal(0, "zap_update('%s', %llu, '%s') = %d", + osname, object, propname, error); + + dmu_tx_commit(tx); + + /* + * Remove a random pair of entries. + */ + prop = ztest_random(ZTEST_ZAP_MAX_PROPS); + (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); + (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); + + error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); + + if (error == ENOENT) + continue; + + ASSERT3U(error, ==, 0); + + tx = dmu_tx_create(os); + dmu_tx_hold_zap(tx, object, 2); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + ztest_record_enospc("remove zap entry"); + dmu_tx_abort(tx); + return; + } + error = zap_remove(os, object, txgname, tx); + if (error) + fatal(0, "zap_remove('%s', %llu, '%s') = %d", + osname, object, txgname, error); + + error = zap_remove(os, object, propname, tx); + if (error) + fatal(0, "zap_remove('%s', %llu, '%s') = %d", + osname, object, propname, error); + + dmu_tx_commit(tx); + } + + /* + * Once in a while, destroy the object. + */ + if (ztest_random(100) != 0) + return; + + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t)); + dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + ztest_record_enospc("destroy zap object"); + dmu_tx_abort(tx); + return; + } + error = zap_destroy(os, object, tx); + if (error) + fatal(0, "zap_destroy('%s', %llu) = %d", + osname, object, error); + object = 0; + dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t), + &object, tx); + dmu_tx_commit(tx); +} + +void +ztest_zap_parallel(ztest_args_t *za) +{ + objset_t *os = za->za_os; + uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc; + int iters = 100; + dmu_tx_t *tx; + int i, namelen, error; + char name[20], string_value[20]; + void *data; + + while (--iters >= 0) { + /* + * Generate a random name of the form 'xxx.....' where each + * x is a random printable character and the dots are dots. + * There are 94 such characters, and the name length goes from + * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names. + */ + namelen = ztest_random(sizeof (name) - 5) + 5 + 1; + + for (i = 0; i < 3; i++) + name[i] = '!' + ztest_random('~' - '!' + 1); + for (; i < namelen - 1; i++) + name[i] = '.'; + name[i] = '\0'; + + if (ztest_random(2) == 0) + object = ZTEST_MICROZAP_OBJ; + else + object = ZTEST_FATZAP_OBJ; + + if ((namelen & 1) || object == ZTEST_MICROZAP_OBJ) { + wsize = sizeof (txg); + wc = 1; + data = &txg; + } else { + wsize = 1; + wc = namelen; + data = string_value; + } + + count = -1ULL; + VERIFY(zap_count(os, object, &count) == 0); + ASSERT(count != -1ULL); + + /* + * Select an operation: length, lookup, add, update, remove. + */ + i = ztest_random(5); + + if (i >= 2) { + tx = dmu_tx_create(os); + dmu_tx_hold_zap(tx, object, 1); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + ztest_record_enospc("zap parallel"); + dmu_tx_abort(tx); + return; + } + txg = dmu_tx_get_txg(tx); + bcopy(name, string_value, namelen); + } else { + tx = NULL; + txg = 0; + bzero(string_value, namelen); + } + + switch (i) { + + case 0: + error = zap_length(os, object, name, &zl_wsize, &zl_wc); + if (error == 0) { + ASSERT3U(wsize, ==, zl_wsize); + ASSERT3U(wc, ==, zl_wc); + } else { + ASSERT3U(error, ==, ENOENT); + } + break; + + case 1: + error = zap_lookup(os, object, name, wsize, wc, data); + if (error == 0) { + if (data == string_value && + bcmp(name, data, namelen) != 0) + fatal(0, "name '%s' != val '%s' len %d", + name, data, namelen); + } else { + ASSERT3U(error, ==, ENOENT); + } + break; + + case 2: + error = zap_add(os, object, name, wsize, wc, data, tx); + ASSERT(error == 0 || error == EEXIST); + break; + + case 3: + VERIFY(zap_update(os, object, name, wsize, wc, + data, tx) == 0); + break; + + case 4: + error = zap_remove(os, object, name, tx); + ASSERT(error == 0 || error == ENOENT); + break; + } + + if (tx != NULL) + dmu_tx_commit(tx); + } +} + +void +ztest_dsl_prop_get_set(ztest_args_t *za) +{ + objset_t *os = za->za_os; + int i, inherit; + uint64_t value; + const char *prop, *valname; + char setpoint[MAXPATHLEN]; + char osname[MAXNAMELEN]; + + (void) rw_rdlock(&ztest_shared->zs_name_lock); + + dmu_objset_name(os, osname); + + for (i = 0; i < 2; i++) { + if (i == 0) { + prop = "checksum"; + value = ztest_random_checksum(); + inherit = (value == ZIO_CHECKSUM_INHERIT); + } else { + prop = "compression"; + value = ztest_random_compress(); + inherit = (value == ZIO_COMPRESS_INHERIT); + } + + VERIFY3U(dsl_prop_set(osname, prop, sizeof (value), + !inherit, &value), ==, 0); + + VERIFY3U(dsl_prop_get(osname, prop, sizeof (value), + 1, &value, setpoint), ==, 0); + + if (i == 0) + valname = zio_checksum_table[value].ci_name; + else + valname = zio_compress_table[value].ci_name; + + if (zopt_verbose >= 6) { + (void) printf("%s %s = %s for '%s'\n", + osname, prop, valname, setpoint); + } + } + + (void) rw_unlock(&ztest_shared->zs_name_lock); +} + +/* + * Inject random faults into the on-disk data. + */ +void +ztest_fault_inject(ztest_args_t *za) +{ + int fd; + uint64_t offset; + uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz; + uint64_t bad = 0x1990c0ffeedecade; + uint64_t top, leaf; + char path0[MAXPATHLEN]; + char path1[MAXPATHLEN]; + char pathrand[MAXPATHLEN]; + size_t fsize; + spa_t *spa = dmu_objset_spa(za->za_os); + int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */ + int iters = 1000; + int ftype; + + /* + * Pick a random top-level vdev. + */ + spa_config_enter(spa, RW_READER); + top = ztest_random(spa->spa_root_vdev->vdev_children); + spa_config_exit(spa); + + /* + * Pick a random leaf. + */ + leaf = ztest_random(leaves); + + /* + * Generate paths to the first to leaves in this top-level vdev, + * and to the random leaf we selected. We'll induce transient + * faults on leaves 0 and 1, we'll online/offline leaf 1, + * and we'll write random garbage to the randomly chosen leaf. + */ + (void) snprintf(path0, sizeof (path0), + ztest_dev_template, zopt_dir, zopt_pool, top * leaves + 0); + (void) snprintf(path1, sizeof (path1), + ztest_dev_template, zopt_dir, zopt_pool, top * leaves + 1); + (void) snprintf(pathrand, sizeof (pathrand), + ztest_dev_template, zopt_dir, zopt_pool, top * leaves + leaf); + + if (leaves < 2) /* there is no second leaf */ + path1[0] = '\0'; + + dprintf("damaging %s, %s, and %s\n", path0, path1, pathrand); + + /* + * If we have exactly one-fault tolerance, just randomly offline + * and online one device. + */ + if (zopt_maxfaults == 1 && path1[0] != '\0') { + if (ztest_random(10) < 6) + (void) vdev_offline(spa, path1); + else + (void) vdev_online(spa, path1); + return; + } + + /* + * Always inject a little random device failure, regardless of + * the replication level. The I/Os should be retried successfully. + * If we only have single-fault tolerance, don't inject write + * faults, because then we'll be doing partial writes and won't + * be able to recover when we inject data corruption. + */ + if (zopt_maxfaults <= 1) + ftype = (1U << ZIO_TYPE_READ); + else + ftype = (1U << ZIO_TYPE_READ) | (1U << ZIO_TYPE_WRITE); + + (void) vdev_error_setup(spa, path0, VDEV_FAULT_COUNT, ftype, 10); + + /* + * If we can tolerate three or more faults, make one of the + * devices fail quite a lot. + */ + if (zopt_maxfaults >= 3 && path1[0] != '\0') + (void) vdev_error_setup(spa, path1, VDEV_FAULT_COUNT, + ftype, 100); + + /* + * If we can tolerate four or more faults, offline one of the devices. + */ + if (zopt_maxfaults >= 4 && path1[0] != '\0') { + if (ztest_random(10) < 6) + (void) vdev_offline(spa, path1); + else + (void) vdev_online(spa, path1); + } + + /* + * If we have at least single-fault tolerance, inject data corruption. + */ + if (zopt_maxfaults < 1) + return; + + fd = open(pathrand, O_RDWR); + + if (fd == -1) /* we hit a gap in the device namespace */ + return; + + fsize = lseek(fd, 0, SEEK_END); + + while (--iters != 0) { + offset = ztest_random(fsize / (leaves << bshift)) * + (leaves << bshift) + (leaf << bshift) + + (ztest_random(1ULL << (bshift - 1)) & -8ULL); + + if (offset >= fsize) + continue; + + if (zopt_verbose >= 6) + (void) printf("injecting bad word into %s," + " offset 0x%llx\n", pathrand, (u_longlong_t)offset); + + if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad)) + fatal(1, "can't inject bad word at 0x%llx in %s", + offset, pathrand); + } + + (void) close(fd); +} + +static void +ztest_error_setup(vdev_t *vd, int mode, int mask, uint64_t arg) +{ + int c; + + for (c = 0; c < vd->vdev_children; c++) + ztest_error_setup(vd->vdev_child[c], mode, mask, arg); + + if (vd->vdev_path != NULL) + (void) vdev_error_setup(vd->vdev_spa, vd->vdev_path, + mode, mask, arg); +} + +/* + * Scrub the pool. + */ +void +ztest_scrub(ztest_args_t *za) +{ + spa_t *spa = dmu_objset_spa(za->za_os); + + (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_FALSE); + (void) poll(NULL, 0, 1000); /* wait a second, then force a restart */ + (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_FALSE); +} + +/* + * Rename the pool to a different name and then rename it back. + */ +void +ztest_spa_rename(ztest_args_t *za) +{ + char *oldname, *newname; + int error; + spa_t *spa; + + (void) rw_wrlock(&ztest_shared->zs_name_lock); + + oldname = za->za_pool; + newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL); + (void) strcpy(newname, oldname); + (void) strcat(newname, "_tmp"); + + /* + * Do the rename + */ + error = spa_rename(oldname, newname); + if (error) + fatal(0, "spa_rename('%s', '%s') = %d", oldname, + newname, error); + + /* + * Try to open it under the old name, which shouldn't exist + */ + error = spa_open(oldname, &spa, FTAG); + if (error != ENOENT) + fatal(0, "spa_open('%s') = %d", oldname, error); + + /* + * Open it under the new name and make sure it's still the same spa_t. + */ + error = spa_open(newname, &spa, FTAG); + if (error != 0) + fatal(0, "spa_open('%s') = %d", newname, error); + + ASSERT(spa == dmu_objset_spa(za->za_os)); + spa_close(spa, FTAG); + + /* + * Rename it back to the original + */ + error = spa_rename(newname, oldname); + if (error) + fatal(0, "spa_rename('%s', '%s') = %d", newname, + oldname, error); + + /* + * Make sure it can still be opened + */ + error = spa_open(oldname, &spa, FTAG); + if (error != 0) + fatal(0, "spa_open('%s') = %d", oldname, error); + + ASSERT(spa == dmu_objset_spa(za->za_os)); + spa_close(spa, FTAG); + + umem_free(newname, strlen(newname) + 1); + + (void) rw_unlock(&ztest_shared->zs_name_lock); +} + + +/* + * Completely obliterate one disk. + */ +static void +ztest_obliterate_one_disk(uint64_t vdev) +{ + int fd; + char dev_name[MAXPATHLEN]; + size_t fsize; + + if (zopt_maxfaults < 2) + return; + + (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev); + + fd = open(dev_name, O_RDWR); + + if (fd == -1) + fatal(1, "can't open %s", dev_name); + + /* + * Determine the size. + */ + fsize = lseek(fd, 0, SEEK_END); + (void) close(fd); + + /* + * Remove it. + */ + VERIFY(remove(dev_name) == 0); + + /* + * Create a new one. + */ + VERIFY((fd = open(dev_name, O_RDWR | O_CREAT | O_TRUNC, 0666)) >= 0); + VERIFY(ftruncate(fd, fsize) == 0); + (void) close(fd); +} + +static void +ztest_replace_one_disk(spa_t *spa, uint64_t vdev) +{ + char dev_name[MAXPATHLEN]; + nvlist_t *file, *root; + int error; + + (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev); + + /* + * Build the nvlist describing dev_name. + */ + VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0); + VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0); + VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, dev_name) == 0); + + VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0); + VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); + VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN, + &file, 1) == 0); + + error = spa_vdev_attach(spa, dev_name, root, B_TRUE); + if (error != 0 && error != EBUSY && error != ENOTSUP && error != ENODEV) + fatal(0, "spa_vdev_attach(in-place) = %d", error); + + nvlist_free(file); + nvlist_free(root); +} + +static void +ztest_verify_blocks(char *pool) +{ + int status; + char zdb[MAXPATHLEN + MAXNAMELEN + 20]; + char zbuf[1024]; + char *bin; + FILE *fp; + + (void) realpath(getexecname(), zdb); + + /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */ + bin = strstr(zdb, "/usr/bin/"); + /* LINTED */ + (void) sprintf(bin, "/usr/sbin/zdb -bc%s%s -U -O %s %s", + zopt_verbose >= 3 ? "s" : "", + zopt_verbose >= 4 ? "v" : "", + ztest_random(2) == 0 ? "pre" : "post", pool); + + if (zopt_verbose >= 5) + (void) printf("Executing %s\n", strstr(zdb, "zdb ")); + + fp = popen(zdb, "r"); + + while (fgets(zbuf, sizeof (zbuf), fp) != NULL) + if (zopt_verbose >= 3) + (void) printf("%s", zbuf); + + status = pclose(fp); + + if (status == 0) + return; + + ztest_dump_core = 0; + if (WIFEXITED(status)) + fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status)); + else + fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status)); +} + +static void +ztest_walk_pool_directory(char *header) +{ + spa_t *spa = NULL; + + if (zopt_verbose >= 6) + (void) printf("%s\n", header); + + mutex_enter(&spa_namespace_lock); + while ((spa = spa_next(spa)) != NULL) + if (zopt_verbose >= 6) + (void) printf("\t%s\n", spa_name(spa)); + mutex_exit(&spa_namespace_lock); +} + +static void +ztest_spa_import_export(char *oldname, char *newname) +{ + nvlist_t *config; + uint64_t pool_guid; + spa_t *spa; + int error; + + if (zopt_verbose >= 4) { + (void) printf("import/export: old = %s, new = %s\n", + oldname, newname); + } + + /* + * Clean up from previous runs. + */ + (void) spa_destroy(newname); + + /* + * Get the pool's configuration and guid. + */ + error = spa_open(oldname, &spa, FTAG); + if (error) + fatal(0, "spa_open('%s') = %d", oldname, error); + + ASSERT(spa->spa_config != NULL); + + VERIFY(nvlist_dup(spa->spa_config, &config, 0) == 0); + pool_guid = spa_guid(spa); + spa_close(spa, FTAG); + + ztest_walk_pool_directory("pools before export"); + + /* + * Export it. + */ + error = spa_export(oldname); + if (error) + fatal(0, "spa_export('%s') = %d", oldname, error); + + ztest_walk_pool_directory("pools after export"); + + /* + * Import it under the new name. + */ + error = spa_import(newname, config, NULL); + if (error) + fatal(0, "spa_import('%s') = %d", newname, error); + + ztest_walk_pool_directory("pools after import"); + + /* + * Try to import it again -- should fail with EEXIST. + */ + error = spa_import(newname, config, NULL); + if (error != EEXIST) + fatal(0, "spa_import('%s') twice", newname); + + /* + * Try to import it under a different name -- should fail with EEXIST. + */ + error = spa_import(oldname, config, NULL); + if (error != EEXIST) + fatal(0, "spa_import('%s') under multiple names", newname); + + /* + * Verify that the pool is no longer visible under the old name. + */ + error = spa_open(oldname, &spa, FTAG); + if (error != ENOENT) + fatal(0, "spa_open('%s') = %d", newname, error); + + /* + * Verify that we can open and close the pool using the new name. + */ + error = spa_open(newname, &spa, FTAG); + if (error) + fatal(0, "spa_open('%s') = %d", newname, error); + ASSERT(pool_guid == spa_guid(spa)); + spa_close(spa, FTAG); + + nvlist_free(config); +} + +static void * +ztest_thread(void *arg) +{ + ztest_args_t *za = arg; + ztest_shared_t *zs = ztest_shared; + hrtime_t now, functime; + ztest_info_t *zi; + int f; + + while ((now = gethrtime()) < za->za_stop) { + /* + * See if it's time to force a crash. + */ + if (now > za->za_kill) { + zs->zs_alloc = spa_get_alloc(dmu_objset_spa(za->za_os)); + zs->zs_space = spa_get_space(dmu_objset_spa(za->za_os)); + (void) kill(getpid(), SIGKILL); + } + + /* + * Pick a random function. + */ + f = ztest_random(ZTEST_FUNCS); + zi = &zs->zs_info[f]; + + /* + * Decide whether to call it, based on the requested frequency. + */ + if (zi->zi_call_target == 0 || + (double)zi->zi_call_total / zi->zi_call_target > + (double)(now - zs->zs_start_time) / (zopt_time * NANOSEC)) + continue; + + atomic_add_64(&zi->zi_calls, 1); + atomic_add_64(&zi->zi_call_total, 1); + + za->za_diroff = (za->za_instance * ZTEST_FUNCS + f) * + ZTEST_DIRSIZE; + za->za_diroff_shared = (1ULL << 63); + + ztest_dmu_write_parallel(za); + + zi->zi_func(za); + + functime = gethrtime() - now; + + atomic_add_64(&zi->zi_call_time, functime); + + if (zopt_verbose >= 4) { + Dl_info dli; + (void) dladdr((void *)zi->zi_func, &dli); + (void) printf("%6.2f sec in %s\n", + (double)functime / NANOSEC, dli.dli_sname); + } + + /* + * If we're getting ENOSPC with some regularity, stop. + */ + if (zs->zs_enospc_count > 10) + break; + } + + return (NULL); +} + +/* + * Kick off threads to run tests on all datasets in parallel. + */ +static void +ztest_run(char *pool) +{ + int t, d, error; + ztest_shared_t *zs = ztest_shared; + ztest_args_t *za; + spa_t *spa; + char name[100]; + + (void) _mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL); + (void) rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL); + + for (t = 0; t < ZTEST_SYNC_LOCKS; t++) + (void) _mutex_init(&zs->zs_sync_lock[t], USYNC_THREAD, NULL); + + /* + * Destroy one disk before we even start. + * It's mirrored, so everything should work just fine. + * This makes us exercise fault handling very early in spa_load(). + */ + ztest_obliterate_one_disk(0); + + /* + * Verify that the sum of the sizes of all blocks in the pool + * equals the SPA's allocated space total. + */ + ztest_verify_blocks(pool); + + /* + * Kick off a replacement of the disk we just obliterated. + */ + kernel_init(FREAD | FWRITE); + error = spa_open(pool, &spa, FTAG); + if (error) + fatal(0, "spa_open(%s) = %d", pool, error); + ztest_replace_one_disk(spa, 0); + if (zopt_verbose >= 5) + show_pool_stats(spa); + spa_close(spa, FTAG); + kernel_fini(); + + kernel_init(FREAD | FWRITE); + + /* + * Verify that we can export the pool and reimport it under a + * different name. + */ + (void) snprintf(name, 100, "%s_import", pool); + ztest_spa_import_export(pool, name); + ztest_spa_import_export(name, pool); + + /* + * Verify that we can loop over all pools. + */ + mutex_enter(&spa_namespace_lock); + for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) { + if (zopt_verbose > 3) { + (void) printf("spa_next: found %s\n", spa_name(spa)); + } + } + mutex_exit(&spa_namespace_lock); + + /* + * Open our pool. + */ + error = spa_open(pool, &spa, FTAG); + if (error) + fatal(0, "spa_open() = %d", error); + + /* + * Verify that we can safely inquire about about any object, + * whether it's allocated or not. To make it interesting, + * we probe a 5-wide window around each power of two. + * This hits all edge cases, including zero and the max. + */ + for (t = 0; t < 64; t++) { + for (d = -5; d <= 5; d++) { + error = dmu_object_info(spa->spa_meta_objset, + (1ULL << t) + d, NULL); + ASSERT(error == 0 || error == ENOENT); + } + } + + /* + * Now kick off all the tests that run in parallel. + */ + zs->zs_enospc_count = 0; + + za = umem_zalloc(zopt_threads * sizeof (ztest_args_t), UMEM_NOFAIL); + + if (zopt_verbose >= 4) + (void) printf("starting main threads...\n"); + + za[0].za_start = gethrtime(); + za[0].za_stop = za[0].za_start + zopt_passtime * NANOSEC; + za[0].za_stop = MIN(za[0].za_stop, zs->zs_stop_time); + za[0].za_kill = za[0].za_stop; + if (ztest_random(100) < zopt_killrate) + za[0].za_kill -= ztest_random(zopt_passtime * NANOSEC); + + for (t = 0; t < zopt_threads; t++) { + d = t % zopt_dirs; + if (t < zopt_dirs) { + ztest_replay_t zr; + (void) rw_rdlock(&ztest_shared->zs_name_lock); + (void) snprintf(name, 100, "%s/%s_%d", pool, pool, d); + error = dmu_objset_create(name, DMU_OST_OTHER, NULL, + ztest_create_cb, NULL); + if (error != 0 && error != EEXIST) { + if (error == ENOSPC) { + zs->zs_enospc_count++; + (void) rw_unlock( + &ztest_shared->zs_name_lock); + break; + } + fatal(0, "dmu_objset_create(%s) = %d", + name, error); + } + error = dmu_objset_open(name, DMU_OST_OTHER, + DS_MODE_STANDARD, &za[d].za_os); + if (error) + fatal(0, "dmu_objset_open('%s') = %d", + name, error); + (void) rw_unlock(&ztest_shared->zs_name_lock); + zr.zr_os = za[d].za_os; + zil_replay(zr.zr_os, &zr, &zr.zr_assign, + ztest_replay_vector, NULL); + za[d].za_zilog = zil_open(za[d].za_os, NULL); + } + za[t].za_pool = spa_strdup(pool); + za[t].za_os = za[d].za_os; + za[t].za_zilog = za[d].za_zilog; + za[t].za_instance = t; + za[t].za_random = ztest_random(-1ULL); + za[t].za_start = za[0].za_start; + za[t].za_stop = za[0].za_stop; + za[t].za_kill = za[0].za_kill; + + error = thr_create(0, 0, ztest_thread, &za[t], THR_BOUND, + &za[t].za_thread); + if (error) + fatal(0, "can't create thread %d: error %d", + t, error); + } + + while (--t >= 0) { + error = thr_join(za[t].za_thread, NULL, NULL); + if (error) + fatal(0, "thr_join(%d) = %d", t, error); + if (za[t].za_th) + traverse_fini(za[t].za_th); + if (t < zopt_dirs) { + zil_close(za[t].za_zilog); + dmu_objset_close(za[t].za_os); + } + spa_strfree(za[t].za_pool); + } + + umem_free(za, zopt_threads * sizeof (ztest_args_t)); + + if (zopt_verbose >= 3) + show_pool_stats(spa); + + txg_wait_synced(spa_get_dsl(spa), 0); + + zs->zs_alloc = spa_get_alloc(spa); + zs->zs_space = spa_get_space(spa); + + /* + * Did we have out-of-space errors? If so, destroy a random objset. + */ + if (zs->zs_enospc_count != 0) { + (void) rw_rdlock(&ztest_shared->zs_name_lock); + (void) snprintf(name, 100, "%s/%s_%d", pool, pool, + (int)ztest_random(zopt_dirs)); + if (zopt_verbose >= 3) + (void) printf("Destroying %s to free up space\n", name); + dmu_objset_find(name, ztest_destroy_cb, NULL, + DS_FIND_SNAPSHOTS); + (void) rw_unlock(&ztest_shared->zs_name_lock); + } + + /* + * Prepare every leaf device to inject a few random read faults. + */ + ztest_error_setup(spa->spa_root_vdev, VDEV_FAULT_COUNT, + (1U << ZIO_TYPE_READ), 10); + + /* + * Right before closing the pool, kick off a bunch of async I/O; + * spa_close() should wait for it to complete. + */ + for (t = 1; t < 50; t++) + dmu_prefetch(spa->spa_meta_objset, t, 0, 1 << 15); + + spa_close(spa, FTAG); + + kernel_fini(); +} + +void +print_time(hrtime_t t, char *timebuf) +{ + hrtime_t s = t / NANOSEC; + hrtime_t m = s / 60; + hrtime_t h = m / 60; + hrtime_t d = h / 24; + + s -= m * 60; + m -= h * 60; + h -= d * 24; + + timebuf[0] = '\0'; + + if (d) + (void) sprintf(timebuf, + "%llud%02lluh%02llum%02llus", d, h, m, s); + else if (h) + (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s); + else if (m) + (void) sprintf(timebuf, "%llum%02llus", m, s); + else + (void) sprintf(timebuf, "%llus", s); +} + +/* + * Create a storage pool with the given name and initial vdev size. + * Then create the specified number of datasets in the pool. + */ +static void +ztest_init(char *pool) +{ + spa_t *spa; + int error; + nvlist_t *nvroot; + + kernel_init(FREAD | FWRITE); + + /* + * Create the storage pool. + */ + (void) spa_destroy(pool); + ztest_shared->zs_vdev_primaries = 0; + nvroot = make_vdev_root(zopt_vdev_size, zopt_raidz, zopt_mirrors, 1); + error = spa_create(pool, nvroot, NULL); + nvlist_free(nvroot); + + if (error) + fatal(0, "spa_create() = %d", error); + error = spa_open(pool, &spa, FTAG); + if (error) + fatal(0, "spa_open() = %d", error); + + if (zopt_verbose >= 3) + show_pool_stats(spa); + + spa_close(spa, FTAG); + + kernel_fini(); +} + +int +main(int argc, char **argv) +{ + int kills = 0; + int iters = 0; + int i, f; + ztest_shared_t *zs; + ztest_info_t *zi; + char timebuf[100]; + char numbuf[6]; + + (void) setvbuf(stdout, NULL, _IOLBF, 0); + + /* Override location of zpool.cache */ + spa_config_dir = "/tmp"; + + /* + * Blow away any existing copy of zpool.cache + */ + (void) remove("/tmp/zpool.cache"); + + ztest_random_fd = open("/dev/urandom", O_RDONLY); + + process_options(argc, argv); + + argc -= optind; + argv += optind; + + dprintf_setup(&argc, argv); + + zs = ztest_shared = (void *)mmap(0, + P2ROUNDUP(sizeof (ztest_shared_t), getpagesize()), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); + + if (zopt_verbose >= 1) { + (void) printf("%llu vdevs, %d datasets, %d threads," + " %llu seconds...\n", + (u_longlong_t)zopt_vdevs, zopt_dirs, zopt_threads, + (u_longlong_t)zopt_time); + } + + /* + * Create and initialize our storage pool. + */ + for (i = 1; i <= zopt_init; i++) { + bzero(zs, sizeof (ztest_shared_t)); + if (zopt_verbose >= 3 && zopt_init != 1) + (void) printf("ztest_init(), pass %d\n", i); + ztest_init(zopt_pool); + } + + /* + * Initialize the call targets for each function. + */ + for (f = 0; f < ZTEST_FUNCS; f++) { + zi = &zs->zs_info[f]; + + *zi = ztest_info[f]; + + if (*zi->zi_interval == 0) + zi->zi_call_target = UINT64_MAX; + else + zi->zi_call_target = zopt_time / *zi->zi_interval; + } + + zs->zs_start_time = gethrtime(); + zs->zs_stop_time = zs->zs_start_time + zopt_time * NANOSEC; + + /* + * Run the tests in a loop. These tests include fault injection + * to verify that self-healing data works, and forced crashes + * to verify that we never lose on-disk consistency. + */ + while (gethrtime() < zs->zs_stop_time) { + int status; + pid_t pid; + char *tmp; + + /* + * Initialize the workload counters for each function. + */ + for (f = 0; f < ZTEST_FUNCS; f++) { + zi = &zs->zs_info[f]; + zi->zi_calls = 0; + zi->zi_call_time = 0; + } + + pid = fork(); + + if (pid == -1) + fatal(1, "fork failed"); + + if (pid == 0) { /* child */ + struct rlimit rl = { 1024, 1024 }; + (void) setrlimit(RLIMIT_NOFILE, &rl); + ztest_run(zopt_pool); + exit(0); + } + + while (waitpid(pid, &status, WEXITED) != pid) + continue; + + if (WIFEXITED(status)) { + if (WEXITSTATUS(status) != 0) { + (void) fprintf(stderr, + "child exited with code %d\n", + WEXITSTATUS(status)); + exit(2); + } + } else { + if (WTERMSIG(status) != SIGKILL) { + (void) fprintf(stderr, + "child died with signal %d\n", + WTERMSIG(status)); + exit(3); + } + kills++; + } + + iters++; + + if (zopt_verbose >= 1) { + hrtime_t now = gethrtime(); + + now = MIN(now, zs->zs_stop_time); + print_time(zs->zs_stop_time - now, timebuf); + nicenum(zs->zs_space, numbuf); + + (void) printf("Pass %3d, %8s, %3llu ENOSPC, " + "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n", + iters, + WIFEXITED(status) ? "Complete" : "SIGKILL", + (u_longlong_t)zs->zs_enospc_count, + 100.0 * zs->zs_alloc / zs->zs_space, + numbuf, + 100.0 * (now - zs->zs_start_time) / + (zopt_time * NANOSEC), timebuf); + } + + if (zopt_verbose >= 2) { + (void) printf("\nWorkload summary:\n\n"); + (void) printf("%7s %9s %s\n", + "Calls", "Time", "Function"); + (void) printf("%7s %9s %s\n", + "-----", "----", "--------"); + for (f = 0; f < ZTEST_FUNCS; f++) { + Dl_info dli; + + zi = &zs->zs_info[f]; + print_time(zi->zi_call_time, timebuf); + (void) dladdr((void *)zi->zi_func, &dli); + (void) printf("%7llu %9s %s\n", + (u_longlong_t)zi->zi_calls, timebuf, + dli.dli_sname); + } + (void) printf("\n"); + } + + /* + * It's possible that we killed a child during a rename test, in + * which case we'll have a 'ztest_tmp' pool lying around instead + * of 'ztest'. Do a blind rename in case this happened. + */ + tmp = umem_alloc(strlen(zopt_pool) + 5, UMEM_NOFAIL); + (void) strcpy(tmp, zopt_pool); + (void) strcat(tmp, "_tmp"); + kernel_init(FREAD | FWRITE); + (void) spa_rename(tmp, zopt_pool); + kernel_fini(); + umem_free(tmp, strlen(tmp) + 1); + } + + ztest_verify_blocks(zopt_pool); + + if (zopt_verbose >= 1) { + (void) printf("%d killed, %d completed, %.0f%% kill rate\n", + kills, iters - kills, (100.0 * kills) / MAX(1, iters)); + } + + return (0); +} |
