diff options
author | Keith M Wesolowski <wesolows@foobazco.org> | 2014-06-09 16:28:05 +0000 |
---|---|---|
committer | Keith M Wesolowski <wesolows@foobazco.org> | 2014-06-09 16:28:05 +0000 |
commit | 63da7fe1962f09fdb0ec7ce4bd5d017bb452d092 (patch) | |
tree | 477f812dd092d3c1c644c19188412687484ec132 | |
parent | 63162cf70473b17134d09687effc3da3d4f75588 (diff) | |
parent | 3038a2b421b40dc5ac11cd88423696618584f85a (diff) | |
download | illumos-joyent-63da7fe1962f09fdb0ec7ce4bd5d017bb452d092.tar.gz |
[illumos-gate merge]
commit 3038a2b421b40dc5ac11cd88423696618584f85a
4897 Space accounting mismatch in L2ARC/zpool
commit df15e419cb7359ba56ddddab9045e438d89e7cbc
4891 want zdb option to dump all metadata
commit 05d57413471eaaa425913a06edc2ab33ad9b05bc
1667 pkcs11 may deadlock when multi-threaded consumers fork
-rw-r--r-- | usr/src/cmd/zdb/zdb.c | 15 | ||||
-rw-r--r-- | usr/src/lib/libcryptoutil/common/cryptoutil.h | 4 | ||||
-rw-r--r-- | usr/src/lib/libcryptoutil/common/mapfile-vers | 4 | ||||
-rw-r--r-- | usr/src/lib/libcryptoutil/common/random.c | 50 | ||||
-rw-r--r-- | usr/src/lib/libzpool/common/kernel.c | 24 | ||||
-rw-r--r-- | usr/src/lib/libzpool/common/sys/zfs_context.h | 2 | ||||
-rw-r--r-- | usr/src/lib/pkcs11/libpkcs11/common/metaGeneral.c | 11 | ||||
-rw-r--r-- | usr/src/lib/pkcs11/pkcs11_softtoken/common/softGeneral.c | 11 | ||||
-rw-r--r-- | usr/src/man/man1m/zdb.1m | 20 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/arc.c | 18 |
10 files changed, 132 insertions, 27 deletions
diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index 281426bb5d..5827623747 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -112,7 +112,7 @@ usage(void) { (void) fprintf(stderr, "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] " - "[-U config] [-M inflight I/Os] poolname [object...]\n" + "[-U config] [-M inflight I/Os] [-x dumpdir] poolname [object...]\n" " %s [-divPA] [-e -p path...] [-U config] dataset " "[object...]\n" " %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " @@ -150,7 +150,7 @@ usage(void) (void) fprintf(stderr, " -R read and display block from a " "device\n\n"); (void) fprintf(stderr, " Below options are intended for use " - "with other options (except -l):\n"); + "with other options:\n"); (void) fprintf(stderr, " -A ignore assertions (-A), enable " "panic recovery (-AA) or both (-AAA)\n"); (void) fprintf(stderr, " -F attempt automatic rewind within " @@ -163,11 +163,14 @@ usage(void) "has altroot/not in a cachefile\n"); (void) fprintf(stderr, " -p <path> -- use one or more with " "-e to specify path to vdev dir\n"); + (void) fprintf(stderr, " -x <dumpdir> -- " + "dump all read blocks into specified directory\n"); (void) fprintf(stderr, " -P print numbers in parseable form\n"); (void) fprintf(stderr, " -t <txg> -- highest txg to use when " "searching for uberblocks\n"); (void) fprintf(stderr, " -M <number of inflight I/Os> -- " - "specify the maximum number of checksumming I/Os [default is 200]"); + "specify the maximum number of " + "checksumming I/Os [default is 200]\n"); (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " "to make only that option verbose\n"); (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); @@ -3325,7 +3328,8 @@ main(int argc, char **argv) dprintf_setup(&argc, argv); - while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) { + while ((c = getopt(argc, argv, + "bcdhilmM:suCDRSAFLXx:evp:t:U:P")) != -1) { switch (c) { case 'b': case 'c': @@ -3378,6 +3382,9 @@ main(int argc, char **argv) } searchdirs[nsearch++] = optarg; break; + case 'x': + vn_dumpdir = optarg; + break; case 't': max_txg = strtoull(optarg, NULL, 0); if (max_txg < TXG_INITIAL) { diff --git a/usr/src/lib/libcryptoutil/common/cryptoutil.h b/usr/src/lib/libcryptoutil/common/cryptoutil.h index b5aad833f7..63a3df665f 100644 --- a/usr/src/lib/libcryptoutil/common/cryptoutil.h +++ b/usr/src/lib/libcryptoutil/common/cryptoutil.h @@ -22,6 +22,7 @@ */ /* * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. */ #ifndef _CRYPTOUTIL_H @@ -216,9 +217,6 @@ extern int pkcs11_seed_urandom(void *sbuf, size_t slen); extern int pkcs11_get_random(void *dbuf, size_t dlen); extern int pkcs11_get_urandom(void *dbuf, size_t dlen); extern int pkcs11_get_nzero_urandom(void *dbuf, size_t dlen); -extern void pkcs11_close_random(void); -extern void pkcs11_close_urandom(void); -extern void pkcs11_close_urandom_seed(void); extern int pkcs11_read_data(char *filename, void **dbuf, size_t *dlen); extern int open_nointr(const char *path, int oflag, ...); diff --git a/usr/src/lib/libcryptoutil/common/mapfile-vers b/usr/src/lib/libcryptoutil/common/mapfile-vers index 5d3c214b55..c7f2576f37 100644 --- a/usr/src/lib/libcryptoutil/common/mapfile-vers +++ b/usr/src/lib/libcryptoutil/common/mapfile-vers @@ -19,6 +19,7 @@ # CDDL HEADER END # # Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright 2014, OmniTI Computer Consulting Inc. All rights reserved. # # @@ -54,9 +55,6 @@ SYMBOL_VERSION SUNWprivate { get_pkcs11conf_info; hexstr_to_bytes; open_nointr; - pkcs11_close_random; - pkcs11_close_urandom; - pkcs11_close_urandom_seed; pkcs11_default_token; pkcs11_free_uri; pkcs11_get_nzero_urandom; diff --git a/usr/src/lib/libcryptoutil/common/random.c b/usr/src/lib/libcryptoutil/common/random.c index 771112850a..ab07168409 100644 --- a/usr/src/lib/libcryptoutil/common/random.c +++ b/usr/src/lib/libcryptoutil/common/random.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. */ #include <stdio.h> @@ -33,6 +34,7 @@ #include <cryptoutil.h> #include <pthread.h> +#pragma init(pkcs11_random_init) static pthread_mutex_t random_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t urandom_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -181,13 +183,13 @@ pkcs11_close_common(int *fd, pthread_mutex_t *mtx) (void) pthread_mutex_unlock(mtx); } -void +static void pkcs11_close_random(void) { pkcs11_close_common(&random_fd, &random_mutex); } -void +static void pkcs11_close_urandom(void) { pkcs11_close_common(&urandom_fd, &urandom_mutex); @@ -199,7 +201,7 @@ pkcs11_close_random_seed(void) pkcs11_close_common(&random_seed_fd, &random_seed_mutex); } -void +static void pkcs11_close_urandom_seed(void) { pkcs11_close_common(&urandom_seed_fd, &urandom_seed_mutex); @@ -377,3 +379,45 @@ pkcs11_get_nzero_urandom(void *dbuf, size_t dlen) } return (0); } + +static void +pkcs11_random_prepare(void) +{ + /* + * NOTE - None of these are acquired more than one at a time. + * I can therefore acquire all four without fear of deadlock. + */ + (void) pthread_mutex_lock(&random_mutex); + (void) pthread_mutex_lock(&urandom_mutex); + (void) pthread_mutex_lock(&random_seed_mutex); + (void) pthread_mutex_lock(&urandom_seed_mutex); +} + +static void +pkcs11_random_parent_post(void) +{ + /* Drop the mutexes and get back to work! */ + (void) pthread_mutex_unlock(&urandom_seed_mutex); + (void) pthread_mutex_unlock(&random_seed_mutex); + (void) pthread_mutex_unlock(&urandom_mutex); + (void) pthread_mutex_unlock(&random_mutex); +} + +static void +pkcs11_random_child_post(void) +{ + pkcs11_random_parent_post(); + + /* Also, close the FDs, just in case. */ + pkcs11_close_random(); + pkcs11_close_urandom(); + pkcs11_close_random_seed(); + pkcs11_close_urandom_seed(); +} + +static void +pkcs11_random_init(void) +{ + (void) pthread_atfork(pkcs11_random_prepare, pkcs11_random_parent_post, + pkcs11_random_child_post); +} diff --git a/usr/src/lib/libzpool/common/kernel.c b/usr/src/lib/libzpool/common/kernel.c index d030700415..418ef435b0 100644 --- a/usr/src/lib/libzpool/common/kernel.c +++ b/usr/src/lib/libzpool/common/kernel.c @@ -31,6 +31,7 @@ #include <stdlib.h> #include <string.h> #include <zlib.h> +#include <libgen.h> #include <sys/spa.h> #include <sys/stat.h> #include <sys/processor.h> @@ -51,6 +52,9 @@ char hw_serial[HW_HOSTID_LEN]; kmutex_t cpu_lock; vmem_t *zio_arena = NULL; +/* If set, all blocks read will be copied to the specified directory. */ +char *vn_dumpdir = NULL; + struct utsname utsname = { "userland", "libzpool", "1", "1", "na" }; @@ -394,6 +398,7 @@ int vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) { int fd; + int dump_fd; vnode_t *vp; int old_umask; char realpath[MAXPATHLEN]; @@ -442,6 +447,17 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) if (flags & FCREAT) (void) umask(old_umask); + if (vn_dumpdir != NULL) { + char dumppath[MAXPATHLEN]; + (void) snprintf(dumppath, sizeof (dumppath), + "%s/%s", vn_dumpdir, basename(realpath)); + dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666); + if (dump_fd == -1) + return (errno); + } else { + dump_fd = -1; + } + if (fd == -1) return (errno); @@ -457,6 +473,7 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) vp->v_fd = fd; vp->v_size = st.st_size; vp->v_path = spa_strdup(path); + vp->v_dump_fd = dump_fd; return (0); } @@ -489,6 +506,11 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, if (uio == UIO_READ) { iolen = pread64(vp->v_fd, addr, len, offset); + if (vp->v_dump_fd != -1) { + int status = + pwrite64(vp->v_dump_fd, addr, iolen, offset); + ASSERT(status != -1); + } } else { /* * To simulate partial disk writes, we split writes into two @@ -515,6 +537,8 @@ void vn_close(vnode_t *vp) { close(vp->v_fd); + if (vp->v_dump_fd != -1) + close(vp->v_dump_fd); spa_strfree(vp->v_path); umem_free(vp, sizeof (vnode_t)); } diff --git a/usr/src/lib/libzpool/common/sys/zfs_context.h b/usr/src/lib/libzpool/common/sys/zfs_context.h index 7ba48fce69..8ee04e7009 100644 --- a/usr/src/lib/libzpool/common/sys/zfs_context.h +++ b/usr/src/lib/libzpool/common/sys/zfs_context.h @@ -390,8 +390,10 @@ typedef struct vnode { uint64_t v_size; int v_fd; char *v_path; + int v_dump_fd; } vnode_t; +extern char *vn_dumpdir; #define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */ typedef struct xoptattr { diff --git a/usr/src/lib/pkcs11/libpkcs11/common/metaGeneral.c b/usr/src/lib/pkcs11/libpkcs11/common/metaGeneral.c index 5e5c339b03..32b00216da 100644 --- a/usr/src/lib/pkcs11/libpkcs11/common/metaGeneral.c +++ b/usr/src/lib/pkcs11/libpkcs11/common/metaGeneral.c @@ -21,8 +21,9 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * + * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. */ - /* * General-Purpose Functions * (as defined in PKCS#11 spec section 11.4) @@ -196,8 +197,12 @@ meta_Finalize(CK_VOID_PTR pReserved) (void) pthread_mutex_lock(&initmutex); - pkcs11_close_urandom(); - pkcs11_close_urandom_seed(); + /* + * There used to be calls to cleanup libcryptoutil here. Given that + * libcryptoutil can be linked and invoked independently of PKCS#11, + * cleaning up libcryptoutil here makes no sense. Decoupling these + * two also prevent deadlocks and other artificial dependencies. + */ meta_objectManager_finalize(); diff --git a/usr/src/lib/pkcs11/pkcs11_softtoken/common/softGeneral.c b/usr/src/lib/pkcs11/pkcs11_softtoken/common/softGeneral.c index 396a3c5bf4..c44cbcb2a2 100644 --- a/usr/src/lib/pkcs11/pkcs11_softtoken/common/softGeneral.c +++ b/usr/src/lib/pkcs11/pkcs11_softtoken/common/softGeneral.c @@ -21,6 +21,8 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * + * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. */ #include <strings.h> @@ -354,9 +356,12 @@ finalize_common(boolean_t force, CK_VOID_PTR pReserved) { softtoken_initialized = B_FALSE; softtoken_pid = 0; - pkcs11_close_urandom(); - pkcs11_close_urandom_seed(); - pkcs11_close_random(); + /* + * There used to be calls to cleanup libcryptoutil here. Given that + * libcryptoutil can be linked and invoked independently of PKCS#11, + * cleaning up libcryptoutil here makes no sense. Decoupling these + * two also prevent deadlocks and other artificial dependencies. + */ /* Destroy the session list lock here */ (void) pthread_mutex_destroy(&soft_sessionlist_mutex); diff --git a/usr/src/man/man1m/zdb.1m b/usr/src/man/man1m/zdb.1m index b4796e19c3..c7d7f38f5b 100644 --- a/usr/src/man/man1m/zdb.1m +++ b/usr/src/man/man1m/zdb.1m @@ -11,7 +11,7 @@ .\" .\" .\" Copyright 2012, Richard Lowe. -.\" Copyright (c) 2012 by Delphix. All rights reserved. +.\" Copyright (c) 2012, 2014 by Delphix. All rights reserved. .\" .TH "ZDB" "1M" "March 6, 2014" "" "" @@ -20,8 +20,8 @@ .SH "SYNOPSIS" \fBzdb\fR [-CumdibcsDvhLXFPA] [-e [-p \fIpath\fR...]] [-t \fItxg\fR] - [-U \fIcache\fR] [-M \fIinflight I/Os\fR] [\fIpoolname\fR - [\fIobject\fR ...]] + [-U \fIcache\fR] [-M \fIinflight I/Os\fR] [-x \fIdumpdir\fR] + [\fIpoolname\fR [\fIobject\fR ...]] .P \fBzdb\fR [-divPA] [-e [-p \fIpath\fR...]] [-U \fIcache\fR] @@ -355,6 +355,20 @@ Operate on an exported pool, not present in \fB/etc/zfs/zpool.cache\fR. The .sp .ne 2 .na +\fB-x\fR \fIdumpdir\fR +.ad +.sp .6 +.RS 4n +All blocks accessed will be copied to files in the specified directory. +The blocks will be placed in sparse files whose name is the same as +that of the file or device read. zdb can be then run on the generated files. +Note that the \fB-bbc\fR flags are sufficient to access (and thus copy) +all metadata on the pool. +.RE + +.sp +.ne 2 +.na \fB-F\fR .ad .sp .6 diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c index 73bc0780ce..e6a0480402 100644 --- a/usr/src/uts/common/fs/zfs/arc.c +++ b/usr/src/uts/common/fs/zfs/arc.c @@ -23,7 +23,7 @@ * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. - * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright 2014 Nexenta Systems, Inc. All rights reserved. */ /* @@ -1595,6 +1595,8 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr) list_remove(l2hdr->b_dev->l2ad_buflist, hdr); ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size); ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize); + vdev_space_update(l2hdr->b_dev->l2ad_vdev, + -l2hdr->b_asize, 0, 0); kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t)); if (hdr->b_state == arc_l2c_only) l2arc_hdr_stat_remove(); @@ -3468,6 +3470,8 @@ arc_release(arc_buf_t *buf, void *tag) if (l2hdr) { ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize); + vdev_space_update(l2hdr->b_dev->l2ad_vdev, + -l2hdr->b_asize, 0, 0); kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t)); ARCSTAT_INCR(arcstat_l2_size, -buf_size); mutex_exit(&l2arc_buflist_mtx); @@ -4261,6 +4265,7 @@ l2arc_write_done(zio_t *zio) arc_buf_hdr_t *head, *ab, *ab_prev; l2arc_buf_hdr_t *abl2; kmutex_t *hash_lock; + int64_t bytes_dropped = 0; cb = zio->io_private; ASSERT(cb != NULL); @@ -4308,6 +4313,7 @@ l2arc_write_done(zio_t *zio) */ list_remove(buflist, ab); ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize); + bytes_dropped += abl2->b_asize; ab->b_l2hdr = NULL; kmem_free(abl2, sizeof (l2arc_buf_hdr_t)); ARCSTAT_INCR(arcstat_l2_size, -ab->b_size); @@ -4326,6 +4332,8 @@ l2arc_write_done(zio_t *zio) kmem_cache_free(hdr_cache, head); mutex_exit(&l2arc_buflist_mtx); + vdev_space_update(dev->l2ad_vdev, -bytes_dropped, 0, 0); + l2arc_do_free_on_write(); kmem_free(cb, sizeof (l2arc_write_callback_t)); @@ -4464,6 +4472,7 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all) arc_buf_hdr_t *ab, *ab_prev; kmutex_t *hash_lock; uint64_t taddr; + int64_t bytes_evicted = 0; buflist = dev->l2ad_buflist; @@ -4562,6 +4571,7 @@ top: if (ab->b_l2hdr != NULL) { abl2 = ab->b_l2hdr; ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize); + bytes_evicted += abl2->b_asize; ab->b_l2hdr = NULL; kmem_free(abl2, sizeof (l2arc_buf_hdr_t)); ARCSTAT_INCR(arcstat_l2_size, -ab->b_size); @@ -4578,7 +4588,7 @@ top: } mutex_exit(&l2arc_buflist_mtx); - vdev_space_update(dev->l2ad_vdev, -(taddr - dev->l2ad_evict), 0, 0); + vdev_space_update(dev->l2ad_vdev, -bytes_evicted, 0, 0); dev->l2ad_evict = taddr; } @@ -4821,15 +4831,13 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize); ARCSTAT_INCR(arcstat_l2_size, write_sz); ARCSTAT_INCR(arcstat_l2_asize, write_asize); - vdev_space_update(dev->l2ad_vdev, write_psize, 0, 0); + vdev_space_update(dev->l2ad_vdev, write_asize, 0, 0); /* * Bump device hand to the device start if it is approaching the end. * l2arc_evict() will already have evicted ahead for this case. */ if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) { - vdev_space_update(dev->l2ad_vdev, - dev->l2ad_end - dev->l2ad_hand, 0, 0); dev->l2ad_hand = dev->l2ad_start; dev->l2ad_evict = dev->l2ad_start; dev->l2ad_first = B_FALSE; |