diff options
Diffstat (limited to 'usr/src/lib/libzpool/common')
-rw-r--r-- | usr/src/lib/libzpool/common/kernel.c | 675 | ||||
-rw-r--r-- | usr/src/lib/libzpool/common/llib-lzpool | 51 | ||||
-rw-r--r-- | usr/src/lib/libzpool/common/sys/zfs_context.h | 411 | ||||
-rw-r--r-- | usr/src/lib/libzpool/common/taskq.c | 250 | ||||
-rw-r--r-- | usr/src/lib/libzpool/common/util.c | 135 |
5 files changed, 1522 insertions, 0 deletions
diff --git a/usr/src/lib/libzpool/common/kernel.c b/usr/src/lib/libzpool/common/kernel.c new file mode 100644 index 0000000000..83155b480f --- /dev/null +++ b/usr/src/lib/libzpool/common/kernel.c @@ -0,0 +1,675 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <sys/zfs_context.h> +#include <poll.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/spa.h> +#include <sys/processor.h> + +/* + * Emulation of kernel services in userland. + */ + +uint64_t physmem; +vnode_t *rootdir = (vnode_t *)0xabcd1234; + +/* + * ========================================================================= + * threads + * ========================================================================= + */ +/*ARGSUSED*/ +kthread_t * +zk_thread_create(void (*func)(), void *arg) +{ + thread_t tid; + + VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, + &tid) == 0); + + return ((void *)(uintptr_t)tid); +} + +/* + * ========================================================================= + * mutexes + * ========================================================================= + */ +void +zmutex_init(kmutex_t *mp) +{ + mp->m_owner = NULL; + (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); +} + +void +zmutex_destroy(kmutex_t *mp) +{ + ASSERT(mp->m_owner == NULL); + (void) _mutex_destroy(&(mp)->m_lock); + mp->m_owner = (void *)-1UL; +} + +void +mutex_enter(kmutex_t *mp) +{ + ASSERT(mp->m_owner != (void *)-1UL); + ASSERT(mp->m_owner != curthread); + (void) mutex_lock(&mp->m_lock); + ASSERT(mp->m_owner == NULL); + mp->m_owner = curthread; +} + +int +mutex_tryenter(kmutex_t *mp) +{ + ASSERT(mp->m_owner != (void *)-1UL); + if (0 == mutex_trylock(&mp->m_lock)) { + ASSERT(mp->m_owner == NULL); + mp->m_owner = curthread; + return (1); + } else { + return (0); + } +} + +void +mutex_exit(kmutex_t *mp) +{ + ASSERT(mutex_owner(mp) == curthread); + mp->m_owner = NULL; + (void) mutex_unlock(&mp->m_lock); +} + +void * +mutex_owner(kmutex_t *mp) +{ + return (mp->m_owner); +} + +/* + * ========================================================================= + * rwlocks + * ========================================================================= + */ +/*ARGSUSED*/ +void +rw_init(krwlock_t *rwlp, char *name, int type, void *arg) +{ + rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); + rwlp->rw_owner = NULL; +} + +void +rw_destroy(krwlock_t *rwlp) +{ + rwlock_destroy(&rwlp->rw_lock); + rwlp->rw_owner = (void *)-1UL; +} + +void +rw_enter(krwlock_t *rwlp, krw_t rw) +{ + ASSERT(!RW_LOCK_HELD(rwlp)); + ASSERT(rwlp->rw_owner != (void *)-1UL); + ASSERT(rwlp->rw_owner != curthread); + + if (rw == RW_READER) + (void) rw_rdlock(&rwlp->rw_lock); + else + (void) rw_wrlock(&rwlp->rw_lock); + + rwlp->rw_owner = curthread; +} + +void +rw_exit(krwlock_t *rwlp) +{ + ASSERT(rwlp->rw_owner != (void *)-1UL); + + rwlp->rw_owner = NULL; + (void) rw_unlock(&rwlp->rw_lock); +} + +int +rw_tryenter(krwlock_t *rwlp, krw_t rw) +{ + int rv; + + ASSERT(rwlp->rw_owner != (void *)-1UL); + + if (rw == RW_READER) + rv = rw_tryrdlock(&rwlp->rw_lock); + else + rv = rw_trywrlock(&rwlp->rw_lock); + + if (rv == 0) { + rwlp->rw_owner = curthread; + return (1); + } + + return (0); +} + +/*ARGSUSED*/ +int +rw_tryupgrade(krwlock_t *rwlp) +{ + ASSERT(rwlp->rw_owner != (void *)-1UL); + + return (0); +} + +/* + * ========================================================================= + * condition variables + * ========================================================================= + */ +/*ARGSUSED*/ +void +cv_init(kcondvar_t *cv, char *name, int type, void *arg) +{ + (void) cond_init(cv, type, NULL); +} + +void +cv_destroy(kcondvar_t *cv) +{ + (void) cond_destroy(cv); +} + +void +cv_wait(kcondvar_t *cv, kmutex_t *mp) +{ + ASSERT(mutex_owner(mp) == curthread); + mp->m_owner = NULL; + (void) cond_wait(cv, &mp->m_lock); + mp->m_owner = curthread; +} + +clock_t +cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) +{ + int error; + timestruc_t ts; + clock_t delta; + +top: + delta = abstime - lbolt; + if (delta <= 0) + return (-1); + + ts.tv_sec = delta / hz; + ts.tv_nsec = (delta % hz) * (NANOSEC / hz); + + ASSERT(mutex_owner(mp) == curthread); + mp->m_owner = NULL; + error = cond_reltimedwait(cv, &mp->m_lock, &ts); + mp->m_owner = curthread; + + if (error == ETIME) + return (-1); + + if (error == EINTR) + goto top; + + ASSERT(error == 0); + + return (1); +} + +void +cv_signal(kcondvar_t *cv) +{ + (void) cond_signal(cv); +} + +void +cv_broadcast(kcondvar_t *cv) +{ + (void) cond_broadcast(cv); +} + +/* + * ========================================================================= + * vnode operations + * ========================================================================= + */ +/* + * Note: for the xxxat() versions of these functions, we assume that the + * starting vp is always rootdir (which is true for spa_directory.c, the only + * ZFS consumer of these interfaces). We assert this is true, and then emulate + * them by adding '/' in front of the path. + */ + +/*ARGSUSED*/ +int +vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) +{ + int fd; + vnode_t *vp; + int old_umask; + char realpath[MAXPATHLEN]; + struct stat64 st; + + /* + * If we're accessing a real disk from userland, we need to use + * the character interface to avoid caching. This is particularly + * important if we're trying to look at a real in-kernel storage + * pool from userland, e.g. via zdb, because otherwise we won't + * see the changes occurring under the segmap cache. + * On the other hand, the stupid character device returns zero + * for its size. So -- gag -- we open the block device to get + * its size, and remember it for subsequent VOP_GETATTR(). + */ + if (strncmp(path, "/dev/", 5) == 0) { + char *dsk; + fd = open64(path, O_RDONLY); + if (fd == -1) + return (errno); + if (fstat64(fd, &st) == -1) { + close(fd); + return (errno); + } + close(fd); + (void) sprintf(realpath, "%s", path); + dsk = strstr(path, "/dsk/"); + if (dsk != NULL) + (void) sprintf(realpath + (dsk - path) + 1, "r%s", + dsk + 1); + } else { + (void) sprintf(realpath, "%s", path); + if (!(flags & FCREAT) && stat64(realpath, &st) == -1) + return (errno); + } + + if (flags & FCREAT) + old_umask = umask(0); + + /* + * The construct 'flags - FREAD' conveniently maps combinations of + * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. + */ + fd = open64(realpath, flags - FREAD, mode); + + if (flags & FCREAT) + (void) umask(old_umask); + + if (fd == -1) + return (errno); + + if (fstat64(fd, &st) == -1) { + close(fd); + return (errno); + } + + (void) fcntl(fd, F_SETFD, FD_CLOEXEC); + + *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); + + vp->v_fd = fd; + vp->v_size = st.st_size; + vp->v_path = spa_strdup(path); + + return (0); +} + +int +vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, + int x3, vnode_t *startvp) +{ + char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); + int ret; + + ASSERT(startvp == rootdir); + (void) sprintf(realpath, "/%s", path); + + ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); + + umem_free(realpath, strlen(path) + 2); + + return (ret); +} + +/*ARGSUSED*/ +int +vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, + int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) +{ + ssize_t iolen, split; + + if (uio == UIO_READ) { + iolen = pread64(vp->v_fd, addr, len, offset); + } else { + /* + * To simulate partial disk writes, we split writes into two + * system calls so that the process can be killed in between. + */ + split = (len > 0 ? rand() % len : 0); + iolen = pwrite64(vp->v_fd, addr, split, offset); + iolen += pwrite64(vp->v_fd, (char *)addr + split, + len - split, offset + split); + } + + if (iolen == -1) + return (errno); + if (residp) + *residp = len - iolen; + else if (iolen != len) + return (EIO); + return (0); +} + +void +vn_close(vnode_t *vp) +{ + close(vp->v_fd); + spa_strfree(vp->v_path); + umem_free(vp, sizeof (vnode_t)); +} + +#ifdef ZFS_DEBUG + +/* + * ========================================================================= + * Figure out which debugging statements to print + * ========================================================================= + */ + +static char *dprintf_string; +static int dprintf_print_all; + +int +dprintf_find_string(const char *string) +{ + char *tmp_str = dprintf_string; + int len = strlen(string); + + /* + * Find out if this is a string we want to print. + * String format: file1.c,function_name1,file2.c,file3.c + */ + + while (tmp_str != NULL) { + if (strncmp(tmp_str, string, len) == 0 && + (tmp_str[len] == ',' || tmp_str[len] == '\0')) + return (1); + tmp_str = strchr(tmp_str, ','); + if (tmp_str != NULL) + tmp_str++; /* Get rid of , */ + } + return (0); +} + +void +dprintf_setup(int *argc, char **argv) +{ + int i, j; + + /* + * Debugging can be specified two ways: by setting the + * environment variable ZFS_DEBUG, or by including a + * "debug=..." argument on the command line. The command + * line setting overrides the environment variable. + */ + + for (i = 1; i < *argc; i++) { + int len = strlen("debug="); + /* First look for a command line argument */ + if (strncmp("debug=", argv[i], len) == 0) { + dprintf_string = argv[i] + len; + /* Remove from args */ + for (j = i; j < *argc; j++) + argv[j] = argv[j+1]; + argv[j] = NULL; + (*argc)--; + } + } + + if (dprintf_string == NULL) { + /* Look for ZFS_DEBUG environment variable */ + dprintf_string = getenv("ZFS_DEBUG"); + } + + /* + * Are we just turning on all debugging? + */ + if (dprintf_find_string("on")) + dprintf_print_all = 1; +} + +/* + * ========================================================================= + * debug printfs + * ========================================================================= + */ +void +__dprintf(const char *file, const char *func, int line, const char *fmt, ...) +{ + const char *newfile; + va_list adx; + + /* + * Get rid of annoying "../common/" prefix to filename. + */ + newfile = strrchr(file, '/'); + if (newfile != NULL) { + newfile = newfile + 1; /* Get rid of leading / */ + } else { + newfile = file; + } + + if (dprintf_print_all || + dprintf_find_string(newfile) || + dprintf_find_string(func)) { + /* Print out just the function name if requested */ + flockfile(stdout); + if (dprintf_find_string("pid")) + (void) printf("%d ", getpid()); + if (dprintf_find_string("tid")) + (void) printf("%u ", thr_self()); + if (dprintf_find_string("cpu")) + (void) printf("%u ", getcpuid()); + if (dprintf_find_string("time")) + (void) printf("%llu ", gethrtime()); + if (dprintf_find_string("long")) + (void) printf("%s, line %d: ", newfile, line); + (void) printf("%s: ", func); + va_start(adx, fmt); + (void) vprintf(fmt, adx); + va_end(adx); + funlockfile(stdout); + } +} + +#endif /* ZFS_DEBUG */ + +/* + * ========================================================================= + * cmn_err() and panic() + * ========================================================================= + */ +static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; +static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; + +void +vpanic(const char *fmt, va_list adx) +{ + (void) fprintf(stderr, "error: "); + (void) vfprintf(stderr, fmt, adx); + (void) fprintf(stderr, "\n"); + + abort(); /* think of it as a "user-level crash dump" */ +} + +void +panic(const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + vpanic(fmt, adx); + va_end(adx); +} + +/*PRINTFLIKE2*/ +void +cmn_err(int ce, const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + if (ce == CE_PANIC) + vpanic(fmt, adx); + if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ + (void) fprintf(stderr, "%s", ce_prefix[ce]); + (void) vfprintf(stderr, fmt, adx); + (void) fprintf(stderr, "%s", ce_suffix[ce]); + } + va_end(adx); +} + +/* + * ========================================================================= + * misc routines + * ========================================================================= + */ + +void +delay(clock_t ticks) +{ + poll(0, 0, ticks * (1000 / hz)); +} + +/* + * Find highest one bit set. + * Returns bit number + 1 of highest bit that is set, otherwise returns 0. + * High order bit is 31 (or 63 in _LP64 kernel). + */ +int +highbit(ulong_t i) +{ + register int h = 1; + + if (i == 0) + return (0); +#ifdef _LP64 + if (i & 0xffffffff00000000ul) { + h += 32; i >>= 32; + } +#endif + if (i & 0xffff0000) { + h += 16; i >>= 16; + } + if (i & 0xff00) { + h += 8; i >>= 8; + } + if (i & 0xf0) { + h += 4; i >>= 4; + } + if (i & 0xc) { + h += 2; i >>= 2; + } + if (i & 0x2) { + h += 1; + } + return (h); +} + +static int +random_get_bytes_common(uint8_t *ptr, size_t len, char *devname) +{ + int fd = open(devname, O_RDONLY); + size_t resid = len; + ssize_t bytes; + + ASSERT(fd != -1); + + while (resid != 0) { + bytes = read(fd, ptr, resid); + ASSERT(bytes >= 0); + ptr += bytes; + resid -= bytes; + } + + close(fd); + + return (0); +} + +int +random_get_bytes(uint8_t *ptr, size_t len) +{ + return (random_get_bytes_common(ptr, len, "/dev/random")); +} + +int +random_get_pseudo_bytes(uint8_t *ptr, size_t len) +{ + return (random_get_bytes_common(ptr, len, "/dev/urandom")); +} + +/* + * ========================================================================= + * kernel emulation setup & teardown + * ========================================================================= + */ +static int +umem_out_of_memory(void) +{ + char errmsg[] = "out of memory -- generating core dump\n"; + + write(fileno(stderr), errmsg, sizeof (errmsg)); + abort(); + return (0); +} + +void +kernel_init(int mode) +{ + umem_nofail_callback(umem_out_of_memory); + + physmem = sysconf(_SC_PHYS_PAGES); + + dprintf("physmem = %llu pages (%.2f GB)\n", physmem, + (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); + + spa_init(mode); +} + +void +kernel_fini(void) +{ + spa_fini(); +} diff --git a/usr/src/lib/libzpool/common/llib-lzpool b/usr/src/lib/libzpool/common/llib-lzpool new file mode 100644 index 0000000000..90c2d6c4fe --- /dev/null +++ b/usr/src/lib/libzpool/common/llib-lzpool @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +#include <sys/zfs_context.h> +#include <sys/list.h> +#include <sys/list_impl.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> +#include <sys/dmu_traverse.h> +#include <sys/dnode.h> +#include <sys/dsl_prop.h> +#include <sys/spa.h> +#include <sys/spa_impl.h> +#include <sys/space_map.h> +#include <sys/vdev.h> +#include <sys/vdev_impl.h> +#include <sys/zap.h> +#include <sys/zio.h> +#include <sys/zio_compress.h> +#include <sys/zil.h> +#include <sys/bplist.h> + +extern uint64_t zio_gang_bang; diff --git a/usr/src/lib/libzpool/common/sys/zfs_context.h b/usr/src/lib/libzpool/common/sys/zfs_context.h new file mode 100644 index 0000000000..243258be98 --- /dev/null +++ b/usr/src/lib/libzpool/common/sys/zfs_context.h @@ -0,0 +1,411 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ZFS_CONTEXT_H +#define _SYS_ZFS_CONTEXT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define _SYS_MUTEX_H +#define _SYS_RWLOCK_H +#define _SYS_CONDVAR_H +#define _SYS_SYSTM_H +#define _SYS_DEBUG_H +#define _SYS_T_LOCK_H +#define _SYS_VNODE_H +#define _SYS_VFS_H +#define _SYS_SUNDDI_H +#define _SYS_CALLB_H + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <stdarg.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <strings.h> +#include <synch.h> +#include <thread.h> +#include <assert.h> +#include <alloca.h> +#include <umem.h> +#include <limits.h> +#include <atomic.h> +#include <dirent.h> +#include <time.h> +#include <sys/note.h> +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/bitmap.h> +#include <sys/resource.h> +#include <sys/byteorder.h> +#include <sys/list.h> +#include <sys/uio.h> +#include <sys/zfs_debug.h> +#include <sys/sdt.h> + +/* + * Debugging + */ + +/* + * Note that we are not using the debugging levels. + */ + +#define CE_CONT 0 /* continuation */ +#define CE_NOTE 1 /* notice */ +#define CE_WARN 2 /* warning */ +#define CE_PANIC 3 /* panic */ +#define CE_IGNORE 4 /* print nothing */ + +/* + * ZFS debugging + */ + +#ifdef ZFS_DEBUG +extern void dprintf_setup(int *argc, char **argv); +#endif /* ZFS_DEBUG */ + +extern void cmn_err(int, const char *, ...); +extern void panic(const char *, ...); +extern void vpanic(const char *, __va_list); + +/* This definition is copied from assert.h. */ +#if defined(__STDC__) +#if __STDC_VERSION__ - 0 >= 199901L +#define verify(EX) (void)((EX) || \ + (__assert_c99(#EX, __FILE__, __LINE__, __func__), 0)) +#else +#define verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0)) +#endif /* __STDC_VERSION__ - 0 >= 199901L */ +#else +#define verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0)) +#endif /* __STDC__ */ + + +#define VERIFY verify +#define ASSERT assert + +extern void __assert(const char *, const char *, int); + +#ifdef lint +#define VERIFY3_IMPL(x, y, z, t) if (x == z) ((void)0) +#else +/* BEGIN CSTYLED */ +#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \ + const TYPE __left = (TYPE)(LEFT); \ + const TYPE __right = (TYPE)(RIGHT); \ + if (!(__left OP __right)) { \ + char *__buf = alloca(256); \ + (void) snprintf(__buf, 256, "%s %s %s (0x%llx %s 0x%llx)", \ + #LEFT, #OP, #RIGHT, \ + (u_longlong_t)__left, #OP, (u_longlong_t)__right); \ + __assert(__buf, __FILE__, __LINE__); \ + } \ +_NOTE(CONSTCOND) } while (0) +/* END CSTYLED */ +#endif /* lint */ + +#define VERIFY3S(x, y, z) VERIFY3_IMPL(x, y, z, int64_t) +#define VERIFY3U(x, y, z) VERIFY3_IMPL(x, y, z, uint64_t) +#define VERIFY3P(x, y, z) VERIFY3_IMPL(x, y, z, uintptr_t) + +#ifdef NDEBUG +#define ASSERT3S(x, y, z) ((void)0) +#define ASSERT3U(x, y, z) ((void)0) +#define ASSERT3P(x, y, z) ((void)0) +#else +#define ASSERT3S(x, y, z) VERIFY3S(x, y, z) +#define ASSERT3U(x, y, z) VERIFY3U(x, y, z) +#define ASSERT3P(x, y, z) VERIFY3P(x, y, z) +#endif + +/* + * Dtrace SDT probes have different signatures in userland than they do in + * kernel. If they're being used in kernel code, re-define them out of + * existence for their counterparts in libzpool. + */ + +#ifdef DTRACE_PROBE1 +#undef DTRACE_PROBE1 +#define DTRACE_PROBE1(a, b, c) ((void)0) +#endif /* DTRACE_PROBE1 */ + +#ifdef DTRACE_PROBE2 +#undef DTRACE_PROBE2 +#define DTRACE_PROBE2(a, b, c, d, e) ((void)0) +#endif /* DTRACE_PROBE2 */ + +/* + * Threads + */ +#define curthread ((void *)(uintptr_t)thr_self()) + +typedef struct kthread kthread_t; + +#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \ + zk_thread_create(func, arg) +#define thread_exit() thr_exit(0) + +extern kthread_t *zk_thread_create(void (*func)(), void *arg); + +#define issig(why) (FALSE) +#define ISSIG(thr, why) (FALSE) + +/* + * Mutexes + */ +typedef struct kmutex { + void *m_owner; + mutex_t m_lock; +} kmutex_t; + +#define MUTEX_DEFAULT USYNC_THREAD +#undef MUTEX_HELD +#define MUTEX_HELD(m) _mutex_held(&(m)->m_lock) + +/* + * Argh -- we have to get cheesy here because the kernel and userland + * have different signatures for the same routine. + */ +extern int _mutex_init(mutex_t *mp, int type, void *arg); +extern int _mutex_destroy(mutex_t *mp); + +#define mutex_init(mp, b, c, d) zmutex_init((kmutex_t *)(mp)) +#define mutex_destroy(mp) zmutex_destroy((kmutex_t *)(mp)) + +extern void zmutex_init(kmutex_t *mp); +extern void zmutex_destroy(kmutex_t *mp); +extern void mutex_enter(kmutex_t *mp); +extern void mutex_exit(kmutex_t *mp); +extern int mutex_tryenter(kmutex_t *mp); +extern void *mutex_owner(kmutex_t *mp); + +/* + * RW locks + */ +typedef struct krwlock { + void *rw_owner; + rwlock_t rw_lock; +} krwlock_t; + +typedef int krw_t; + +#define RW_READER 0 +#define RW_WRITER 1 +#define RW_DEFAULT USYNC_THREAD + +#undef RW_READ_HELD +#define RW_READ_HELD(x) _rw_read_held(&(x)->rw_lock) + +#undef RW_WRITE_HELD +#define RW_WRITE_HELD(x) _rw_write_held(&(x)->rw_lock) + +extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg); +extern void rw_destroy(krwlock_t *rwlp); +extern void rw_enter(krwlock_t *rwlp, krw_t rw); +extern int rw_tryenter(krwlock_t *rwlp, krw_t rw); +extern int rw_tryupgrade(krwlock_t *rwlp); +extern void rw_exit(krwlock_t *rwlp); +#define rw_downgrade(rwlp) do { } while (0) + +/* + * Condition variables + */ +typedef cond_t kcondvar_t; + +#define CV_DEFAULT USYNC_THREAD + +extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg); +extern void cv_destroy(kcondvar_t *cv); +extern void cv_wait(kcondvar_t *cv, kmutex_t *mp); +extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime); +extern void cv_signal(kcondvar_t *cv); +extern void cv_broadcast(kcondvar_t *cv); + +/* + * Kernel memory + */ +#define KM_SLEEP UMEM_NOFAIL +#define KM_NOSLEEP UMEM_DEFAULT +#define kmem_alloc(_s, _f) umem_alloc(_s, _f) +#define kmem_zalloc(_s, _f) umem_zalloc(_s, _f) +#define kmem_free(_b, _s) umem_free(_b, _s) +#define kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \ + umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) +#define kmem_cache_destroy(_c) umem_cache_destroy(_c) +#define kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f) +#define kmem_cache_free(_c, _b) umem_cache_free(_c, _b) +#define kmem_debugging() 0 +#define kmem_cache_reap_now(c) + +typedef umem_cache_t kmem_cache_t; + +/* + * Task queues + */ +typedef struct taskq taskq_t; +typedef uintptr_t taskqid_t; +typedef void (task_func_t)(void *); + +#define TASKQ_PREPOPULATE 0x0001 +#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */ +#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */ + +#define TQ_SLEEP KM_SLEEP /* Can block for memory */ +#define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */ +#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */ + +extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t); +extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t); +extern void taskq_destroy(taskq_t *); +extern void taskq_wait(taskq_t *); +extern int taskq_member(taskq_t *, void *); + +/* + * vnodes + */ +typedef struct vnode { + uint64_t v_size; + int v_fd; + char *v_path; +} vnode_t; + +typedef struct vattr { + uint_t va_mask; /* bit-mask of attributes */ + u_offset_t va_size; /* file size in bytes */ +} vattr_t; + +#define AT_TYPE 0x0001 +#define AT_MODE 0x0002 +#define AT_UID 0x0004 +#define AT_GID 0x0008 +#define AT_FSID 0x0010 +#define AT_NODEID 0x0020 +#define AT_NLINK 0x0040 +#define AT_SIZE 0x0080 +#define AT_ATIME 0x0100 +#define AT_MTIME 0x0200 +#define AT_CTIME 0x0400 +#define AT_RDEV 0x0800 +#define AT_BLKSIZE 0x1000 +#define AT_NBLOCKS 0x2000 +#define AT_SEQ 0x8000 + +#define CRCREAT 0 + +#define VOP_CLOSE(vp, f, c, o, cr) 0 +#define VOP_PUTPAGE(vp, of, sz, fl, cr) 0 +#define VOP_GETATTR(vp, vap, fl, cr) ((vap)->va_size = (vp)->v_size, 0) + +#define VOP_FSYNC(vp, f, cr) fsync((vp)->v_fd) + +#define VN_RELE(vp) vn_close(vp) + +extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp, + int x2, int x3); +extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp, + int x2, int x3, vnode_t *vp); +extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, + offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp); +extern void vn_close(vnode_t *vp); + +#define vn_remove(path, x1, x2) remove(path) +#define vn_rename(from, to, seg) rename((from), (to)) + +extern vnode_t *rootdir; + +#include <sys/file.h> /* for FREAD, FWRITE, etc */ + +/* + * Random stuff + */ +#define lbolt (gethrtime() >> 23) +#define lbolt64 (gethrtime() >> 23) +#define hz 119 /* frequency when using gethrtime() >> 23 for lbolt */ + +extern void delay(clock_t ticks); + +#define gethrestime_sec() time(NULL) + +#define max_ncpus 64 + +#define minclsyspri 60 +#define maxclsyspri 99 + +#define CPU_SEQID (thr_self() & (max_ncpus - 1)) + +#define kcred NULL +#define CRED() NULL + +extern uint64_t physmem; + +extern int highbit(ulong_t i); +extern int random_get_bytes(uint8_t *ptr, size_t len); +extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len); + +extern void kernel_init(int); +extern void kernel_fini(void); + +struct spa; +extern void nicenum(uint64_t num, char *buf); +extern void show_pool_stats(struct spa *); + +typedef struct callb_cpr { + kmutex_t *cc_lockp; +} callb_cpr_t; + +#define CALLB_CPR_INIT(cp, lockp, func, name) { \ + (cp)->cc_lockp = lockp; \ +} + +#define CALLB_CPR_SAFE_BEGIN(cp) { \ + ASSERT(MUTEX_HELD((cp)->cc_lockp)); \ +} + +#define CALLB_CPR_SAFE_END(cp, lockp) { \ + ASSERT(MUTEX_HELD((cp)->cc_lockp)); \ +} + +#define CALLB_CPR_EXIT(cp) { \ + ASSERT(MUTEX_HELD((cp)->cc_lockp)); \ + mutex_exit((cp)->cc_lockp); \ +} + +#define zone_dataset_visible(x, y) (1) +#define INGLOBALZONE(z) (1) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZFS_CONTEXT_H */ diff --git a/usr/src/lib/libzpool/common/taskq.c b/usr/src/lib/libzpool/common/taskq.c new file mode 100644 index 0000000000..f7b65718c3 --- /dev/null +++ b/usr/src/lib/libzpool/common/taskq.c @@ -0,0 +1,250 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/zfs_context.h> + +int taskq_now; + +typedef struct task { + struct task *task_next; + struct task *task_prev; + task_func_t *task_func; + void *task_arg; +} task_t; + +#define TASKQ_ACTIVE 0x00010000 + +struct taskq { + kmutex_t tq_lock; + krwlock_t tq_threadlock; + kcondvar_t tq_dispatch_cv; + kcondvar_t tq_wait_cv; + thread_t *tq_threadlist; + int tq_flags; + int tq_active; + int tq_nthreads; + int tq_nalloc; + int tq_minalloc; + int tq_maxalloc; + task_t *tq_freelist; + task_t tq_task; +}; + +static task_t * +task_alloc(taskq_t *tq, int tqflags) +{ + task_t *t; + + if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) { + tq->tq_freelist = t->task_next; + } else { + mutex_exit(&tq->tq_lock); + if (tq->tq_nalloc >= tq->tq_maxalloc) { + if (!(tqflags & KM_SLEEP)) { + mutex_enter(&tq->tq_lock); + return (NULL); + } + /* + * We don't want to exceed tq_maxalloc, but we can't + * wait for other tasks to complete (and thus free up + * task structures) without risking deadlock with + * the caller. So, we just delay for one second + * to throttle the allocation rate. + */ + delay(hz); + } + t = kmem_alloc(sizeof (task_t), tqflags); + mutex_enter(&tq->tq_lock); + if (t != NULL) + tq->tq_nalloc++; + } + return (t); +} + +static void +task_free(taskq_t *tq, task_t *t) +{ + if (tq->tq_nalloc <= tq->tq_minalloc) { + t->task_next = tq->tq_freelist; + tq->tq_freelist = t; + } else { + tq->tq_nalloc--; + mutex_exit(&tq->tq_lock); + kmem_free(t, sizeof (task_t)); + mutex_enter(&tq->tq_lock); + } +} + +taskqid_t +taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags) +{ + task_t *t; + + if (taskq_now) { + func(arg); + return (1); + } + + mutex_enter(&tq->tq_lock); + ASSERT(tq->tq_flags & TASKQ_ACTIVE); + if ((t = task_alloc(tq, tqflags)) == NULL) { + mutex_exit(&tq->tq_lock); + return (0); + } + t->task_next = &tq->tq_task; + t->task_prev = tq->tq_task.task_prev; + t->task_next->task_prev = t; + t->task_prev->task_next = t; + t->task_func = func; + t->task_arg = arg; + cv_signal(&tq->tq_dispatch_cv); + mutex_exit(&tq->tq_lock); + return (1); +} + +void +taskq_wait(taskq_t *tq) +{ + mutex_enter(&tq->tq_lock); + while (tq->tq_task.task_next != &tq->tq_task || tq->tq_active != 0) + cv_wait(&tq->tq_wait_cv, &tq->tq_lock); + mutex_exit(&tq->tq_lock); +} + +static void * +taskq_thread(void *arg) +{ + taskq_t *tq = arg; + task_t *t; + + mutex_enter(&tq->tq_lock); + while (tq->tq_flags & TASKQ_ACTIVE) { + if ((t = tq->tq_task.task_next) == &tq->tq_task) { + if (--tq->tq_active == 0) + cv_broadcast(&tq->tq_wait_cv); + cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock); + tq->tq_active++; + continue; + } + t->task_prev->task_next = t->task_next; + t->task_next->task_prev = t->task_prev; + mutex_exit(&tq->tq_lock); + + rw_enter(&tq->tq_threadlock, RW_READER); + t->task_func(t->task_arg); + rw_exit(&tq->tq_threadlock); + + mutex_enter(&tq->tq_lock); + task_free(tq, t); + } + tq->tq_nthreads--; + cv_broadcast(&tq->tq_wait_cv); + mutex_exit(&tq->tq_lock); + return (NULL); +} + +/*ARGSUSED*/ +taskq_t * +taskq_create(const char *name, int nthreads, pri_t pri, + int minalloc, int maxalloc, uint_t flags) +{ + taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP); + int t; + + rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL); + tq->tq_flags = flags | TASKQ_ACTIVE; + tq->tq_active = nthreads; + tq->tq_nthreads = nthreads; + tq->tq_minalloc = minalloc; + tq->tq_maxalloc = maxalloc; + tq->tq_task.task_next = &tq->tq_task; + tq->tq_task.task_prev = &tq->tq_task; + tq->tq_threadlist = kmem_alloc(nthreads * sizeof (thread_t), KM_SLEEP); + + if (flags & TASKQ_PREPOPULATE) { + mutex_enter(&tq->tq_lock); + while (minalloc-- > 0) + task_free(tq, task_alloc(tq, KM_SLEEP)); + mutex_exit(&tq->tq_lock); + } + + for (t = 0; t < nthreads; t++) + (void) thr_create(0, 0, taskq_thread, + tq, THR_BOUND, &tq->tq_threadlist[t]); + + return (tq); +} + +void +taskq_destroy(taskq_t *tq) +{ + int t; + int nthreads = tq->tq_nthreads; + + taskq_wait(tq); + + mutex_enter(&tq->tq_lock); + + tq->tq_flags &= ~TASKQ_ACTIVE; + cv_broadcast(&tq->tq_dispatch_cv); + + while (tq->tq_nthreads != 0) + cv_wait(&tq->tq_wait_cv, &tq->tq_lock); + + tq->tq_minalloc = 0; + while (tq->tq_nalloc != 0) { + ASSERT(tq->tq_freelist != NULL); + task_free(tq, task_alloc(tq, KM_SLEEP)); + } + + mutex_exit(&tq->tq_lock); + + for (t = 0; t < nthreads; t++) + (void) thr_join(tq->tq_threadlist[t], NULL, NULL); + + kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t)); + + rw_destroy(&tq->tq_threadlock); + + kmem_free(tq, sizeof (taskq_t)); +} + +int +taskq_member(taskq_t *tq, void *t) +{ + int i; + + if (taskq_now) + return (1); + + for (i = 0; i < tq->tq_nthreads; i++) + if (tq->tq_threadlist[i] == (thread_t)(uintptr_t)t) + return (1); + + return (0); +} diff --git a/usr/src/lib/libzpool/common/util.c b/usr/src/lib/libzpool/common/util.c new file mode 100644 index 0000000000..28a6704702 --- /dev/null +++ b/usr/src/lib/libzpool/common/util.c @@ -0,0 +1,135 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <sys/zfs_context.h> +#include <sys/avl.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/spa.h> +#include <sys/fs/zfs.h> + +/* + * Routines needed by more than one client of libzpool. + */ + +void +nicenum(uint64_t num, char *buf) +{ + uint64_t n = num; + int index = 0; + char u; + + while (n >= 1024) { + n = (n + (1024 / 2)) / 1024; /* Round up or down */ + index++; + } + + u = " KMGTPE"[index]; + + if (index == 0) { + (void) sprintf(buf, "%llu", (u_longlong_t)n); + } else if (n < 10 && (num & (num - 1)) != 0) { + (void) sprintf(buf, "%.2f%c", + (double)num / (1ULL << 10 * index), u); + } else if (n < 100 && (num & (num - 1)) != 0) { + (void) sprintf(buf, "%.1f%c", + (double)num / (1ULL << 10 * index), u); + } else { + (void) sprintf(buf, "%llu%c", (u_longlong_t)n, u); + } +} + +static void +show_vdev_stats(const char *desc, nvlist_t *nv, int indent) +{ + nvlist_t **child; + uint_t c, children; + vdev_stat_t *vs; + uint64_t sec; + char used[6], avail[6]; + char rops[6], wops[6], rbytes[6], wbytes[6], rerr[6], werr[6], cerr[6]; + + if (indent == 0) { + (void) printf(" " + " capacity operations bandwidth ---- errors ----\n"); + (void) printf("description " + "used avail read write read write read write cksum\n"); + } + + VERIFY(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS, + (uint64_t **)&vs, &c) == 0); + + sec = MAX(1, vs->vs_timestamp / NANOSEC); + + nicenum(vs->vs_alloc, used); + nicenum(vs->vs_space - vs->vs_alloc, avail); + nicenum(vs->vs_ops[ZIO_TYPE_READ] / sec, rops); + nicenum(vs->vs_ops[ZIO_TYPE_WRITE] / sec, wops); + nicenum(vs->vs_bytes[ZIO_TYPE_READ] / sec, rbytes); + nicenum(vs->vs_bytes[ZIO_TYPE_WRITE] / sec, wbytes); + nicenum(vs->vs_read_errors, rerr); + nicenum(vs->vs_write_errors, werr); + nicenum(vs->vs_checksum_errors, cerr); + + (void) printf("%*s%*s%*s%*s %5s %5s %5s %5s %5s %5s %5s\n", + indent, "", + indent - 19 - (vs->vs_space ? 0 : 12), desc, + vs->vs_space ? 6 : 0, vs->vs_space ? used : "", + vs->vs_space ? 6 : 0, vs->vs_space ? avail : "", + rops, wops, rbytes, wbytes, rerr, werr, cerr); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return; + + for (c = 0; c < children; c++) { + nvlist_t *cnv = child[c]; + char *cname; + if (nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &cname) && + nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &cname)) + cname = "<unknown>"; + show_vdev_stats(cname, cnv, indent + 2); + } +} + +void +show_pool_stats(spa_t *spa) +{ + nvlist_t *config = NULL; + nvlist_t *nvroot = NULL; + + spa_config_enter(spa, RW_READER); + VERIFY(spa_get_stats(spa_name(spa), &config) == 0); + VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + show_vdev_stats(spa_name(spa), nvroot, 0); + spa_config_exit(spa); +} |