diff options
| author | ahrens <none@none> | 2005-10-31 11:33:35 -0800 |
|---|---|---|
| committer | ahrens <none@none> | 2005-10-31 11:33:35 -0800 |
| commit | fa9e4066f08beec538e775443c5be79dd423fcab (patch) | |
| tree | 576d99665e57bb7cb70584431adb08c14d47e3ce /usr/src/uts/common/os | |
| parent | f1b64740276f67fc6914c1d855f2af601efe99ac (diff) | |
| download | illumos-joyent-fa9e4066f08beec538e775443c5be79dd423fcab.tar.gz | |
PSARC 2002/240 ZFS
6338653 Integrate ZFS
PSARC 2004/652 - DKIOCFLUSH
5096886 Write caching disks need mechanism to flush cache to physical media
Diffstat (limited to 'usr/src/uts/common/os')
| -rw-r--r-- | usr/src/uts/common/os/autoconf.c | 20 | ||||
| -rw-r--r-- | usr/src/uts/common/os/devcfg.c | 14 | ||||
| -rw-r--r-- | usr/src/uts/common/os/kmem.c | 24 | ||||
| -rw-r--r-- | usr/src/uts/common/os/list.c | 15 | ||||
| -rw-r--r-- | usr/src/uts/common/os/policy.c | 12 | ||||
| -rw-r--r-- | usr/src/uts/common/os/printf.c | 19 | ||||
| -rw-r--r-- | usr/src/uts/common/os/sunddi.c | 4 | ||||
| -rw-r--r-- | usr/src/uts/common/os/sunndi.c | 2 | ||||
| -rw-r--r-- | usr/src/uts/common/os/swapgeneric.c | 4 | ||||
| -rw-r--r-- | usr/src/uts/common/os/zone.c | 141 |
10 files changed, 228 insertions, 27 deletions
diff --git a/usr/src/uts/common/os/autoconf.c b/usr/src/uts/common/os/autoconf.c index 9be275c03b..6127073b28 100644 --- a/usr/src/uts/common/os/autoconf.c +++ b/usr/src/uts/common/os/autoconf.c @@ -208,11 +208,11 @@ getlongprop_buf(int id, char *name, char *buf, int maxlen) { int size; - size = prom_getproplen((dnode_t)id, name); + size = prom_getproplen((pnode_t)id, name); if (size <= 0 || (size > maxlen - 1)) return (-1); - if (-1 == prom_getprop((dnode_t)id, name, buf)) + if (-1 == prom_getprop((pnode_t)id, name, buf)) return (-1); /* @@ -246,14 +246,14 @@ get_neighbors(dev_info_t *di, int flag) snid = cnid = 0; switch (flag) { case DDI_WALK_PRUNESIB: - cnid = (int)prom_childnode((dnode_t)nid); + cnid = (int)prom_childnode((pnode_t)nid); break; case DDI_WALK_PRUNECHILD: - snid = (int)prom_nextnode((dnode_t)nid); + snid = (int)prom_nextnode((pnode_t)nid); break; case 0: - snid = (int)prom_nextnode((dnode_t)nid); - cnid = (int)prom_childnode((dnode_t)nid); + snid = (int)prom_nextnode((pnode_t)nid); + cnid = (int)prom_childnode((pnode_t)nid); break; default: return (DDI_WALK_TERMINATE); @@ -265,7 +265,7 @@ get_neighbors(dev_info_t *di, int flag) * add the first sibling that passes check_status() */ for (; snid && (snid != -1); - snid = (int)prom_nextnode((dnode_t)snid)) { + snid = (int)prom_nextnode((pnode_t)snid)) { if (getlongprop_buf(snid, OBP_NAME, buf, sizeof (buf)) > 0) { if (check_status(snid, buf, parent) == @@ -286,9 +286,9 @@ get_neighbors(dev_info_t *di, int flag) if (check_status(cnid, buf, di) == DDI_SUCCESS) { (void) ddi_add_child(di, buf, cnid, -1); } else { - for (cnid = (int)prom_nextnode((dnode_t)cnid); + for (cnid = (int)prom_nextnode((pnode_t)cnid); cnid && (cnid != -1); - cnid = (int)prom_nextnode((dnode_t)cnid)) { + cnid = (int)prom_nextnode((pnode_t)cnid)) { if (getlongprop_buf(cnid, OBP_NAME, buf, sizeof (buf)) > 0) { if (check_status(cnid, buf, di) @@ -352,7 +352,7 @@ static void create_devinfo_tree(void) { major_t major; - dnode_t nodeid; + pnode_t nodeid; i_ddi_node_cache_init(); #if defined(__sparc) diff --git a/usr/src/uts/common/os/devcfg.c b/usr/src/uts/common/os/devcfg.c index c11cbcdf86..deabd79f37 100644 --- a/usr/src/uts/common/os/devcfg.c +++ b/usr/src/uts/common/os/devcfg.c @@ -80,7 +80,7 @@ struct mt_config_handle { }; struct devi_nodeid { - dnode_t nodeid; + pnode_t nodeid; dev_info_t *dip; struct devi_nodeid *next; }; @@ -196,7 +196,7 @@ i_ddi_node_cache_init() * The allocated node has a reference count of 0. */ dev_info_t * -i_ddi_alloc_node(dev_info_t *pdip, char *node_name, dnode_t nodeid, +i_ddi_alloc_node(dev_info_t *pdip, char *node_name, pnode_t nodeid, int instance, ddi_prop_t *sys_prop, int flag) { struct dev_info *devi; @@ -1666,7 +1666,7 @@ ndi_devi_tryenter(dev_info_t *dip, int *circular) * not allowed to sleep. */ int -ndi_devi_alloc(dev_info_t *parent, char *node_name, dnode_t nodeid, +ndi_devi_alloc(dev_info_t *parent, char *node_name, pnode_t nodeid, dev_info_t **ret_dip) { ASSERT(node_name != NULL); @@ -1686,7 +1686,7 @@ ndi_devi_alloc(dev_info_t *parent, char *node_name, dnode_t nodeid, * This routine may sleep and should not be called at interrupt time */ void -ndi_devi_alloc_sleep(dev_info_t *parent, char *node_name, dnode_t nodeid, +ndi_devi_alloc_sleep(dev_info_t *parent, char *node_name, pnode_t nodeid, dev_info_t **ret_dip) { ASSERT(node_name != NULL); @@ -3652,7 +3652,7 @@ init_spec_child(dev_info_t *pdip, struct hwc_spec *specp, uint_t flags) return; } - dip = i_ddi_alloc_node(pdip, node_name, (dnode_t)DEVI_PSEUDO_NODEID, + dip = i_ddi_alloc_node(pdip, node_name, (pnode_t)DEVI_PSEUDO_NODEID, -1, specp->hwc_devi_sys_prop_ptr, KM_SLEEP); if (dip == NULL) @@ -5367,7 +5367,7 @@ path_to_major(char *path) { dev_info_t *dip; char *p, *q; - dnode_t nodeid; + pnode_t nodeid; major_t maj; /* @@ -6390,7 +6390,7 @@ mt_config_driver(struct mt_config_handle *hdl) * NOTE: This function will return NULL for .conf nodeids. */ dev_info_t * -e_ddi_nodeid_to_dip(dnode_t nodeid) +e_ddi_nodeid_to_dip(pnode_t nodeid) { dev_info_t *dip = NULL; struct devi_nodeid *prev, *elem; diff --git a/usr/src/uts/common/os/kmem.c b/usr/src/uts/common/os/kmem.c index 7ae9d5f16b..be7ba8b0cc 100644 --- a/usr/src/uts/common/os/kmem.c +++ b/usr/src/uts/common/os/kmem.c @@ -1682,6 +1682,21 @@ kmem_cache_magazine_enable(kmem_cache_t *cp) } /* + * Reap (almost) everything right now. See kmem_cache_magazine_purge() + * for explanation of the back-to-back kmem_depot_ws_update() calls. + */ +void +kmem_cache_reap_now(kmem_cache_t *cp) +{ + kmem_depot_ws_update(cp); + kmem_depot_ws_update(cp); + + (void) taskq_dispatch(kmem_taskq, + (task_func_t *)kmem_depot_ws_reap, cp, TQ_SLEEP); + taskq_wait(kmem_taskq); +} + +/* * Recompute a cache's magazine size. The trade-off is that larger magazines * provide a higher transfer rate with the depot, while smaller magazines * reduce memory consumption. Magazine resizing is an expensive operation; @@ -1978,6 +1993,15 @@ kmem_maxavail(void) return ((size_t)ptob(MAX(MIN(pmem, vmem), 0))); } +/* + * Indicate whether memory-intensive kmem debugging is enabled. + */ +int +kmem_debugging(void) +{ + return (kmem_flags & (KMF_AUDIT | KMF_REDZONE)); +} + kmem_cache_t * kmem_cache_create( char *name, /* descriptive name for this cache */ diff --git a/usr/src/uts/common/os/list.c b/usr/src/uts/common/os/list.c index b511d60bdc..8194b79147 100644 --- a/usr/src/uts/common/os/list.c +++ b/usr/src/uts/common/os/list.c @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -180,3 +180,16 @@ list_move_tail(list_t *dst, list_t *src) /* empty src list */ srcnode->list_next = srcnode->list_prev = srcnode; } + +int +list_link_active(list_node_t *link) +{ + ASSERT((link->list_next == NULL) == (link->list_prev == NULL)); + return (link->list_next != NULL); +} + +int +list_is_empty(list_t *list) +{ + return (list_empty(list)); +} diff --git a/usr/src/uts/common/os/policy.c b/usr/src/uts/common/os/policy.c index 8c0b61a6c5..a5b011a3f8 100644 --- a/usr/src/uts/common/os/policy.c +++ b/usr/src/uts/common/os/policy.c @@ -1761,3 +1761,15 @@ secpolicy_gart_map(const cred_t *cr) } return (0); } + +/* + * secpolicy_zfs + * + * Determine if the user has permission to manipulate ZFS datasets (not pools). + * Equivalent to the SYS_MOUNT privilege. + */ +int +secpolicy_zfs(const cred_t *cr) +{ + return (PRIV_POLICY(cr, PRIV_SYS_MOUNT, B_FALSE, EPERM, NULL)); +} diff --git a/usr/src/uts/common/os/printf.c b/usr/src/uts/common/os/printf.c index d21e8cb029..603da31b62 100644 --- a/usr/src/uts/common/os/printf.c +++ b/usr/src/uts/common/os/printf.c @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -298,6 +298,23 @@ assfail(const char *a, const char *f, int l) return (0); } +void +assfail3(const char *a, uintmax_t lv, const char *op, uintmax_t rv, + const char *f, int l) +{ + if (aask) { + printf("ASSERTION CAUGHT: %s (0x%llx %s 0x%llx), file: %s, " + "line: %d", a, (u_longlong_t)lv, op, (u_longlong_t)rv, + f, l); + debug_enter(NULL); + } + + if (!aok && !panicstr) + panic("assertion failed: %s (0x%llx %s 0x%llx), file: %s, " + "line: %d", a, (u_longlong_t)lv, op, (u_longlong_t)rv, + f, l); +} + int strlog(short mid, short sid, char level, ushort_t sl, char *fmt, ...) { diff --git a/usr/src/uts/common/os/sunddi.c b/usr/src/uts/common/os/sunddi.c index 92f331c157..081e9d9005 100644 --- a/usr/src/uts/common/os/sunddi.c +++ b/usr/src/uts/common/os/sunddi.c @@ -4500,7 +4500,7 @@ impl_ddi_bus_prop_op(dev_t dev, dev_info_t *dip, dev_info_t *ch_dip, if (((dev == DDI_DEV_T_NONE) || (dev == DDI_DEV_T_ANY)) && ndi_dev_is_prom_node(ch_dip) && ((mod_flags & DDI_PROP_NOTPROM) == 0)) { - len = prom_getproplen((dnode_t)DEVI(ch_dip)->devi_nodeid, name); + len = prom_getproplen((pnode_t)DEVI(ch_dip)->devi_nodeid, name); if (len == -1) { return (DDI_PROP_NOT_FOUND); } @@ -4555,7 +4555,7 @@ impl_ddi_bus_prop_op(dev_t dev, dev_info_t *dip, dev_info_t *ch_dip, /* * Call the PROM function to do the copy. */ - (void) prom_getprop((dnode_t)DEVI(ch_dip)->devi_nodeid, + (void) prom_getprop((pnode_t)DEVI(ch_dip)->devi_nodeid, name, buffer); *lengthp = len; /* return the actual length to the caller */ diff --git a/usr/src/uts/common/os/sunndi.c b/usr/src/uts/common/os/sunndi.c index 4758668769..4def654094 100644 --- a/usr/src/uts/common/os/sunndi.c +++ b/usr/src/uts/common/os/sunndi.c @@ -957,7 +957,7 @@ i_dc_devi_create(struct devctl_iocdata *dcp, dev_info_t *pdip, /* * construct a new dev_info node with a user-provided nodename */ - ndi_devi_alloc_sleep(pdip, cname, (dnode_t)DEVI_SID_NODEID, &cdip); + ndi_devi_alloc_sleep(pdip, cname, (pnode_t)DEVI_SID_NODEID, &cdip); /* * create hardware properties for each member in the property diff --git a/usr/src/uts/common/os/swapgeneric.c b/usr/src/uts/common/os/swapgeneric.c index a180ec293c..9da38ab18b 100644 --- a/usr/src/uts/common/os/swapgeneric.c +++ b/usr/src/uts/common/os/swapgeneric.c @@ -877,7 +877,7 @@ load_boot_platform_modules(char *drv) * to locate a given nodeid in the device tree. */ struct i_path_findnode { - dnode_t nodeid; + pnode_t nodeid; dev_info_t *dip; }; @@ -952,7 +952,7 @@ netboot_over_ib(char *bootpath) char *temp; boolean_t ret = B_FALSE; - dnode_t node = prom_finddevice(bootpath); + pnode_t node = prom_finddevice(bootpath); int len; char devicetype[OBP_MAXDRVNAME]; diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index b4abecaadf..71fe7305a2 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -173,7 +173,7 @@ * the following system calls (all subcodes of the primary "zone" * system call): * - zone_create: creates a zone with selected attributes (name, - * root path, privileges, resource controls) + * root path, privileges, resource controls, ZFS datasets) * - zone_enter: allows the current process to enter a zone * - zone_getattr: reports attributes of a zone * - zone_list: lists all zones active in the system @@ -770,6 +770,23 @@ zone_free_zsd(zone_t *zone) } /* + * Frees memory associated with the zone dataset list. + */ +static void +zone_free_datasets(zone_t *zone) +{ + zone_dataset_t *t, *next; + + for (t = list_head(&zone->zone_datasets); t != NULL; t = next) { + next = list_next(&zone->zone_datasets, t); + list_remove(&zone->zone_datasets, t); + kmem_free(t->zd_dataset, strlen(t->zd_dataset) + 1); + kmem_free(t, sizeof (*t)); + } + list_destroy(&zone->zone_datasets); +} + +/* * zone.cpu-shares resource control support. */ /*ARGSUSED*/ @@ -1055,6 +1072,7 @@ zone_free(zone_t *zone) } zone_free_zsd(zone); + zone_free_datasets(zone); if (zone->zone_rootvp != NULL) VN_RELE(zone->zone_rootvp); @@ -2500,6 +2518,55 @@ zone_create_error(int er_error, int er_ext, int *er_out) { } /* + * Parses a comma-separated list of ZFS datasets into a per-zone dictionary. + */ +static int +parse_zfs(zone_t *zone, caddr_t ubuf, size_t buflen) +{ + char *kbuf; + char *dataset, *next; + zone_dataset_t *zd; + size_t len; + + if (ubuf == NULL || buflen == 0) + return (0); + + if ((kbuf = kmem_alloc(buflen, KM_NOSLEEP)) == NULL) + return (ENOMEM); + + if (copyin(ubuf, kbuf, buflen) != 0) { + kmem_free(kbuf, buflen); + return (EFAULT); + } + + dataset = next = kbuf; + for (;;) { + zd = kmem_alloc(sizeof (zone_dataset_t), KM_SLEEP); + + next = strchr(dataset, ','); + + if (next == NULL) + len = strlen(dataset); + else + len = next - dataset; + + zd->zd_dataset = kmem_alloc(len + 1, KM_SLEEP); + bcopy(dataset, zd->zd_dataset, len); + zd->zd_dataset[len] = '\0'; + + list_insert_head(&zone->zone_datasets, zd); + + if (next == NULL) + break; + + dataset = next + 1; + } + + kmem_free(kbuf, buflen); + return (0); +} + +/* * System call to create/initialize a new zone named 'zone_name', rooted * at 'zone_root', with a zone-wide privilege limit set of 'zone_privs', * and initialized with the zone-wide rctls described in 'rctlbuf'. @@ -2510,7 +2577,7 @@ zone_create_error(int er_error, int er_ext, int *er_out) { static zoneid_t zone_create(const char *zone_name, const char *zone_root, const priv_set_t *zone_privs, caddr_t rctlbuf, size_t rctlbufsz, - int *extended_error) + caddr_t zfsbuf, size_t zfsbufsz, int *extended_error) { struct zsched_arg zarg; nvlist_t *rctls = NULL; @@ -2543,6 +2610,8 @@ zone_create(const char *zone_name, const char *zone_root, cv_init(&zone->zone_cv, NULL, CV_DEFAULT, NULL); list_create(&zone->zone_zsd, sizeof (struct zsd_entry), offsetof(struct zsd_entry, zsd_linkage)); + list_create(&zone->zone_datasets, sizeof (zone_dataset_t), + offsetof(zone_dataset_t, zd_linkage)); if ((error = zone_set_name(zone, zone_name)) != 0) { zone_free(zone); @@ -2578,6 +2647,11 @@ zone_create(const char *zone_name, const char *zone_root, return (zone_create_error(error, 0, extended_error)); } + if ((error = parse_zfs(zone, zfsbuf, zfsbufsz)) != 0) { + zone_free(zone); + return (set_errno(error)); + } + /* * Stop all lwps since that's what normally happens as part of fork(). * This needs to happen before we grab any locks to avoid deadlock @@ -3722,7 +3796,7 @@ zone_lookup(const char *zone_name) /* ARGSUSED */ long -zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4, void *arg5) +zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4) { zone_def zs; @@ -3748,6 +3822,8 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4, void *arg5) (unsigned long)zs32.zone_privs; zs.rctlbuf = (caddr_t)(unsigned long)zs32.rctlbuf; zs.rctlbufsz = zs32.rctlbufsz; + zs.zfsbuf = (caddr_t)(unsigned long)zs32.zfsbuf; + zs.zfsbufsz = zs32.zfsbufsz; zs.extended_error = (int *)(unsigned long)zs32.extended_error; #else @@ -3757,6 +3833,7 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4, void *arg5) return (zone_create(zs.zone_name, zs.zone_root, zs.zone_privs, (caddr_t)zs.rctlbuf, zs.rctlbufsz, + (caddr_t)zs.zfsbuf, zs.zfsbufsz, zs.extended_error)); case ZONE_BOOT: return (zone_boot((zoneid_t)(uintptr_t)arg1, @@ -4037,3 +4114,61 @@ zone_shutdown_global(void) zone_status_set(global_zone, ZONE_IS_SHUTTING_DOWN); mutex_exit(&zone_status_lock); } + +/* + * Returns true if the named dataset is visible in the current zone. + * The 'write' parameter is set to 1 if the dataset is also writable. + */ +int +zone_dataset_visible(const char *dataset, int *write) +{ + zone_dataset_t *zd; + size_t len; + zone_t *zone = curproc->p_zone; + + if (dataset[0] == '\0') + return (0); + + /* + * Walk the list once, looking for datasets which match exactly, or + * specify a dataset underneath an exported dataset. If found, return + * true and note that it is writable. + */ + for (zd = list_head(&zone->zone_datasets); zd != NULL; + zd = list_next(&zone->zone_datasets, zd)) { + + len = strlen(zd->zd_dataset); + if (strlen(dataset) >= len && + bcmp(dataset, zd->zd_dataset, len) == 0 && + (zd->zd_dataset[len-1] == '/' || + dataset[len] == '\0' || dataset[len] == '/')) { + if (write) + *write = 1; + return (1); + } + } + + /* + * Walk the list a second time, searching for datasets which are parents + * of exported datasets. These should be visible, but read-only. + * + * Note that we also have to support forms such as 'pool/dataset/', with + * a trailing slash. + */ + for (zd = list_head(&zone->zone_datasets); zd != NULL; + zd = list_next(&zone->zone_datasets, zd)) { + + len = strlen(dataset); + if (dataset[len - 1] == '/') + len--; /* Ignore trailing slash */ + if (len < strlen(zd->zd_dataset) && + bcmp(dataset, zd->zd_dataset, len) == 0 && + zd->zd_dataset[len] == '/') { + if (write) + *write = 0; + return (1); + } + } + + return (0); +} |
