summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/os
diff options
context:
space:
mode:
authorahrens <none@none>2005-10-31 11:33:35 -0800
committerahrens <none@none>2005-10-31 11:33:35 -0800
commitfa9e4066f08beec538e775443c5be79dd423fcab (patch)
tree576d99665e57bb7cb70584431adb08c14d47e3ce /usr/src/uts/common/os
parentf1b64740276f67fc6914c1d855f2af601efe99ac (diff)
downloadillumos-joyent-fa9e4066f08beec538e775443c5be79dd423fcab.tar.gz
PSARC 2002/240 ZFS
6338653 Integrate ZFS PSARC 2004/652 - DKIOCFLUSH 5096886 Write caching disks need mechanism to flush cache to physical media
Diffstat (limited to 'usr/src/uts/common/os')
-rw-r--r--usr/src/uts/common/os/autoconf.c20
-rw-r--r--usr/src/uts/common/os/devcfg.c14
-rw-r--r--usr/src/uts/common/os/kmem.c24
-rw-r--r--usr/src/uts/common/os/list.c15
-rw-r--r--usr/src/uts/common/os/policy.c12
-rw-r--r--usr/src/uts/common/os/printf.c19
-rw-r--r--usr/src/uts/common/os/sunddi.c4
-rw-r--r--usr/src/uts/common/os/sunndi.c2
-rw-r--r--usr/src/uts/common/os/swapgeneric.c4
-rw-r--r--usr/src/uts/common/os/zone.c141
10 files changed, 228 insertions, 27 deletions
diff --git a/usr/src/uts/common/os/autoconf.c b/usr/src/uts/common/os/autoconf.c
index 9be275c03b..6127073b28 100644
--- a/usr/src/uts/common/os/autoconf.c
+++ b/usr/src/uts/common/os/autoconf.c
@@ -208,11 +208,11 @@ getlongprop_buf(int id, char *name, char *buf, int maxlen)
{
int size;
- size = prom_getproplen((dnode_t)id, name);
+ size = prom_getproplen((pnode_t)id, name);
if (size <= 0 || (size > maxlen - 1))
return (-1);
- if (-1 == prom_getprop((dnode_t)id, name, buf))
+ if (-1 == prom_getprop((pnode_t)id, name, buf))
return (-1);
/*
@@ -246,14 +246,14 @@ get_neighbors(dev_info_t *di, int flag)
snid = cnid = 0;
switch (flag) {
case DDI_WALK_PRUNESIB:
- cnid = (int)prom_childnode((dnode_t)nid);
+ cnid = (int)prom_childnode((pnode_t)nid);
break;
case DDI_WALK_PRUNECHILD:
- snid = (int)prom_nextnode((dnode_t)nid);
+ snid = (int)prom_nextnode((pnode_t)nid);
break;
case 0:
- snid = (int)prom_nextnode((dnode_t)nid);
- cnid = (int)prom_childnode((dnode_t)nid);
+ snid = (int)prom_nextnode((pnode_t)nid);
+ cnid = (int)prom_childnode((pnode_t)nid);
break;
default:
return (DDI_WALK_TERMINATE);
@@ -265,7 +265,7 @@ get_neighbors(dev_info_t *di, int flag)
* add the first sibling that passes check_status()
*/
for (; snid && (snid != -1);
- snid = (int)prom_nextnode((dnode_t)snid)) {
+ snid = (int)prom_nextnode((pnode_t)snid)) {
if (getlongprop_buf(snid, OBP_NAME, buf,
sizeof (buf)) > 0) {
if (check_status(snid, buf, parent) ==
@@ -286,9 +286,9 @@ get_neighbors(dev_info_t *di, int flag)
if (check_status(cnid, buf, di) == DDI_SUCCESS) {
(void) ddi_add_child(di, buf, cnid, -1);
} else {
- for (cnid = (int)prom_nextnode((dnode_t)cnid);
+ for (cnid = (int)prom_nextnode((pnode_t)cnid);
cnid && (cnid != -1);
- cnid = (int)prom_nextnode((dnode_t)cnid)) {
+ cnid = (int)prom_nextnode((pnode_t)cnid)) {
if (getlongprop_buf(cnid, OBP_NAME,
buf, sizeof (buf)) > 0) {
if (check_status(cnid, buf, di)
@@ -352,7 +352,7 @@ static void
create_devinfo_tree(void)
{
major_t major;
- dnode_t nodeid;
+ pnode_t nodeid;
i_ddi_node_cache_init();
#if defined(__sparc)
diff --git a/usr/src/uts/common/os/devcfg.c b/usr/src/uts/common/os/devcfg.c
index c11cbcdf86..deabd79f37 100644
--- a/usr/src/uts/common/os/devcfg.c
+++ b/usr/src/uts/common/os/devcfg.c
@@ -80,7 +80,7 @@ struct mt_config_handle {
};
struct devi_nodeid {
- dnode_t nodeid;
+ pnode_t nodeid;
dev_info_t *dip;
struct devi_nodeid *next;
};
@@ -196,7 +196,7 @@ i_ddi_node_cache_init()
* The allocated node has a reference count of 0.
*/
dev_info_t *
-i_ddi_alloc_node(dev_info_t *pdip, char *node_name, dnode_t nodeid,
+i_ddi_alloc_node(dev_info_t *pdip, char *node_name, pnode_t nodeid,
int instance, ddi_prop_t *sys_prop, int flag)
{
struct dev_info *devi;
@@ -1666,7 +1666,7 @@ ndi_devi_tryenter(dev_info_t *dip, int *circular)
* not allowed to sleep.
*/
int
-ndi_devi_alloc(dev_info_t *parent, char *node_name, dnode_t nodeid,
+ndi_devi_alloc(dev_info_t *parent, char *node_name, pnode_t nodeid,
dev_info_t **ret_dip)
{
ASSERT(node_name != NULL);
@@ -1686,7 +1686,7 @@ ndi_devi_alloc(dev_info_t *parent, char *node_name, dnode_t nodeid,
* This routine may sleep and should not be called at interrupt time
*/
void
-ndi_devi_alloc_sleep(dev_info_t *parent, char *node_name, dnode_t nodeid,
+ndi_devi_alloc_sleep(dev_info_t *parent, char *node_name, pnode_t nodeid,
dev_info_t **ret_dip)
{
ASSERT(node_name != NULL);
@@ -3652,7 +3652,7 @@ init_spec_child(dev_info_t *pdip, struct hwc_spec *specp, uint_t flags)
return;
}
- dip = i_ddi_alloc_node(pdip, node_name, (dnode_t)DEVI_PSEUDO_NODEID,
+ dip = i_ddi_alloc_node(pdip, node_name, (pnode_t)DEVI_PSEUDO_NODEID,
-1, specp->hwc_devi_sys_prop_ptr, KM_SLEEP);
if (dip == NULL)
@@ -5367,7 +5367,7 @@ path_to_major(char *path)
{
dev_info_t *dip;
char *p, *q;
- dnode_t nodeid;
+ pnode_t nodeid;
major_t maj;
/*
@@ -6390,7 +6390,7 @@ mt_config_driver(struct mt_config_handle *hdl)
* NOTE: This function will return NULL for .conf nodeids.
*/
dev_info_t *
-e_ddi_nodeid_to_dip(dnode_t nodeid)
+e_ddi_nodeid_to_dip(pnode_t nodeid)
{
dev_info_t *dip = NULL;
struct devi_nodeid *prev, *elem;
diff --git a/usr/src/uts/common/os/kmem.c b/usr/src/uts/common/os/kmem.c
index 7ae9d5f16b..be7ba8b0cc 100644
--- a/usr/src/uts/common/os/kmem.c
+++ b/usr/src/uts/common/os/kmem.c
@@ -1682,6 +1682,21 @@ kmem_cache_magazine_enable(kmem_cache_t *cp)
}
/*
+ * Reap (almost) everything right now. See kmem_cache_magazine_purge()
+ * for explanation of the back-to-back kmem_depot_ws_update() calls.
+ */
+void
+kmem_cache_reap_now(kmem_cache_t *cp)
+{
+ kmem_depot_ws_update(cp);
+ kmem_depot_ws_update(cp);
+
+ (void) taskq_dispatch(kmem_taskq,
+ (task_func_t *)kmem_depot_ws_reap, cp, TQ_SLEEP);
+ taskq_wait(kmem_taskq);
+}
+
+/*
* Recompute a cache's magazine size. The trade-off is that larger magazines
* provide a higher transfer rate with the depot, while smaller magazines
* reduce memory consumption. Magazine resizing is an expensive operation;
@@ -1978,6 +1993,15 @@ kmem_maxavail(void)
return ((size_t)ptob(MAX(MIN(pmem, vmem), 0)));
}
+/*
+ * Indicate whether memory-intensive kmem debugging is enabled.
+ */
+int
+kmem_debugging(void)
+{
+ return (kmem_flags & (KMF_AUDIT | KMF_REDZONE));
+}
+
kmem_cache_t *
kmem_cache_create(
char *name, /* descriptive name for this cache */
diff --git a/usr/src/uts/common/os/list.c b/usr/src/uts/common/os/list.c
index b511d60bdc..8194b79147 100644
--- a/usr/src/uts/common/os/list.c
+++ b/usr/src/uts/common/os/list.c
@@ -20,7 +20,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -180,3 +180,16 @@ list_move_tail(list_t *dst, list_t *src)
/* empty src list */
srcnode->list_next = srcnode->list_prev = srcnode;
}
+
+int
+list_link_active(list_node_t *link)
+{
+ ASSERT((link->list_next == NULL) == (link->list_prev == NULL));
+ return (link->list_next != NULL);
+}
+
+int
+list_is_empty(list_t *list)
+{
+ return (list_empty(list));
+}
diff --git a/usr/src/uts/common/os/policy.c b/usr/src/uts/common/os/policy.c
index 8c0b61a6c5..a5b011a3f8 100644
--- a/usr/src/uts/common/os/policy.c
+++ b/usr/src/uts/common/os/policy.c
@@ -1761,3 +1761,15 @@ secpolicy_gart_map(const cred_t *cr)
}
return (0);
}
+
+/*
+ * secpolicy_zfs
+ *
+ * Determine if the user has permission to manipulate ZFS datasets (not pools).
+ * Equivalent to the SYS_MOUNT privilege.
+ */
+int
+secpolicy_zfs(const cred_t *cr)
+{
+ return (PRIV_POLICY(cr, PRIV_SYS_MOUNT, B_FALSE, EPERM, NULL));
+}
diff --git a/usr/src/uts/common/os/printf.c b/usr/src/uts/common/os/printf.c
index d21e8cb029..603da31b62 100644
--- a/usr/src/uts/common/os/printf.c
+++ b/usr/src/uts/common/os/printf.c
@@ -20,7 +20,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -298,6 +298,23 @@ assfail(const char *a, const char *f, int l)
return (0);
}
+void
+assfail3(const char *a, uintmax_t lv, const char *op, uintmax_t rv,
+ const char *f, int l)
+{
+ if (aask) {
+ printf("ASSERTION CAUGHT: %s (0x%llx %s 0x%llx), file: %s, "
+ "line: %d", a, (u_longlong_t)lv, op, (u_longlong_t)rv,
+ f, l);
+ debug_enter(NULL);
+ }
+
+ if (!aok && !panicstr)
+ panic("assertion failed: %s (0x%llx %s 0x%llx), file: %s, "
+ "line: %d", a, (u_longlong_t)lv, op, (u_longlong_t)rv,
+ f, l);
+}
+
int
strlog(short mid, short sid, char level, ushort_t sl, char *fmt, ...)
{
diff --git a/usr/src/uts/common/os/sunddi.c b/usr/src/uts/common/os/sunddi.c
index 92f331c157..081e9d9005 100644
--- a/usr/src/uts/common/os/sunddi.c
+++ b/usr/src/uts/common/os/sunddi.c
@@ -4500,7 +4500,7 @@ impl_ddi_bus_prop_op(dev_t dev, dev_info_t *dip, dev_info_t *ch_dip,
if (((dev == DDI_DEV_T_NONE) || (dev == DDI_DEV_T_ANY)) &&
ndi_dev_is_prom_node(ch_dip) &&
((mod_flags & DDI_PROP_NOTPROM) == 0)) {
- len = prom_getproplen((dnode_t)DEVI(ch_dip)->devi_nodeid, name);
+ len = prom_getproplen((pnode_t)DEVI(ch_dip)->devi_nodeid, name);
if (len == -1) {
return (DDI_PROP_NOT_FOUND);
}
@@ -4555,7 +4555,7 @@ impl_ddi_bus_prop_op(dev_t dev, dev_info_t *dip, dev_info_t *ch_dip,
/*
* Call the PROM function to do the copy.
*/
- (void) prom_getprop((dnode_t)DEVI(ch_dip)->devi_nodeid,
+ (void) prom_getprop((pnode_t)DEVI(ch_dip)->devi_nodeid,
name, buffer);
*lengthp = len; /* return the actual length to the caller */
diff --git a/usr/src/uts/common/os/sunndi.c b/usr/src/uts/common/os/sunndi.c
index 4758668769..4def654094 100644
--- a/usr/src/uts/common/os/sunndi.c
+++ b/usr/src/uts/common/os/sunndi.c
@@ -957,7 +957,7 @@ i_dc_devi_create(struct devctl_iocdata *dcp, dev_info_t *pdip,
/*
* construct a new dev_info node with a user-provided nodename
*/
- ndi_devi_alloc_sleep(pdip, cname, (dnode_t)DEVI_SID_NODEID, &cdip);
+ ndi_devi_alloc_sleep(pdip, cname, (pnode_t)DEVI_SID_NODEID, &cdip);
/*
* create hardware properties for each member in the property
diff --git a/usr/src/uts/common/os/swapgeneric.c b/usr/src/uts/common/os/swapgeneric.c
index a180ec293c..9da38ab18b 100644
--- a/usr/src/uts/common/os/swapgeneric.c
+++ b/usr/src/uts/common/os/swapgeneric.c
@@ -877,7 +877,7 @@ load_boot_platform_modules(char *drv)
* to locate a given nodeid in the device tree.
*/
struct i_path_findnode {
- dnode_t nodeid;
+ pnode_t nodeid;
dev_info_t *dip;
};
@@ -952,7 +952,7 @@ netboot_over_ib(char *bootpath)
char *temp;
boolean_t ret = B_FALSE;
- dnode_t node = prom_finddevice(bootpath);
+ pnode_t node = prom_finddevice(bootpath);
int len;
char devicetype[OBP_MAXDRVNAME];
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index b4abecaadf..71fe7305a2 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -173,7 +173,7 @@
* the following system calls (all subcodes of the primary "zone"
* system call):
* - zone_create: creates a zone with selected attributes (name,
- * root path, privileges, resource controls)
+ * root path, privileges, resource controls, ZFS datasets)
* - zone_enter: allows the current process to enter a zone
* - zone_getattr: reports attributes of a zone
* - zone_list: lists all zones active in the system
@@ -770,6 +770,23 @@ zone_free_zsd(zone_t *zone)
}
/*
+ * Frees memory associated with the zone dataset list.
+ */
+static void
+zone_free_datasets(zone_t *zone)
+{
+ zone_dataset_t *t, *next;
+
+ for (t = list_head(&zone->zone_datasets); t != NULL; t = next) {
+ next = list_next(&zone->zone_datasets, t);
+ list_remove(&zone->zone_datasets, t);
+ kmem_free(t->zd_dataset, strlen(t->zd_dataset) + 1);
+ kmem_free(t, sizeof (*t));
+ }
+ list_destroy(&zone->zone_datasets);
+}
+
+/*
* zone.cpu-shares resource control support.
*/
/*ARGSUSED*/
@@ -1055,6 +1072,7 @@ zone_free(zone_t *zone)
}
zone_free_zsd(zone);
+ zone_free_datasets(zone);
if (zone->zone_rootvp != NULL)
VN_RELE(zone->zone_rootvp);
@@ -2500,6 +2518,55 @@ zone_create_error(int er_error, int er_ext, int *er_out) {
}
/*
+ * Parses a comma-separated list of ZFS datasets into a per-zone dictionary.
+ */
+static int
+parse_zfs(zone_t *zone, caddr_t ubuf, size_t buflen)
+{
+ char *kbuf;
+ char *dataset, *next;
+ zone_dataset_t *zd;
+ size_t len;
+
+ if (ubuf == NULL || buflen == 0)
+ return (0);
+
+ if ((kbuf = kmem_alloc(buflen, KM_NOSLEEP)) == NULL)
+ return (ENOMEM);
+
+ if (copyin(ubuf, kbuf, buflen) != 0) {
+ kmem_free(kbuf, buflen);
+ return (EFAULT);
+ }
+
+ dataset = next = kbuf;
+ for (;;) {
+ zd = kmem_alloc(sizeof (zone_dataset_t), KM_SLEEP);
+
+ next = strchr(dataset, ',');
+
+ if (next == NULL)
+ len = strlen(dataset);
+ else
+ len = next - dataset;
+
+ zd->zd_dataset = kmem_alloc(len + 1, KM_SLEEP);
+ bcopy(dataset, zd->zd_dataset, len);
+ zd->zd_dataset[len] = '\0';
+
+ list_insert_head(&zone->zone_datasets, zd);
+
+ if (next == NULL)
+ break;
+
+ dataset = next + 1;
+ }
+
+ kmem_free(kbuf, buflen);
+ return (0);
+}
+
+/*
* System call to create/initialize a new zone named 'zone_name', rooted
* at 'zone_root', with a zone-wide privilege limit set of 'zone_privs',
* and initialized with the zone-wide rctls described in 'rctlbuf'.
@@ -2510,7 +2577,7 @@ zone_create_error(int er_error, int er_ext, int *er_out) {
static zoneid_t
zone_create(const char *zone_name, const char *zone_root,
const priv_set_t *zone_privs, caddr_t rctlbuf, size_t rctlbufsz,
- int *extended_error)
+ caddr_t zfsbuf, size_t zfsbufsz, int *extended_error)
{
struct zsched_arg zarg;
nvlist_t *rctls = NULL;
@@ -2543,6 +2610,8 @@ zone_create(const char *zone_name, const char *zone_root,
cv_init(&zone->zone_cv, NULL, CV_DEFAULT, NULL);
list_create(&zone->zone_zsd, sizeof (struct zsd_entry),
offsetof(struct zsd_entry, zsd_linkage));
+ list_create(&zone->zone_datasets, sizeof (zone_dataset_t),
+ offsetof(zone_dataset_t, zd_linkage));
if ((error = zone_set_name(zone, zone_name)) != 0) {
zone_free(zone);
@@ -2578,6 +2647,11 @@ zone_create(const char *zone_name, const char *zone_root,
return (zone_create_error(error, 0, extended_error));
}
+ if ((error = parse_zfs(zone, zfsbuf, zfsbufsz)) != 0) {
+ zone_free(zone);
+ return (set_errno(error));
+ }
+
/*
* Stop all lwps since that's what normally happens as part of fork().
* This needs to happen before we grab any locks to avoid deadlock
@@ -3722,7 +3796,7 @@ zone_lookup(const char *zone_name)
/* ARGSUSED */
long
-zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4, void *arg5)
+zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4)
{
zone_def zs;
@@ -3748,6 +3822,8 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4, void *arg5)
(unsigned long)zs32.zone_privs;
zs.rctlbuf = (caddr_t)(unsigned long)zs32.rctlbuf;
zs.rctlbufsz = zs32.rctlbufsz;
+ zs.zfsbuf = (caddr_t)(unsigned long)zs32.zfsbuf;
+ zs.zfsbufsz = zs32.zfsbufsz;
zs.extended_error =
(int *)(unsigned long)zs32.extended_error;
#else
@@ -3757,6 +3833,7 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4, void *arg5)
return (zone_create(zs.zone_name, zs.zone_root,
zs.zone_privs, (caddr_t)zs.rctlbuf, zs.rctlbufsz,
+ (caddr_t)zs.zfsbuf, zs.zfsbufsz,
zs.extended_error));
case ZONE_BOOT:
return (zone_boot((zoneid_t)(uintptr_t)arg1,
@@ -4037,3 +4114,61 @@ zone_shutdown_global(void)
zone_status_set(global_zone, ZONE_IS_SHUTTING_DOWN);
mutex_exit(&zone_status_lock);
}
+
+/*
+ * Returns true if the named dataset is visible in the current zone.
+ * The 'write' parameter is set to 1 if the dataset is also writable.
+ */
+int
+zone_dataset_visible(const char *dataset, int *write)
+{
+ zone_dataset_t *zd;
+ size_t len;
+ zone_t *zone = curproc->p_zone;
+
+ if (dataset[0] == '\0')
+ return (0);
+
+ /*
+ * Walk the list once, looking for datasets which match exactly, or
+ * specify a dataset underneath an exported dataset. If found, return
+ * true and note that it is writable.
+ */
+ for (zd = list_head(&zone->zone_datasets); zd != NULL;
+ zd = list_next(&zone->zone_datasets, zd)) {
+
+ len = strlen(zd->zd_dataset);
+ if (strlen(dataset) >= len &&
+ bcmp(dataset, zd->zd_dataset, len) == 0 &&
+ (zd->zd_dataset[len-1] == '/' ||
+ dataset[len] == '\0' || dataset[len] == '/')) {
+ if (write)
+ *write = 1;
+ return (1);
+ }
+ }
+
+ /*
+ * Walk the list a second time, searching for datasets which are parents
+ * of exported datasets. These should be visible, but read-only.
+ *
+ * Note that we also have to support forms such as 'pool/dataset/', with
+ * a trailing slash.
+ */
+ for (zd = list_head(&zone->zone_datasets); zd != NULL;
+ zd = list_next(&zone->zone_datasets, zd)) {
+
+ len = strlen(dataset);
+ if (dataset[len - 1] == '/')
+ len--; /* Ignore trailing slash */
+ if (len < strlen(zd->zd_dataset) &&
+ bcmp(dataset, zd->zd_dataset, len) == 0 &&
+ zd->zd_dataset[len] == '/') {
+ if (write)
+ *write = 0;
+ return (1);
+ }
+ }
+
+ return (0);
+}