diff options
author | llai1 <none@none> | 2006-08-25 17:24:25 -0700 |
---|---|---|
committer | llai1 <none@none> | 2006-08-25 17:24:25 -0700 |
commit | facf4a8d7b59fde89a8662b4f4c73a758e6c402c (patch) | |
tree | 4e0024c5508351006df1496ec4be6e7b564c3ce8 /usr/src/uts/common/fs/dev | |
parent | adcafb0fe4c49c4d46c0b393dfba36d4e1b55c0e (diff) | |
download | illumos-gate-facf4a8d7b59fde89a8662b4f4c73a758e6c402c.tar.gz |
PSARC/2003/246 Filesystem Driven Device Naming
5050715 logical device names not created during early boot
6292952 devfsadm mishandles optarg
6362924 devfsadm secondary link generation is not zones aware
6413127 Integrate the Devname Project
6464196 bfu should remove pt_chmod, obsoleted by /dev filesystem
--HG--
rename : usr/src/cmd/pt_chmod/Makefile => deleted_files/usr/src/cmd/pt_chmod/Makefile
rename : usr/src/cmd/pt_chmod/pt_chmod.c => deleted_files/usr/src/cmd/pt_chmod/pt_chmod.c
Diffstat (limited to 'usr/src/uts/common/fs/dev')
-rw-r--r-- | usr/src/uts/common/fs/dev/sdev_comm.c | 749 | ||||
-rw-r--r-- | usr/src/uts/common/fs/dev/sdev_ncache.c | 740 | ||||
-rw-r--r-- | usr/src/uts/common/fs/dev/sdev_nsconfig_mod.c | 198 | ||||
-rw-r--r-- | usr/src/uts/common/fs/dev/sdev_profile.c | 983 | ||||
-rw-r--r-- | usr/src/uts/common/fs/dev/sdev_ptsops.c | 398 | ||||
-rw-r--r-- | usr/src/uts/common/fs/dev/sdev_subr.c | 3657 | ||||
-rw-r--r-- | usr/src/uts/common/fs/dev/sdev_vfsops.c | 524 | ||||
-rw-r--r-- | usr/src/uts/common/fs/dev/sdev_vnops.c | 1329 |
8 files changed, 8578 insertions, 0 deletions
diff --git a/usr/src/uts/common/fs/dev/sdev_comm.c b/usr/src/uts/common/fs/dev/sdev_comm.c new file mode 100644 index 0000000000..d82afffd07 --- /dev/null +++ b/usr/src/uts/common/fs/dev/sdev_comm.c @@ -0,0 +1,749 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * routines to invoke user level name lookup services + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/t_lock.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/user.h> +#include <sys/time.h> +#include <sys/vfs.h> +#include <sys/vnode.h> +#include <sys/file.h> +#include <sys/fcntl.h> +#include <sys/flock.h> +#include <sys/kmem.h> +#include <sys/uio.h> +#include <sys/errno.h> +#include <sys/stat.h> +#include <sys/cred.h> +#include <sys/dirent.h> +#include <sys/pathname.h> +#include <sys/cmn_err.h> +#include <sys/debug.h> +#include <sys/mode.h> +#include <sys/policy.h> +#include <sys/disp.h> +#include <sys/door.h> +#include <fs/fs_subr.h> +#include <sys/mount.h> +#include <sys/fs/snode.h> +#include <sys/fs/dv_node.h> +#include <sys/fs/sdev_impl.h> +#include <sys/fs/sdev_node.h> +#include <sys/sunndi.h> +#include <sys/sunddi.h> +#include <sys/sunmdi.h> +#include <sys/conf.h> +#include <sys/modctl.h> +#include <sys/ddi.h> + +/* default timeout to wait for devfsadm response in seconds */ +#define DEV_DEVFSADM_STARTUP (1 * 60) +#define DEV_NODE_WAIT_TIMEOUT (5 * 60) + +/* atomic bitset for devfsadm status */ +volatile uint_t devfsadm_state; + +static kmutex_t devfsadm_lock; +static kcondvar_t devfsadm_cv; + +int devname_nsmaps_loaded = 0; +static int dev_node_wait_timeout = DEV_NODE_WAIT_TIMEOUT; +static int dev_devfsadm_startup = DEV_DEVFSADM_STARTUP; + +/* + * Door used to communicate with devfsadmd + */ +static door_handle_t sdev_upcall_door = NULL; /* Door for upcalls */ +static char *sdev_door_upcall_filename = NULL; +static int sdev_upcall_door_revoked = 0; +static int sdev_door_upcall_filename_size; + +static void sdev_devfsadmd_nsrdr(sdev_nsrdr_work_t *); +static int sdev_devfsadm_revoked(void); +static int sdev_ki_call_devfsadmd(sdev_door_arg_t *, sdev_door_res_t *); + +/* + * nsmap_readdir processing thread + */ +static uint_t sdev_nsrdr_thread_created = 0; +static kmutex_t sdev_nsrdr_thread_lock; +static kcondvar_t sdev_nsrdr_thread_cv; +static sdev_nsrdr_work_t *sdev_nsrdr_thread_workq = NULL; +static sdev_nsrdr_work_t *sdev_nsrdr_thread_tail = NULL; + +void +sdev_devfsadm_lockinit(void) +{ + mutex_init(&devfsadm_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&devfsadm_cv, NULL, CV_DEFAULT, NULL); +} + +void +sdev_devfsadm_lockdestroy(void) +{ + mutex_destroy(&devfsadm_lock); + cv_destroy(&devfsadm_cv); +} + +/* + * Wait for node to be created + */ +int +sdev_wait4lookup(struct sdev_node *dv, int cmd) +{ + clock_t expire; + clock_t rv; + int rval = ENOENT; + int is_lookup = (cmd == SDEV_LOOKUP); + + ASSERT(cmd == SDEV_LOOKUP || cmd == SDEV_READDIR); + ASSERT(MUTEX_HELD(&dv->sdev_lookup_lock)); + + /* tick value at which wait expires */ + expire = ddi_get_lbolt() + + drv_usectohz(dev_node_wait_timeout * 1000000); + + sdcmn_err6(("wait4lookup %s %s, %ld %d\n", + is_lookup ? "lookup" : "readdir", + dv->sdev_name, expire - ddi_get_lbolt(), dv->sdev_state)); + + if (SDEV_IS_LGWAITING(dv)) { + /* devfsadm nodes */ + while (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && + !sdev_devfsadm_revoked()) { + /* wait 2 sec and check devfsadm completion */ + rv = cv_timedwait_sig(&dv->sdev_lookup_cv, + &dv->sdev_lookup_lock, ddi_get_lbolt() + + drv_usectohz(2 * 1000000)); + + if (is_lookup && (rv > 0)) { + /* was this node constructed ? */ + if (dv->sdev_state == SDEV_READY) { + rval = 0; + } + sdcmn_err6(("%s: wait done, %screated %d\n", + dv->sdev_name, rval ? "not " : "", + dv->sdev_state)); + break; + } else if (rv == 0) { + /* interrupted */ + sdcmn_err6(("%s: wait interrupted\n", + dv->sdev_name)); + break; + } else if ((rv == -1) && + (ddi_get_lbolt() >= expire)) { + sdcmn_err6(("%s: wait time is up\n", + dv->sdev_name)); + break; + } + sdcmn_err6(("%s: wait " + "rv %ld state 0x%x expire %ld\n", + dv->sdev_name, rv, devfsadm_state, + expire - ddi_get_lbolt())); + } + } else { + /* + * for the nodes created by + * devname_lookup_func callback + * or plug-in modules + */ + while (SDEV_IS_LOOKUP(dv) || SDEV_IS_READDIR(dv)) { + cv_wait(&dv->sdev_lookup_cv, &dv->sdev_lookup_lock); + } + rval = 0; + } + + sdcmn_err6(("wait4lookup unblocking %s state 0x%x %d\n", + dv->sdev_name, devfsadm_state, dv->sdev_state)); + + if (is_lookup) { + SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP); + } else { + SDEV_UNBLOCK_OTHERS(dv, SDEV_READDIR); + } + + return (rval); +} + +void +sdev_unblock_others(struct sdev_node *dv, uint_t cmd) +{ + ASSERT(MUTEX_HELD(&dv->sdev_lookup_lock)); + + SDEV_CLEAR_LOOKUP_FLAGS(dv, cmd); + if (SDEV_IS_LGWAITING(dv)) { + SDEV_CLEAR_LOOKUP_FLAGS(dv, SDEV_LGWAITING); + } + cv_broadcast(&dv->sdev_lookup_cv); +} + +/* + * In the case devfsadmd is down, it is re-started by syseventd + * upon receiving an event subscribed to by devfsadmd. + */ +static int +sdev_start_devfsadmd() +{ + int se_err = 0; + sysevent_t *ev; + sysevent_id_t eid; + + ev = sysevent_alloc(EC_DEVFS, ESC_DEVFS_START, EP_DDI, SE_SLEEP); + ASSERT(ev); + if ((se_err = log_sysevent(ev, SE_SLEEP, &eid)) != 0) { + switch (se_err) { + case SE_NO_TRANSPORT: + cmn_err(CE_WARN, "unable to start devfsadm - " + "syseventd may not be responding\n"); + break; + default: + cmn_err(CE_WARN, "unable to start devfsadm - " + "sysevent error %d\n", se_err); + break; + } + } + + sysevent_free(ev); + return (se_err); +} + +static int +sdev_open_upcall_door() +{ + int error; + clock_t rv; + clock_t expire; + + ASSERT(sdev_upcall_door == NULL); + + /* tick value at which wait expires */ + expire = ddi_get_lbolt() + + drv_usectohz(dev_devfsadm_startup * 1000000); + + if (sdev_door_upcall_filename == NULL) { + if ((error = sdev_start_devfsadmd()) != 0) { + return (error); + } + + /* wait for devfsadmd start */ + mutex_enter(&devfsadm_lock); + while (sdev_door_upcall_filename == NULL) { + sdcmn_err6(("waiting for dev_door creation, %ld\n", + expire - ddi_get_lbolt())); + rv = cv_timedwait_sig(&devfsadm_cv, &devfsadm_lock, + expire); + sdcmn_err6(("dev_door wait rv %ld\n", rv)); + if (rv <= 0) { + sdcmn_err6(("devfsadmd startup error\n")); + mutex_exit(&devfsadm_lock); + return (EBADF); + } + } + sdcmn_err6(("devfsadmd is ready\n")); + mutex_exit(&devfsadm_lock); + } + + if ((error = door_ki_open(sdev_door_upcall_filename, + &sdev_upcall_door)) != 0) { + sdcmn_err6(("upcall_lookup: door open error %d\n", + error)); + return (error); + } + + return (0); +} + +static void +sdev_release_door() +{ + if (sdev_upcall_door) { + door_ki_rele(sdev_upcall_door); + sdev_upcall_door = NULL; + } + if (sdev_door_upcall_filename) { + kmem_free(sdev_door_upcall_filename, + sdev_door_upcall_filename_size); + sdev_door_upcall_filename = NULL; + } +} + +static int +sdev_ki_call_devfsadmd(sdev_door_arg_t *argp, sdev_door_res_t *resultp) +{ + door_arg_t darg, save_arg; + int error; + int retry; + + if (((sdev_upcall_door == NULL) && + ((error = sdev_open_upcall_door()) != 0)) || + sdev_devfsadm_revoked()) { + sdcmn_err6(("call_devfsadm: upcall lookup error\n")); + return (error); + } + + ASSERT(argp); + darg.data_ptr = (char *)argp; + darg.data_size = sizeof (struct sdev_door_arg); + darg.desc_ptr = NULL; + darg.desc_num = 0; + darg.rbuf = (char *)(resultp); + darg.rsize = sizeof (struct sdev_door_res); + + ASSERT(sdev_upcall_door); + save_arg = darg; + for (retry = 0; ; retry++) { + sdcmn_err6(("call devfsadm: upcall lookup, retry %d\n", retry)); + if ((error = door_ki_upcall(sdev_upcall_door, &darg)) == 0) { + sdcmn_err6(("call devfsadm: upcall lookup ok\n")); + break; + } + + /* + * handle door call errors + */ + if (sdev_devfsadm_revoked()) { + sdcmn_err6(("upcall lookup door revoked, " + "error %d\n", error)); + return (error); + } + + switch (error) { + case EINTR: + /* return error here? */ + sdcmn_err6(("sdev_ki_call_devfsadm: EINTR\n")); + delay(hz); + break; + case EAGAIN: + sdcmn_err6(("sdev_ki_call_devfsadm: EAGAIN\n")); + delay(2 * hz); + break; + case EBADF: + if (retry > 4) { + sdcmn_err6(("sdev_ki_call_devfsadm: EBADF\n")); + return (EBADF); + } + sdcmn_err6(( + "sdev_ki_call_devfsadm: EBADF, re-binding\n")); + sdev_release_door(); + delay(retry * hz); + error = sdev_open_upcall_door(); + if (error != 0) { + sdcmn_err6(("sdev_ki_call_devfsadm: " + "EBADF lookup error %d\n", error)); + if (!sdev_devfsadm_revoked()) + cmn_err(CE_NOTE, + "?unable to invoke devfsadm - " + "please run manually\n"); + return (EBADF); + } + break; + case EINVAL: + default: + cmn_err(CE_CONT, + "?sdev: door_ki_upcall unexpected result %d\n", + error); + return (error); + } + + darg = save_arg; + } + + if (!error) { + ASSERT((struct sdev_door_res *)darg.rbuf == resultp); + if (resultp->devfsadm_error != 0) { + sdcmn_err6(("sdev_ki_call_devfsadmd: result %d\n", + resultp->devfsadm_error)); + error = resultp->devfsadm_error; + } + } else { + sdcmn_err6(("sdev_ki_call_devfsadmd with error %d\n", error)); + } + + return (error); +} + +static int +sdev_devfsadm_revoked(void) +{ + struct door_info info; + int rv; + extern int sys_shutdown; + + if (sys_shutdown) { + sdcmn_err6(("dev: shutdown observed\n")); + return (1); + } + + if (sdev_upcall_door && !sdev_upcall_door_revoked) { + rv = door_ki_info(sdev_upcall_door, &info); + if ((rv == 0) && info.di_attributes & DOOR_REVOKED) { + sdcmn_err6(("lookup door: revoked\n")); + sdev_upcall_door_revoked = 1; + } + } + + return (sdev_upcall_door_revoked); +} + +/*ARGSUSED*/ +static void +sdev_config_all_thread(struct sdev_node *dv) +{ + int32_t error = 0; + sdev_door_arg_t *argp; + sdev_door_res_t result; + + argp = kmem_zalloc(sizeof (sdev_door_arg_t), KM_SLEEP); + argp->devfsadm_cmd = DEVFSADMD_RUN_ALL; + + error = sdev_ki_call_devfsadmd(argp, &result); + if (!error) { + sdcmn_err6(("devfsadm result error: %d\n", + result.devfsadm_error)); + if (!result.devfsadm_error) { + DEVNAME_DEVFSADM_SET_RUN(devfsadm_state); + } else { + DEVNAME_DEVFSADM_SET_STOP(devfsadm_state); + } + } else { + DEVNAME_DEVFSADM_SET_STOP(devfsadm_state); + } + + kmem_free(argp, sizeof (sdev_door_arg_t)); +done: + sdcmn_err6(("sdev_config_all_thread: stopping, devfsadm state 0x%x\n", + devfsadm_state)); + thread_exit(); +} + +/* + * launch an asynchronous thread to do the devfsadm dev_config_all + */ +/*ARGSUSED*/ +void +sdev_devfsadmd_thread(struct sdev_node *ddv, struct sdev_node *dv, + struct cred *cred) +{ + ASSERT(i_ddi_io_initialized()); + DEVNAME_DEVFSADM_SET_RUNNING(devfsadm_state); + (void) thread_create(NULL, 0, sdev_config_all_thread, dv, 0, + &p0, TS_RUN, MINCLSYSPRI); +} + +int +devname_filename_register(int cmd, char *name) +{ + int error = 0; + char *strbuf; + char *namep; + int n; + + ASSERT(cmd == MODDEVNAME_LOOKUPDOOR || + cmd == MODDEVNAME_DEVFSADMNODE); + + strbuf = kmem_zalloc(MOD_MAXPATH, KM_SLEEP); + + if (copyinstr(name, strbuf, MOD_MAXPATH, 0)) { + sdcmn_err6(("error copyin \n")); + error = EFAULT; + } else { + sdcmn_err6(("file %s is registering\n", strbuf)); + switch (cmd) { + case MODDEVNAME_LOOKUPDOOR: + /* handling the daemon re-start situations */ + n = strlen(strbuf) + 1; + namep = i_ddi_strdup(strbuf, KM_SLEEP); + mutex_enter(&devfsadm_lock); + sdev_release_door(); + sdev_door_upcall_filename_size = n; + sdev_door_upcall_filename = namep; + sdcmn_err6(("size %d file name %s\n", + sdev_door_upcall_filename_size, + sdev_door_upcall_filename)); + cv_broadcast(&devfsadm_cv); + mutex_exit(&devfsadm_lock); + break; + case MODDEVNAME_DEVFSADMNODE: + break; + } + } + + kmem_free(strbuf, MOD_MAXPATH); + return (error); +} +static void +sdev_nsrdr_thread(void) +{ + sdev_nsrdr_work_t *work; + + for (;;) { + mutex_enter(&sdev_nsrdr_thread_lock); + if (sdev_nsrdr_thread_workq == NULL) { + cv_wait(&sdev_nsrdr_thread_cv, &sdev_nsrdr_thread_lock); + } + work = sdev_nsrdr_thread_workq; + sdev_nsrdr_thread_workq = work->next; + if (sdev_nsrdr_thread_tail == work) + sdev_nsrdr_thread_tail = work->next; + mutex_exit(&sdev_nsrdr_thread_lock); + sdev_devfsadmd_nsrdr(work); + } + /*NOTREACHED*/ +} + +int +devname_nsmaps_register(char *nvlbuf, size_t nvlsize) +{ + int error = 0; + nvlist_t *nvl, *attrs; + nvpair_t *nvp = NULL; + nvpair_t *kvp = NULL; + char *buf; + char *key; + char *dirname = NULL; + char *dirmodule = NULL; + char *dirmap = NULL; + char *orig_module; + char *orig_map; + int len = 0; + char *tmpmap; + int mapcount = 0; + + buf = kmem_zalloc(nvlsize, KM_SLEEP); + if ((error = ddi_copyin(nvlbuf, buf, nvlsize, 0)) != 0) { + kmem_free(buf, nvlsize); + return (error); + } + + ASSERT(buf); + sdcmn_err6(("devname_nsmaps_register: nsmap buf %p\n", (void *)buf)); + nvl = NULL; + error = nvlist_unpack(buf, nvlsize, &nvl, KM_SLEEP); + kmem_free(buf, nvlsize); + if (error || (nvl == NULL)) + return (error); + + /* invalidate all the nsmaps */ + mutex_enter(&devname_nsmaps_lock); + sdev_invalidate_nsmaps(); + for (nvp = nvlist_next_nvpair(nvl, NULL); nvp != NULL; + nvp = nvlist_next_nvpair(nvl, nvp)) { + dirname = nvpair_name(nvp); + if (dirname == NULL) { + nvlist_free(nvl); + mutex_exit(&devname_nsmaps_lock); + return (-1); + } + + sdcmn_err6(("dirname %s\n", dirname)); + (void) nvpair_value_nvlist(nvp, &attrs); + for (kvp = nvlist_next_nvpair(attrs, NULL); kvp; + kvp = nvlist_next_nvpair(attrs, kvp)) { + key = nvpair_name(kvp); + sdcmn_err6(("key %s\n", key)); + if (strcmp(key, "module") == 0) { + (void) nvpair_value_string(kvp, &orig_module); + sdcmn_err6(("module %s\n", orig_module)); + dirmodule = i_ddi_strdup(orig_module, KM_SLEEP); + if (strcmp(dirmodule, "devname_null") == 0) + dirmodule = NULL; + } + + if (strcmp(key, "nsconfig") == 0) { + (void) nvpair_value_string(kvp, &orig_map); + sdcmn_err6(("dirmap %s\n", orig_map)); + dirmap = i_ddi_strdup(orig_map, KM_SLEEP); + if (strcmp(dirmap, "devname_null") == 0) + dirmap = NULL; + else if (dirmap[0] != '/') { + len = strlen(dirmap) + + strlen(ETC_DEV_DIR) + 2; + tmpmap = i_ddi_strdup(dirmap, KM_SLEEP); + (void) snprintf(dirmap, len, "%s/%s", + ETC_DEV_DIR, tmpmap); + kmem_free(tmpmap, strlen(tmpmap) + 1); + } + } + } + + if (dirmodule == NULL && dirmap == NULL) { + nvlist_free(nvl); + mutex_exit(&devname_nsmaps_lock); + return (-1); + } + + sdcmn_err6(("sdev_nsmaps_register: dir %s module %s map %s\n", + dirname, dirmodule, dirmap)); + sdev_insert_nsmap(dirname, dirmodule, dirmap); + mapcount++; + } + + if (mapcount > 0) + devname_nsmaps_loaded = 1; + + /* clean up obsolete nsmaps */ + sdev_validate_nsmaps(); + mutex_exit(&devname_nsmaps_lock); + if (nvl) + nvlist_free(nvl); + + if (sdev_nsrdr_thread_created) { + return (0); + } + + mutex_init(&sdev_nsrdr_thread_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&sdev_nsrdr_thread_cv, NULL, CV_DEFAULT, NULL); + (void) thread_create(NULL, 0, (void (*)())sdev_nsrdr_thread, NULL, 0, + &p0, TS_RUN, minclsyspri); + sdev_nsrdr_thread_created = 1; + + return (0); +} + +void +sdev_dispatch_to_nsrdr_thread(struct sdev_node *ddv, char *dir_map, + devname_rdr_result_t *result) +{ + sdev_nsrdr_work_t *new_work; + + new_work = kmem_zalloc(sizeof (sdev_nsrdr_work_t), KM_SLEEP); + new_work->dir_name = i_ddi_strdup(ddv->sdev_name, KM_SLEEP); + new_work->dir_map = i_ddi_strdup(dir_map, KM_SLEEP); + new_work->dir_dv = ddv; + new_work->result = &result; + mutex_enter(&sdev_nsrdr_thread_lock); + if (sdev_nsrdr_thread_workq == NULL) { + sdev_nsrdr_thread_workq = new_work; + sdev_nsrdr_thread_tail = new_work; + new_work->next = NULL; + } else { + sdev_nsrdr_thread_tail->next = new_work; + sdev_nsrdr_thread_tail = new_work; + new_work->next = NULL; + } + cv_signal(&sdev_nsrdr_thread_cv); + mutex_exit(&sdev_nsrdr_thread_lock); +} + +static void +sdev_devfsadmd_nsrdr(sdev_nsrdr_work_t *work) +{ + int32_t error; + struct sdev_door_arg *argp; + struct sdev_door_res res; + struct sdev_node *ddv = work->dir_dv; + uint32_t mapcount; + + argp = kmem_zalloc(sizeof (sdev_door_arg_t), KM_SLEEP); + argp->devfsadm_cmd = DEVFSADMD_NS_READDIR; + + (void) snprintf(argp->ns_hdl.ns_name, + strlen(work->dir_dv->sdev_path) + 1, "%s", work->dir_dv->sdev_path); + (void) snprintf(argp->ns_hdl.ns_map, strlen(work->dir_map) + 1, "%s", + work->dir_map); + + sdcmn_err6(("sdev_devfsadmd_nsrdr: ns_name %s, ns_map %s\n", + argp->ns_hdl.ns_name, argp->ns_hdl.ns_map)); + error = sdev_ki_call_devfsadmd(argp, &res); + sdcmn_err6(("sdev_devfsadmd_nsrdr error %d\n", error)); + if (error == 0) { + error = res.devfsadm_error; + if (error) { + goto out; + } + + mapcount = (uint32_t)res.ns_rdr_hdl.ns_mapcount; + sdcmn_err6(("nsmapcount %d\n", mapcount)); + if (mapcount > 0) { + struct devname_nsmap *map = + ddv->sdev_mapinfo; + ASSERT(map && map->dir_map); + rw_enter(&map->dir_lock, RW_WRITER); + map->dir_maploaded = 1; + rw_exit(&map->dir_lock); + } + } + +out: + mutex_enter(&ddv->sdev_lookup_lock); + SDEV_UNBLOCK_OTHERS(ddv, SDEV_READDIR); + mutex_exit(&ddv->sdev_lookup_lock); + + kmem_free(argp, sizeof (sdev_door_arg_t)); +} + + +int +devname_nsmap_lookup(devname_lkp_arg_t *args, devname_lkp_result_t **result) +{ + int32_t error = 0; + struct sdev_door_arg *argp; + struct sdev_door_res resp; + char *link; + uint8_t spec; + + argp = kmem_zalloc(sizeof (sdev_door_arg_t), KM_SLEEP); + argp->devfsadm_cmd = DEVFSADMD_NS_LOOKUP; + + (void) snprintf(argp->ns_hdl.ns_name, strlen(args->devname_name) + 1, + "%s", args->devname_name); + (void) snprintf(argp->ns_hdl.ns_map, strlen(args->devname_map) + 1, + "%s", args->devname_map); + + error = sdev_ki_call_devfsadmd(argp, &resp); + if (error == 0) { + error = resp.devfsadm_error; + sdcmn_err6(("devfsadm: error %d\n", error)); + if (error) { + goto done; + } + link = resp.ns_lkp_hdl.devfsadm_link; + if (link == NULL) { + error = ENOENT; + goto done; + } + spec = resp.ns_lkp_hdl.devfsadm_spec; + sdcmn_err6(("devfsadm_link %s spec %d\n", link, spec)); + + + (*result)->devname_spec = (devname_spec_t)spec; + (*result)->devname_link = i_ddi_strdup(link, KM_SLEEP); + } else { + (*result)->devname_spec = DEVNAME_NS_NONE; + (*result)->devname_link = NULL; + } +done: + kmem_free(argp, sizeof (sdev_door_arg_t)); + return (error); +} diff --git a/usr/src/uts/common/fs/dev/sdev_ncache.c b/usr/src/uts/common/fs/dev/sdev_ncache.c new file mode 100644 index 0000000000..f66532508d --- /dev/null +++ b/usr/src/uts/common/fs/dev/sdev_ncache.c @@ -0,0 +1,740 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * negative cache handling for the /dev fs + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/t_lock.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/user.h> +#include <sys/time.h> +#include <sys/vfs.h> +#include <sys/vnode.h> +#include <sys/file.h> +#include <sys/fcntl.h> +#include <sys/flock.h> +#include <sys/kmem.h> +#include <sys/uio.h> +#include <sys/errno.h> +#include <sys/stat.h> +#include <sys/cred.h> +#include <sys/cmn_err.h> +#include <sys/debug.h> +#include <sys/mode.h> +#include <sys/policy.h> +#include <fs/fs_subr.h> +#include <sys/mount.h> +#include <sys/fs/snode.h> +#include <sys/fs/dv_node.h> +#include <sys/fs/sdev_node.h> +#include <sys/sunndi.h> +#include <sys/sunmdi.h> +#include <sys/ddi.h> +#include <sys/modctl.h> +#include <sys/devctl_impl.h> + + +/* + * ncache is a negative cache of failed lookups. An entry + * is added after an attempt to configure a device by that + * name failed. An accumulation of these entries over time + * gives us a set of device name for which implicit reconfiguration + * does not need to be attempted. If a name is created matching + * an entry in ncache, that entry is removed, with the + * persistent store updated. + * + * Implicit reconfig is initiated for any name during lookup that + * can't be resolved from the backing store and that isn't + * present in the negative cache. This functionality is + * enabled during system startup once communication with devfsadm + * can be achieved. Since readdir is more general, implicit + * reconfig initiated by reading a directory isn't enabled until + * the system is more fully booted, at the time of the multi-user + * milestone, corresponding to init state 2. + * + * A maximum is imposed on the number of entries in the cache + * to limit some script going wild and as a defense against attack. + * The default limit is 64 and can be adjusted via sdev_nc_max_entries. + * + * Each entry also has a expiration count. When looked up a name in + * the cache is set to the default. Subsequent boots will decrement + * the count if a name isn't referenced. This permits a once-only + * entry to eventually be removed over time. + * + * sdev_reconfig_delay implements a "debounce" of the timing beyond + * system available indication, providing what the filesystem considers + * to be the system-is-fully-booted state. This is provided to adjust + * the timing if some application startup is performing a readdir + * in /dev that initiates a troublesome implicit reconfig on every boot. + * + * sdev_nc_disable_reset can be used to disable clearing the negative cache + * on reconfig boot. The default is to clear the cache on reconfig boot. + * sdev_nc_disable can be used to disable the negative cache itself. + * + * sdev_reconfig_disable can be used to disable implicit reconfig. + * The default is that implicit reconfig is enabled. + */ + +/* tunables and defaults */ +#define SDEV_NC_EXPIRECNT 4 +#define SDEV_NC_MAX_ENTRIES 64 +#define SEV_RECONFIG_DELAY 6 /* seconds */ + +int sdev_nc_expirecnt = SDEV_NC_EXPIRECNT; +int sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES; +int sdev_reconfig_delay = SEV_RECONFIG_DELAY; +int sdev_reconfig_verbose = 0; +int sdev_reconfig_disable = 0; +int sdev_nc_disable = 0; +int sdev_nc_disable_reset = 0; +int sdev_nc_verbose = 0; + +/* globals */ +sdev_nc_list_t *sdev_ncache; +int sdev_boot_state = SDEV_BOOT_STATE_INITIAL; +int sdev_reconfig_boot = 0; +static timeout_id_t sdev_timeout_id = 0; + +/* static prototypes */ +static void sdev_ncache_write_complete(nvfd_t *); +static void sdev_ncache_write(void); +static void sdev_ncache_process_store(void); +static sdev_nc_list_t *sdev_nc_newlist(void); +static void sdev_nc_free_unlinked_node(sdev_nc_node_t *); +static void sdev_nc_free_all_nodes(sdev_nc_list_t *); +static void sdev_nc_freelist(sdev_nc_list_t *); +static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *); +static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *); +static void sdev_nc_free_bootonly(void); + + +/* + * called once at filesystem initialization + */ +void +sdev_ncache_init(void) +{ + sdev_ncache = sdev_nc_newlist(); +} + +/* + * called at mount of the global instance + * currently the global instance is never unmounted + */ +void +sdev_ncache_setup(void) +{ + nvfd_t *nvf = sdevfd; + + nvf_register_write_complete(nvf, sdev_ncache_write_complete); + + i_ddi_read_devname_file(); + sdev_ncache_process_store(); + sdev_devstate_change(); +} + +static void +sdev_nvp_cache_free(nvfd_t *nvf) +{ + nvp_devname_t *np; + nvp_devname_t *next; + + for (np = NVF_DEVNAME_LIST(nvf); np; np = next) { + next = NVP_DEVNAME_NEXT(np); + nfd_nvp_free_and_unlink(nvf, NVPLIST(np)); + } +} + +static void +sdev_ncache_process_store(void) +{ + nvfd_t *nvf = sdevfd; + sdev_nc_list_t *ncl = sdev_ncache; + nvp_devname_t *np; + sdev_nc_node_t *lp; + char *path; + int i, n; + + if (sdev_nc_disable) + return; + + for (np = NVF_DEVNAME_LIST(nvf); np; np = NVP_DEVNAME_NEXT(np)) { + for (i = 0; i < np->nvp_npaths; i++) { + sdcmn_err5((" %s %d\n", + np->nvp_paths[i], np->nvp_expirecnts[i])); + if (ncl->ncl_nentries < sdev_nc_max_entries) { + path = np->nvp_paths[i]; + n = strlen(path) + 1; + lp = kmem_alloc(sizeof (sdev_nc_node_t), + KM_SLEEP); + lp->ncn_name = kmem_alloc(n, KM_SLEEP); + bcopy(path, lp->ncn_name, n); + lp->ncn_flags = NCN_SRC_STORE; + lp->ncn_expirecnt = np->nvp_expirecnts[i]; + sdev_nc_insertnode(ncl, lp); + } else if (sdev_nc_verbose) { + cmn_err(CE_CONT, + "?%s: truncating from ncache (max %d)\n", + np->nvp_paths[i], sdev_nc_max_entries); + } + } + } +} + +static void +sdev_ncache_write_complete(nvfd_t *nvf) +{ + sdev_nc_list_t *ncl = sdev_ncache; + + mutex_enter(&ncl->ncl_mutex); + + ASSERT(ncl->ncl_flags & NCL_LIST_WRITING); + + if (ncl->ncl_flags & NCL_LIST_DIRTY) { + sdcmn_err5(("ncache write complete but dirty again\n")); + ncl->ncl_flags &= ~NCL_LIST_DIRTY; + mutex_exit(&ncl->ncl_mutex); + sdev_ncache_write(); + } else { + sdcmn_err5(("ncache write complete\n")); + ncl->ncl_flags &= ~NCL_LIST_WRITING; + mutex_exit(&ncl->ncl_mutex); + rw_enter(&nvf->nvf_lock, RW_WRITER); + sdev_nvp_cache_free(nvf); + rw_exit(&nvf->nvf_lock); + } +} + +static void +sdev_ncache_write(void) +{ + nvfd_t *nvf = sdevfd; + sdev_nc_list_t *ncl = sdev_ncache; + nvp_devname_t *np; + sdev_nc_node_t *lp; + int n, i; + + if (sdev_cache_write_disable) { + mutex_enter(&ncl->ncl_mutex); + ncl->ncl_flags &= ~NCL_LIST_WRITING; + mutex_exit(&ncl->ncl_mutex); + return; + } + + /* proper lock ordering here is essential */ + rw_enter(&nvf->nvf_lock, RW_WRITER); + sdev_nvp_cache_free(nvf); + + rw_enter(&ncl->ncl_lock, RW_READER); + n = ncl->ncl_nentries; + ASSERT(n <= sdev_nc_max_entries); + + np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP); + np->nvp_npaths = n; + np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP); + np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP); + + i = 0; + for (lp = list_head(&ncl->ncl_list); lp; + lp = list_next(&ncl->ncl_list, lp)) { + np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP); + np->nvp_expirecnts[i] = lp->ncn_expirecnt; + sdcmn_err5((" %s %d\n", + np->nvp_paths[i], np->nvp_expirecnts[i])); + i++; + } + + rw_exit(&ncl->ncl_lock); + + NVF_MARK_DIRTY(nvf); + nfd_nvp_link(nvf, NVPLIST(np)); + rw_exit(&nvf->nvf_lock); + + wake_nvpflush_daemon(); +} + +static void +sdev_nc_flush_updates(void) +{ + sdev_nc_list_t *ncl = sdev_ncache; + + if (sdev_nc_disable || sdev_cache_write_disable) + return; + + mutex_enter(&ncl->ncl_mutex); + if (((ncl->ncl_flags & + (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) == + (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) { + ncl->ncl_flags &= ~NCL_LIST_DIRTY; + ncl->ncl_flags |= NCL_LIST_WRITING; + mutex_exit(&ncl->ncl_mutex); + sdev_ncache_write(); + } else { + mutex_exit(&ncl->ncl_mutex); + } +} + +static void +sdev_nc_flush_boot_update(void) +{ + sdev_nc_list_t *ncl = sdev_ncache; + + if (sdev_nc_disable || sdev_cache_write_disable || + (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) { + return; + } + mutex_enter(&ncl->ncl_mutex); + if (ncl->ncl_flags & NCL_LIST_WENABLE) { + mutex_exit(&ncl->ncl_mutex); + sdev_nc_flush_updates(); + } else { + mutex_exit(&ncl->ncl_mutex); + } + +} + +static void +sdev_state_boot_complete() +{ + sdev_nc_list_t *ncl = sdev_ncache; + sdev_nc_node_t *lp, *next; + + /* + * Once boot is complete, decrement the expire count of each entry + * in the cache not touched by a reference. Remove any that + * goes to zero. This effectively removes random entries over + * time. + */ + rw_enter(&ncl->ncl_lock, RW_WRITER); + mutex_enter(&ncl->ncl_mutex); + + for (lp = list_head(&ncl->ncl_list); lp; lp = next) { + next = list_next(&ncl->ncl_list, lp); + if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) { + if (lp->ncn_flags & NCN_ACTIVE) { + if (lp->ncn_expirecnt != sdev_nc_expirecnt) { + lp->ncn_expirecnt = sdev_nc_expirecnt; + ncl->ncl_flags |= NCL_LIST_DIRTY; + } + } else { + if (--lp->ncn_expirecnt == 0) { + list_remove(&ncl->ncl_list, lp); + sdev_nc_free_unlinked_node(lp); + ncl->ncl_nentries--; + } + ncl->ncl_flags |= NCL_LIST_DIRTY; + } + } + } + + mutex_exit(&ncl->ncl_mutex); + rw_exit(&ncl->ncl_lock); + + sdev_nc_flush_boot_update(); + sdev_boot_state = SDEV_BOOT_STATE_COMPLETE; +} + +/* + * Upon transition to the login state on a reconfigure boot, + * a debounce timer is set up so that we cache all the nonsense + * lookups we're hit with by the windowing system startup. + */ + +/*ARGSUSED*/ +static void +sdev_state_timeout(void *arg) +{ + sdev_timeout_id = 0; + sdev_state_boot_complete(); +} + +static void +sdev_state_sysavail() +{ + sdev_nc_list_t *ncl = sdev_ncache; + clock_t nticks; + int nsecs; + + mutex_enter(&ncl->ncl_mutex); + ncl->ncl_flags |= NCL_LIST_WENABLE; + mutex_exit(&ncl->ncl_mutex); + + nsecs = sdev_reconfig_delay; + if (nsecs == 0) { + sdev_state_boot_complete(); + } else { + nticks = drv_usectohz(1000000 * nsecs); + sdcmn_err5(("timeout initiated %ld\n", nticks)); + sdev_timeout_id = timeout(sdev_state_timeout, NULL, nticks); + sdev_nc_flush_boot_update(); + } +} + +/* + * Called to inform the filesystem of progress during boot, + * either a notice of reconfiguration boot or an indication of + * system boot complete. At system boot complete, set up a + * timer at the expiration of which no further failed lookups + * will be added to the negative cache. + * + * The dev filesystem infers from reconfig boot that implicit + * reconfig need not be invoked at all as all available devices + * will have already been named. + * + * The dev filesystem infers from "system available" that devfsadmd + * can now be run and hence implicit reconfiguration may be initiated. + * During early stages of system startup, implicit reconfig is + * not done to avoid impacting boot performance. + */ +void +sdev_devstate_change(void) +{ + int new_state; + + /* + * Track system state and manage interesting transitions + */ + new_state = SDEV_BOOT_STATE_INITIAL; + if (i_ddi_reconfig()) + new_state = SDEV_BOOT_STATE_RECONFIG; + if (i_ddi_sysavail()) + new_state = SDEV_BOOT_STATE_SYSAVAIL; + + if (sdev_boot_state < new_state) { + switch (new_state) { + case SDEV_BOOT_STATE_RECONFIG: + sdcmn_err5(("state change: reconfigure boot\n")); + sdev_boot_state = new_state; + sdev_reconfig_boot = 1; + if (!sdev_nc_disable_reset) + sdev_nc_free_bootonly(); + break; + case SDEV_BOOT_STATE_SYSAVAIL: + sdcmn_err5(("system available\n")); + sdev_boot_state = new_state; + sdev_state_sysavail(); + break; + } + } +} + +/* + * Lookup: filter out entries in the negative cache + * Return 1 if the lookup should not cause a reconfig. + */ +int +sdev_lookup_filter(sdev_node_t *dv, char *nm) +{ + int n; + sdev_nc_list_t *ncl = sdev_ncache; + sdev_nc_node_t *lp; + char *path; + int rval = 0; + int changed = 0; + + ASSERT(i_ddi_io_initialized()); + ASSERT(SDEVTOV(dv)->v_type == VDIR); + + if (sdev_nc_disable) + return (0); + + n = strlen(dv->sdev_path) + strlen(nm) + 2; + path = kmem_alloc(n, KM_SLEEP); + (void) sprintf(path, "%s/%s", dv->sdev_path, nm); + + rw_enter(&ncl->ncl_lock, RW_READER); + if ((lp = sdev_nc_findpath(ncl, path)) != NULL) { + sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n", + dv->sdev_name, nm, curproc->p_user.u_comm)); + if (sdev_nc_verbose) { + cmn_err(CE_CONT, + "?%s/%s: lookup by %s cached, no reconfig\n", + dv->sdev_name, nm, curproc->p_user.u_comm); + } + mutex_enter(&ncl->ncl_mutex); + lp->ncn_flags |= NCN_ACTIVE; + if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 && + lp->ncn_expirecnt < sdev_nc_expirecnt) { + lp->ncn_expirecnt = sdev_nc_expirecnt; + ncl->ncl_flags |= NCL_LIST_DIRTY; + changed = 1; + } + mutex_exit(&ncl->ncl_mutex); + rval = 1; + } + rw_exit(&ncl->ncl_lock); + kmem_free(path, n); + if (changed) + sdev_nc_flush_boot_update(); + return (rval); +} + +void +sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags) +{ + if (sdev_nc_disable) + return; + + /* + * If we're still in the initial boot stage, always update + * the cache - we may not have received notice of the + * reconfig boot state yet. On a reconfigure boot, entries + * from the backing store are not re-persisted on update, + * but new entries are marked as needing an update. + * Never cache dynamic or non-global nodes. + */ + if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) && + !SDEV_IS_NO_NCACHE(dv) && + ((failed_flags & SLF_NO_NCACHE) == 0) && + ((sdev_reconfig_boot && + (sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) || + (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) { + sdev_nc_addname(sdev_ncache, + dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE); + } +} + +static sdev_nc_list_t * +sdev_nc_newlist(void) +{ + sdev_nc_list_t *ncl; + + ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP); + + rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL); + mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL); + list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t), + offsetof(sdev_nc_node_t, ncn_link)); + + return (ncl); +} + +static void +sdev_nc_free_unlinked_node(sdev_nc_node_t *lp) +{ + kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1); + kmem_free(lp, sizeof (sdev_nc_node_t)); +} + +static void +sdev_nc_free_all_nodes(sdev_nc_list_t *ncl) +{ + sdev_nc_node_t *lp; + + while ((lp = list_head(&ncl->ncl_list)) != NULL) { + list_remove(&ncl->ncl_list, lp); + sdev_nc_free_unlinked_node(lp); + ncl->ncl_nentries--; + } + ASSERT(ncl->ncl_nentries == 0); +} + +static void +sdev_nc_freelist(sdev_nc_list_t *ncl) +{ + if (!list_is_empty(&ncl->ncl_list)) + sdev_nc_free_all_nodes(ncl); + ASSERT(list_is_empty(&ncl->ncl_list)); + ASSERT(ncl->ncl_nentries == 0); + + mutex_destroy(&ncl->ncl_mutex); + rw_destroy(&ncl->ncl_lock); + list_destroy(&ncl->ncl_list); + kmem_free(ncl, sizeof (sdev_nc_list_t)); +} + +static sdev_nc_node_t * +sdev_nc_findpath(sdev_nc_list_t *ncl, char *path) +{ + sdev_nc_node_t *lp; + + ASSERT(RW_LOCK_HELD(&ncl->ncl_lock)); + + for (lp = list_head(&ncl->ncl_list); lp; + lp = list_next(&ncl->ncl_list, lp)) { + if (strcmp(path, lp->ncn_name) == 0) + return (lp); + } + + return (NULL); +} + +static void +sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new) +{ + sdev_nc_node_t *lp; + + rw_enter(&ncl->ncl_lock, RW_WRITER); + + lp = sdev_nc_findpath(ncl, new->ncn_name); + if (lp == NULL) { + if (ncl->ncl_nentries == sdev_nc_max_entries) { + sdcmn_err5(( + "%s by %s: not adding to ncache (max %d)\n", + new->ncn_name, curproc->p_user.u_comm, + ncl->ncl_nentries)); + if (sdev_nc_verbose) { + cmn_err(CE_CONT, "?%s by %s: " + "not adding to ncache (max %d)\n", + new->ncn_name, curproc->p_user.u_comm, + ncl->ncl_nentries); + } + rw_exit(&ncl->ncl_lock); + sdev_nc_free_unlinked_node(new); + } else { + + list_insert_tail(&ncl->ncl_list, new); + ncl->ncl_nentries++; + + /* don't mark list dirty for nodes from store */ + mutex_enter(&ncl->ncl_mutex); + if ((new->ncn_flags & NCN_SRC_STORE) == 0) { + sdcmn_err5(("%s by %s: add to ncache\n", + new->ncn_name, curproc->p_user.u_comm)); + if (sdev_nc_verbose) { + cmn_err(CE_CONT, + "?%s by %s: add to ncache\n", + new->ncn_name, + curproc->p_user.u_comm); + } + ncl->ncl_flags |= NCL_LIST_DIRTY; + } + mutex_exit(&ncl->ncl_mutex); + rw_exit(&ncl->ncl_lock); + lp = new; + sdev_nc_flush_boot_update(); + } + } else { + mutex_enter(&ncl->ncl_mutex); + lp->ncn_flags |= new->ncn_flags; + mutex_exit(&ncl->ncl_mutex); + rw_exit(&ncl->ncl_lock); + sdev_nc_free_unlinked_node(new); + } +} + +void +sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags) +{ + int n; + sdev_nc_node_t *lp; + + ASSERT(SDEVTOV(dv)->v_type == VDIR); + + lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP); + + n = strlen(dv->sdev_path) + strlen(nm) + 2; + lp->ncn_name = kmem_alloc(n, KM_SLEEP); + (void) sprintf(lp->ncn_name, "%s/%s", + dv->sdev_path, nm); + lp->ncn_flags = flags; + lp->ncn_expirecnt = sdev_nc_expirecnt; + sdev_nc_insertnode(ncl, lp); +} + +void +sdev_nc_node_exists(sdev_node_t *dv) +{ + /* dynamic and non-global nodes are never cached */ + if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) && + !SDEV_IS_NO_NCACHE(dv)) { + sdev_nc_path_exists(sdev_ncache, dv->sdev_path); + } +} + +void +sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path) +{ + sdev_nc_node_t *lp; + + if (sdev_nc_disable) + return; + + rw_enter(&ncl->ncl_lock, RW_READER); + if ((lp = sdev_nc_findpath(ncl, path)) == NULL) { + rw_exit(&ncl->ncl_lock); + return; + } + if (rw_tryupgrade(&ncl->ncl_lock) == 0) { + rw_exit(&ncl->ncl_lock); + rw_enter(&ncl->ncl_lock, RW_WRITER); + lp = sdev_nc_findpath(ncl, path); + } + if (lp) { + list_remove(&ncl->ncl_list, lp); + ncl->ncl_nentries--; + mutex_enter(&ncl->ncl_mutex); + ncl->ncl_flags |= NCL_LIST_DIRTY; + if (ncl->ncl_flags & NCL_LIST_WENABLE) { + mutex_exit(&ncl->ncl_mutex); + rw_exit(&ncl->ncl_lock); + sdev_nc_flush_updates(); + } else { + mutex_exit(&ncl->ncl_mutex); + rw_exit(&ncl->ncl_lock); + } + sdev_nc_free_unlinked_node(lp); + sdcmn_err5(("%s by %s: removed from ncache\n", + path, curproc->p_user.u_comm)); + if (sdev_nc_verbose) { + cmn_err(CE_CONT, "?%s by %s: removed from ncache\n", + path, curproc->p_user.u_comm); + } + } else + rw_exit(&ncl->ncl_lock); +} + +static void +sdev_nc_free_bootonly(void) +{ + sdev_nc_list_t *ncl = sdev_ncache; + sdev_nc_node_t *lp; + sdev_nc_node_t *next; + + ASSERT(sdev_reconfig_boot); + + rw_enter(&ncl->ncl_lock, RW_WRITER); + + for (lp = list_head(&ncl->ncl_list); lp; lp = next) { + next = list_next(&ncl->ncl_list, lp); + if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) { + sdcmn_err5(("freeing %s\n", lp->ncn_name)); + mutex_enter(&ncl->ncl_mutex); + ncl->ncl_flags |= NCL_LIST_DIRTY; + mutex_exit(&ncl->ncl_mutex); + list_remove(&ncl->ncl_list, lp); + sdev_nc_free_unlinked_node(lp); + ncl->ncl_nentries--; + } + } + + rw_exit(&ncl->ncl_lock); +} diff --git a/usr/src/uts/common/fs/dev/sdev_nsconfig_mod.c b/usr/src/uts/common/fs/dev/sdev_nsconfig_mod.c new file mode 100644 index 0000000000..476eb2472d --- /dev/null +++ b/usr/src/uts/common/fs/dev/sdev_nsconfig_mod.c @@ -0,0 +1,198 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * this module implements the devname_ops to fetch + * a specific entry from a /etc/dev/devname_map file or + * a name service map. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/errno.h> +#include <sys/stat.h> +#include <sys/cmn_err.h> +#include <sys/sunddi.h> +#include <sys/sunndi.h> +#include <sys/modctl.h> +#include <sys/debug.h> +#include <sys/fs/sdev_impl.h> +#include <sys/fs/sdev_node.h> + +static int devname_lookup(char *, devname_handle_t *, struct cred *); +static int devname_remove(devname_handle_t *); +static int devname_rename(devname_handle_t *, char *); +static int devname_readdir(devname_handle_t *, struct cred *); +static int devname_getattr(devname_handle_t *, struct vattr *, + struct cred *); +static void devname_inactive(devname_handle_t *, struct cred *); + +static struct devname_ops devname_ops = { + DEVNOPS_REV, /* devnops_rev, */ + devname_lookup, /* devnops_lookup */ + devname_remove, /* devnops_remove */ + devname_rename, /* devnops_rename */ + devname_getattr, /* devnops_getattr */ + devname_readdir, /* devname_readdir */ + devname_inactive /* devname_inactive */ +}; + +/* + * Module linkage information for the kernel. + */ +static struct modldev modldev = { + &mod_devfsops, + "devname name service mod 1.0", + &devname_ops, +}; + +static struct modlinkage modlinkage = { + MODREV_1, &modldev, NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +/*ARGSUSED2*/ +static int +devname_lookup(char *nm, devname_handle_t *dhl, struct cred *cred) +{ + int error = 0; + char *dir = NULL; + devname_lkp_arg_t *args = NULL; + devname_lkp_result_t *result = NULL; + struct devname_nsmap *map = NULL; + + args = kmem_zalloc(sizeof (struct devname_lkp_arg), KM_SLEEP); + if (args == NULL) { + error = ENOENT; + goto errout; + } + + args->devname_name = i_ddi_strdup(nm, KM_SLEEP); + error = devname_get_dir_path(dhl, &dir); + if (error) { + error = ENOENT; + goto errout; + } + + args->devname_dir = i_ddi_strdup(dir, KM_SLEEP); + error = devname_get_dir_nsmap(dhl, &map); + if (map && map->dir_map) + args->devname_map = i_ddi_strdup(map->dir_map, KM_SLEEP); + + result = kmem_zalloc(sizeof (struct devname_lkp_result), KM_SLEEP); + if (result == NULL) { + error = ENOENT; + goto errout; + } + + + error = devname_nsmap_lookup(args, &result); + if (error) { + error = ENOENT; + goto errout; + } + + devname_set_nodetype(dhl, (void *)result->devname_link, + (int)result->devname_spec); + +errout: + if (args->devname_name) + kmem_free(args->devname_name, strlen(args->devname_name) + 1); + if (args->devname_dir) + kmem_free(args->devname_dir, strlen(args->devname_dir) + 1); + if (args->devname_map) + kmem_free(args->devname_map, strlen(args->devname_map) + 1); + if (args) + kmem_free(args, sizeof (struct devname_lkp_arg)); + if (result) + kmem_free(result, sizeof (struct devname_lkp_result)); + return (error); +} + +/*ARGSUSED*/ +static int +devname_readdir(devname_handle_t *hdl, struct cred *cred) +{ + char *entry; + char *dir; + + (void) devname_get_name(hdl, &entry); + (void) devname_get_dir_name(hdl, &dir); + + /* do not waste to do the map check */ + return (0); +} + +/*ARGSUSED*/ +static int +devname_remove(devname_handle_t *hdl) +{ + char *entry; + + (void) devname_get_name(hdl, &entry); + return (EROFS); +} + +/*ARGSUSED*/ +static int +devname_rename(devname_handle_t *ohdl, char *new_name) +{ + char *oname; + + (void) devname_get_name(ohdl, &oname); + return (ENOTSUP); +} + +/*ARGSUSED*/ +static int +devname_getattr(devname_handle_t *hdl, vattr_t *vap, struct cred *cred) +{ + return (0); +} + +/*ARGSUSED*/ +static void +devname_inactive(devname_handle_t *hdl, struct cred *cred) +{ +} diff --git a/usr/src/uts/common/fs/dev/sdev_profile.c b/usr/src/uts/common/fs/dev/sdev_profile.c new file mode 100644 index 0000000000..009dc4f8d5 --- /dev/null +++ b/usr/src/uts/common/fs/dev/sdev_profile.c @@ -0,0 +1,983 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This file implements /dev filesystem operations for non-global + * instances. Three major entry points: + * devname_profile_update() + * Update matching rules determining which names to export + * prof_readdir() + * Return the list of exported names + * prof_lookup() + * Implements lookup + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/vnode.h> +#include <sys/uio.h> +#include <sys/dirent.h> +#include <sys/pathname.h> +#include <sys/fs/dv_node.h> +#include <sys/fs/sdev_impl.h> +#include <sys/sunndi.h> +#include <sys/modctl.h> + +enum { + PROFILE_TYPE_INCLUDE, + PROFILE_TYPE_EXCLUDE, + PROFILE_TYPE_MAP, + PROFILE_TYPE_SYMLINK +}; + +enum { + WALK_DIR_CONTINUE = 0, + WALK_DIR_TERMINATE +}; + +static const char *sdev_nvp_val_err = "nvpair_value error %d, %s\n"; + +static void process_rule(struct sdev_node *, struct sdev_node *, + char *, char *, int); +static void walk_dir(struct vnode *, void *, int (*)(char *, void *)); + +static void +prof_getattr(struct sdev_node *dir, char *name, struct vnode *gdv, + struct vattr *vap, struct vnode **avpp, int *no_fs_perm) +{ + struct vnode *advp; + + /* get attribute from shadow, if present; else get default */ + advp = dir->sdev_attrvp; + if (advp && VOP_LOOKUP(advp, name, avpp, NULL, 0, NULL, kcred) == 0) { + (void) VOP_GETATTR(*avpp, vap, 0, kcred); + } else if (gdv == NULL || gdv->v_type == VDIR) { + /* always create shadow directory */ + *vap = sdev_vattr_dir; + if (advp && VOP_MKDIR(advp, name, + &sdev_vattr_dir, avpp, kcred) != 0) { + *avpp = NULLVP; + sdcmn_err10(("prof_getattr: failed to create " + "shadow directory %s/%s\n", dir->sdev_path, name)); + } + } else { + /* + * get default permission from devfs + * Before calling devfs_get_defattr, we need to get + * the realvp (the dv_node). If realvp is not a dv_node, + * devfs_get_defattr() will return a system-wide default + * attr for device nodes. + */ + struct vnode *rvp; + if (VOP_REALVP(gdv, &rvp) != 0) + rvp = gdv; + devfs_get_defattr(rvp, vap, no_fs_perm); + *avpp = NULLVP; + } + + /* ignore dev_t and vtype from backing store */ + if (gdv) { + vap->va_type = gdv->v_type; + vap->va_rdev = gdv->v_rdev; + } +} + +static void +apply_glob_pattern(struct sdev_node *pdir, struct sdev_node *cdir) +{ + char *name; + nvpair_t *nvp = NULL; + nvlist_t *nvl; + struct vnode *vp = SDEVTOV(cdir); + int rv = 0; + + if (vp->v_type != VDIR) + return; + name = cdir->sdev_name; + nvl = pdir->sdev_prof.dev_glob_incdir; + while (nvp = nvlist_next_nvpair(nvl, nvp)) { + char *pathleft; + char *expr = nvpair_name(nvp); + if (!gmatch(name, expr)) + continue; + rv = nvpair_value_string(nvp, &pathleft); + if (rv != 0) { + cmn_err(CE_WARN, sdev_nvp_val_err, + rv, nvpair_name(nvp)); + break; + } + process_rule(cdir, cdir->sdev_origin, + pathleft, NULL, PROFILE_TYPE_INCLUDE); + } +} + +/* + * Some commonality here with sdev_mknode(), could be simplified. + * NOTE: prof_mknode returns with *newdv held once, if success. + */ +static int +prof_mknode(struct sdev_node *dir, char *name, struct sdev_node **newdv, + vattr_t *vap, vnode_t *avp, void *arg, cred_t *cred) +{ + struct sdev_node *dv; + int rv; + + ASSERT(RW_WRITE_HELD(&dir->sdev_contents)); + + /* check cache first */ + if (dv = sdev_cache_lookup(dir, name)) { + *newdv = dv; + return (0); + } + + /* allocate node and insert into cache */ + rv = sdev_nodeinit(dir, name, &dv, NULL); + if (rv != 0) { + *newdv = NULL; + return (rv); + } + + rv = sdev_cache_update(dir, &dv, name, SDEV_CACHE_ADD); + *newdv = dv; + + /* put it in ready state */ + rv = sdev_nodeready(*newdv, vap, avp, arg, cred); + + /* handle glob pattern in the middle of a path */ + if (rv == 0) { + if (SDEVTOV(*newdv)->v_type == VDIR) + sdcmn_err10(("sdev_origin for %s set to 0x%p\n", + name, arg)); + apply_glob_pattern(dir, *newdv); + } + return (rv); +} + +/* + * Create a directory node in a non-global dev instance. + * Always create shadow vnode. Set sdev_origin to the corresponding + * global directory sdev_node if it exists. This facilitates the + * lookup operation. + */ +static int +prof_make_dir(char *name, struct sdev_node **gdirp, struct sdev_node **dirp) +{ + struct sdev_node *dir = *dirp; + struct sdev_node *gdir = *gdirp; + struct sdev_node *newdv; + struct vnode *avp, *gnewdir = NULL; + struct vattr vattr; + int error; + + /* see if name already exists */ + rw_enter(&dir->sdev_contents, RW_READER); + if (newdv = sdev_cache_lookup(dir, name)) { + *dirp = newdv; + *gdirp = newdv->sdev_origin; + SDEV_RELE(dir); + rw_exit(&dir->sdev_contents); + return (0); + } + rw_exit(&dir->sdev_contents); + + /* find corresponding dir node in global dev */ + if (gdir) { + error = VOP_LOOKUP(SDEVTOV(gdir), name, &gnewdir, + NULL, 0, NULL, kcred); + if (error == 0) { + *gdirp = VTOSDEV(gnewdir); + } else { /* it's ok if there no global dir */ + *gdirp = NULL; + } + } + + /* get attribute from shadow, also create shadow dir */ + prof_getattr(dir, name, gnewdir, &vattr, &avp, NULL); + + /* create dev directory vnode */ + rw_enter(&dir->sdev_contents, RW_WRITER); + error = prof_mknode(dir, name, &newdv, &vattr, avp, (void *)*gdirp, + kcred); + rw_exit(&dir->sdev_contents); + if (error == 0) { + ASSERT(newdv); + *dirp = newdv; + } + SDEV_RELE(dir); + return (error); +} + +/* + * Look up a logical name in the global zone. + * Provides the ability to map the global zone's device name + * to an alternate name within a zone. The primary example + * is the virtual console device /dev/zcons/[zonename]/zconsole + * mapped to /[zonename]/root/dev/zconsole. + */ +static void +prof_lookup_globaldev(struct sdev_node *dir, struct sdev_node *gdir, + char *name, char *rename) +{ + /* global OS rootdir */ + extern vnode_t *rootdir; + + int error; + struct vnode *avp, *gdv, *gddv; + struct sdev_node *newdv; + struct vattr vattr = {0}; + struct pathname pn; + + /* check if node already exists */ + newdv = sdev_cache_lookup(dir, rename); + if (newdv) { + ASSERT(newdv->sdev_state != SDEV_ZOMBIE); + SDEV_SIMPLE_RELE(newdv); + return; + } + + /* sanity check arguments */ + if (!gdir || pn_get(name, UIO_SYSSPACE, &pn)) + return; + + /* perform a relative lookup of the global /dev instance */ + gddv = SDEVTOV(gdir); + VN_HOLD(gddv); + VN_HOLD(rootdir); + error = lookuppnvp(&pn, NULL, FOLLOW, NULLVPP, &gdv, + rootdir, gddv, kcred); + pn_free(&pn); + if (error) { + sdcmn_err10(("prof_lookup_globaldev: %s not found\n", name)); + return; + } + ASSERT(gdv && gdv->v_type != VLNK); + + /* + * Found the entry in global /dev, figure out attributes + * by looking at backing store. Call into devfs for default. + */ + prof_getattr(dir, name, gdv, &vattr, &avp, NULL); + + if (gdv->v_type != VDIR) { + VN_RELE(gdv); + gdir = NULL; + } else + gdir = VTOSDEV(gdv); + + if (prof_mknode(dir, rename, &newdv, &vattr, avp, + (void *)gdir, kcred) == 0) { + ASSERT(newdv->sdev_state != SDEV_ZOMBIE); + SDEV_SIMPLE_RELE(newdv); + } +} + +static void +prof_make_sym(struct sdev_node *dir, char *lnm, char *tgt) +{ + struct sdev_node *newdv; + + if (prof_mknode(dir, lnm, &newdv, &sdev_vattr_lnk, NULL, + (void *)tgt, kcred) == 0) { + ASSERT(newdv->sdev_state != SDEV_ZOMBIE); + SDEV_SIMPLE_RELE(newdv); + } +} + +/* + * Create symlinks in the current directory based on profile + */ +static void +prof_make_symlinks(struct sdev_node *dir) +{ + char *tgt, *lnm; + nvpair_t *nvp = NULL; + nvlist_t *nvl = dir->sdev_prof.dev_symlink; + int rv; + + ASSERT(RW_WRITE_HELD(&dir->sdev_contents)); + + if (nvl == NULL) + return; + + while (nvp = nvlist_next_nvpair(nvl, nvp)) { + lnm = nvpair_name(nvp); + rv = nvpair_value_string(nvp, &tgt); + if (rv != 0) { + cmn_err(CE_WARN, sdev_nvp_val_err, + rv, nvpair_name(nvp)); + break; + } + prof_make_sym(dir, lnm, tgt); + } +} + +static void +prof_make_maps(struct sdev_node *dir) +{ + nvpair_t *nvp = NULL; + nvlist_t *nvl = dir->sdev_prof.dev_map; + int rv; + + ASSERT(RW_WRITE_HELD(&dir->sdev_contents)); + + if (nvl == NULL) + return; + + while (nvp = nvlist_next_nvpair(nvl, nvp)) { + char *name; + char *rename = nvpair_name(nvp); + rv = nvpair_value_string(nvp, &name); + if (rv != 0) { + cmn_err(CE_WARN, sdev_nvp_val_err, + rv, nvpair_name(nvp)); + break; + } + sdcmn_err10(("map %s -> %s\n", name, rename)); + (void) prof_lookup_globaldev(dir, sdev_origins->sdev_root, + name, rename); + } +} + +struct match_arg { + char *expr; + int match; +}; + +static int +match_name(char *name, void *arg) +{ + struct match_arg *margp = (struct match_arg *)arg; + + if (gmatch(name, margp->expr)) { + margp->match = 1; + return (WALK_DIR_TERMINATE); + } + return (WALK_DIR_CONTINUE); +} + +static int +is_nonempty_dir(char *name, char *pathleft, struct sdev_node *dir) +{ + struct match_arg marg; + struct pathname pn; + struct vnode *gvp; + struct sdev_node *gdir = dir->sdev_origin; + + if (VOP_LOOKUP(SDEVTOV(gdir), name, &gvp, NULL, 0, NULL, kcred) != 0) + return (0); + + if (gvp->v_type != VDIR) { + VN_RELE(gvp); + return (0); + } + + if (pn_get(pathleft, UIO_SYSSPACE, &pn) != 0) { + VN_RELE(gvp); + return (0); + } + + marg.expr = kmem_alloc(MAXNAMELEN, KM_SLEEP); + (void) pn_getcomponent(&pn, marg.expr); + marg.match = 0; + + walk_dir(gvp, &marg, match_name); + VN_RELE(gvp); + kmem_free(marg.expr, MAXNAMELEN); + pn_free(&pn); + + return (marg.match); +} + + +/* Check if name passes matching rules */ +static int +prof_name_matched(char *name, struct sdev_node *dir) +{ + int type, match = 0; + char *expr; + nvlist_t *nvl; + nvpair_t *nvp = NULL; + int rv; + + /* check against nvlist for leaf include/exclude */ + nvl = dir->sdev_prof.dev_name; + while (nvp = nvlist_next_nvpair(nvl, nvp)) { + expr = nvpair_name(nvp); + rv = nvpair_value_int32(nvp, &type); + if (rv != 0) { + cmn_err(CE_WARN, sdev_nvp_val_err, + rv, nvpair_name(nvp)); + break; + } + + if (type == PROFILE_TYPE_EXCLUDE) { + if (gmatch(name, expr)) + return (0); /* excluded */ + } else if (!match) { + match = gmatch(name, expr); + } + } + if (match) { + sdcmn_err10(("prof_name_matched: %s\n", name)); + return (match); + } + + /* check for match against directory globbing pattern */ + nvl = dir->sdev_prof.dev_glob_incdir; + while (nvp = nvlist_next_nvpair(nvl, nvp)) { + char *pathleft; + expr = nvpair_name(nvp); + if (gmatch(name, expr) == 0) + continue; + rv = nvpair_value_string(nvp, &pathleft); + if (rv != 0) { + cmn_err(CE_WARN, sdev_nvp_val_err, + rv, nvpair_name(nvp)); + break; + } + if (is_nonempty_dir(name, pathleft, dir)) { + sdcmn_err10(("prof_name_matched: dir %s\n", name)); + return (1); + } + } + + return (0); +} + +static void +walk_dir(struct vnode *dvp, void *arg, int (*callback)(char *, void *)) +{ + char *nm; + int eof, error; + struct iovec iov; + struct uio uio; + struct dirent64 *dp; + dirent64_t *dbuf; + size_t dbuflen, dlen; + + ASSERT(dvp); + + dlen = 4096; + dbuf = kmem_zalloc(dlen, KM_SLEEP); + + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_fmode = 0; + uio.uio_extflg = UIO_COPY_CACHED; + uio.uio_loffset = 0; + uio.uio_llimit = MAXOFFSET_T; + + eof = 0; + error = 0; + while (!error && !eof) { + uio.uio_resid = dlen; + iov.iov_base = (char *)dbuf; + iov.iov_len = dlen; + (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL); + error = VOP_READDIR(dvp, &uio, kcred, &eof); + VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL); + + dbuflen = dlen - uio.uio_resid; + if (error || dbuflen == 0) + break; + for (dp = dbuf; ((intptr_t)dp < + (intptr_t)dbuf + dbuflen); + dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { + nm = dp->d_name; + + if (strcmp(nm, ".") == 0 || + strcmp(nm, "..") == 0) + continue; + + if (callback(nm, arg) == WALK_DIR_TERMINATE) + goto end; + } + } + +end: + kmem_free(dbuf, dlen); +} + +static int +prof_make_name(char *nm, void *arg) +{ + struct sdev_node *ddv = (struct sdev_node *)arg; + + if (prof_name_matched(nm, ddv)) + prof_lookup_globaldev(ddv, ddv->sdev_origin, nm, nm); + return (WALK_DIR_CONTINUE); +} + +static void +prof_make_names_glob(struct sdev_node *ddv) +{ + struct sdev_node *gdir; + + gdir = ddv->sdev_origin; + if (gdir == NULL) + return; + walk_dir(SDEVTOV(gdir), (void *)ddv, prof_make_name); +} + +static void +prof_make_names(struct sdev_node *dir) +{ + char *name; + nvpair_t *nvp = NULL; + nvlist_t *nvl = dir->sdev_prof.dev_name; + int rv; + + ASSERT(RW_WRITE_HELD(&dir->sdev_contents)); + + if (nvl == NULL) + return; + + if (dir->sdev_prof.has_glob) { + prof_make_names_glob(dir); + return; + } + + /* Walk nvlist and lookup corresponding device in global inst */ + while (nvp = nvlist_next_nvpair(nvl, nvp)) { + int type; + rv = nvpair_value_int32(nvp, &type); + if (rv != 0) { + cmn_err(CE_WARN, sdev_nvp_val_err, + rv, nvpair_name(nvp)); + break; + } + if (type == PROFILE_TYPE_EXCLUDE) + continue; + name = nvpair_name(nvp); + (void) prof_lookup_globaldev(dir, dir->sdev_origin, + name, name); + } +} + +/* + * Build directory vnodes based on the profile and the global + * dev instance. + */ +void +prof_filldir(struct sdev_node *ddv) +{ + int firsttime = 1; + struct sdev_node *gdir = ddv->sdev_origin; + + ASSERT(RW_READ_HELD(&ddv->sdev_contents)); + + /* + * We need to rebuild the directory content if + * - SDEV_BUILD is set + * - The device tree generation number has changed + * - The corresponding /dev namespace has been updated + */ +check_build: + if ((ddv->sdev_flags & SDEV_BUILD) == 0 && + ddv->sdev_devtree_gen == devtree_gen && + (gdir == NULL || ddv->sdev_ldir_gen + == gdir->sdev_gdir_gen)) + return; /* already up to date */ + + if (firsttime && rw_tryupgrade(&ddv->sdev_contents) == 0) { + rw_exit(&ddv->sdev_contents); + firsttime = 0; + rw_enter(&ddv->sdev_contents, RW_WRITER); + goto check_build; + } + sdcmn_err10(("devtree_gen (%s): %ld -> %ld\n", + ddv->sdev_path, ddv->sdev_devtree_gen, devtree_gen)); + if (gdir) + sdcmn_err10(("sdev_dir_gen (%s): %ld -> %ld\n", + ddv->sdev_path, ddv->sdev_ldir_gen, + gdir->sdev_gdir_gen)); + + /* update flags and generation number so next filldir is quick */ + ddv->sdev_flags &= ~SDEV_BUILD; + ddv->sdev_devtree_gen = devtree_gen; + if (gdir) + ddv->sdev_ldir_gen = gdir->sdev_gdir_gen; + + prof_make_symlinks(ddv); + prof_make_maps(ddv); + prof_make_names(ddv); + rw_downgrade(&ddv->sdev_contents); +} + +/* apply include/exclude pattern to existing directory content */ +static void +apply_dir_pattern(struct sdev_node *dir, char *expr, char *pathleft, int type) +{ + struct sdev_node *dv; + + /* leaf pattern */ + if (pathleft == NULL) { + if (type == PROFILE_TYPE_INCLUDE) + return; /* nothing to do for include */ + (void) sdev_cleandir(dir, expr, SDEV_ENFORCE); + return; + } + + /* directory pattern */ + rw_enter(&dir->sdev_contents, RW_WRITER); + for (dv = dir->sdev_dot; dv; dv = dv->sdev_next) { + if (gmatch(dv->sdev_name, expr) == 0 || + SDEVTOV(dv)->v_type != VDIR) + continue; + process_rule(dv, dv->sdev_origin, + pathleft, NULL, type); + } + rw_exit(&dir->sdev_contents); +} + +/* + * Add a profile rule. + * tgt represents a device name matching expression, + * matching device names are to be either included or excluded. + */ +static void +prof_add_rule(char *name, char *tgt, struct sdev_node *dir, int type) +{ + int error; + nvlist_t **nvlp = NULL; + int rv; + + ASSERT(SDEVTOV(dir)->v_type == VDIR); + + rw_enter(&dir->sdev_contents, RW_WRITER); + + switch (type) { + case PROFILE_TYPE_INCLUDE: + if (tgt) + nvlp = &(dir->sdev_prof.dev_glob_incdir); + else + nvlp = &(dir->sdev_prof.dev_name); + break; + case PROFILE_TYPE_EXCLUDE: + if (tgt) + nvlp = &(dir->sdev_prof.dev_glob_excdir); + else + nvlp = &(dir->sdev_prof.dev_name); + break; + case PROFILE_TYPE_MAP: + nvlp = &(dir->sdev_prof.dev_map); + break; + case PROFILE_TYPE_SYMLINK: + nvlp = &(dir->sdev_prof.dev_symlink); + break; + }; + + /* initialize nvlist */ + if (*nvlp == NULL) { + error = nvlist_alloc(nvlp, NV_UNIQUE_NAME, KM_SLEEP); + ASSERT(error == 0); + } + + if (tgt) { + rv = nvlist_add_string(*nvlp, name, tgt); + } else { + rv = nvlist_add_int32(*nvlp, name, type); + } + ASSERT(rv == 0); + /* rebuild directory content */ + dir->sdev_flags |= SDEV_BUILD; + + if ((type == PROFILE_TYPE_INCLUDE) && + (strpbrk(name, "*?[]") != NULL)) { + dir->sdev_prof.has_glob = 1; + } + + rw_exit(&dir->sdev_contents); + + /* additional details for glob pattern and exclusion */ + switch (type) { + case PROFILE_TYPE_INCLUDE: + case PROFILE_TYPE_EXCLUDE: + apply_dir_pattern(dir, name, tgt, type); + break; + }; +} + +/* + * Parse path components and apply requested matching rule at + * directory level. + */ +static void +process_rule(struct sdev_node *dir, struct sdev_node *gdir, + char *path, char *tgt, int type) +{ + char *name; + struct pathname pn; + int rv = 0; + + if ((strlen(path) > 5) && (strncmp(path, "/dev/", 5) == 0)) { + path += 5; + } + + if (pn_get(path, UIO_SYSSPACE, &pn) != 0) + return; + + name = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) pn_getcomponent(&pn, name); + pn_skipslash(&pn); + SDEV_HOLD(dir); + + while (pn_pathleft(&pn)) { + /* If this is pattern, just add the pattern */ + if (strpbrk(name, "*?[]") != NULL && + (type == PROFILE_TYPE_INCLUDE || + type == PROFILE_TYPE_EXCLUDE)) { + ASSERT(tgt == NULL); + tgt = pn.pn_path; + break; + } + if ((rv = prof_make_dir(name, &gdir, &dir)) != 0) { + cmn_err(CE_CONT, "process_rule: %s error %d\n", + path, rv); + break; + } + (void) pn_getcomponent(&pn, name); + pn_skipslash(&pn); + } + + /* process the leaf component */ + if (rv == 0) { + prof_add_rule(name, tgt, dir, type); + SDEV_SIMPLE_RELE(dir); + } + + kmem_free(name, MAXPATHLEN); + pn_free(&pn); +} + +static int +copyin_nvlist(char *packed_usr, size_t packed_sz, nvlist_t **nvlp) +{ + int err = 0; + char *packed; + nvlist_t *profile = NULL; + + /* simple sanity check */ + if (packed_usr == NULL || packed_sz == 0) + return (NULL); + + /* copyin packed profile nvlist */ + packed = kmem_alloc(packed_sz, KM_NOSLEEP); + if (packed == NULL) + return (ENOMEM); + err = copyin(packed_usr, packed, packed_sz); + + /* unpack packed profile nvlist */ + if (err) + cmn_err(CE_WARN, "copyin_nvlist: copyin failed with " + "err %d\n", err); + else if (err = nvlist_unpack(packed, packed_sz, &profile, KM_NOSLEEP)) + cmn_err(CE_WARN, "copyin_nvlist: nvlist_unpack " + "failed with err %d\n", err); + + kmem_free(packed, packed_sz); + if (err == 0) + *nvlp = profile; + return (err); +} + +/* + * Process profile passed down from libdevinfo. There are four types + * of matching rules: + * include: export a name or names matching a pattern + * exclude: exclude a name or names matching a pattern + * symlink: create a local symlink + * map: export a device with a name different from the global zone + * Note: We may consider supporting VOP_SYMLINK in non-global instances, + * because it does not present any security risk. For now, the fs + * instance is read only. + */ +static void +sdev_process_profile(struct sdev_data *sdev_data, nvlist_t *profile) +{ + nvpair_t *nvpair; + char *nvname, *dname; + struct sdev_node *dir, *gdir; + char **pair; /* for symlinks and maps */ + uint_t nelem; + int rv; + + gdir = sdev_origins->sdev_root; /* root of global /dev */ + dir = sdev_data->sdev_root; /* root of current instance */ + + ASSERT(profile); + + /* process nvpairs in the list */ + nvpair = NULL; + while (nvpair = nvlist_next_nvpair(profile, nvpair)) { + nvname = nvpair_name(nvpair); + ASSERT(nvname != NULL); + + if (strcmp(nvname, SDEV_NVNAME_INCLUDE) == 0) { + rv = nvpair_value_string(nvpair, &dname); + if (rv != 0) { + cmn_err(CE_WARN, sdev_nvp_val_err, + rv, nvpair_name(nvpair)); + break; + } + process_rule(dir, gdir, dname, NULL, + PROFILE_TYPE_INCLUDE); + } else if (strcmp(nvname, SDEV_NVNAME_EXCLUDE) == 0) { + rv = nvpair_value_string(nvpair, &dname); + if (rv != 0) { + cmn_err(CE_WARN, sdev_nvp_val_err, + rv, nvpair_name(nvpair)); + break; + } + process_rule(dir, gdir, dname, NULL, + PROFILE_TYPE_EXCLUDE); + } else if (strcmp(nvname, SDEV_NVNAME_SYMLINK) == 0) { + rv = nvpair_value_string_array(nvpair, &pair, &nelem); + if (rv != 0) { + cmn_err(CE_WARN, sdev_nvp_val_err, + rv, nvpair_name(nvpair)); + break; + } + ASSERT(nelem == 2); + process_rule(dir, gdir, pair[0], pair[1], + PROFILE_TYPE_SYMLINK); + } else if (strcmp(nvname, SDEV_NVNAME_MAP) == 0) { + rv = nvpair_value_string_array(nvpair, &pair, &nelem); + if (rv != 0) { + cmn_err(CE_WARN, sdev_nvp_val_err, + rv, nvpair_name(nvpair)); + break; + } + process_rule(dir, gdir, pair[1], pair[0], + PROFILE_TYPE_MAP); + } else if (strcmp(nvname, SDEV_NVNAME_MOUNTPT) != 0) { + cmn_err(CE_WARN, "sdev_process_profile: invalid " + "nvpair %s\n", nvname); + } + } +} + +/*ARGSUSED*/ +int +prof_lookup(vnode_t *dvp, char *nm, struct vnode **vpp, struct cred *cred) +{ + struct sdev_node *ddv = VTOSDEV(dvp); + struct sdev_node *dv; + int nmlen; + + /* + * Empty name or ., return node itself. + */ + nmlen = strlen(nm); + if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { + *vpp = SDEVTOV(ddv); + VN_HOLD(*vpp); + return (0); + } + + /* + * .., return the parent directory + */ + if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { + *vpp = SDEVTOV(ddv->sdev_dotdot); + VN_HOLD(*vpp); + return (0); + } + + rw_enter(&ddv->sdev_contents, RW_READER); + dv = sdev_cache_lookup(ddv, nm); + if (dv == NULL) { + prof_filldir(ddv); + dv = sdev_cache_lookup(ddv, nm); + } + rw_exit(&ddv->sdev_contents); + if (dv == NULL) { + sdcmn_err10(("prof_lookup: %s not found\n", nm)); + return (ENOENT); + } + + return (sdev_to_vp(dv, vpp)); +} + +/* + * This is invoked after a new filesystem is mounted to define the + * name space. It is also invoked during normal system operation + * to update the name space. + * + * Applications call di_prof_commit() in libdevinfo, which invokes + * modctl(). modctl calls this function. The input is a packed nvlist. + */ +int +devname_profile_update(char *packed, size_t packed_sz) +{ + char *mntpt; + nvlist_t *nvl; + nvpair_t *nvp; + struct sdev_data *mntinfo; + int err; + int rv; + + nvl = NULL; + if ((err = copyin_nvlist(packed, packed_sz, &nvl)) != 0) + return (err); + ASSERT(nvl); + + /* The first nvpair must be the mount point */ + nvp = nvlist_next_nvpair(nvl, NULL); + if (strcmp(nvpair_name(nvp), SDEV_NVNAME_MOUNTPT) != 0) { + cmn_err(CE_NOTE, + "devname_profile_update: mount point not specified"); + nvlist_free(nvl); + return (EINVAL); + } + + /* find the matching filesystem instance */ + rv = nvpair_value_string(nvp, &mntpt); + if (rv != 0) { + cmn_err(CE_WARN, sdev_nvp_val_err, + rv, nvpair_name(nvp)); + } else { + mntinfo = sdev_find_mntinfo(mntpt); + if (mntinfo == NULL) { + cmn_err(CE_NOTE, "devname_profile_update: " + " mount point %s not found", mntpt); + nvlist_free(nvl); + return (EINVAL); + } + + /* now do the hardwork to process the profile */ + sdev_process_profile(mntinfo, nvl); + + sdev_mntinfo_rele(mntinfo); + } + + nvlist_free(nvl); + return (0); +} diff --git a/usr/src/uts/common/fs/dev/sdev_ptsops.c b/usr/src/uts/common/fs/dev/sdev_ptsops.c new file mode 100644 index 0000000000..7ec53cf417 --- /dev/null +++ b/usr/src/uts/common/fs/dev/sdev_ptsops.c @@ -0,0 +1,398 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * vnode ops for the /dev/pts directory + * The lookup is based on the internal pty table. We also + * override readdir in order to delete pts nodes no longer + * in use. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/sunndi.h> +#include <fs/fs_subr.h> +#include <sys/fs/dv_node.h> +#include <sys/fs/sdev_impl.h> +#include <sys/policy.h> +#include <sys/ptms.h> +#include <sys/stat.h> + +#define DEVPTS_UID_DEFAULT 0 +#define DEVPTS_GID_DEFAULT 3 +#define DEVPTS_DEVMODE_DEFAULT (0620) + +#define isdigit(ch) ((ch) >= '0' && (ch) <= '9') + +static vattr_t devpts_vattr = { + AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ + VCHR, /* va_type */ + S_IFCHR | DEVPTS_DEVMODE_DEFAULT, /* va_mode */ + DEVPTS_UID_DEFAULT, /* va_uid */ + DEVPTS_GID_DEFAULT, /* va_gid */ + 0 /* 0 hereafter */ +}; + +struct vnodeops *devpts_vnodeops; + +struct vnodeops * +devpts_getvnodeops(void) +{ + return (devpts_vnodeops); +} + +/* + * Convert string to minor number. Some care must be taken + * as we are processing user input. Catch cases like + * /dev/pts/4foo and /dev/pts/-1 + */ +static int +devpts_strtol(const char *nm, minor_t *mp) +{ + long uminor = 0; + char *endptr = NULL; + + if (nm == NULL || !isdigit(*nm)) + return (EINVAL); + + *mp = 0; + if (ddi_strtol(nm, &endptr, 10, &uminor) != 0 || + *endptr != '\0' || uminor < 0) { + return (EINVAL); + } + + *mp = uminor; + return (0); +} + +/* + * Check if a pts sdev_node is still valid - i.e. it represents a current pty. + * This serves two purposes + * - only valid pts nodes are returned during lookup() and readdir(). + * - since pts sdev_nodes are not actively destroyed when a pty goes + * away, we use the validator to do deferred cleanup i.e. when such + * nodes are encountered during subsequent lookup() and readdir(). + */ +/*ARGSUSED*/ +int +devpts_validate(struct sdev_node *dv) +{ + minor_t min; + uid_t uid; + gid_t gid; + timestruc_t now; + char *nm = dv->sdev_name; + + ASSERT(!(dv->sdev_flags & SDEV_STALE)); + ASSERT(dv->sdev_state == SDEV_READY); + + /* validate only READY nodes */ + if (dv->sdev_state != SDEV_READY) { + sdcmn_err(("dev fs: skipping: node not ready %s(%p)", + nm, (void *)dv)); + return (SDEV_VTOR_SKIP); + } + + if (devpts_strtol(nm, &min) != 0) { + sdcmn_err7(("devpts_validate: not a valid minor: %s\n", nm)); + return (SDEV_VTOR_INVALID); + } + + /* + * Check if pts driver is attached + */ + if (ptms_slave_attached() == (major_t)-1) { + sdcmn_err7(("devpts_validate: slave not attached\n")); + return (SDEV_VTOR_INVALID); + } + + if (ptms_minor_valid(min, &uid, &gid) == 0) { + if (ptms_minor_exists(min)) { + sdcmn_err7(("devpts_validate: valid in different zone " + "%s\n", nm)); + return (SDEV_VTOR_SKIP); + } else { + sdcmn_err7(("devpts_validate: %s not valid pty\n", + nm)); + return (SDEV_VTOR_INVALID); + } + } + + ASSERT(dv->sdev_attr); + if (dv->sdev_attr->va_uid != uid || dv->sdev_attr->va_gid != gid) { + ASSERT(uid >= 0); + ASSERT(gid >= 0); + dv->sdev_attr->va_uid = uid; + dv->sdev_attr->va_gid = gid; + gethrestime(&now); + dv->sdev_attr->va_atime = now; + dv->sdev_attr->va_mtime = now; + dv->sdev_attr->va_ctime = now; + sdcmn_err7(("devpts_validate: update uid/gid/times%s\n", nm)); + } + + return (SDEV_VTOR_VALID); +} + +/* + * This callback is invoked from devname_lookup_func() to create + * a pts entry when the node is not found in the cache. + */ +/*ARGSUSED*/ +static int +devpts_create_rvp(struct sdev_node *ddv, char *nm, + void **arg, cred_t *cred, void *whatever, char *whichever) +{ + minor_t min; + major_t maj; + uid_t uid; + gid_t gid; + timestruc_t now; + struct vattr *vap = (struct vattr *)arg; + + if (devpts_strtol(nm, &min) != 0) { + sdcmn_err7(("devpts_create_rvp: not a valid minor: %s\n", nm)); + return (-1); + } + + /* + * Check if pts driver is attached and if it is + * get the major number. + */ + maj = ptms_slave_attached(); + if (maj == (major_t)-1) { + sdcmn_err7(("devpts_create_rvp: slave not attached\n")); + return (-1); + } + + /* + * Only allow creation of ptys allocated to our zone + */ + if (!ptms_minor_valid(min, &uid, &gid)) { + sdcmn_err7(("devpts_create_rvp: %s not valid pty" + "or not valid in this zone\n", nm)); + return (-1); + } + + + /* + * This is a valid pty (at least at this point in time). + * Create the node by setting the attribute. The rest + * is taken care of by devname_lookup_func(). + */ + *vap = devpts_vattr; + vap->va_rdev = makedevice(maj, min); + ASSERT(uid >= 0); + ASSERT(gid >= 0); + vap->va_uid = uid; + vap->va_gid = gid; + gethrestime(&now); + vap->va_atime = now; + vap->va_mtime = now; + vap->va_ctime = now; + + return (0); +} + +/* + * Clean pts sdev_nodes that are no longer valid. + */ +static void +devpts_prunedir(struct sdev_node *ddv) +{ + struct vnode *vp; + struct sdev_node *dv, *next = NULL; + int (*vtor)(struct sdev_node *) = NULL; + + ASSERT(ddv->sdev_flags & SDEV_VTOR); + + vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); + ASSERT(vtor); + + if (rw_tryupgrade(&ddv->sdev_contents) == NULL) { + rw_exit(&ddv->sdev_contents); + rw_enter(&ddv->sdev_contents, RW_WRITER); + } + + for (dv = ddv->sdev_dot; dv; dv = next) { + next = dv->sdev_next; + + /* skip stale nodes */ + if (dv->sdev_flags & SDEV_STALE) + continue; + + /* validate and prune only ready nodes */ + if (dv->sdev_state != SDEV_READY) + continue; + + switch (vtor(dv)) { + case SDEV_VTOR_VALID: + case SDEV_VTOR_SKIP: + continue; + case SDEV_VTOR_INVALID: + sdcmn_err7(("prunedir: destroy invalid " + "node: %s(%p)\n", dv->sdev_name, (void *)dv)); + break; + } + vp = SDEVTOV(dv); + if (vp->v_count > 0) + continue; + SDEV_HOLD(dv); + /* remove the cache node */ + (void) sdev_cache_update(ddv, &dv, dv->sdev_name, + SDEV_CACHE_DELETE); + } + rw_downgrade(&ddv->sdev_contents); +} + +/* + * Lookup for /dev/pts directory + * If the entry does not exist, the devpts_create_rvp() callback + * is invoked to create it. Nodes do not persist across reboot. + */ +/*ARGSUSED3*/ +static int +devpts_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, + struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred) +{ + struct sdev_node *sdvp = VTOSDEV(dvp); + struct sdev_node *dv; + int error; + + error = devname_lookup_func(sdvp, nm, vpp, cred, devpts_create_rvp, + SDEV_VATTR); + + if (error == 0) { + switch ((*vpp)->v_type) { + case VCHR: + dv = VTOSDEV(VTOS(*vpp)->s_realvp); + break; + case VDIR: + dv = VTOSDEV(*vpp); + break; + default: + cmn_err(CE_PANIC, "devpts_lookup: Unsupported node " + "type: %p: %d", (void *)(*vpp), (*vpp)->v_type); + break; + } + ASSERT(SDEV_HELD(dv)); + } + + return (error); +} + +/* + * We allow create to find existing nodes + * - if the node doesn't exist - EROFS + * - creating an existing dir read-only succeeds, otherwise EISDIR + * - exclusive creates fail - EEXIST + */ +/*ARGSUSED2*/ +static int +devpts_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, + int mode, struct vnode **vpp, struct cred *cred, int flag) +{ + int error; + struct vnode *vp; + + *vpp = NULL; + + error = devpts_lookup(dvp, nm, &vp, NULL, 0, NULL, cred); + if (error == 0) { + if (excl == EXCL) + error = EEXIST; + else if (vp->v_type == VDIR && (mode & VWRITE)) + error = EISDIR; + else + error = VOP_ACCESS(vp, mode, 0, cred); + + if (error) { + VN_RELE(vp); + } else + *vpp = vp; + } else if (error == ENOENT) { + error = EROFS; + } + + return (error); +} + +/* + * Display all instantiated pts (slave) device nodes. + * A /dev/pts entry will be created only after the first lookup of the slave + * device succeeds. + */ +static int +devpts_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, + int *eofp) +{ + struct sdev_node *sdvp = VTOSDEV(dvp); + if (uiop->uio_offset == 0) { + devpts_prunedir(sdvp); + } + + return (devname_readdir_func(dvp, uiop, cred, eofp, 0)); +} + + +static int +devpts_set_id(struct sdev_node *dv, struct vattr *vap, int protocol) +{ + ASSERT((protocol & AT_UID) || (protocol & AT_GID)); + ptms_set_owner(getminor(SDEVTOV(dv)->v_rdev), + vap->va_uid, vap->va_gid); + return (0); + +} + +static int +devpts_setattr(struct vnode *vp, struct vattr *vap, int flags, + struct cred *cred) +{ + ASSERT((vp->v_type == VCHR) || (vp->v_type == VDIR)); + return (devname_setattr_func(vp, vap, flags, cred, + devpts_set_id, AT_UID|AT_GID)); +} + +/* + * We override lookup and readdir to build entries based on the + * in kernel pty table. Also override setattr/setsecattr to + * avoid persisting permissions. + */ +const fs_operation_def_t devpts_vnodeops_tbl[] = { + VOPNAME_READDIR, devpts_readdir, + VOPNAME_LOOKUP, devpts_lookup, + VOPNAME_CREATE, devpts_create, + VOPNAME_SETATTR, devpts_setattr, + VOPNAME_REMOVE, fs_nosys, + VOPNAME_MKDIR, fs_nosys, + VOPNAME_RMDIR, fs_nosys, + VOPNAME_SYMLINK, fs_nosys, + VOPNAME_SETSECATTR, fs_nosys, + NULL, NULL +}; diff --git a/usr/src/uts/common/fs/dev/sdev_subr.c b/usr/src/uts/common/fs/dev/sdev_subr.c new file mode 100644 index 0000000000..ddca87ac61 --- /dev/null +++ b/usr/src/uts/common/fs/dev/sdev_subr.c @@ -0,0 +1,3657 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * utility routines for the /dev fs + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/t_lock.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/user.h> +#include <sys/time.h> +#include <sys/vfs.h> +#include <sys/vnode.h> +#include <sys/file.h> +#include <sys/fcntl.h> +#include <sys/flock.h> +#include <sys/kmem.h> +#include <sys/uio.h> +#include <sys/errno.h> +#include <sys/stat.h> +#include <sys/cred.h> +#include <sys/dirent.h> +#include <sys/pathname.h> +#include <sys/cmn_err.h> +#include <sys/debug.h> +#include <sys/mode.h> +#include <sys/policy.h> +#include <fs/fs_subr.h> +#include <sys/mount.h> +#include <sys/fs/snode.h> +#include <sys/fs/dv_node.h> +#include <sys/fs/sdev_impl.h> +#include <sys/fs/sdev_node.h> +#include <sys/sunndi.h> +#include <sys/sunmdi.h> +#include <sys/conf.h> +#include <sys/proc.h> +#include <sys/user.h> +#include <sys/modctl.h> + +#ifdef DEBUG +int sdev_debug = 0x00000001; +int sdev_debug_cache_flags = 0; +#endif + +/* + * globals + */ +/* prototype memory vattrs */ +vattr_t sdev_vattr_dir = { + AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ + VDIR, /* va_type */ + SDEV_DIRMODE_DEFAULT, /* va_mode */ + SDEV_UID_DEFAULT, /* va_uid */ + SDEV_GID_DEFAULT, /* va_gid */ + 0, /* va_fsid */ + 0, /* va_nodeid */ + 0, /* va_nlink */ + 0, /* va_size */ + 0, /* va_atime */ + 0, /* va_mtime */ + 0, /* va_ctime */ + 0, /* va_rdev */ + 0, /* va_blksize */ + 0, /* va_nblocks */ + 0 /* va_vcode */ +}; + +vattr_t sdev_vattr_lnk = { + AT_TYPE|AT_MODE, /* va_mask */ + VLNK, /* va_type */ + SDEV_LNKMODE_DEFAULT, /* va_mode */ + SDEV_UID_DEFAULT, /* va_uid */ + SDEV_GID_DEFAULT, /* va_gid */ + 0, /* va_fsid */ + 0, /* va_nodeid */ + 0, /* va_nlink */ + 0, /* va_size */ + 0, /* va_atime */ + 0, /* va_mtime */ + 0, /* va_ctime */ + 0, /* va_rdev */ + 0, /* va_blksize */ + 0, /* va_nblocks */ + 0 /* va_vcode */ +}; + +vattr_t sdev_vattr_blk = { + AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ + VBLK, /* va_type */ + S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */ + SDEV_UID_DEFAULT, /* va_uid */ + SDEV_GID_DEFAULT, /* va_gid */ + 0, /* va_fsid */ + 0, /* va_nodeid */ + 0, /* va_nlink */ + 0, /* va_size */ + 0, /* va_atime */ + 0, /* va_mtime */ + 0, /* va_ctime */ + 0, /* va_rdev */ + 0, /* va_blksize */ + 0, /* va_nblocks */ + 0 /* va_vcode */ +}; + +vattr_t sdev_vattr_chr = { + AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ + VCHR, /* va_type */ + S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */ + SDEV_UID_DEFAULT, /* va_uid */ + SDEV_GID_DEFAULT, /* va_gid */ + 0, /* va_fsid */ + 0, /* va_nodeid */ + 0, /* va_nlink */ + 0, /* va_size */ + 0, /* va_atime */ + 0, /* va_mtime */ + 0, /* va_ctime */ + 0, /* va_rdev */ + 0, /* va_blksize */ + 0, /* va_nblocks */ + 0 /* va_vcode */ +}; + +kmem_cache_t *sdev_node_cache; /* sdev_node cache */ +int devtype; /* fstype */ + +struct devname_ops *devname_ns_ops; /* default name service directory ops */ +kmutex_t devname_nsmaps_lock; /* protect devname_nsmaps */ + +/* static */ +static struct devname_nsmap *devname_nsmaps = NULL; + /* contents from /etc/dev/devname_master */ +static int devname_nsmaps_invalidated = 0; /* "devfsadm -m" has run */ + +static struct vnodeops *sdev_get_vop(struct sdev_node *); +static void sdev_set_no_nocache(struct sdev_node *); +static int sdev_get_moduleops(struct sdev_node *); +static void sdev_handle_alloc(struct sdev_node *); +static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []); +static void sdev_free_vtab(fs_operation_def_t *); + +static void +sdev_prof_free(struct sdev_node *dv) +{ + ASSERT(!SDEV_IS_GLOBAL(dv)); + if (dv->sdev_prof.dev_name) + nvlist_free(dv->sdev_prof.dev_name); + if (dv->sdev_prof.dev_map) + nvlist_free(dv->sdev_prof.dev_map); + if (dv->sdev_prof.dev_symlink) + nvlist_free(dv->sdev_prof.dev_symlink); + if (dv->sdev_prof.dev_glob_incdir) + nvlist_free(dv->sdev_prof.dev_glob_incdir); + if (dv->sdev_prof.dev_glob_excdir) + nvlist_free(dv->sdev_prof.dev_glob_excdir); + bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); +} + +/* + * sdev_node cache constructor + */ +/*ARGSUSED1*/ +static int +i_sdev_node_ctor(void *buf, void *cfarg, int flag) +{ + struct sdev_node *dv = (struct sdev_node *)buf; + struct vnode *vp; + + ASSERT(flag == KM_SLEEP); + + bzero(buf, sizeof (struct sdev_node)); + rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL); + dv->sdev_vnode = vn_alloc(KM_SLEEP); + vp = SDEVTOV(dv); + vp->v_data = (caddr_t)dv; + return (0); +} + +/* sdev_node destructor for kmem cache */ +/*ARGSUSED1*/ +static void +i_sdev_node_dtor(void *buf, void *arg) +{ + struct sdev_node *dv = (struct sdev_node *)buf; + struct vnode *vp = SDEVTOV(dv); + + rw_destroy(&dv->sdev_contents); + vn_free(vp); +} + +/* initialize sdev_node cache */ +void +sdev_node_cache_init() +{ + int flags = 0; + +#ifdef DEBUG + flags = sdev_debug_cache_flags; + if (flags) + sdcmn_err(("cache debug flags 0x%x\n", flags)); +#endif /* DEBUG */ + + ASSERT(sdev_node_cache == NULL); + sdev_node_cache = kmem_cache_create("sdev_node_cache", + sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor, + NULL, NULL, NULL, flags); +} + +/* destroy sdev_node cache */ +void +sdev_node_cache_fini() +{ + ASSERT(sdev_node_cache != NULL); + kmem_cache_destroy(sdev_node_cache); + sdev_node_cache = NULL; +} + +void +sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state) +{ + ASSERT(dv); + ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); + dv->sdev_state = state; +} + +static void +sdev_attrinit(struct sdev_node *dv, vattr_t *vap) +{ + timestruc_t now; + + ASSERT(vap); + + dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP); + *dv->sdev_attr = *vap; + + dv->sdev_attr->va_mode = MAKEIMODE(vap->va_type, vap->va_mode); + + gethrestime(&now); + dv->sdev_attr->va_atime = now; + dv->sdev_attr->va_mtime = now; + dv->sdev_attr->va_ctime = now; +} + +/* alloc and initialize a sdev_node */ +int +sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, + vattr_t *vap) +{ + struct sdev_node *dv = NULL; + struct vnode *vp; + size_t nmlen, len; + devname_handle_t *dhl; + + nmlen = strlen(nm) + 1; + if (nmlen > MAXNAMELEN) { + sdcmn_err9(("sdev_nodeinit: node name %s" + " too long\n", nm)); + *newdv = NULL; + return (ENAMETOOLONG); + } + + dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); + + dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP); + bcopy(nm, dv->sdev_name, nmlen); + dv->sdev_namelen = nmlen - 1; /* '\0' not included */ + len = strlen(ddv->sdev_path) + strlen(nm) + 2; + dv->sdev_path = kmem_alloc(len, KM_SLEEP); + (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm); + /* overwritten for VLNK nodes */ + dv->sdev_symlink = NULL; + + vp = SDEVTOV(dv); + vn_reinit(vp); + vp->v_vfsp = SDEVTOV(ddv)->v_vfsp; + if (vap) + vp->v_type = vap->va_type; + + /* + * initialized to the parent's vnodeops. + * maybe overwriten for a VDIR + */ + vn_setops(vp, vn_getops(SDEVTOV(ddv))); + vn_exists(vp); + + dv->sdev_dotdot = NULL; + dv->sdev_dot = NULL; + dv->sdev_next = NULL; + dv->sdev_attrvp = NULL; + if (vap) { + sdev_attrinit(dv, vap); + } else { + dv->sdev_attr = NULL; + } + + dv->sdev_ino = sdev_mkino(dv); + dv->sdev_nlink = 0; /* updated on insert */ + dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */ + dv->sdev_flags |= SDEV_BUILD; + mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); + if (SDEV_IS_GLOBAL(ddv)) { + dv->sdev_flags |= SDEV_GLOBAL; + dv->sdev_mapinfo = NULL; + dhl = &(dv->sdev_handle); + dhl->dh_data = dv; + dhl->dh_spec = DEVNAME_NS_NONE; + dhl->dh_args = NULL; + sdev_set_no_nocache(dv); + dv->sdev_gdir_gen = 0; + } else { + dv->sdev_flags &= ~SDEV_GLOBAL; + dv->sdev_origin = NULL; /* set later */ + bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); + dv->sdev_ldir_gen = 0; + dv->sdev_devtree_gen = 0; + } + + rw_enter(&dv->sdev_contents, RW_WRITER); + sdev_set_nodestate(dv, SDEV_INIT); + rw_exit(&dv->sdev_contents); + *newdv = dv; + + return (0); +} + +/* + * transition a sdev_node into SDEV_READY state + */ +int +sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp, + void *args, struct cred *cred) +{ + int error = 0; + struct vnode *vp = SDEVTOV(dv); + vtype_t type; + + ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap); + + type = vap->va_type; + vp->v_type = type; + vp->v_rdev = vap->va_rdev; + rw_enter(&dv->sdev_contents, RW_WRITER); + if (type == VDIR) { + dv->sdev_nlink = 2; + dv->sdev_flags &= ~SDEV_PERSIST; + dv->sdev_flags &= ~SDEV_DYNAMIC; + vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */ + error = sdev_get_moduleops(dv); /* from plug-in module */ + ASSERT(dv->sdev_dotdot); + ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR); + vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev; + } else if (type == VLNK) { + ASSERT(args); + dv->sdev_nlink = 1; + dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP); + } else { + dv->sdev_nlink = 1; + } + + if (!(SDEV_IS_GLOBAL(dv))) { + dv->sdev_origin = (struct sdev_node *)args; + dv->sdev_flags &= ~SDEV_PERSIST; + } + + /* + * shadow node is created here OR + * if failed (indicated by dv->sdev_attrvp == NULL), + * created later in sdev_setattr + */ + if (avp) { + dv->sdev_attrvp = avp; + } else { + if (dv->sdev_attr == NULL) + sdev_attrinit(dv, vap); + else + *dv->sdev_attr = *vap; + + if ((SDEV_IS_PERSIST(dv) && (dv->sdev_attrvp == NULL)) || + ((SDEVTOV(dv)->v_type == VDIR) && + (dv->sdev_attrvp == NULL))) + error = sdev_shadow_node(dv, cred); + } + + /* transition to READY state */ + sdev_set_nodestate(dv, SDEV_READY); + sdev_nc_node_exists(dv); + rw_exit(&dv->sdev_contents); + return (error); +} + +/* + * setting ZOMBIE state + */ +static int +sdev_nodezombied(struct sdev_node *dv) +{ + rw_enter(&dv->sdev_contents, RW_WRITER); + sdev_set_nodestate(dv, SDEV_ZOMBIE); + rw_exit(&dv->sdev_contents); + return (0); +} + +/* + * Build the VROOT sdev_node. + */ +/*ARGSUSED*/ +struct sdev_node * +sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp, + struct vnode *avp, struct cred *cred) +{ + struct sdev_node *dv; + struct vnode *vp; + char devdir[] = "/dev"; + + ASSERT(sdev_node_cache != NULL); + ASSERT(avp); + dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); + vp = SDEVTOV(dv); + vn_reinit(vp); + vp->v_flag |= VROOT; + vp->v_vfsp = vfsp; + vp->v_type = VDIR; + vp->v_rdev = devdev; + vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */ + vn_exists(vp); + + if (vfsp->vfs_mntpt) + dv->sdev_name = i_ddi_strdup( + (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP); + else + /* vfs_mountdev1 set mount point later */ + dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP); + dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */ + dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP); + dv->sdev_ino = SDEV_ROOTINO; + dv->sdev_nlink = 2; /* name + . (no sdev_insert) */ + dv->sdev_dotdot = dv; /* .. == self */ + dv->sdev_attrvp = avp; + dv->sdev_attr = NULL; + mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); + if (strcmp(dv->sdev_name, "/dev") == 0) { + mutex_init(&devname_nsmaps_lock, NULL, MUTEX_DEFAULT, NULL); + dv->sdev_mapinfo = NULL; + dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST; + bzero(&dv->sdev_handle, sizeof (dv->sdev_handle)); + dv->sdev_gdir_gen = 0; + } else { + dv->sdev_flags = SDEV_BUILD; + dv->sdev_flags &= ~SDEV_PERSIST; + bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); + dv->sdev_ldir_gen = 0; + dv->sdev_devtree_gen = 0; + } + + rw_enter(&dv->sdev_contents, RW_WRITER); + sdev_set_nodestate(dv, SDEV_READY); + rw_exit(&dv->sdev_contents); + sdev_nc_node_exists(dv); + return (dv); +} + +/* + * 1. load the module + * 2. modload invokes sdev_module_register, which in turn sets + * the dv->sdev_mapinfo->dir_ops + * + * note: locking order: + * dv->sdev_contents -> map->dir_lock + */ +static int +sdev_get_moduleops(struct sdev_node *dv) +{ + int error = 0; + struct devname_nsmap *map = NULL; + char *module; + char *path; + int load = 1; + + ASSERT(SDEVTOV(dv)->v_type == VDIR); + + if (devname_nsmaps == NULL) + return (0); + + if (!sdev_nsmaps_loaded() && !sdev_nsmaps_reloaded()) + return (0); + + + path = dv->sdev_path; + if ((map = sdev_get_nsmap_by_dir(path, 0))) { + rw_enter(&map->dir_lock, RW_READER); + if (map->dir_invalid) { + if (map->dir_module && map->dir_newmodule && + (strcmp(map->dir_module, + map->dir_newmodule) == 0)) { + load = 0; + } + sdev_replace_nsmap(map, map->dir_newmodule, + map->dir_newmap); + } + + module = map->dir_module; + if (module && load) { + sdcmn_err6(("sdev_get_moduleops: " + "load module %s", module)); + rw_exit(&map->dir_lock); + error = modload("devname", module); + sdcmn_err6(("sdev_get_moduleops: error %d\n", error)); + if (error < 0) { + return (-1); + } + } else if (module == NULL) { + /* + * loading the module ops for name services + */ + if (devname_ns_ops == NULL) { + sdcmn_err6(( + "sdev_get_moduleops: modload default\n")); + error = modload("devname", DEVNAME_NSCONFIG); + sdcmn_err6(( + "sdev_get_moduleops: error %d\n", error)); + if (error < 0) { + return (-1); + } + } + + if (!rw_tryupgrade(&map->dir_lock)) { + rw_exit(&map->dir_lock); + rw_enter(&map->dir_lock, RW_WRITER); + } + ASSERT(devname_ns_ops); + map->dir_ops = devname_ns_ops; + rw_exit(&map->dir_lock); + } + } + + dv->sdev_mapinfo = map; + return (0); +} + +/* directory dependent vop table */ +struct sdev_vop_table { + char *vt_name; /* subdirectory name */ + const fs_operation_def_t *vt_service; /* vnodeops table */ + struct vnodeops *vt_vops; /* constructed vop */ + struct vnodeops **vt_global_vops; /* global container for vop */ + int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */ + int vt_flags; +}; + +/* + * A nice improvement would be to provide a plug-in mechanism + * for this table instead of a const table. + */ +static struct sdev_vop_table vtab[] = +{ + { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate, + SDEV_DYNAMIC | SDEV_VTOR }, + + { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE }, + + { NULL, NULL, NULL, NULL, NULL, 0} +}; + + +/* + * sets a directory's vnodeops if the directory is in the vtab; + */ +static struct vnodeops * +sdev_get_vop(struct sdev_node *dv) +{ + int i; + char *path; + + path = dv->sdev_path; + ASSERT(path); + + /* gets the relative path to /dev/ */ + path += 5; + + /* gets the vtab entry if matches */ + for (i = 0; vtab[i].vt_name; i++) { + if (strcmp(vtab[i].vt_name, path) != 0) + continue; + dv->sdev_flags |= vtab[i].vt_flags; + + if (vtab[i].vt_vops) { + if (vtab[i].vt_global_vops) + *(vtab[i].vt_global_vops) = vtab[i].vt_vops; + return (vtab[i].vt_vops); + } + + if (vtab[i].vt_service) { + fs_operation_def_t *templ; + templ = sdev_merge_vtab(vtab[i].vt_service); + if (vn_make_ops(vtab[i].vt_name, + (const fs_operation_def_t *)templ, + &vtab[i].vt_vops) != 0) { + cmn_err(CE_PANIC, "%s: malformed vnode ops\n", + vtab[i].vt_name); + /*NOTREACHED*/ + } + if (vtab[i].vt_global_vops) { + *(vtab[i].vt_global_vops) = vtab[i].vt_vops; + } + sdev_free_vtab(templ); + return (vtab[i].vt_vops); + } + return (sdev_vnodeops); + } + + /* child inherits the persistence of the parent */ + if (SDEV_IS_PERSIST(dv->sdev_dotdot)) + dv->sdev_flags |= SDEV_PERSIST; + + return (sdev_vnodeops); +} + +static void +sdev_set_no_nocache(struct sdev_node *dv) +{ + int i; + char *path; + + ASSERT(dv->sdev_path); + path = dv->sdev_path + strlen("/dev/"); + + for (i = 0; vtab[i].vt_name; i++) { + if (strcmp(vtab[i].vt_name, path) == 0) { + if (vtab[i].vt_flags & SDEV_NO_NCACHE) + dv->sdev_flags |= SDEV_NO_NCACHE; + break; + } + } +} + +void * +sdev_get_vtor(struct sdev_node *dv) +{ + int i; + + for (i = 0; vtab[i].vt_name; i++) { + if (strcmp(vtab[i].vt_name, dv->sdev_name) != 0) + continue; + return ((void *)vtab[i].vt_vtor); + } + return (NULL); +} + +/* + * Build the base root inode + */ +ino_t +sdev_mkino(struct sdev_node *dv) +{ + ino_t ino; + + /* + * for now, follow the lead of tmpfs here + * need to someday understand the requirements here + */ + ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3); + ino += SDEV_ROOTINO + 1; + + return (ino); +} + +static int +sdev_getlink(struct vnode *linkvp, char **link) +{ + int err; + char *buf; + struct uio uio = {0}; + struct iovec iov = {0}; + + if (linkvp == NULL) + return (ENOENT); + ASSERT(linkvp->v_type == VLNK); + + buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + iov.iov_base = buf; + iov.iov_len = MAXPATHLEN; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_resid = MAXPATHLEN; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_llimit = MAXOFFSET_T; + + err = VOP_READLINK(linkvp, &uio, kcred); + if (err) { + cmn_err(CE_WARN, "readlink %s failed in dev\n", buf); + kmem_free(buf, MAXPATHLEN); + return (ENOENT); + } + + /* mission complete */ + *link = i_ddi_strdup(buf, KM_SLEEP); + kmem_free(buf, MAXPATHLEN); + return (0); +} + +/* + * A convenient wrapper to get the devfs node vnode for a device + * minor functionality: readlink() of a /dev symlink + * Place the link into dv->sdev_symlink + */ +static int +sdev_follow_link(struct sdev_node *dv) +{ + int err; + struct vnode *linkvp; + char *link = NULL; + + linkvp = SDEVTOV(dv); + if (linkvp == NULL) + return (ENOENT); + ASSERT(linkvp->v_type == VLNK); + err = sdev_getlink(linkvp, &link); + if (err) { + (void) sdev_nodezombied(dv); + dv->sdev_symlink = NULL; + return (ENOENT); + } + + ASSERT(link != NULL); + dv->sdev_symlink = link; + return (0); +} + +static int +sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs) +{ + vtype_t otype = SDEVTOV(dv)->v_type; + + /* + * existing sdev_node has a different type. + */ + if (otype != nvap->va_type) { + sdcmn_err9(("sdev_node_check: existing node " + " %s type %d does not match new node type %d\n", + dv->sdev_name, otype, nvap->va_type)); + return (EEXIST); + } + + /* + * For a symlink, the target should be the same. + */ + if (otype == VLNK) { + ASSERT(nargs != NULL); + ASSERT(dv->sdev_symlink != NULL); + if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) { + sdcmn_err9(("sdev_node_check: existing node " + " %s has different symlink %s as new node " + " %s\n", dv->sdev_name, dv->sdev_symlink, + (char *)nargs)); + return (EEXIST); + } + } + + return (0); +} + +/* + * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready() + * + * arguments: + * - ddv (parent) + * - nm (child name) + * - newdv (sdev_node for nm is returned here) + * - vap (vattr for the node to be created, va_type should be set. + * the defaults should be used if unknown) + * - cred + * - args + * . tnm (for VLNK) + * . global sdev_node (for !SDEV_GLOBAL) + * - state: SDEV_INIT, SDEV_READY + * + * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT) + * + * NOTE: directory contents writers lock needs to be held before + * calling this routine. + */ +int +sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, + struct vattr *vap, struct vnode *avp, void *args, struct cred *cred, + sdev_node_state_t state) +{ + int error = 0; + sdev_node_state_t node_state; + struct sdev_node *dv = NULL; + + ASSERT(state != SDEV_ZOMBIE); + ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); + + if (*newdv) { + dv = *newdv; + } else { + /* allocate and initialize a sdev_node */ + if (ddv->sdev_state == SDEV_ZOMBIE) { + sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n", + ddv->sdev_path)); + return (ENOENT); + } + + error = sdev_nodeinit(ddv, nm, &dv, vap); + if (error != 0) { + sdcmn_err9(("sdev_mknode: error %d," + " name %s can not be initialized\n", + error, nm)); + return (ENOENT); + } + ASSERT(dv); + + /* insert into the directory cache */ + error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD); + if (error) { + sdcmn_err9(("sdev_mknode: node %s can not" + " be added into directory cache\n", nm)); + return (ENOENT); + } + } + + ASSERT(dv); + node_state = dv->sdev_state; + ASSERT(node_state != SDEV_ZOMBIE); + + if (state == SDEV_READY) { + switch (node_state) { + case SDEV_INIT: + error = sdev_nodeready(dv, vap, avp, args, cred); + /* + * masking the errors with ENOENT + */ + if (error) { + sdcmn_err9(("sdev_mknode: node %s can NOT" + " be transitioned into READY state, " + "error %d\n", nm, error)); + error = ENOENT; + } + break; + case SDEV_READY: + /* + * Do some sanity checking to make sure + * the existing sdev_node is what has been + * asked for. + */ + error = sdev_node_check(dv, vap, args); + break; + default: + break; + } + } + + if (!error) { + *newdv = dv; + ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE); + } else { + SDEV_SIMPLE_RELE(dv); + *newdv = NULL; + } + + return (error); +} + +/* + * convenient wrapper to change vp's ATIME, CTIME and ATIME + */ +void +sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask) +{ + struct vattr attr; + timestruc_t now; + int err; + + ASSERT(vp); + gethrestime(&now); + if (mask & AT_CTIME) + attr.va_ctime = now; + if (mask & AT_MTIME) + attr.va_mtime = now; + if (mask & AT_ATIME) + attr.va_atime = now; + + attr.va_mask = (mask & AT_TIMES); + err = VOP_SETATTR(vp, &attr, 0, cred, NULL); + if (err && (err != EROFS)) { + sdcmn_err(("update timestamps error %d\n", err)); + } +} + +/* + * the backing store vnode is released here + */ +/*ARGSUSED1*/ +void +sdev_nodedestroy(struct sdev_node *dv, uint_t flags) +{ + /* no references */ + ASSERT(dv->sdev_nlink == 0); + + if (dv->sdev_attrvp != NULLVP) { + VN_RELE(dv->sdev_attrvp); + /* + * reset the attrvp so that no more + * references can be made on this already + * vn_rele() vnode + */ + dv->sdev_attrvp = NULLVP; + } + + if (dv->sdev_attr != NULL) { + kmem_free(dv->sdev_attr, sizeof (struct vattr)); + dv->sdev_attr = NULL; + } + + if (dv->sdev_name != NULL) { + kmem_free(dv->sdev_name, dv->sdev_namelen + 1); + dv->sdev_name = NULL; + } + + if (dv->sdev_symlink != NULL) { + kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1); + dv->sdev_symlink = NULL; + } + + if (dv->sdev_path) { + kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1); + dv->sdev_path = NULL; + } + + if (!SDEV_IS_GLOBAL(dv)) + sdev_prof_free(dv); + + mutex_destroy(&dv->sdev_lookup_lock); + cv_destroy(&dv->sdev_lookup_cv); + + /* return node to initial state as per constructor */ + (void) memset((void *)&dv->sdev_instance_data, 0, + sizeof (dv->sdev_instance_data)); + + vn_invalid(SDEVTOV(dv)); + kmem_cache_free(sdev_node_cache, dv); +} + +/* + * DIRECTORY CACHE lookup + */ +struct sdev_node * +sdev_findbyname(struct sdev_node *ddv, char *nm) +{ + struct sdev_node *dv; + size_t nmlen = strlen(nm); + + ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); + for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next) { + if (dv->sdev_namelen != nmlen) { + continue; + } + + /* + * Can't lookup stale nodes + */ + if (dv->sdev_flags & SDEV_STALE) { + sdcmn_err9(( + "sdev_findbyname: skipped stale node: %s\n", + dv->sdev_name)); + continue; + } + + if (strcmp(dv->sdev_name, nm) == 0) { + SDEV_HOLD(dv); + return (dv); + } + } + return (NULL); +} + +/* + * Inserts a new sdev_node in a parent directory + */ +void +sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv) +{ + ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); + ASSERT(SDEVTOV(ddv)->v_type == VDIR); + ASSERT(ddv->sdev_nlink >= 2); + ASSERT(dv->sdev_nlink == 0); + + dv->sdev_dotdot = ddv; + dv->sdev_next = ddv->sdev_dot; + ddv->sdev_dot = dv; + ddv->sdev_nlink++; +} + +/* + * The following check is needed because while sdev_nodes are linked + * in SDEV_INIT state, they have their link counts incremented only + * in SDEV_READY state. + */ +static void +decr_link(struct sdev_node *dv) +{ + if (dv->sdev_state != SDEV_INIT) + dv->sdev_nlink--; + else + ASSERT(dv->sdev_nlink == 0); +} + +/* + * Delete an existing dv from directory cache + * + * In the case of a node is still held by non-zero reference count, + * the node is put into ZOMBIE state. Once the reference count + * reaches "0", the node is unlinked and destroyed, + * in sdev_inactive(). + */ +static int +sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv) +{ + struct sdev_node *idv; + struct sdev_node *prev = NULL; + struct vnode *vp; + + ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); + + vp = SDEVTOV(dv); + mutex_enter(&vp->v_lock); + + /* dv is held still */ + if (vp->v_count > 1) { + rw_enter(&dv->sdev_contents, RW_WRITER); + if (dv->sdev_state == SDEV_READY) { + sdcmn_err9(( + "sdev_delete: node %s busy with count %d\n", + dv->sdev_name, vp->v_count)); + dv->sdev_state = SDEV_ZOMBIE; + } + rw_exit(&dv->sdev_contents); + --vp->v_count; + mutex_exit(&vp->v_lock); + return (EBUSY); + } + ASSERT(vp->v_count == 1); + + /* unlink from the memory cache */ + ddv->sdev_nlink--; /* .. to above */ + if (vp->v_type == VDIR) { + decr_link(dv); /* . to self */ + } + + for (idv = ddv->sdev_dot; idv && idv != dv; + prev = idv, idv = idv->sdev_next) + ; + ASSERT(idv == dv); /* node to be deleted must exist */ + if (prev == NULL) + ddv->sdev_dot = dv->sdev_next; + else + prev->sdev_next = dv->sdev_next; + dv->sdev_next = NULL; + decr_link(dv); /* name, back to zero */ + vp->v_count--; + mutex_exit(&vp->v_lock); + + /* destroy the node */ + sdev_nodedestroy(dv, 0); + return (0); +} + +/* + * check if the source is in the path of the target + * + * source and target are different + */ +/*ARGSUSED2*/ +static int +sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred) +{ + int error = 0; + struct sdev_node *dotdot, *dir; + + rw_enter(&tdv->sdev_contents, RW_READER); + dotdot = tdv->sdev_dotdot; + ASSERT(dotdot); + + /* fs root */ + if (dotdot == tdv) { + rw_exit(&tdv->sdev_contents); + return (0); + } + + for (;;) { + /* + * avoid error cases like + * mv a a/b + * mv a a/b/c + * etc. + */ + if (dotdot == sdv) { + error = EINVAL; + break; + } + + dir = dotdot; + dotdot = dir->sdev_dotdot; + + /* done checking because root is reached */ + if (dir == dotdot) { + break; + } + } + rw_exit(&tdv->sdev_contents); + return (error); +} + +/* + * Renaming a directory to a different parent + * requires modifying the ".." reference. + */ +static void +sdev_fixdotdot(struct sdev_node *dv, struct sdev_node *oparent, + struct sdev_node *nparent) +{ + ASSERT(SDEVTOV(dv)->v_type == VDIR); + ASSERT(nparent); + ASSERT(oparent); + + rw_enter(&nparent->sdev_contents, RW_WRITER); + nparent->sdev_nlink++; + ASSERT(dv->sdev_dotdot == oparent); + dv->sdev_dotdot = nparent; + rw_exit(&nparent->sdev_contents); + + rw_enter(&oparent->sdev_contents, RW_WRITER); + oparent->sdev_nlink--; + rw_exit(&oparent->sdev_contents); +} + +int +sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, + struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm, + struct cred *cred) +{ + int error = 0; + struct vnode *ovp = SDEVTOV(odv); + struct vnode *nvp; + struct vattr vattr; + int doingdir = (ovp->v_type == VDIR); + char *link = NULL; + + /* + * If renaming a directory, and the parents are different (".." must be + * changed) then the source dir must not be in the dir hierarchy above + * the target since it would orphan everything below the source dir. + */ + if (doingdir && (oddv != nddv)) { + error = sdev_checkpath(odv, nddv, cred); + if (error) + return (error); + } + + vattr.va_mask = AT_MODE|AT_UID|AT_GID; + error = VOP_GETATTR(ovp, &vattr, 0, cred); + if (error) + return (error); + + if (*ndvp) { + /* destination existing */ + nvp = SDEVTOV(*ndvp); + ASSERT(nvp); + + /* handling renaming to itself */ + if (odv == *ndvp) + return (0); + + /* special handling directory renaming */ + if (doingdir) { + if (nvp->v_type != VDIR) + return (ENOTDIR); + + /* + * Renaming a directory with the parent different + * requires that ".." be re-written. + */ + if (oddv != nddv) { + sdev_fixdotdot(*ndvp, oddv, nddv); + } + } + } else { + /* creating the destination node with the source attr */ + rw_enter(&nddv->sdev_contents, RW_WRITER); + error = sdev_mknode(nddv, nnm, ndvp, &vattr, NULL, NULL, + cred, SDEV_INIT); + rw_exit(&nddv->sdev_contents); + if (error) + return (error); + + ASSERT(*ndvp); + nvp = SDEVTOV(*ndvp); + } + + /* fix the source for a symlink */ + if (vattr.va_type == VLNK) { + if (odv->sdev_symlink == NULL) { + error = sdev_follow_link(odv); + if (error) + return (ENOENT); + } + ASSERT(odv->sdev_symlink); + link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); + } + + rw_enter(&nddv->sdev_contents, RW_WRITER); + error = sdev_mknode(nddv, nnm, ndvp, &vattr, NULL, (void *)link, + cred, SDEV_READY); + rw_exit(&nddv->sdev_contents); + + if (link) + kmem_free(link, strlen(link) + 1); + + /* update timestamps */ + sdev_update_timestamps(nvp, kcred, AT_CTIME|AT_ATIME); + sdev_update_timestamps(SDEVTOV(nddv), kcred, AT_MTIME|AT_ATIME); + SDEV_RELE(*ndvp); + return (0); +} + +/* + * Merge sdev_node specific information into an attribute structure. + * + * note: sdev_node is not locked here + */ +void +sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap) +{ + struct vnode *vp = SDEVTOV(dv); + + vap->va_nlink = dv->sdev_nlink; + vap->va_nodeid = dv->sdev_ino; + vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev; + vap->va_type = vp->v_type; + + if (vp->v_type == VDIR) { + vap->va_rdev = 0; + vap->va_fsid = vp->v_rdev; + } else if (vp->v_type == VLNK) { + vap->va_rdev = 0; + vap->va_mode &= ~S_IFMT; + vap->va_mode |= S_IFLNK; + } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { + vap->va_rdev = vp->v_rdev; + vap->va_mode &= ~S_IFMT; + if (vap->va_type == VCHR) + vap->va_mode |= S_IFCHR; + else + vap->va_mode |= S_IFBLK; + } else { + vap->va_rdev = 0; + } +} + +static struct vattr * +sdev_getdefault_attr(enum vtype type) +{ + if (type == VDIR) + return (&sdev_vattr_dir); + else if (type == VCHR) + return (&sdev_vattr_chr); + else if (type == VBLK) + return (&sdev_vattr_blk); + else if (type == VLNK) + return (&sdev_vattr_lnk); + else + return (NULL); +} +int +sdev_to_vp(struct sdev_node *dv, struct vnode **vpp) +{ + int rv = 0; + struct vnode *vp = SDEVTOV(dv); + + switch (vp->v_type) { + case VCHR: + case VBLK: + /* + * If vnode is a device, return special vnode instead + * (though it knows all about -us- via sp->s_realvp) + */ + *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred); + VN_RELE(vp); + if (*vpp == NULLVP) + rv = ENOSYS; + break; + default: /* most types are returned as is */ + *vpp = vp; + break; + } + return (rv); +} + +/* + * loopback into sdev_lookup() + */ +static struct vnode * +devname_find_by_devpath(char *devpath, struct vattr *vattr) +{ + int error = 0; + struct vnode *vp; + + error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULLVPP, &vp); + if (error) { + return (NULL); + } + + if (vattr) + (void) VOP_GETATTR(vp, vattr, 0, kcred); + return (vp); +} + +/* + * the junction between devname and devfs + */ +static struct vnode * +devname_configure_by_path(char *physpath, struct vattr *vattr) +{ + int error = 0; + struct vnode *vp; + + ASSERT(strncmp(physpath, "/devices/", sizeof ("/devices/" - 1)) + == 0); + + error = devfs_lookupname(physpath + sizeof ("/devices/") - 1, + NULLVPP, &vp); + if (error != 0) { + if (error == ENODEV) { + cmn_err(CE_CONT, "%s: not found (line %d)\n", + physpath, __LINE__); + } + + return (NULL); + } + + if (vattr) + (void) VOP_GETATTR(vp, vattr, 0, kcred); + return (vp); +} + +/* + * junction between devname and root file system, e.g. ufs + */ +int +devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp) +{ + struct vnode *rdvp = ddv->sdev_attrvp; + int rval = 0; + + ASSERT(rdvp); + + rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred); + return (rval); +} + +static int +sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred) +{ + struct sdev_node *dv = NULL; + char *nm; + struct vnode *dirvp; + int error; + vnode_t *vp; + int eof; + struct iovec iov; + struct uio uio; + struct dirent64 *dp; + dirent64_t *dbuf; + size_t dbuflen; + struct vattr vattr; + char *link = NULL; + + if (ddv->sdev_attrvp == NULL) + return (0); + if (!(ddv->sdev_flags & SDEV_BUILD)) + return (0); + + dirvp = ddv->sdev_attrvp; + VN_HOLD(dirvp); + dbuf = kmem_zalloc(dlen, KM_SLEEP); + + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_fmode = 0; + uio.uio_extflg = UIO_COPY_CACHED; + uio.uio_loffset = 0; + uio.uio_llimit = MAXOFFSET_T; + + eof = 0; + error = 0; + while (!error && !eof) { + uio.uio_resid = dlen; + iov.iov_base = (char *)dbuf; + iov.iov_len = dlen; + (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); + error = VOP_READDIR(dirvp, &uio, kcred, &eof); + VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); + + dbuflen = dlen - uio.uio_resid; + if (error || dbuflen == 0) + break; + + if (!(ddv->sdev_flags & SDEV_BUILD)) { + error = 0; + break; + } + + for (dp = dbuf; ((intptr_t)dp < + (intptr_t)dbuf + dbuflen); + dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { + nm = dp->d_name; + + if (strcmp(nm, ".") == 0 || + strcmp(nm, "..") == 0) + continue; + + vp = NULLVP; + dv = sdev_cache_lookup(ddv, nm); + if (dv) { + if (dv->sdev_state != SDEV_ZOMBIE) { + SDEV_SIMPLE_RELE(dv); + } else { + /* + * A ZOMBIE node may not have been + * cleaned up from the backing store, + * bypass this entry in this case, + * and clean it up from the directory + * cache if this is the last call. + */ + (void) sdev_dirdelete(ddv, dv); + } + continue; + } + + /* refill the cache if not already */ + error = devname_backstore_lookup(ddv, nm, &vp); + if (error) + continue; + + vattr.va_mask = AT_MODE|AT_UID|AT_GID; + error = VOP_GETATTR(vp, &vattr, 0, cred); + if (error) + continue; + + if (vattr.va_type == VLNK) { + error = sdev_getlink(vp, &link); + if (error) { + continue; + } + ASSERT(link != NULL); + } + + if (!rw_tryupgrade(&ddv->sdev_contents)) { + rw_exit(&ddv->sdev_contents); + rw_enter(&ddv->sdev_contents, RW_WRITER); + } + error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link, + cred, SDEV_READY); + rw_downgrade(&ddv->sdev_contents); + + if (link != NULL) { + kmem_free(link, strlen(link) + 1); + link = NULL; + } + + if (!error) { + ASSERT(dv); + ASSERT(dv->sdev_state != SDEV_ZOMBIE); + SDEV_SIMPLE_RELE(dv); + } + vp = NULL; + dv = NULL; + } + } + +done: + VN_RELE(dirvp); + kmem_free(dbuf, dlen); + + return (error); +} + +static int +sdev_filldir_dynamic(struct sdev_node *ddv) +{ + int error; + int i; + struct vattr *vap; + char *nm = NULL; + struct sdev_node *dv = NULL; + + if (!(ddv->sdev_flags & SDEV_BUILD)) { + return (0); + } + + ASSERT(RW_READ_HELD(&ddv->sdev_contents)); + if (!rw_tryupgrade(&ddv->sdev_contents)) { + rw_exit(&ddv->sdev_contents); + rw_enter(&ddv->sdev_contents, RW_WRITER); + } + + vap = sdev_getdefault_attr(VDIR); + for (i = 0; vtab[i].vt_name != NULL; i++) { + nm = vtab[i].vt_name; + ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); + error = sdev_mknode(ddv, nm, &dv, vap, NULL, + NULL, kcred, SDEV_READY); + if (error) + continue; + ASSERT(dv); + ASSERT(dv->sdev_state != SDEV_ZOMBIE); + SDEV_SIMPLE_RELE(dv); + dv = NULL; + } + rw_downgrade(&ddv->sdev_contents); + return (0); +} + +/* + * Creating a backing store entry based on sdev_attr. + * This is called either as part of node creation in a persistent directory + * or from setattr/setsecattr to persist access attributes across reboot. + */ +int +sdev_shadow_node(struct sdev_node *dv, struct cred *cred) +{ + int error = 0; + struct vnode *dvp = SDEVTOV(dv->sdev_dotdot); + struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp; + struct vattr *vap = dv->sdev_attr; + char *nm = dv->sdev_name; + struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL; + + ASSERT(dv && dv->sdev_name && rdvp); + ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL); + +lookup: + /* try to find it in the backing store */ + error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred); + if (error == 0) { + if (VOP_REALVP(*rvp, &rrvp) == 0) { + VN_HOLD(rrvp); + VN_RELE(*rvp); + *rvp = rrvp; + } + + kmem_free(dv->sdev_attr, sizeof (vattr_t)); + dv->sdev_attr = NULL; + dv->sdev_attrvp = *rvp; + return (0); + } + + /* let's try to persist the node */ + gethrestime(&vap->va_atime); + vap->va_mtime = vap->va_atime; + vap->va_ctime = vap->va_atime; + vap->va_mask |= AT_TYPE|AT_MODE; + switch (vap->va_type) { + case VDIR: + error = VOP_MKDIR(rdvp, nm, vap, rvp, cred); + sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n", + (void *)(*rvp), error)); + break; + case VCHR: + case VBLK: + case VREG: + case VDOOR: + error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE, + rvp, cred, 0); + sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n", + (void *)(*rvp), error)); + if (!error) + VN_RELE(*rvp); + break; + case VLNK: + ASSERT(dv->sdev_symlink); + error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred); + sdcmn_err9(("sdev_shadow_node: create symlink error %d\n", + error)); + break; + default: + cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node " + "create\n", nm); + /*NOTREACHED*/ + } + + /* go back to lookup to factor out spec node and set attrvp */ + if (error == 0) + goto lookup; + + return (error); +} + +static int +sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm) +{ + int error = 0; + struct sdev_node *dup = NULL; + + ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); + if ((dup = sdev_findbyname(ddv, nm)) == NULL) { + sdev_direnter(ddv, *dv); + } else { + if (dup->sdev_state == SDEV_ZOMBIE) { + error = sdev_dirdelete(ddv, dup); + /* + * The ZOMBIE node is still hanging + * around with more than one reference counts. + * Fail the new node creation so that + * the directory cache won't have + * duplicate entries for the same named node + */ + if (error == EBUSY) { + SDEV_SIMPLE_RELE(*dv); + sdev_nodedestroy(*dv, 0); + *dv = NULL; + return (error); + } + sdev_direnter(ddv, *dv); + } else { + ASSERT((*dv)->sdev_state != SDEV_ZOMBIE); + SDEV_SIMPLE_RELE(*dv); + sdev_nodedestroy(*dv, 0); + *dv = dup; + } + } + + return (0); +} + +static int +sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv) +{ + ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); + return (sdev_dirdelete(ddv, *dv)); +} + +/* + * update the in-core directory cache + */ +int +sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm, + sdev_cache_ops_t ops) +{ + int error = 0; + + ASSERT((SDEV_HELD(*dv))); + + ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); + switch (ops) { + case SDEV_CACHE_ADD: + error = sdev_cache_add(ddv, dv, nm); + break; + case SDEV_CACHE_DELETE: + error = sdev_cache_delete(ddv, dv); + break; + default: + break; + } + + return (error); +} + +/* + * retrive the named entry from the directory cache + */ +struct sdev_node * +sdev_cache_lookup(struct sdev_node *ddv, char *nm) +{ + struct sdev_node *dv = NULL; + + ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); + dv = sdev_findbyname(ddv, nm); + + return (dv); +} + +/* + * Implicit reconfig for nodes constructed by a link generator + * Start devfsadm if needed, or if devfsadm is in progress, + * prepare to block on devfsadm either completing or + * constructing the desired node. As devfsadmd is global + * in scope, constructing all necessary nodes, we only + * need to initiate it once. + */ +static int +sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm) +{ + int error = 0; + + if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { + sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n", + ddv->sdev_name, nm, devfsadm_state)); + mutex_enter(&dv->sdev_lookup_lock); + SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING)); + mutex_exit(&dv->sdev_lookup_lock); + error = 0; + } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) { + sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n", + ddv->sdev_name, nm, devfsadm_state)); + + sdev_devfsadmd_thread(ddv, dv, kcred); + mutex_enter(&dv->sdev_lookup_lock); + SDEV_BLOCK_OTHERS(dv, + (SDEV_LOOKUP | SDEV_LGWAITING)); + mutex_exit(&dv->sdev_lookup_lock); + error = 0; + } else { + error = -1; + } + + return (error); +} + +static int +sdev_call_modulelookup(struct sdev_node *ddv, struct sdev_node **dvp, char *nm, + int (*fn)(char *, devname_handle_t *, struct cred *), struct cred *cred) +{ + struct vnode *rvp = NULL; + int error = 0; + struct vattr *vap; + devname_spec_t spec; + devname_handle_t *hdl; + void *args = NULL; + struct sdev_node *dv = *dvp; + + ASSERT(dv && ddv); + hdl = &(dv->sdev_handle); + ASSERT(hdl->dh_data == dv); + mutex_enter(&dv->sdev_lookup_lock); + SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP); + mutex_exit(&dv->sdev_lookup_lock); + error = (*fn)(nm, hdl, cred); + if (error) { + return (error); + } + + spec = hdl->dh_spec; + args = hdl->dh_args; + ASSERT(args); + + switch (spec) { + case DEVNAME_NS_PATH: + /* + * symlink of: + * /dev/dir/nm -> /device/... + */ + rvp = devname_configure_by_path((char *)args, NULL); + break; + case DEVNAME_NS_DEV: + /* + * symlink of: + * /dev/dir/nm -> /dev/... + */ + rvp = devname_find_by_devpath((char *)args, NULL); + break; + default: + if (args) + kmem_free((char *)args, strlen(args) + 1); + return (ENOENT); + + } + + if (rvp == NULL) { + if (args) + kmem_free((char *)args, strlen(args) + 1); + return (ENOENT); + } else { + vap = sdev_getdefault_attr(VLNK); + ASSERT(RW_READ_HELD(&ddv->sdev_contents)); + /* + * Could sdev_mknode return a different dv_node + * once the lock is dropped? + */ + if (!rw_tryupgrade(&ddv->sdev_contents)) { + rw_exit(&ddv->sdev_contents); + rw_enter(&ddv->sdev_contents, RW_WRITER); + } + error = sdev_mknode(ddv, nm, &dv, vap, NULL, args, cred, + SDEV_READY); + rw_downgrade(&ddv->sdev_contents); + if (error) { + if (args) + kmem_free((char *)args, strlen(args) + 1); + return (error); + } else { + mutex_enter(&dv->sdev_lookup_lock); + SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP); + mutex_exit(&dv->sdev_lookup_lock); + error = 0; + } + } + + if (args) + kmem_free((char *)args, strlen(args) + 1); + + *dvp = dv; + return (0); +} + +/* + * Support for specialized device naming construction mechanisms + */ +static int +sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm, + int (*callback)(struct sdev_node *, char *, void **, struct cred *, + void *, char *), int flags, struct cred *cred) +{ + int rv = 0; + char *physpath = NULL; + struct vnode *rvp = NULL; + struct vattr vattr; + struct vattr *vap; + struct sdev_node *dv = *dvp; + + mutex_enter(&dv->sdev_lookup_lock); + SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP); + mutex_exit(&dv->sdev_lookup_lock); + + /* for non-devfsadm devices */ + if (flags & SDEV_PATH) { + physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + rv = callback(ddv, nm, (void *)&physpath, kcred, NULL, + NULL); + if (rv) { + kmem_free(physpath, MAXPATHLEN); + return (-1); + } + + ASSERT(physpath); + rvp = devname_configure_by_path(physpath, NULL); + if (rvp == NULL) { + sdcmn_err3(("devname_configure_by_path: " + "failed for /dev/%s/%s\n", + ddv->sdev_name, nm)); + kmem_free(physpath, MAXPATHLEN); + rv = -1; + } else { + vap = sdev_getdefault_attr(VLNK); + ASSERT(RW_READ_HELD(&ddv->sdev_contents)); + + /* + * Sdev_mknode may return back a different sdev_node + * that was created by another thread that + * raced to the directroy cache before this thread. + * + * With current directory cache mechanism + * (linked list with the sdev_node name as + * the entity key), this is a way to make sure + * only one entry exists for the same name + * in the same directory. The outcome is + * the winner wins. + */ + if (!rw_tryupgrade(&ddv->sdev_contents)) { + rw_exit(&ddv->sdev_contents); + rw_enter(&ddv->sdev_contents, RW_WRITER); + } + rv = sdev_mknode(ddv, nm, &dv, vap, NULL, + (void *)physpath, cred, SDEV_READY); + rw_downgrade(&ddv->sdev_contents); + kmem_free(physpath, MAXPATHLEN); + if (rv) { + return (rv); + } else { + mutex_enter(&dv->sdev_lookup_lock); + SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP); + mutex_exit(&dv->sdev_lookup_lock); + return (0); + } + } + } else if (flags & SDEV_VNODE) { + /* + * DBNR has its own way to create the device + * and return a backing store vnode in rvp + */ + ASSERT(callback); + rv = callback(ddv, nm, (void *)&rvp, kcred, NULL, NULL); + if (rv || (rvp == NULL)) { + sdcmn_err3(("devname_lookup_func: SDEV_VNODE " + "callback failed \n")); + return (-1); + } + vap = sdev_getdefault_attr(rvp->v_type); + if (vap == NULL) + return (-1); + + ASSERT(RW_READ_HELD(&ddv->sdev_contents)); + if (!rw_tryupgrade(&ddv->sdev_contents)) { + rw_exit(&ddv->sdev_contents); + rw_enter(&ddv->sdev_contents, RW_WRITER); + } + rv = sdev_mknode(ddv, nm, &dv, vap, rvp, NULL, + cred, SDEV_READY); + rw_downgrade(&ddv->sdev_contents); + if (rv) + return (rv); + + mutex_enter(&dv->sdev_lookup_lock); + SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP); + mutex_exit(&dv->sdev_lookup_lock); + return (0); + } else if (flags & SDEV_VATTR) { + /* + * /dev/pts + * + * DBNR has its own way to create the device + * "0" is returned upon success. + * + * callback is responsible to set the basic attributes, + * e.g. va_type/va_uid/va_gid/ + * dev_t if VCHR or VBLK/ + */ + ASSERT(callback); + rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL); + if (rv) { + sdcmn_err3(("devname_lookup_func: SDEV_NONE " + "callback failed \n")); + return (-1); + } + + ASSERT(RW_READ_HELD(&ddv->sdev_contents)); + if (!rw_tryupgrade(&ddv->sdev_contents)) { + rw_exit(&ddv->sdev_contents); + rw_enter(&ddv->sdev_contents, RW_WRITER); + } + rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL, + cred, SDEV_READY); + rw_downgrade(&ddv->sdev_contents); + + if (rv) + return (rv); + + mutex_enter(&dv->sdev_lookup_lock); + SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP); + mutex_exit(&dv->sdev_lookup_lock); + return (0); + } else { + impossible(("lookup: %s/%s by %s not supported (%d)\n", + SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm, + __LINE__)); + rv = -1; + } + + *dvp = dv; + return (rv); +} + +static int +is_devfsadm_thread(char *exec_name) +{ + /* + * note: because devfsadmd -> /usr/sbin/devfsadm + * it is safe to use "devfsadm" to capture the lookups + * from devfsadm and its daemon version. + */ + if (strcmp(exec_name, "devfsadm") == 0) + return (1); + return (0); +} + + +/* + * Lookup Order: + * sdev_node cache; + * backing store (SDEV_PERSIST); + * DBNR: a. dir_ops implemented in the loadable modules; + * b. vnode ops in vtab. + */ +int +devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp, + struct cred *cred, int (*callback)(struct sdev_node *, char *, void **, + struct cred *, void *, char *), int flags) +{ + int rv = 0, nmlen; + struct vnode *rvp = NULL; + struct sdev_node *dv = NULL; + int retried = 0; + int error = 0; + struct devname_nsmap *map = NULL; + struct devname_ops *dirops = NULL; + int (*fn)(char *, devname_handle_t *, struct cred *) = NULL; + struct vattr vattr; + char *lookup_thread = curproc->p_user.u_comm; + int failed_flags = 0; + int (*vtor)(struct sdev_node *) = NULL; + int state; + int parent_state; + char *link = NULL; + + if (SDEVTOV(ddv)->v_type != VDIR) + return (ENOTDIR); + + /* + * Empty name or ., return node itself. + */ + nmlen = strlen(nm); + if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { + *vpp = SDEVTOV(ddv); + VN_HOLD(*vpp); + return (0); + } + + /* + * .., return the parent directory + */ + if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { + *vpp = SDEVTOV(ddv->sdev_dotdot); + VN_HOLD(*vpp); + return (0); + } + + rw_enter(&ddv->sdev_contents, RW_READER); + if (ddv->sdev_flags & SDEV_VTOR) { + vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); + ASSERT(vtor); + } + +tryagain: + /* + * (a) directory cache lookup: + */ + ASSERT(RW_READ_HELD(&ddv->sdev_contents)); + parent_state = ddv->sdev_state; + dv = sdev_cache_lookup(ddv, nm); + if (dv) { + state = dv->sdev_state; + switch (state) { + case SDEV_INIT: + if (is_devfsadm_thread(lookup_thread)) + break; + + /* ZOMBIED parent won't allow node creation */ + if (parent_state == SDEV_ZOMBIE) { + SD_TRACE_FAILED_LOOKUP(ddv, nm, + retried); + goto nolock_notfound; + } + + mutex_enter(&dv->sdev_lookup_lock); + /* compensate the threads started after devfsadm */ + if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && + !(SDEV_IS_LOOKUP(dv))) + SDEV_BLOCK_OTHERS(dv, + (SDEV_LOOKUP | SDEV_LGWAITING)); + + if (SDEV_IS_LOOKUP(dv)) { + failed_flags |= SLF_REBUILT; + rw_exit(&ddv->sdev_contents); + error = sdev_wait4lookup(dv, SDEV_LOOKUP); + mutex_exit(&dv->sdev_lookup_lock); + rw_enter(&ddv->sdev_contents, RW_READER); + + if (error != 0) { + SD_TRACE_FAILED_LOOKUP(ddv, nm, + retried); + goto nolock_notfound; + } + + state = dv->sdev_state; + if (state == SDEV_INIT) { + SD_TRACE_FAILED_LOOKUP(ddv, nm, + retried); + goto nolock_notfound; + } else if (state == SDEV_READY) { + goto found; + } else if (state == SDEV_ZOMBIE) { + rw_exit(&ddv->sdev_contents); + SD_TRACE_FAILED_LOOKUP(ddv, nm, + retried); + SDEV_RELE(dv); + goto lookup_failed; + } + } else { + mutex_exit(&dv->sdev_lookup_lock); + } + break; + case SDEV_READY: + goto found; + case SDEV_ZOMBIE: + rw_exit(&ddv->sdev_contents); + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + SDEV_RELE(dv); + goto lookup_failed; + default: + rw_exit(&ddv->sdev_contents); + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + sdev_lookup_failed(ddv, nm, failed_flags); + *vpp = NULLVP; + return (ENOENT); + } + } + ASSERT(RW_READ_HELD(&ddv->sdev_contents)); + + /* + * ZOMBIED parent does not allow new node creation. + * bail out early + */ + if (parent_state == SDEV_ZOMBIE) { + rw_exit(&ddv->sdev_contents); + *vpp = NULL; + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + return (ENOENT); + } + + /* + * (b0): backing store lookup + * SDEV_PERSIST is default except: + * 1) pts nodes + * 2) non-chmod'ed local nodes + */ + if (SDEV_IS_PERSIST(ddv)) { + error = devname_backstore_lookup(ddv, nm, &rvp); + + if (!error) { + sdcmn_err3(("devname_backstore_lookup: " + "found attrvp %p for %s\n", (void *)rvp, nm)); + + vattr.va_mask = AT_MODE|AT_UID|AT_GID; + error = VOP_GETATTR(rvp, &vattr, 0, cred); + if (error) { + rw_exit(&ddv->sdev_contents); + if (dv) + SDEV_RELE(dv); + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + sdev_lookup_failed(ddv, nm, failed_flags); + *vpp = NULLVP; + return (ENOENT); + } + + if (vattr.va_type == VLNK) { + error = sdev_getlink(rvp, &link); + if (error) { + rw_exit(&ddv->sdev_contents); + if (dv) + SDEV_RELE(dv); + SD_TRACE_FAILED_LOOKUP(ddv, nm, + retried); + sdev_lookup_failed(ddv, nm, + failed_flags); + *vpp = NULLVP; + return (ENOENT); + } + ASSERT(link != NULL); + } + + if (!rw_tryupgrade(&ddv->sdev_contents)) { + rw_exit(&ddv->sdev_contents); + rw_enter(&ddv->sdev_contents, RW_WRITER); + } + error = sdev_mknode(ddv, nm, &dv, &vattr, + rvp, link, cred, SDEV_READY); + rw_downgrade(&ddv->sdev_contents); + + if (link != NULL) { + kmem_free(link, strlen(link) + 1); + link = NULL; + } + + if (error) { + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + rw_exit(&ddv->sdev_contents); + if (dv) + SDEV_RELE(dv); + goto lookup_failed; + } else { + goto found; + } + } else if (retried) { + rw_exit(&ddv->sdev_contents); + sdcmn_err3(("retry of lookup of %s/%s: failed\n", + ddv->sdev_name, nm)); + if (dv) + SDEV_RELE(dv); + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + sdev_lookup_failed(ddv, nm, failed_flags); + *vpp = NULLVP; + return (ENOENT); + } + } + + + /* first thread that is doing the lookup on this node */ + if (!dv) { + if (!rw_tryupgrade(&ddv->sdev_contents)) { + rw_exit(&ddv->sdev_contents); + rw_enter(&ddv->sdev_contents, RW_WRITER); + } + error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL, + cred, SDEV_INIT); + if (!dv) { + rw_exit(&ddv->sdev_contents); + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + sdev_lookup_failed(ddv, nm, failed_flags); + *vpp = NULLVP; + return (ENOENT); + } + rw_downgrade(&ddv->sdev_contents); + } + ASSERT(dv); + ASSERT(SDEV_HELD(dv)); + + if (SDEV_IS_NO_NCACHE(dv)) { + failed_flags |= SLF_NO_NCACHE; + } + + if (SDEV_IS_GLOBAL(ddv)) { + map = sdev_get_map(ddv, 1); + dirops = map ? map->dir_ops : NULL; + fn = dirops ? dirops->devnops_lookup : NULL; + } + + /* + * (b1) invoking devfsadm once per life time for devfsadm nodes + */ + if ((fn == NULL) && !callback) { + + if (sdev_reconfig_boot || !i_ddi_io_initialized() || + SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) || + ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) { + ASSERT(SDEV_HELD(dv)); + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + goto nolock_notfound; + } + + /* + * filter out known non-existent devices recorded + * during initial reconfiguration boot for which + * reconfig should not be done and lookup may + * be short-circuited now. + */ + if (sdev_lookup_filter(ddv, nm)) { + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + goto nolock_notfound; + } + + /* bypassing devfsadm internal nodes */ + if (is_devfsadm_thread(lookup_thread)) { + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + goto nolock_notfound; + } + + if (sdev_reconfig_disable) { + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + goto nolock_notfound; + } + + error = sdev_call_devfsadmd(ddv, dv, nm); + if (error == 0) { + sdcmn_err8(("lookup of %s/%s by %s: reconfig\n", + ddv->sdev_name, nm, curproc->p_user.u_comm)); + if (sdev_reconfig_verbose) { + cmn_err(CE_CONT, + "?lookup of %s/%s by %s: reconfig\n", + ddv->sdev_name, nm, curproc->p_user.u_comm); + } + retried = 1; + failed_flags |= SLF_REBUILT; + ASSERT(dv->sdev_state != SDEV_ZOMBIE); + SDEV_SIMPLE_RELE(dv); + goto tryagain; + } else { + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + goto nolock_notfound; + } + } + + /* + * (b2) Directory Based Name Resolution (DBNR): + * ddv - parent + * nm - /dev/(ddv->sdev_name)/nm + * + * note: module vnode ops take precedence than the build-in ones + */ + if (fn) { + error = sdev_call_modulelookup(ddv, &dv, nm, fn, cred); + if (error) { + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + goto notfound; + } else { + goto found; + } + } else if (callback) { + error = sdev_call_dircallback(ddv, &dv, nm, callback, + flags, cred); + if (error == 0) { + goto found; + } else { + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + goto notfound; + } + } + ASSERT(rvp); + +found: + ASSERT(!(dv->sdev_flags & SDEV_STALE)); + ASSERT(dv->sdev_state == SDEV_READY); + if (vtor) { + /* + * Check validity of returned node + */ + switch (vtor(dv)) { + case SDEV_VTOR_VALID: + break; + case SDEV_VTOR_INVALID: + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + sdcmn_err7(("lookup: destroy invalid " + "node: %s(%p)\n", dv->sdev_name, (void *)dv)); + goto nolock_notfound; + case SDEV_VTOR_SKIP: + sdcmn_err7(("lookup: node not applicable - " + "skipping: %s(%p)\n", dv->sdev_name, (void *)dv)); + rw_exit(&ddv->sdev_contents); + SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); + SDEV_RELE(dv); + goto lookup_failed; + default: + cmn_err(CE_PANIC, + "dev fs: validator failed: %s(%p)\n", + dv->sdev_name, (void *)dv); + break; + /*NOTREACHED*/ + } + } + + if ((SDEVTOV(dv)->v_type == VDIR) && SDEV_IS_GLOBAL(dv)) { + rw_enter(&dv->sdev_contents, RW_READER); + (void) sdev_get_map(dv, 1); + rw_exit(&dv->sdev_contents); + } + rw_exit(&ddv->sdev_contents); + rv = sdev_to_vp(dv, vpp); + sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d " + "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count, + dv->sdev_state, nm, rv)); + return (rv); + +notfound: + mutex_enter(&dv->sdev_lookup_lock); + SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP); + mutex_exit(&dv->sdev_lookup_lock); +nolock_notfound: + /* + * Destroy the node that is created for synchronization purposes. + */ + sdcmn_err3(("devname_lookup_func: %s with state %d\n", + nm, dv->sdev_state)); + ASSERT(RW_READ_HELD(&ddv->sdev_contents)); + if (dv->sdev_state == SDEV_INIT) { + if (!rw_tryupgrade(&ddv->sdev_contents)) { + rw_exit(&ddv->sdev_contents); + rw_enter(&ddv->sdev_contents, RW_WRITER); + } + + /* + * Node state may have changed during the lock + * changes. Re-check. + */ + if (dv->sdev_state == SDEV_INIT) { + (void) sdev_dirdelete(ddv, dv); + rw_exit(&ddv->sdev_contents); + sdev_lookup_failed(ddv, nm, failed_flags); + *vpp = NULL; + return (ENOENT); + } + } + + rw_exit(&ddv->sdev_contents); + SDEV_RELE(dv); + +lookup_failed: + sdev_lookup_failed(ddv, nm, failed_flags); + *vpp = NULL; + return (ENOENT); +} + +/* + * Given a directory node, mark all nodes beneath as + * STALE, i.e. nodes that don't exist as far as new + * consumers are concerned + */ +void +sdev_stale(struct sdev_node *ddv) +{ + struct sdev_node *dv; + struct vnode *vp; + + ASSERT(SDEVTOV(ddv)->v_type == VDIR); + + rw_enter(&ddv->sdev_contents, RW_WRITER); + for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next) { + vp = SDEVTOV(dv); + if (vp->v_type == VDIR) + sdev_stale(dv); + + sdcmn_err9(("sdev_stale: setting stale %s\n", + dv->sdev_name)); + dv->sdev_flags |= SDEV_STALE; + } + ddv->sdev_flags |= SDEV_BUILD; + rw_exit(&ddv->sdev_contents); +} + +/* + * Given a directory node, clean out all the nodes beneath. + * If expr is specified, clean node with names matching expr. + * If SDEV_ENFORCE is specified in flags, busy nodes are made stale, + * so they are excluded from future lookups. + */ +int +sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) +{ + int error = 0; + int busy = 0; + struct vnode *vp; + struct sdev_node *dv, *next = NULL; + int bkstore = 0; + int len = 0; + char *bks_name = NULL; + + ASSERT(SDEVTOV(ddv)->v_type == VDIR); + + /* + * We try our best to destroy all unused sdev_node's + */ + rw_enter(&ddv->sdev_contents, RW_WRITER); + for (dv = ddv->sdev_dot; dv; dv = next) { + next = dv->sdev_next; + vp = SDEVTOV(dv); + + if (expr && gmatch(dv->sdev_name, expr) == 0) + continue; + + if (vp->v_type == VDIR && + sdev_cleandir(dv, NULL, flags) != 0) { + sdcmn_err9(("sdev_cleandir: dir %s busy\n", + dv->sdev_name)); + busy++; + continue; + } + + if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) { + sdcmn_err9(("sdev_cleandir: dir %s busy\n", + dv->sdev_name)); + busy++; + continue; + } + + /* + * at this point, either dv is not held or SDEV_ENFORCE + * is specified. In either case, dv needs to be deleted + */ + SDEV_HOLD(dv); + + bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; + if (bkstore && (vp->v_type == VDIR)) + bkstore += 1; + + if (bkstore) { + len = strlen(dv->sdev_name) + 1; + bks_name = kmem_alloc(len, KM_SLEEP); + bcopy(dv->sdev_name, bks_name, len); + } + + error = sdev_dirdelete(ddv, dv); + + if (error == EBUSY) { + sdcmn_err9(("sdev_cleandir: dir busy\n")); + busy++; + } + + /* take care the backing store clean up */ + if (bkstore && (error == 0)) { + ASSERT(bks_name); + ASSERT(ddv->sdev_attrvp); + + if (bkstore == 1) { + error = VOP_REMOVE(ddv->sdev_attrvp, + bks_name, kcred); + } else if (bkstore == 2) { + error = VOP_RMDIR(ddv->sdev_attrvp, + bks_name, ddv->sdev_attrvp, kcred); + } + + /* do not propagate the backing store errors */ + if (error) { + sdcmn_err9(("sdev_cleandir: backing store" + "not cleaned\n")); + error = 0; + } + + bkstore = 0; + kmem_free(bks_name, len); + bks_name = NULL; + len = 0; + } + } + + ddv->sdev_flags |= SDEV_BUILD; + rw_exit(&ddv->sdev_contents); + + if (busy) { + error = EBUSY; + } + + return (error); +} + +/* + * a convenient wrapper for readdir() funcs + */ +size_t +add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off) +{ + size_t reclen = DIRENT64_RECLEN(strlen(nm)); + if (reclen > size) + return (0); + + de->d_ino = (ino64_t)ino; + de->d_off = (off64_t)off + 1; + de->d_reclen = (ushort_t)reclen; + (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen)); + return (reclen); +} + +/* + * sdev_mount service routines + */ +int +sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args) +{ + int error; + + if (uap->datalen != sizeof (*args)) + return (EINVAL); + + if (error = copyin(uap->dataptr, args, sizeof (*args))) { + cmn_err(CE_WARN, "sdev_copyin_mountargs: can not" + "get user data. error %d\n", error); + return (EFAULT); + } + + return (0); +} + +#ifdef nextdp +#undef nextdp +#endif +#define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) + +/* + * readdir helper func + */ +int +devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp, + int flags) +{ + struct sdev_node *ddv = VTOSDEV(vp); + struct sdev_node *dv; + dirent64_t *dp; + ulong_t outcount = 0; + size_t namelen; + ulong_t alloc_count; + void *outbuf; + struct iovec *iovp; + int error = 0; + size_t reclen; + offset_t diroff; + offset_t soff; + int this_reclen; + struct devname_nsmap *map = NULL; + struct devname_ops *dirops = NULL; + int (*fn)(devname_handle_t *, struct cred *) = NULL; + int (*vtor)(struct sdev_node *) = NULL; + struct vattr attr; + timestruc_t now; + + ASSERT(ddv->sdev_attr || ddv->sdev_attrvp); + ASSERT(RW_READ_HELD(&ddv->sdev_contents)); + + if (uiop->uio_loffset >= MAXOFF_T) { + if (eofp) + *eofp = 1; + return (0); + } + + if (uiop->uio_iovcnt != 1) + return (EINVAL); + + if (vp->v_type != VDIR) + return (ENOTDIR); + + if (ddv->sdev_flags & SDEV_VTOR) { + vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); + ASSERT(vtor); + } + + if (eofp != NULL) + *eofp = 0; + + soff = uiop->uio_offset; + iovp = uiop->uio_iov; + alloc_count = iovp->iov_len; + dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP); + outcount = 0; + + if (ddv->sdev_state == SDEV_ZOMBIE) + goto get_cache; + + if (!SDEV_IS_GLOBAL(ddv)) { + /* make sure directory content is up to date */ + prof_filldir(ddv); + } else { + map = sdev_get_map(ddv, 0); + dirops = map ? map->dir_ops : NULL; + fn = dirops ? dirops->devnops_readdir : NULL; + + if (map && map->dir_map) { + /* + * load the name mapping rule database + * through invoking devfsadm and symlink + * all the entries in the map + */ + devname_rdr_result_t rdr_result; + int do_thread = 0; + + rw_enter(&map->dir_lock, RW_READER); + do_thread = map->dir_maploaded ? 0 : 1; + rw_exit(&map->dir_lock); + + if (do_thread) { + mutex_enter(&ddv->sdev_lookup_lock); + SDEV_BLOCK_OTHERS(ddv, SDEV_READDIR); + mutex_exit(&ddv->sdev_lookup_lock); + + sdev_dispatch_to_nsrdr_thread(ddv, + map->dir_map, &rdr_result); + } + } else if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) && + !sdev_reconfig_boot && (flags & SDEV_BROWSE) && + !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) && + ((moddebug & MODDEBUG_FINI_EBUSY) == 0) && + !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) && + !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && + !sdev_reconfig_disable) { + /* + * invoking "devfsadm" to do system device reconfig + */ + mutex_enter(&ddv->sdev_lookup_lock); + SDEV_BLOCK_OTHERS(ddv, + (SDEV_READDIR|SDEV_LGWAITING)); + mutex_exit(&ddv->sdev_lookup_lock); + + sdcmn_err8(("readdir of %s by %s: reconfig\n", + ddv->sdev_path, curproc->p_user.u_comm)); + if (sdev_reconfig_verbose) { + cmn_err(CE_CONT, + "?readdir of %s by %s: reconfig\n", + ddv->sdev_path, curproc->p_user.u_comm); + } + + sdev_devfsadmd_thread(ddv, NULL, kcred); + } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { + /* + * compensate the "ls" started later than "devfsadm" + */ + mutex_enter(&ddv->sdev_lookup_lock); + SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING)); + mutex_exit(&ddv->sdev_lookup_lock); + } + + /* + * release the contents lock so that + * the cache maybe updated by devfsadmd + */ + rw_exit(&ddv->sdev_contents); + mutex_enter(&ddv->sdev_lookup_lock); + if (SDEV_IS_READDIR(ddv)) + (void) sdev_wait4lookup(ddv, SDEV_READDIR); + mutex_exit(&ddv->sdev_lookup_lock); + rw_enter(&ddv->sdev_contents, RW_READER); + + sdcmn_err4(("readdir of directory %s by %s\n", + ddv->sdev_name, curproc->p_user.u_comm)); + while (ddv->sdev_flags & SDEV_BUILD) { + if (SDEV_IS_PERSIST(ddv)) { + error = sdev_filldir_from_store(ddv, + alloc_count, cred); + } + + /* + * pre-creating the directories + * defined in vtab + */ + if (SDEVTOV(ddv)->v_flag & VROOT) { + error = sdev_filldir_dynamic(ddv); + } + + if (!error) + ddv->sdev_flags &= ~SDEV_BUILD; + } + } + +get_cache: + /* handle "." and ".." */ + diroff = 0; + if (soff == 0) { + /* first time */ + this_reclen = DIRENT64_RECLEN(1); + if (alloc_count < this_reclen) { + error = EINVAL; + goto done; + } + + dp->d_ino = (ino64_t)ddv->sdev_ino; + dp->d_off = (off64_t)1; + dp->d_reclen = (ushort_t)this_reclen; + + (void) strncpy(dp->d_name, ".", + DIRENT64_NAMELEN(this_reclen)); + outcount += dp->d_reclen; + dp = nextdp(dp); + } + + diroff++; + if (soff <= 1) { + this_reclen = DIRENT64_RECLEN(2); + if (alloc_count < outcount + this_reclen) { + error = EINVAL; + goto done; + } + + dp->d_reclen = (ushort_t)this_reclen; + dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino; + dp->d_off = (off64_t)2; + + (void) strncpy(dp->d_name, "..", + DIRENT64_NAMELEN(this_reclen)); + outcount += dp->d_reclen; + + dp = nextdp(dp); + } + + + /* gets the cache */ + diroff++; + for (dv = ddv->sdev_dot; dv; dv = dv->sdev_next, diroff++) { + sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n", + diroff, soff, dv->sdev_name)); + + /* bypassing pre-matured nodes */ + if (diroff < soff || (dv->sdev_state != SDEV_READY)) { + sdcmn_err3(("sdev_readdir: pre-mature node " + "%s\n", dv->sdev_name)); + continue; + } + + /* don't list stale nodes */ + if (dv->sdev_flags & SDEV_STALE) { + sdcmn_err4(("sdev_readdir: STALE node " + "%s\n", dv->sdev_name)); + continue; + } + + /* + * Check validity of node + */ + if (vtor) { + switch (vtor(dv)) { + case SDEV_VTOR_VALID: + break; + case SDEV_VTOR_INVALID: + case SDEV_VTOR_SKIP: + continue; + default: + cmn_err(CE_PANIC, + "dev fs: validator failed: %s(%p)\n", + dv->sdev_name, (void *)dv); + break; + /*NOTREACHED*/ + } + } + + /* + * call back into the module for the validity/bookkeeping + * of this entry + */ + if (fn) { + error = (*fn)(&(dv->sdev_handle), cred); + if (error) { + sdcmn_err4(("sdev_readdir: module did not " + "validate %s\n", dv->sdev_name)); + continue; + } + } + + namelen = strlen(dv->sdev_name); + reclen = DIRENT64_RECLEN(namelen); + if (outcount + reclen > alloc_count) { + goto full; + } + dp->d_reclen = (ushort_t)reclen; + dp->d_ino = (ino64_t)dv->sdev_ino; + dp->d_off = (off64_t)diroff + 1; + (void) strncpy(dp->d_name, dv->sdev_name, + DIRENT64_NAMELEN(reclen)); + outcount += reclen; + dp = nextdp(dp); + } + +full: + sdcmn_err4(("sdev_readdir: moving %lu bytes: " + "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff, + (void *)dv)); + + if (outcount) + error = uiomove(outbuf, outcount, UIO_READ, uiop); + + if (!error) { + uiop->uio_offset = diroff; + if (eofp) + *eofp = dv ? 0 : 1; + } + + + if (ddv->sdev_attrvp) { + gethrestime(&now); + attr.va_ctime = now; + attr.va_atime = now; + attr.va_mask = AT_CTIME|AT_ATIME; + + (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL); + } +done: + kmem_free(outbuf, alloc_count); + return (error); +} + + +static int +sdev_modctl_lookup(const char *path, vnode_t **r_vp) +{ + vnode_t *vp; + vnode_t *cvp; + struct sdev_node *svp; + char *nm; + struct pathname pn; + int error; + int persisted = 0; + + if (error = pn_get((char *)path, UIO_SYSSPACE, &pn)) + return (error); + nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); + + vp = rootdir; + VN_HOLD(vp); + + while (pn_pathleft(&pn)) { + ASSERT(vp->v_type == VDIR); + (void) pn_getcomponent(&pn, nm); + error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred); + VN_RELE(vp); + + if (error) + break; + + /* traverse mount points encountered on our journey */ + if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) { + VN_RELE(cvp); + break; + } + + /* + * Direct the operation to the persisting filesystem + * underlying /dev. Bail if we encounter a + * non-persistent dev entity here. + */ + if (cvp->v_vfsp->vfs_fstype == devtype) { + + if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) { + error = ENOENT; + VN_RELE(cvp); + break; + } + + if (VTOSDEV(cvp) == NULL) { + error = ENOENT; + VN_RELE(cvp); + break; + } + svp = VTOSDEV(cvp); + if ((vp = svp->sdev_attrvp) == NULL) { + error = ENOENT; + VN_RELE(cvp); + break; + } + persisted = 1; + VN_HOLD(vp); + VN_RELE(cvp); + cvp = vp; + } + + vp = cvp; + pn_skipslash(&pn); + } + + kmem_free(nm, MAXNAMELEN); + pn_free(&pn); + + if (error) + return (error); + + /* + * Only return persisted nodes in the filesystem underlying /dev. + */ + if (!persisted) { + VN_RELE(vp); + return (ENOENT); + } + + *r_vp = vp; + return (0); +} + +int +sdev_modctl_readdir(const char *dir, char ***dirlistp, + int *npathsp, int *npathsp_alloc) +{ + char **pathlist = NULL; + char **newlist = NULL; + int npaths = 0; + int npaths_alloc = 0; + dirent64_t *dbuf = NULL; + int n; + char *s; + int error; + vnode_t *vp; + int eof; + struct iovec iov; + struct uio uio; + struct dirent64 *dp; + size_t dlen; + size_t dbuflen; + int ndirents = 64; + char *nm; + + error = sdev_modctl_lookup(dir, &vp); + sdcmn_err11(("modctl readdir: %s by %s: %s\n", + dir, curproc->p_user.u_comm, + (error == 0) ? "ok" : "failed")); + if (error) + return (error); + + dlen = ndirents * (sizeof (*dbuf)); + dbuf = kmem_alloc(dlen, KM_SLEEP); + + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_fmode = 0; + uio.uio_extflg = UIO_COPY_CACHED; + uio.uio_loffset = 0; + uio.uio_llimit = MAXOFFSET_T; + + eof = 0; + error = 0; + while (!error && !eof) { + uio.uio_resid = dlen; + iov.iov_base = (char *)dbuf; + iov.iov_len = dlen; + + (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); + error = VOP_READDIR(vp, &uio, kcred, &eof); + VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); + + dbuflen = dlen - uio.uio_resid; + + if (error || dbuflen == 0) + break; + + for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); + dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { + + nm = dp->d_name; + + if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) + continue; + + if (npaths == npaths_alloc) { + npaths_alloc += 64; + newlist = (char **) + kmem_zalloc((npaths_alloc + 1) * + sizeof (char *), KM_SLEEP); + if (pathlist) { + bcopy(pathlist, newlist, + npaths * sizeof (char *)); + kmem_free(pathlist, + (npaths + 1) * sizeof (char *)); + } + pathlist = newlist; + } + n = strlen(nm) + 1; + s = kmem_alloc(n, KM_SLEEP); + bcopy(nm, s, n); + pathlist[npaths++] = s; + sdcmn_err11((" %s/%s\n", dir, s)); + } + } + +exit: + VN_RELE(vp); + + if (dbuf) + kmem_free(dbuf, dlen); + + if (error) + return (error); + + *dirlistp = pathlist; + *npathsp = npaths; + *npathsp_alloc = npaths_alloc; + + return (0); +} + +void +sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc) +{ + int i, n; + + for (i = 0; i < npaths; i++) { + n = strlen(pathlist[i]) + 1; + kmem_free(pathlist[i], n); + } + + kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *)); +} + +int +sdev_modctl_devexists(const char *path) +{ + vnode_t *vp; + int error; + + error = sdev_modctl_lookup(path, &vp); + sdcmn_err11(("modctl dev exists: %s by %s: %s\n", + path, curproc->p_user.u_comm, + (error == 0) ? "ok" : "failed")); + if (error == 0) + VN_RELE(vp); + + return (error); +} + +void +sdev_update_newnsmap(struct devname_nsmap *map, char *module, char *mapname) +{ + rw_enter(&map->dir_lock, RW_WRITER); + if (module) { + ASSERT(map->dir_newmodule == NULL); + map->dir_newmodule = i_ddi_strdup(module, KM_SLEEP); + } + if (mapname) { + ASSERT(map->dir_newmap == NULL); + map->dir_newmap = i_ddi_strdup(mapname, KM_SLEEP); + } + + map->dir_invalid = 1; + rw_exit(&map->dir_lock); +} + +void +sdev_replace_nsmap(struct devname_nsmap *map, char *module, char *mapname) +{ + char *old_module = NULL; + char *old_map = NULL; + + ASSERT(RW_LOCK_HELD(&map->dir_lock)); + if (!rw_tryupgrade(&map->dir_lock)) { + rw_exit(&map->dir_lock); + rw_enter(&map->dir_lock, RW_WRITER); + } + + old_module = map->dir_module; + if (module) { + if (old_module && strcmp(old_module, module) != 0) { + kmem_free(old_module, strlen(old_module) + 1); + } + map->dir_module = module; + map->dir_newmodule = NULL; + } + + old_map = map->dir_map; + if (mapname) { + if (old_map && strcmp(old_map, mapname) != 0) { + kmem_free(old_map, strlen(old_map) + 1); + } + + map->dir_map = mapname; + map->dir_newmap = NULL; + } + map->dir_maploaded = 0; + map->dir_invalid = 0; + rw_downgrade(&map->dir_lock); +} + +/* + * dir_name should have at least one attribute, + * dir_module + * or dir_map + * or both + * caller holds the devname_nsmaps_lock + */ +void +sdev_insert_nsmap(char *dir_name, char *dir_module, char *dir_map) +{ + struct devname_nsmap *map; + int len = 0; + + ASSERT(dir_name); + ASSERT(dir_module || dir_map); + ASSERT(MUTEX_HELD(&devname_nsmaps_lock)); + + if (map = sdev_get_nsmap_by_dir(dir_name, 1)) { + sdev_update_newnsmap(map, dir_module, dir_map); + return; + } + + map = (struct devname_nsmap *)kmem_zalloc(sizeof (*map), KM_SLEEP); + map->dir_name = i_ddi_strdup(dir_name, KM_SLEEP); + if (dir_module) { + map->dir_module = i_ddi_strdup(dir_module, KM_SLEEP); + } + + if (dir_map) { + if (dir_map[0] != '/') { + len = strlen(ETC_DEV_DIR) + strlen(dir_map) + 2; + map->dir_map = kmem_zalloc(len, KM_SLEEP); + (void) snprintf(map->dir_map, len, "%s/%s", ETC_DEV_DIR, + dir_map); + } else { + map->dir_map = i_ddi_strdup(dir_map, KM_SLEEP); + } + } + + map->dir_ops = NULL; + map->dir_maploaded = 0; + map->dir_invalid = 0; + rw_init(&map->dir_lock, NULL, RW_DEFAULT, NULL); + + map->next = devname_nsmaps; + map->prev = NULL; + if (devname_nsmaps) { + devname_nsmaps->prev = map; + } + devname_nsmaps = map; +} + +struct devname_nsmap * +sdev_get_nsmap_by_dir(char *dir_path, int locked) +{ + struct devname_nsmap *map = NULL; + + if (!locked) + mutex_enter(&devname_nsmaps_lock); + for (map = devname_nsmaps; map; map = map->next) { + sdcmn_err6(("sdev_get_nsmap_by_dir: dir %s\n", map->dir_name)); + if (strcmp(map->dir_name, dir_path) == 0) { + if (!locked) + mutex_exit(&devname_nsmaps_lock); + return (map); + } + } + if (!locked) + mutex_exit(&devname_nsmaps_lock); + return (NULL); +} + +struct devname_nsmap * +sdev_get_nsmap_by_module(char *mod_name) +{ + struct devname_nsmap *map = NULL; + + mutex_enter(&devname_nsmaps_lock); + for (map = devname_nsmaps; map; map = map->next) { + sdcmn_err7(("sdev_get_nsmap_by_module: module %s\n", + map->dir_module)); + if (map->dir_module && strcmp(map->dir_module, mod_name) == 0) { + mutex_exit(&devname_nsmaps_lock); + return (map); + } + } + mutex_exit(&devname_nsmaps_lock); + return (NULL); +} + +void +sdev_invalidate_nsmaps() +{ + struct devname_nsmap *map = NULL; + + ASSERT(MUTEX_HELD(&devname_nsmaps_lock)); + + if (devname_nsmaps == NULL) + return; + + for (map = devname_nsmaps; map; map = map->next) { + rw_enter(&map->dir_lock, RW_WRITER); + map->dir_invalid = 1; + rw_exit(&map->dir_lock); + } + devname_nsmaps_invalidated = 1; +} + + +int +sdev_nsmaps_loaded() +{ + int ret = 0; + + mutex_enter(&devname_nsmaps_lock); + if (devname_nsmaps_loaded) + ret = 1; + + mutex_exit(&devname_nsmaps_lock); + return (ret); +} + +int +sdev_nsmaps_reloaded() +{ + int ret = 0; + + mutex_enter(&devname_nsmaps_lock); + if (devname_nsmaps_invalidated) + ret = 1; + + mutex_exit(&devname_nsmaps_lock); + return (ret); +} + +static void +sdev_free_nsmap(struct devname_nsmap *map) +{ + ASSERT(map); + if (map->dir_name) + kmem_free(map->dir_name, strlen(map->dir_name) + 1); + if (map->dir_module) + kmem_free(map->dir_module, strlen(map->dir_module) + 1); + if (map->dir_map) + kmem_free(map->dir_map, strlen(map->dir_map) + 1); + rw_destroy(&map->dir_lock); + kmem_free(map, sizeof (*map)); +} + +void +sdev_validate_nsmaps() +{ + struct devname_nsmap *map = NULL; + struct devname_nsmap *oldmap = NULL; + + ASSERT(MUTEX_HELD(&devname_nsmaps_lock)); + map = devname_nsmaps; + while (map) { + rw_enter(&map->dir_lock, RW_READER); + if ((map->dir_invalid == 1) && (map->dir_newmodule == NULL) && + (map->dir_newmap == NULL)) { + oldmap = map; + rw_exit(&map->dir_lock); + if (map->prev) + map->prev->next = oldmap->next; + if (map == devname_nsmaps) + devname_nsmaps = oldmap->next; + + map = oldmap->next; + if (map) + map->prev = oldmap->prev; + sdev_free_nsmap(oldmap); + oldmap = NULL; + } else { + rw_exit(&map->dir_lock); + map = map->next; + } + } + devname_nsmaps_invalidated = 0; +} + +static int +sdev_map_is_invalid(struct devname_nsmap *map) +{ + int ret = 0; + + ASSERT(map); + rw_enter(&map->dir_lock, RW_READER); + if (map->dir_invalid) + ret = 1; + rw_exit(&map->dir_lock); + return (ret); +} + +static int +sdev_check_map(struct devname_nsmap *map) +{ + struct devname_nsmap *mapp; + + mutex_enter(&devname_nsmaps_lock); + if (devname_nsmaps == NULL) { + mutex_exit(&devname_nsmaps_lock); + return (1); + } + + for (mapp = devname_nsmaps; mapp; mapp = mapp->next) { + if (mapp == map) { + mutex_exit(&devname_nsmaps_lock); + return (0); + } + } + + mutex_exit(&devname_nsmaps_lock); + return (1); + +} + +struct devname_nsmap * +sdev_get_map(struct sdev_node *dv, int validate) +{ + struct devname_nsmap *map; + int error; + + ASSERT(RW_READ_HELD(&dv->sdev_contents)); + map = dv->sdev_mapinfo; + if (map && sdev_check_map(map)) { + if (!rw_tryupgrade(&dv->sdev_contents)) { + rw_exit(&dv->sdev_contents); + rw_enter(&dv->sdev_contents, RW_WRITER); + } + dv->sdev_mapinfo = NULL; + rw_downgrade(&dv->sdev_contents); + return (NULL); + } + + if (validate && (!map || (map && sdev_map_is_invalid(map)))) { + if (!rw_tryupgrade(&dv->sdev_contents)) { + rw_exit(&dv->sdev_contents); + rw_enter(&dv->sdev_contents, RW_WRITER); + } + error = sdev_get_moduleops(dv); + if (!error) + map = dv->sdev_mapinfo; + rw_downgrade(&dv->sdev_contents); + } + return (map); +} + +void +sdev_handle_alloc(struct sdev_node *dv) +{ + rw_enter(&dv->sdev_contents, RW_WRITER); + dv->sdev_handle.dh_data = dv; + rw_exit(&dv->sdev_contents); +} + + +extern int sdev_vnodeops_tbl_size; + +/* + * construct a new template with overrides from vtab + */ +static fs_operation_def_t * +sdev_merge_vtab(const fs_operation_def_t tab[]) +{ + fs_operation_def_t *new; + const fs_operation_def_t *tab_entry; + + /* make a copy of standard vnode ops table */ + new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP); + bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size); + + /* replace the overrides from tab */ + for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) { + fs_operation_def_t *std_entry = new; + while (std_entry->name) { + if (strcmp(tab_entry->name, std_entry->name) == 0) { + std_entry->func = tab_entry->func; + break; + } + std_entry++; + } + if (std_entry->name == NULL) + cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.", + tab_entry->name); + } + + return (new); +} + +/* free memory allocated by sdev_merge_vtab */ +static void +sdev_free_vtab(fs_operation_def_t *new) +{ + kmem_free(new, sdev_vnodeops_tbl_size); +} + +void +devname_get_vnode(devname_handle_t *hdl, vnode_t **vpp) +{ + struct sdev_node *dv = hdl->dh_data; + + ASSERT(dv); + + rw_enter(&dv->sdev_contents, RW_READER); + *vpp = SDEVTOV(dv); + rw_exit(&dv->sdev_contents); +} + +int +devname_get_path(devname_handle_t *hdl, char **path) +{ + struct sdev_node *dv = hdl->dh_data; + + ASSERT(dv); + + rw_enter(&dv->sdev_contents, RW_READER); + *path = dv->sdev_path; + rw_exit(&dv->sdev_contents); + return (0); +} + +int +devname_get_name(devname_handle_t *hdl, char **entry) +{ + struct sdev_node *dv = hdl->dh_data; + + ASSERT(dv); + rw_enter(&dv->sdev_contents, RW_READER); + *entry = dv->sdev_name; + rw_exit(&dv->sdev_contents); + return (0); +} + +void +devname_get_dir_vnode(devname_handle_t *hdl, vnode_t **vpp) +{ + struct sdev_node *dv = hdl->dh_data->sdev_dotdot; + + ASSERT(dv); + + rw_enter(&dv->sdev_contents, RW_READER); + *vpp = SDEVTOV(dv); + rw_exit(&dv->sdev_contents); +} + +int +devname_get_dir_path(devname_handle_t *hdl, char **path) +{ + struct sdev_node *dv = hdl->dh_data->sdev_dotdot; + + ASSERT(dv); + rw_enter(&dv->sdev_contents, RW_READER); + *path = dv->sdev_path; + rw_exit(&dv->sdev_contents); + return (0); +} + +int +devname_get_dir_name(devname_handle_t *hdl, char **entry) +{ + struct sdev_node *dv = hdl->dh_data->sdev_dotdot; + + ASSERT(dv); + rw_enter(&dv->sdev_contents, RW_READER); + *entry = dv->sdev_name; + rw_exit(&dv->sdev_contents); + return (0); +} + +int +devname_get_dir_nsmap(devname_handle_t *hdl, struct devname_nsmap **map) +{ + struct sdev_node *dv = hdl->dh_data->sdev_dotdot; + + ASSERT(dv); + rw_enter(&dv->sdev_contents, RW_READER); + *map = dv->sdev_mapinfo; + rw_exit(&dv->sdev_contents); + return (0); +} + +int +devname_get_dir_handle(devname_handle_t *hdl, devname_handle_t **dir_hdl) +{ + struct sdev_node *dv = hdl->dh_data->sdev_dotdot; + + ASSERT(dv); + rw_enter(&dv->sdev_contents, RW_READER); + *dir_hdl = &(dv->sdev_handle); + rw_exit(&dv->sdev_contents); + return (0); +} + +void +devname_set_nodetype(devname_handle_t *hdl, void *args, int spec) +{ + struct sdev_node *dv = hdl->dh_data; + + ASSERT(dv); + rw_enter(&dv->sdev_contents, RW_WRITER); + hdl->dh_spec = (devname_spec_t)spec; + hdl->dh_args = (void *)i_ddi_strdup((char *)args, KM_SLEEP); + rw_exit(&dv->sdev_contents); +} + +/* + * a generic setattr() function + * + * note: flags only supports AT_UID and AT_GID. + * Future enhancements can be done for other types, e.g. AT_MODE + */ +int +devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags, + struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *, + int), int protocol) +{ + struct sdev_node *dv = VTOSDEV(vp); + struct sdev_node *parent = dv->sdev_dotdot; + struct vattr *get; + uint_t mask = vap->va_mask; + int error; + + /* some sanity checks */ + if (vap->va_mask & AT_NOSET) + return (EINVAL); + + if (vap->va_mask & AT_SIZE) { + if (vp->v_type == VDIR) { + return (EISDIR); + } + } + + /* no need to set attribute, but do not fail either */ + ASSERT(parent); + rw_enter(&parent->sdev_contents, RW_READER); + if (dv->sdev_state == SDEV_ZOMBIE) { + rw_exit(&parent->sdev_contents); + return (0); + } + + /* If backing store exists, just set it. */ + if (dv->sdev_attrvp) { + rw_exit(&parent->sdev_contents); + return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); + } + + /* + * Otherwise, for nodes with the persistence attribute, create it. + */ + ASSERT(dv->sdev_attr); + if (SDEV_IS_PERSIST(dv) || + ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) { + sdev_vattr_merge(dv, vap); + rw_enter(&dv->sdev_contents, RW_WRITER); + error = sdev_shadow_node(dv, cred); + rw_exit(&dv->sdev_contents); + rw_exit(&parent->sdev_contents); + + if (error) + return (error); + return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); + } + + + /* + * sdev_attr was allocated in sdev_mknode + */ + rw_enter(&dv->sdev_contents, RW_WRITER); + error = secpolicy_vnode_setattr(cred, vp, vap, dv->sdev_attr, + flags, sdev_unlocked_access, dv); + if (error) { + rw_exit(&dv->sdev_contents); + rw_exit(&parent->sdev_contents); + return (error); + } + + get = dv->sdev_attr; + if (mask & AT_MODE) { + get->va_mode &= S_IFMT; + get->va_mode |= vap->va_mode & ~S_IFMT; + } + + if ((mask & AT_UID) || (mask & AT_GID)) { + if (mask & AT_UID) + get->va_uid = vap->va_uid; + if (mask & AT_GID) + get->va_gid = vap->va_gid; + /* + * a callback must be provided if the protocol is set + */ + if ((protocol & AT_UID) || (protocol & AT_GID)) { + ASSERT(callback); + error = callback(dv, get, protocol); + if (error) { + rw_exit(&dv->sdev_contents); + rw_exit(&parent->sdev_contents); + return (error); + } + } + } + + if (mask & AT_ATIME) + get->va_atime = vap->va_atime; + if (mask & AT_MTIME) + get->va_mtime = vap->va_mtime; + if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) { + gethrestime(&get->va_ctime); + } + + sdev_vattr_merge(dv, get); + rw_exit(&dv->sdev_contents); + rw_exit(&parent->sdev_contents); + return (0); +} diff --git a/usr/src/uts/common/fs/dev/sdev_vfsops.c b/usr/src/uts/common/fs/dev/sdev_vfsops.c new file mode 100644 index 0000000000..6ecea19f3f --- /dev/null +++ b/usr/src/uts/common/fs/dev/sdev_vfsops.c @@ -0,0 +1,524 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This is the /dev (hence, the sdev_ prefix) filesystem. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/systm.h> +#include <sys/kmem.h> +#include <sys/time.h> +#include <sys/pathname.h> +#include <sys/vfs.h> +#include <sys/vnode.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/uio.h> +#include <sys/stat.h> +#include <sys/errno.h> +#include <sys/cmn_err.h> +#include <sys/cred.h> +#include <sys/statvfs.h> +#include <sys/policy.h> +#include <sys/mount.h> +#include <sys/debug.h> +#include <sys/modctl.h> +#include <sys/mkdev.h> +#include <fs/fs_subr.h> +#include <sys/fs/sdev_impl.h> +#include <sys/fs/sdev_node.h> +#include <sys/fs/snode.h> +#include <sys/fs/dv_node.h> +#include <sys/sunndi.h> +#include <sys/mntent.h> + +/* + * /dev vfs operations. + */ + +/* + * globals + */ +struct sdev_data *sdev_origins; /* mount info for origins under /dev */ + +/* + * static + */ +static kmutex_t sdev_lock; /* protects global data */ +static major_t devmajor; /* the fictitious major we live on */ +static major_t devminor; /* the fictitious minor of this instance */ +static struct sdev_data *sdev_mntinfo = NULL; /* linked list of instances */ +static struct vnode *sdev_stale_attrvp; /* stale root attrvp after remount */ +static int sdev_mntinfo_cnt; /* mntinfo reference count */ + +static int sdev_mount(struct vfs *, struct vnode *, struct mounta *, + struct cred *); +static int sdev_unmount(struct vfs *, int, struct cred *); +static int sdev_root(struct vfs *, struct vnode **); +static int sdev_statvfs(struct vfs *, struct statvfs64 *); +static void sdev_insert_mntinfo(struct sdev_data *); +static int devinit(int, char *); + +static vfsdef_t sdev_vfssw = { + VFSDEF_VERSION, + "dev", /* type name string */ + devinit, /* init routine */ + VSW_CANREMOUNT, /* flags */ + NULL /* mount options table prototype */ +}; + + +/* + * Module linkage information + */ +static struct modlfs modlfs = { + &mod_fsops, "/dev filesystem %I%", &sdev_vfssw +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modlfs, NULL +}; + +int +_init(void) +{ + int e; + + mutex_init(&sdev_lock, NULL, MUTEX_DEFAULT, NULL); + sdev_node_cache_init(); + sdev_devfsadm_lockinit(); + if ((e = mod_install(&modlinkage)) != 0) { + sdev_devfsadm_lockdestroy(); + sdev_node_cache_fini(); + mutex_destroy(&sdev_lock); + return (e); + } + return (0); +} + +/* + * dev module remained loaded for the global /dev instance + */ +int +_fini(void) +{ + return (EBUSY); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +/*ARGSUSED*/ +static int +devinit(int fstype, char *name) +{ + static const fs_operation_def_t dev_vfsops_tbl[] = { + VFSNAME_MOUNT, sdev_mount, /* mount file system */ + VFSNAME_UNMOUNT, sdev_unmount, /* unmount file system */ + VFSNAME_ROOT, sdev_root, /* get root vnode */ + VFSNAME_STATVFS, sdev_statvfs, /* get file system statistics */ + NULL, NULL + }; + + int error; + extern major_t getudev(void); + + devtype = fstype; + + error = vfs_setfsops(fstype, dev_vfsops_tbl, NULL); + if (error != 0) { + cmn_err(CE_WARN, "devinit: bad vfs ops tbl"); + return (error); + } + + error = vn_make_ops("dev", sdev_vnodeops_tbl, &sdev_vnodeops); + if (error != 0) { + (void) vfs_freevfsops_by_type(fstype); + cmn_err(CE_WARN, "devinit: bad vnode ops tbl"); + return (error); + } + + if ((devmajor = getudev()) == (major_t)-1) { + cmn_err(CE_WARN, "%s: can't get unique dev", sdev_vfssw.name); + return (1); + } + + /* initialize negative cache */ + sdev_ncache_init(); + + return (0); +} + +/* + * Both mount point and backing store directory name are + * passed in from userland + */ +static int +sdev_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap, + struct cred *cr) +{ + struct sdev_data *sdev_data; + struct vnode *avp; + struct sdev_node *dv; + struct sdev_mountargs *args = NULL; + int error = 0; + dev_t devdev; + + /* + * security check + */ + if ((secpolicy_fs_mount(cr, mvp, vfsp) != 0) || + (secpolicy_sys_devices(cr) != 0)) + return (EPERM); + + /* + * Sanity check the mount point + */ + if (mvp->v_type != VDIR) + return (ENOTDIR); + + /* + * Sanity Check for overlay mount. + */ + mutex_enter(&mvp->v_lock); + if ((uap->flags & MS_OVERLAY) == 0 && + (uap->flags & MS_REMOUNT) == 0 && + (mvp->v_count > 1 || (mvp->v_flag & VROOT))) { + mutex_exit(&mvp->v_lock); + return (EBUSY); + } + mutex_exit(&mvp->v_lock); + + args = kmem_zalloc(sizeof (*args), KM_SLEEP); + + if ((uap->flags & MS_DATA) && + (uap->datalen != 0 && uap->dataptr != NULL)) { + /* copy in the arguments */ + if (error = sdev_copyin_mountargs(uap, args)) + goto cleanup; + } + + /* + * Sanity check the backing store + */ + if (args->sdev_attrdir) { + /* user supplied an attribute store */ + if (error = lookupname((char *)(uintptr_t)args->sdev_attrdir, + UIO_USERSPACE, FOLLOW, NULLVPP, &avp)) { + cmn_err(CE_NOTE, "/dev fs: lookup on attribute " + "directory %s failed", + (char *)(uintptr_t)args->sdev_attrdir); + goto cleanup; + } + + if (avp->v_type != VDIR) { + VN_RELE(avp); + error = ENOTDIR; + goto cleanup; + } + } else { + /* use mountp as the attribute store */ + avp = mvp; + VN_HOLD(avp); + } + + mutex_enter(&sdev_lock); + + /* + * handling installation + */ + if (uap->flags & MS_REMOUNT) { + sdev_data = (struct sdev_data *)vfsp->vfs_data; + ASSERT(sdev_data); + + dv = sdev_data->sdev_root; + ASSERT(dv == dv->sdev_dotdot); + + /* + * mark all existing sdev_nodes (except root node) stale + */ + sdev_stale(dv); + + /* Reset previous mountargs */ + if (sdev_data->sdev_mountargs) { + kmem_free(sdev_data->sdev_mountargs, + sizeof (struct sdev_mountargs)); + } + sdev_data->sdev_mountargs = args; + args = NULL; /* so it won't be freed below */ + + sdev_stale_attrvp = dv->sdev_attrvp; + dv->sdev_attrvp = avp; + vfsp->vfs_mtime = ddi_get_time(); + + mutex_exit(&sdev_lock); + goto cleanup; /* we're done */ + } + + /* + * Create and initialize the vfs-private data. + */ + devdev = makedevice(devmajor, devminor); + while (vfs_devismounted(devdev)) { + devminor = (devminor + 1) & MAXMIN32; + + /* + * All the minor numbers are used up. + */ + if (devminor == 0) { + mutex_exit(&sdev_lock); + VN_RELE(avp); + error = ENODEV; + goto cleanup; + } + + devdev = makedevice(devmajor, devminor); + } + + dv = sdev_mkroot(vfsp, devdev, mvp, avp, cr); + sdev_data = kmem_zalloc(sizeof (struct sdev_data), KM_SLEEP); + vfsp->vfs_dev = devdev; + vfsp->vfs_data = (caddr_t)sdev_data; + vfsp->vfs_fstype = devtype; + vfsp->vfs_bsize = DEV_BSIZE; + vfsp->vfs_mtime = ddi_get_time(); + vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devtype); + + ASSERT(dv == dv->sdev_dotdot); + + sdev_data->sdev_vfsp = vfsp; + sdev_data->sdev_root = dv; + sdev_data->sdev_mountargs = args; + + /* get acl flavor from attribute dir */ + if (VOP_PATHCONF(avp, _PC_ACL_ENABLED, &sdev_data->sdev_acl_flavor, + kcred) != 0 || sdev_data->sdev_acl_flavor == 0) + sdev_data->sdev_acl_flavor = _ACL_ACLENT_ENABLED; + + args = NULL; /* so it won't be freed below */ + sdev_insert_mntinfo(sdev_data); + mutex_exit(&sdev_lock); + + if (!SDEV_IS_GLOBAL(dv)) { + ASSERT(sdev_origins); + dv->sdev_flags &= ~SDEV_GLOBAL; + dv->sdev_origin = sdev_origins->sdev_root; + } else { + sdev_ncache_setup(); + } + + sdev_update_timestamps(dv->sdev_attrvp, + cr, AT_CTIME|AT_MTIME|AT_ATIME); + +cleanup: + if (args) + kmem_free(args, sizeof (*args)); + return (error); +} + +/* + * unmounting the non-global /dev instances, e.g. when deleting a Kevlar zone. + */ +static int +sdev_unmount(struct vfs *vfsp, int flag, struct cred *cr) +{ + struct sdev_node *dv; + int error; + struct sdev_data *sdev_data, *prev, *next; + + /* + * enforce the security policies + */ + if ((secpolicy_fs_unmount(cr, vfsp) != 0) || + (secpolicy_sys_devices(cr) != 0)) + return (EPERM); + + if (flag & MS_FORCE) + return (ENOTSUP); + + mutex_enter(&sdev_lock); + dv = VFSTOSDEVFS(vfsp)->sdev_root; + ASSERT(dv == dv->sdev_dotdot); + if (SDEVTOV(dv)->v_count > 1) { + mutex_exit(&sdev_lock); + return (EBUSY); + } + + /* + * global instance remains mounted + */ + if (SDEV_IS_GLOBAL(dv)) { + mutex_exit(&sdev_lock); + return (EBUSY); + } + mutex_exit(&sdev_lock); + + /* verify the v_count */ + if ((error = sdev_cleandir(dv, NULL, 0)) != 0) { + return (error); + } + ASSERT(SDEVTOV(dv)->v_count == 1); + + /* release hold on root node and destroy it */ + SDEV_RELE(dv); + dv->sdev_nlink -= 2; + sdev_nodedestroy(dv, 0); + + sdev_data = (struct sdev_data *)vfsp->vfs_data; + vfsp->vfs_data = (caddr_t)0; + + /* + * XXX separate it into sdev_delete_mntinfo() if useful + */ + mutex_enter(&sdev_lock); + prev = sdev_data->sdev_prev; + next = sdev_data->sdev_next; + if (prev) + prev->sdev_next = next; + else + sdev_mntinfo = next; + if (next) + next->sdev_prev = prev; + mutex_exit(&sdev_lock); + + if (sdev_data->sdev_mountargs) { + kmem_free(sdev_data->sdev_mountargs, + sizeof (struct sdev_mountargs)); + } + kmem_free(sdev_data, sizeof (struct sdev_data)); + return (0); +} + +/* + * return root vnode for given vfs + */ +static int +sdev_root(struct vfs *vfsp, struct vnode **vpp) +{ + *vpp = SDEVTOV(VFSTOSDEVFS(vfsp)->sdev_root); + VN_HOLD(*vpp); + return (0); +} + +/* + * return 'generic superblock' information to userland. + * + * not much that we can usefully admit to here + */ +static int +sdev_statvfs(struct vfs *vfsp, struct statvfs64 *sbp) +{ + dev32_t d32; + + bzero(sbp, sizeof (*sbp)); + sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize; + sbp->f_files = kmem_cache_stat(sdev_node_cache, "alloc"); + + /* no illusions that free/avail files is relevant to dev */ + sbp->f_ffree = 0; + sbp->f_favail = 0; + + /* no illusions that blocks are relevant to devfs */ + sbp->f_bfree = 0; + sbp->f_bavail = 0; + sbp->f_blocks = 0; + + (void) cmpldev(&d32, vfsp->vfs_dev); + sbp->f_fsid = d32; + (void) strcpy(sbp->f_basetype, vfssw[devtype].vsw_name); + sbp->f_flag = vf_to_stf(vfsp->vfs_flag); + sbp->f_namemax = MAXNAMELEN - 1; + (void) strcpy(sbp->f_fstr, "dev"); + + return (0); +} + +int +sdev_module_register(char *mod_name, struct devname_ops *dev_ops) +{ + struct devname_nsmap *map = NULL; + + if (strcmp(mod_name, DEVNAME_NSCONFIG) == 0) { + devname_ns_ops = dev_ops; + return (0); + } + + map = sdev_get_nsmap_by_module(mod_name); + if (map == NULL) + return (EFAULT); + + rw_enter(&map->dir_lock, RW_WRITER); + map->dir_ops = dev_ops; + rw_exit(&map->dir_lock); + return (0); +} + +static void +sdev_insert_mntinfo(struct sdev_data *data) +{ + ASSERT(mutex_owned(&sdev_lock)); + data->sdev_next = sdev_mntinfo; + data->sdev_prev = NULL; + if (sdev_mntinfo) { + sdev_mntinfo->sdev_prev = data; + } else { + sdev_origins = data; + } + sdev_mntinfo = data; +} + +struct sdev_data * +sdev_find_mntinfo(char *mntpt) +{ + struct sdev_data *mntinfo; + + mutex_enter(&sdev_lock); + mntinfo = sdev_mntinfo; + while (mntinfo) { + if (strcmp(mntpt, mntinfo->sdev_root->sdev_name) == 0) { + SDEVTOV(mntinfo->sdev_root)->v_count++; + break; + } + mntinfo = mntinfo->sdev_next; + } + mutex_exit(&sdev_lock); + return (mntinfo); +} + +void +sdev_mntinfo_rele(struct sdev_data *mntinfo) +{ + mutex_enter(&sdev_lock); + SDEVTOV(mntinfo->sdev_root)->v_count--; + mutex_exit(&sdev_lock); +} diff --git a/usr/src/uts/common/fs/dev/sdev_vnops.c b/usr/src/uts/common/fs/dev/sdev_vnops.c new file mode 100644 index 0000000000..da579439ee --- /dev/null +++ b/usr/src/uts/common/fs/dev/sdev_vnops.c @@ -0,0 +1,1329 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * vnode ops for the /dev filesystem + * + * - VDIR, VCHR, CBLK, and VLNK are considered must supported files + * - VREG and VDOOR are used for some internal implementations in + * the global zone, e.g. devname and devfsadm communication + * - other file types are unusual in this namespace and + * not supported for now + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/t_lock.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/user.h> +#include <sys/time.h> +#include <sys/vfs.h> +#include <sys/vnode.h> +#include <sys/file.h> +#include <sys/fcntl.h> +#include <sys/flock.h> +#include <sys/kmem.h> +#include <sys/uio.h> +#include <sys/errno.h> +#include <sys/stat.h> +#include <sys/cred.h> +#include <sys/cred_impl.h> +#include <sys/dirent.h> +#include <sys/pathname.h> +#include <sys/cmn_err.h> +#include <sys/debug.h> +#include <sys/policy.h> +#include <vm/hat.h> +#include <vm/seg_vn.h> +#include <vm/seg_map.h> +#include <vm/seg.h> +#include <vm/as.h> +#include <vm/page.h> +#include <sys/proc.h> +#include <sys/mode.h> +#include <sys/sunndi.h> +#include <sys/ptms.h> +#include <fs/fs_subr.h> +#include <sys/fs/dv_node.h> +#include <sys/fs/sdev_impl.h> +#include <sys/fs/sdev_node.h> + +/*ARGSUSED*/ +static int +sdev_open(struct vnode **vpp, int flag, struct cred *cred) +{ + struct sdev_node *dv = VTOSDEV(*vpp); + struct sdev_node *ddv = dv->sdev_dotdot; + int error = 0; + + if ((*vpp)->v_type == VDIR) + return (0); + + if (!SDEV_IS_GLOBAL(dv)) + return (ENOTSUP); + + ASSERT((*vpp)->v_type == VREG); + if ((*vpp)->v_type != VREG) + return (ENOTSUP); + + ASSERT(ddv); + rw_enter(&ddv->sdev_contents, RW_READER); + if (dv->sdev_attrvp == NULL) { + rw_exit(&ddv->sdev_contents); + return (ENOENT); + } + error = VOP_OPEN(&(dv->sdev_attrvp), flag, cred); + rw_exit(&ddv->sdev_contents); + return (error); +} + +/*ARGSUSED1*/ +static int +sdev_close(struct vnode *vp, int flag, int count, + offset_t offset, struct cred *cred) +{ + struct sdev_node *dv = VTOSDEV(vp); + + if (vp->v_type == VDIR) { + cleanlocks(vp, ttoproc(curthread)->p_pid, 0); + cleanshares(vp, ttoproc(curthread)->p_pid); + return (0); + } + + if (!SDEV_IS_GLOBAL(dv)) + return (ENOTSUP); + + ASSERT(vp->v_type == VREG); + if (vp->v_type != VREG) + return (ENOTSUP); + + ASSERT(dv->sdev_attrvp); + return (VOP_CLOSE(dv->sdev_attrvp, flag, count, offset, cred)); +} + +/*ARGSUSED*/ +static int +sdev_read(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred, + struct caller_context *ct) +{ + struct sdev_node *dv = (struct sdev_node *)VTOSDEV(vp); + int error; + + if (!SDEV_IS_GLOBAL(dv)) + return (EINVAL); + + if (vp->v_type == VDIR) + return (EISDIR); + + /* only supporting regular files in /dev */ + ASSERT(vp->v_type == VREG); + if (vp->v_type != VREG) + return (EINVAL); + + ASSERT(RW_READ_HELD(&VTOSDEV(vp)->sdev_contents)); + ASSERT(dv->sdev_attrvp); + (void) VOP_RWLOCK(dv->sdev_attrvp, 0, NULL); + error = VOP_READ(dv->sdev_attrvp, uio, ioflag, cred, ct); + VOP_RWUNLOCK(dv->sdev_attrvp, 0, NULL); + return (error); +} + +/*ARGSUSED*/ +static int +sdev_write(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred, + struct caller_context *ct) +{ + struct sdev_node *dv = VTOSDEV(vp); + int error = 0; + + if (!SDEV_IS_GLOBAL(dv)) + return (EINVAL); + + if (vp->v_type == VDIR) + return (EISDIR); + + /* only supporting regular files in /dev */ + ASSERT(vp->v_type == VREG); + if (vp->v_type != VREG) + return (EINVAL); + + ASSERT(dv->sdev_attrvp); + + (void) VOP_RWLOCK(dv->sdev_attrvp, 1, NULL); + error = VOP_WRITE(dv->sdev_attrvp, uio, ioflag, cred, ct); + VOP_RWUNLOCK(dv->sdev_attrvp, 1, NULL); + if (error == 0) { + sdev_update_timestamps(dv->sdev_attrvp, kcred, + AT_MTIME); + } + return (error); +} + +/*ARGSUSED*/ +static int +sdev_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, + struct cred *cred, int *rvalp) +{ + struct sdev_node *dv = VTOSDEV(vp); + + if (!SDEV_IS_GLOBAL(dv) || (vp->v_type == VDIR)) + return (ENOTTY); + + ASSERT(vp->v_type == VREG); + if (vp->v_type != VREG) + return (EINVAL); + + ASSERT(dv->sdev_attrvp); + return (VOP_IOCTL(dv->sdev_attrvp, cmd, arg, flag, cred, rvalp)); +} + +static int +sdev_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cr) +{ + int error = 0; + struct sdev_node *dv = VTOSDEV(vp); + struct sdev_node *parent = dv->sdev_dotdot; + struct devname_nsmap *map = NULL; + struct devname_ops *dirops = NULL; + int (*fn)(devname_handle_t *, struct vattr *, struct cred *); + + ASSERT(parent); + + rw_enter(&parent->sdev_contents, RW_READER); + ASSERT(dv->sdev_attr || dv->sdev_attrvp); + if (SDEV_IS_GLOBAL(dv) && (dv->sdev_state != SDEV_ZOMBIE)) { + map = sdev_get_map(parent, 0); + dirops = map ? map->dir_ops : NULL; + } + + /* + * search order: + * - for persistent nodes (SDEV_PERSIST): backstore + * - for non-persistent nodes: module ops if global, then memory + */ + if (dv->sdev_attrvp) { + rw_exit(&parent->sdev_contents); + error = VOP_GETATTR(dv->sdev_attrvp, vap, flags, cr); + sdev_vattr_merge(dv, vap); + } else if (dirops && (fn = dirops->devnops_getattr)) { + sdev_vattr_merge(dv, vap); + rw_exit(&parent->sdev_contents); + error = (*fn)(&(dv->sdev_handle), vap, cr); + } else { + ASSERT(dv->sdev_attr); + *vap = *dv->sdev_attr; + sdev_vattr_merge(dv, vap); + rw_exit(&parent->sdev_contents); + } + + return (error); +} + +static int +sdev_setattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cred) +{ + return (devname_setattr_func(vp, vap, flags, cred, NULL, 0)); +} + +static int +sdev_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, + struct cred *cr) +{ + int error; + struct sdev_node *dv = VTOSDEV(vp); + struct vnode *avp = dv->sdev_attrvp; + + if (avp == NULL) { + /* return fs_fab_acl() if flavor matches, else do nothing */ + if ((SDEV_ACL_FLAVOR(vp) == _ACL_ACLENT_ENABLED && + (vsap->vsa_mask & (VSA_ACLCNT | VSA_DFACLCNT))) || + (SDEV_ACL_FLAVOR(vp) == _ACL_ACE_ENABLED && + (vsap->vsa_mask & (VSA_ACECNT | VSA_ACE)))) + return (fs_fab_acl(vp, vsap, flags, cr)); + + return (ENOSYS); + } + + (void) VOP_RWLOCK(avp, 1, NULL); + error = VOP_GETSECATTR(avp, vsap, flags, cr); + VOP_RWUNLOCK(avp, 1, NULL); + return (error); +} + +static int +sdev_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, + struct cred *cr) +{ + int error; + struct sdev_node *dv = VTOSDEV(vp); + struct vnode *avp = dv->sdev_attrvp; + + if (dv->sdev_state == SDEV_ZOMBIE) + return (0); + + if (avp == NULL) { + if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_PERSIST(dv)) + return (fs_nosys()); + + ASSERT(dv->sdev_attr); + /* + * if coming in directly, the acl system call will + * have held the read-write lock via VOP_RWLOCK() + * If coming in via specfs, specfs will have + * held the rw lock on the realvp i.e. us. + */ + ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); + sdev_vattr_merge(dv, dv->sdev_attr); + error = sdev_shadow_node(dv, cr); + if (error) { + return (fs_nosys()); + } + + ASSERT(dv->sdev_attrvp); + /* clean out the memory copy if any */ + if (dv->sdev_attr) { + kmem_free(dv->sdev_attr, sizeof (struct vattr)); + dv->sdev_attr = NULL; + } + avp = dv->sdev_attrvp; + } + ASSERT(avp); + + (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, NULL); + error = VOP_SETSECATTR(avp, vsap, flags, cr); + VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, NULL); + return (error); +} + +int +sdev_unlocked_access(void *vdv, int mode, struct cred *cr) +{ + struct sdev_node *dv = vdv; + int shift = 0; + uid_t owner = dv->sdev_attr->va_uid; + + if (crgetuid(cr) != owner) { + shift += 3; + if (groupmember(dv->sdev_attr->va_gid, cr) == 0) + shift += 3; + } + + mode &= ~(dv->sdev_attr->va_mode << shift); + if (mode == 0) + return (0); + + return (secpolicy_vnode_access(cr, SDEVTOV(dv), owner, mode)); +} + +static int +sdev_access(struct vnode *vp, int mode, int flags, struct cred *cr) +{ + struct sdev_node *dv = VTOSDEV(vp); + int ret = 0; + + ASSERT(dv->sdev_attr || dv->sdev_attrvp); + + if (dv->sdev_attrvp) { + ret = VOP_ACCESS(dv->sdev_attrvp, mode, flags, cr); + } else if (dv->sdev_attr) { + rw_enter(&dv->sdev_contents, RW_READER); + ret = sdev_unlocked_access(dv, mode, cr); + if (ret) + ret = EACCES; + rw_exit(&dv->sdev_contents); + } + + return (ret); +} + +/* + * Lookup + */ +/*ARGSUSED3*/ +static int +sdev_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, + struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred) +{ + struct sdev_node *parent; + + parent = VTOSDEV(dvp); + ASSERT(parent); + + if (!SDEV_IS_GLOBAL(parent)) + return (prof_lookup(dvp, nm, vpp, cred)); + return (devname_lookup_func(parent, nm, vpp, cred, NULL, 0)); +} + +/*ARGSUSED2*/ +static int +sdev_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, + int mode, struct vnode **vpp, struct cred *cred, int flag) +{ + struct vnode *vp = NULL; + struct vnode *avp; + struct sdev_node *parent; + struct sdev_node *self = NULL; + int error = 0; + vtype_t type = vap->va_type; + + ASSERT(vap->va_type != VNON && + vap->va_type != VBAD); + + if ((type == VFIFO) || (type == VSOCK) || + (type == VPROC) || (type == VPORT)) + return (ENOTSUP); + + parent = VTOSDEV(dvp); + ASSERT(parent); + + rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); + if (parent->sdev_state == SDEV_ZOMBIE) { + rw_exit(&parent->sdev_dotdot->sdev_contents); + return (ENOENT); + } + + /* non-global do not allow pure node creation */ + if (!SDEV_IS_GLOBAL(parent)) { + rw_exit(&parent->sdev_dotdot->sdev_contents); + return (prof_lookup(dvp, nm, vpp, cred)); + } + rw_exit(&parent->sdev_dotdot->sdev_contents); + +again: + /* check existing name */ + error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred); + + /* name found */ + if (error == 0) { + ASSERT(vp); + if (excl == EXCL) { + error = EEXIST; + } else if ((vp->v_type == VDIR) && (mode & VWRITE)) { + /* allowing create/read-only an existing directory */ + error = EISDIR; + } else { + error = VOP_ACCESS(vp, mode, flag, cred); + } + + if (error) { + VN_RELE(vp); + return (error); + } + + /* truncation first */ + if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) && + (vap->va_size == 0)) { + ASSERT(parent->sdev_attrvp); + ASSERT(VTOSDEV(vp)->sdev_attrvp); + error = VOP_CREATE(parent->sdev_attrvp, + nm, vap, excl, mode, &avp, cred, flag); + + if (error) { + VN_RELE(vp); + return (error); + } + } + + sdev_update_timestamps(vp, kcred, + AT_CTIME|AT_MTIME|AT_ATIME); + *vpp = vp; + return (0); + } + + /* bail out early */ + if (error != ENOENT) + return (error); + + /* + * For memory-based (ROFS) directory: + * - either disallow node creation; + * - or implement VOP_CREATE of its own + */ + rw_enter(&parent->sdev_contents, RW_WRITER); + if (!SDEV_IS_PERSIST(parent)) { + rw_exit(&parent->sdev_contents); + return (ENOTSUP); + } + ASSERT(parent->sdev_attrvp); + error = sdev_mknode(parent, nm, &self, vap, NULL, NULL, + cred, SDEV_READY); + if (error) { + rw_exit(&parent->sdev_contents); + if (self) + SDEV_RELE(self); + return (error); + } + rw_exit(&parent->sdev_contents); + + ASSERT(self); + /* take care the timestamps for the node and its parent */ + sdev_update_timestamps(SDEVTOV(self), kcred, + AT_CTIME|AT_MTIME|AT_ATIME); + sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); + if (SDEV_IS_GLOBAL(parent)) + atomic_inc_ulong(&parent->sdev_gdir_gen); + + /* wake up other threads blocked on looking up this node */ + mutex_enter(&self->sdev_lookup_lock); + SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); + mutex_exit(&self->sdev_lookup_lock); + error = sdev_to_vp(self, vpp); + return (error); +} + +static int +sdev_remove(struct vnode *dvp, char *nm, struct cred *cred) +{ + int error; + struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); + struct vnode *vp = NULL; + struct sdev_node *dv = NULL; + struct devname_nsmap *map = NULL; + struct devname_ops *dirops = NULL; + int (*fn)(devname_handle_t *); + int len; + int bkstore = 0; + + /* bail out early */ + len = strlen(nm); + if (nm[0] == '.') { + if (len == 1) { + return (EINVAL); + } else if (len == 2 && nm[1] == '.') { + return (EEXIST); + } + } + + ASSERT(parent); + rw_enter(&parent->sdev_contents, RW_READER); + if (!SDEV_IS_GLOBAL(parent)) { + rw_exit(&parent->sdev_contents); + return (ENOTSUP); + } + + /* check existence first */ + dv = sdev_cache_lookup(parent, nm); + if (dv == NULL) { + rw_exit(&parent->sdev_contents); + return (ENOENT); + } + + vp = SDEVTOV(dv); + if ((dv->sdev_state == SDEV_INIT) || + (dv->sdev_state == SDEV_ZOMBIE)) { + rw_exit(&parent->sdev_contents); + VN_RELE(vp); + return (ENOENT); + } + + /* the module may record/reject removing a device node */ + map = sdev_get_map(parent, 0); + dirops = map ? map->dir_ops : NULL; + if (dirops && ((fn = dirops->devnops_remove) != NULL)) { + error = (*fn)(&(dv->sdev_handle)); + if (error) { + rw_exit(&parent->sdev_contents); + VN_RELE(vp); + return (error); + } + } + + /* + * sdev_dirdelete does the real job of: + * - make sure no open ref count + * - destroying the sdev_node + * - releasing the hold on attrvp + */ + bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; + if (!rw_tryupgrade(&parent->sdev_contents)) { + rw_exit(&parent->sdev_contents); + rw_enter(&parent->sdev_contents, RW_WRITER); + } + error = sdev_cache_update(parent, &dv, nm, SDEV_CACHE_DELETE); + rw_exit(&parent->sdev_contents); + + sdcmn_err2(("sdev_remove: cache_update error %d\n", error)); + if (error && (error != EBUSY)) { + /* report errors other than EBUSY */ + VN_RELE(vp); + } else { + sdcmn_err2(("sdev_remove: cleaning node %s from cache " + " with error %d\n", nm, error)); + + /* + * best efforts clean up the backing store + */ + if (bkstore) { + ASSERT(parent->sdev_attrvp); + error = VOP_REMOVE(parent->sdev_attrvp, nm, cred); + /* + * do not report BUSY error + * because the backing store ref count is released + * when the last ref count on the sdev_node is + * released. + */ + if (error == EBUSY) { + sdcmn_err2(("sdev_remove: device %s is still on" + "disk %s\n", nm, parent->sdev_path)); + error = 0; + } + } + + if (error == EBUSY) + error = 0; + } + + return (error); +} + +/* + * Some restrictions for this file system: + * - both oldnm and newnm are in the scope of /dev file system, + * to simply the namespace management model. + */ +static int +sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, + struct cred *cred) +{ + struct sdev_node *fromparent = NULL; + struct vattr vattr; + struct sdev_node *toparent; + struct sdev_node *fromdv = NULL; /* source node */ + struct vnode *ovp; /* source vnode */ + struct sdev_node *todv = NULL; /* destination node */ + struct vnode *nvp; /* destination vnode */ + int samedir = 0; /* set if odvp == ndvp */ + struct vnode *realvp; + int len; + char nnm_path[MAXPATHLEN]; + struct devname_nsmap *omap = NULL; + struct devname_ops *odirops = NULL; + int (*fn)(devname_handle_t *, char *); + int (*rmfn)(devname_handle_t *); + int error = 0; + dev_t fsid; + int bkstore = 0; + + /* prevent modifying "." and ".." */ + if ((onm[0] == '.' && + (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0')))) { + return (EINVAL); + } + + fromparent = VTOSDEV(odvp); + toparent = VTOSDEV(ndvp); + + /* ZOMBIE parent doesn't allow new node creation */ + rw_enter(&fromparent->sdev_dotdot->sdev_contents, RW_READER); + if (fromparent->sdev_state == SDEV_ZOMBIE) { + rw_exit(&fromparent->sdev_dotdot->sdev_contents); + return (ENOENT); + } + + /* renaming only supported for global device nodes */ + if (!SDEV_IS_GLOBAL(fromparent)) { + rw_exit(&fromparent->sdev_dotdot->sdev_contents); + return (ENOTSUP); + } + rw_exit(&fromparent->sdev_dotdot->sdev_contents); + + rw_enter(&toparent->sdev_dotdot->sdev_contents, RW_READER); + if (toparent->sdev_state == SDEV_ZOMBIE) { + rw_exit(&toparent->sdev_dotdot->sdev_contents); + return (ENOENT); + } + rw_exit(&toparent->sdev_dotdot->sdev_contents); + + /* check existence of the source node */ + error = VOP_LOOKUP(odvp, onm, &ovp, NULL, 0, NULL, cred); + if (error) { + sdcmn_err2(("sdev_rename: the source node %s exists\n", + onm)); + return (error); + } + + if (VOP_REALVP(ovp, &realvp) == 0) { + VN_HOLD(realvp); + VN_RELE(ovp); + ovp = realvp; + } + + /* check existence of destination */ + error = VOP_LOOKUP(ndvp, nnm, &nvp, NULL, 0, NULL, cred); + if (error && (error != ENOENT)) { + VN_RELE(ovp); + return (error); + } + + if (nvp && (VOP_REALVP(nvp, &realvp) == 0)) { + VN_HOLD(realvp); + VN_RELE(nvp); + nvp = realvp; + } + + /* + * For now, if both exist, they must be the same type. + * Changing the type of a node probably needs some special + * handling. + */ + if (ovp && nvp) { + if (ovp->v_type != nvp->v_type) { + VN_RELE(ovp); + VN_RELE(nvp); + return (EINVAL); + } + } + + /* make sure the source and the destination are in /dev */ + if (odvp != ndvp) { + vattr.va_mask = AT_FSID; + if (error = VOP_GETATTR(odvp, &vattr, 0, cred)) { + VN_RELE(ovp); + return (error); + } + fsid = vattr.va_fsid; + vattr.va_mask = AT_FSID; + if (error = VOP_GETATTR(ndvp, &vattr, 0, cred)) { + VN_RELE(ovp); + return (error); + } + if (fsid != vattr.va_fsid) { + VN_RELE(ovp); + return (EXDEV); + } + } + + /* make sure the old entry can be deleted */ + error = VOP_ACCESS(odvp, VWRITE, 0, cred); + if (error) { + VN_RELE(ovp); + return (error); + } + + /* make sure the destination allows creation */ + samedir = (fromparent == toparent); + if (!samedir) { + error = VOP_ACCESS(ndvp, VEXEC|VWRITE, 0, cred); + if (error) { + VN_RELE(ovp); + return (error); + } + } + + fromdv = VTOSDEV(ovp); + ASSERT(fromdv); + + /* check with the plug-in modules for the source directory */ + rw_enter(&fromparent->sdev_contents, RW_READER); + omap = sdev_get_map(fromparent, 0); + rw_exit(&fromparent->sdev_contents); + odirops = omap ? omap->dir_ops : NULL; + if (odirops && ((fn = odirops->devnops_rename) != NULL)) { + if (samedir) { + error = (*fn)(&(fromdv->sdev_handle), nnm); + } else { + len = strlen(nnm) + strlen(toparent->sdev_name) + 2; + (void) snprintf(nnm_path, len, "%s/%s", + toparent->sdev_name, nnm); + error = (*fn)(&(fromdv->sdev_handle), nnm); + } + + if (error) { + VN_RELE(ovp); + return (error); + } + } + + /* + * Remove the destination if exist + * On failure, we should attempt to restore the current state + * before returning error. + */ + if (nvp) { + switch (nvp->v_type) { + case VDIR: + error = VOP_RMDIR(ndvp, nnm, ndvp, cred); + break; + default: + error = VOP_REMOVE(ndvp, nnm, cred); + break; + } + + if (error) { + sdcmn_err2(("sdev_rename: removing existing destination" + " %s failed, error %d\n", nnm, error)); + VN_RELE(ovp); + VN_RELE(nvp); + return (error); + } + } + + /* + * link source to new target in the memory + */ + error = VOP_LOOKUP(ndvp, nnm, &nvp, NULL, 0, NULL, cred); + if (error && (error != ENOENT)) { + VN_RELE(ovp); + return (error); + } else if (error == ENOENT) { + /* make a new node from the old node */ + error = sdev_rnmnode(fromparent, fromdv, toparent, &todv, + nnm, cred); + } else { + ASSERT(nvp); + if (VOP_REALVP(nvp, &realvp) == 0) { + VN_HOLD(realvp); + VN_RELE(nvp); + nvp = realvp; + } + + /* destination file exists */ + todv = VTOSDEV(nvp); + ASSERT(todv); + error = sdev_rnmnode(fromparent, fromdv, toparent, &todv, + nnm, cred); + if (error) { + sdcmn_err2(("sdev_rename: renaming %s to %s failed " + " with existing destination error %d\n", + onm, nnm, error)); + VN_RELE(nvp); + VN_RELE(ovp); + return (error); + } + } + + /* unlink from source */ + if (error == 0) { + /* + * check with the plug-in module whether source can be + * re-used or not + */ + if (odirops && ((rmfn = odirops->devnops_remove) != NULL)) { + error = (*rmfn)(&(fromdv->sdev_handle)); + } + + if (error == 0) { + bkstore = SDEV_IS_PERSIST(fromdv) ? 1 : 0; + rw_enter(&fromparent->sdev_contents, RW_WRITER); + error = sdev_cache_update(fromparent, &fromdv, onm, + SDEV_CACHE_DELETE); + rw_exit(&fromparent->sdev_contents); + + /* best effforts clean up the backing store */ + if (bkstore) { + ASSERT(fromparent->sdev_attrvp); + error = VOP_REMOVE(fromparent->sdev_attrvp, + onm, kcred); + if (error) { + sdcmn_err2(("sdev_rename: device %s is " + "still on disk %s\n", onm, + fromparent->sdev_path)); + error = 0; + } + } + + if (error == EBUSY) { + error = 0; + } + } + } + + /* book keeping the ovp v_count */ + if (error) { + sdcmn_err2(("sdev_rename: renaming %s to %s failed " + " with error %d\n", onm, nnm, error)); + VN_RELE(ovp); + } + + return (error); +} + +/* + * dev-fs version of "ln -s path dev-name" + * tnm - path, e.g. /devices/... or /dev/... + * lnm - dev_name + */ +static int +sdev_symlink(struct vnode *dvp, char *lnm, struct vattr *tva, + char *tnm, struct cred *cred) +{ + int error; + struct vnode *vp = NULL; + struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); + struct sdev_node *self = (struct sdev_node *)NULL; + + ASSERT(parent); + rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); + if (parent->sdev_state == SDEV_ZOMBIE) { + rw_exit(&parent->sdev_dotdot->sdev_contents); + sdcmn_err2(("sdev_symlink: parent %s is ZOMBIED \n", + parent->sdev_name)); + return (ENOENT); + } + + if (!SDEV_IS_GLOBAL(parent)) { + rw_exit(&parent->sdev_dotdot->sdev_contents); + return (ENOTSUP); + } + rw_exit(&parent->sdev_dotdot->sdev_contents); + + /* find existing name */ + error = VOP_LOOKUP(dvp, lnm, &vp, NULL, 0, NULL, cred); + if (error == 0) { + ASSERT(vp); + VN_RELE(vp); + sdcmn_err2(("sdev_symlink: node %s already exists\n", lnm)); + return (EEXIST); + } + + if (error != ENOENT) { + return (error); + } + + /* put it into memory cache */ + rw_enter(&parent->sdev_contents, RW_WRITER); + error = sdev_mknode(parent, lnm, &self, tva, NULL, (void *)tnm, + cred, SDEV_READY); + if (error) { + rw_exit(&parent->sdev_contents); + sdcmn_err2(("sdev_symlink: node %s creation failed\n", lnm)); + if (self) + SDEV_RELE(self); + + return (error); + } + ASSERT(self && (self->sdev_state == SDEV_READY)); + rw_exit(&parent->sdev_contents); + + /* take care the timestamps for the node and its parent */ + sdev_update_timestamps(SDEVTOV(self), kcred, + AT_CTIME|AT_MTIME|AT_ATIME); + sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); + if (SDEV_IS_GLOBAL(parent)) + atomic_inc_ulong(&parent->sdev_gdir_gen); + + /* wake up other threads blocked on looking up this node */ + mutex_enter(&self->sdev_lookup_lock); + SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); + mutex_exit(&self->sdev_lookup_lock); + SDEV_RELE(self); /* don't return with vnode held */ + return (0); +} + +static int +sdev_mkdir(struct vnode *dvp, char *nm, struct vattr *va, struct vnode **vpp, + struct cred *cred) +{ + int error; + struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); + struct sdev_node *self = NULL; + struct vnode *vp = NULL; + + ASSERT(parent && parent->sdev_dotdot); + rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); + if (parent->sdev_state == SDEV_ZOMBIE) { + rw_exit(&parent->sdev_dotdot->sdev_contents); + return (ENOENT); + } + + /* non-global do not allow pure directory creation */ + if (!SDEV_IS_GLOBAL(parent)) { + rw_exit(&parent->sdev_dotdot->sdev_contents); + return (prof_lookup(dvp, nm, vpp, cred)); + } + rw_exit(&parent->sdev_dotdot->sdev_contents); + + /* find existing name */ + error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred); + if (error == 0) { + VN_RELE(vp); + return (EEXIST); + } + + if (error != ENOENT) + return (error); + + /* put it into memory */ + rw_enter(&parent->sdev_contents, RW_WRITER); + error = sdev_mknode(parent, nm, &self, + va, NULL, NULL, cred, SDEV_READY); + if (error) { + rw_exit(&parent->sdev_contents); + if (self) + SDEV_RELE(self); + return (error); + } + ASSERT(self && (self->sdev_state == SDEV_READY)); + rw_exit(&parent->sdev_contents); + + /* take care the timestamps for the node and its parent */ + sdev_update_timestamps(SDEVTOV(self), kcred, + AT_CTIME|AT_MTIME|AT_ATIME); + sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); + if (SDEV_IS_GLOBAL(parent)) + atomic_inc_ulong(&parent->sdev_gdir_gen); + + /* wake up other threads blocked on looking up this node */ + mutex_enter(&self->sdev_lookup_lock); + SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); + mutex_exit(&self->sdev_lookup_lock); + *vpp = SDEVTOV(self); + return (0); +} + +/* + * allowing removing an empty directory under /dev + */ +/*ARGSUSED*/ +static int +sdev_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred) +{ + int error = 0; + struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); + struct sdev_node *self = NULL; + struct vnode *vp = NULL; + + /* bail out early */ + if (strcmp(nm, ".") == 0) + return (EINVAL); + if (strcmp(nm, "..") == 0) + return (EEXIST); /* should be ENOTEMPTY */ + + /* no destruction of non-global node */ + ASSERT(parent && parent->sdev_dotdot); + rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); + if (!SDEV_IS_GLOBAL(parent)) { + rw_exit(&parent->sdev_dotdot->sdev_contents); + return (ENOTSUP); + } + rw_exit(&parent->sdev_dotdot->sdev_contents); + + /* check existing name */ + rw_enter(&parent->sdev_contents, RW_WRITER); + self = sdev_cache_lookup(parent, nm); + if (self == NULL) { + rw_exit(&parent->sdev_contents); + return (ENOENT); + } + + vp = SDEVTOV(self); + if ((self->sdev_state == SDEV_INIT) || + (self->sdev_state == SDEV_ZOMBIE)) { + rw_exit(&parent->sdev_contents); + VN_RELE(vp); + return (ENOENT); + } + + /* some sanity checks */ + if (vp == dvp || vp == cdir) { + rw_exit(&parent->sdev_contents); + VN_RELE(vp); + return (EINVAL); + } + + if (vp->v_type != VDIR) { + rw_exit(&parent->sdev_contents); + VN_RELE(vp); + return (ENOTDIR); + } + + if (vn_vfswlock(vp)) { + rw_exit(&parent->sdev_contents); + VN_RELE(vp); + return (EBUSY); + } + + if (vn_mountedvfs(vp) != NULL) { + rw_exit(&parent->sdev_contents); + vn_vfsunlock(vp); + VN_RELE(vp); + return (EBUSY); + } + + self = VTOSDEV(vp); + /* bail out on a non-empty directory */ + rw_enter(&self->sdev_contents, RW_READER); + if (self->sdev_nlink > 2) { + rw_exit(&self->sdev_contents); + rw_exit(&parent->sdev_contents); + vn_vfsunlock(vp); + VN_RELE(vp); + return (ENOTEMPTY); + } + rw_exit(&self->sdev_contents); + + /* unlink it from the directory cache */ + error = sdev_cache_update(parent, &self, nm, SDEV_CACHE_DELETE); + rw_exit(&parent->sdev_contents); + vn_vfsunlock(vp); + + if (error && (error != EBUSY)) { + VN_RELE(vp); + } else { + sdcmn_err2(("sdev_rmdir: cleaning node %s from directory " + " cache with error %d\n", nm, error)); + + /* best effort to clean up the backing store */ + if (SDEV_IS_PERSIST(parent)) { + ASSERT(parent->sdev_attrvp); + error = VOP_RMDIR(parent->sdev_attrvp, nm, + parent->sdev_attrvp, kcred); + sdcmn_err2(("sdev_rmdir: cleaning device %s is on" + " disk error %d\n", parent->sdev_path, error)); + } + + if (error == EBUSY) + error = 0; + } + + return (error); +} + +/* + * read the contents of a symbolic link + */ +static int +sdev_readlink(struct vnode *vp, struct uio *uiop, struct cred *cred) +{ + struct sdev_node *dv; + int error = 0; + + ASSERT(vp->v_type == VLNK); + + dv = VTOSDEV(vp); + + if (dv->sdev_attrvp) { + /* non-NULL attrvp implys a persisted node at READY state */ + return (VOP_READLINK(dv->sdev_attrvp, uiop, cred)); + } else if (dv->sdev_symlink != NULL) { + /* memory nodes, e.g. local nodes */ + rw_enter(&dv->sdev_contents, RW_READER); + sdcmn_err2(("sdev_readlink link is %s\n", dv->sdev_symlink)); + error = uiomove(dv->sdev_symlink, strlen(dv->sdev_symlink), + UIO_READ, uiop); + rw_exit(&dv->sdev_contents); + return (error); + } + + return (ENOENT); +} + +static int +sdev_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp) +{ + return (devname_readdir_func(dvp, uiop, cred, eofp, SDEV_BROWSE)); +} + +/*ARGSUSED1*/ +static void +sdev_inactive(struct vnode *vp, struct cred *cred) +{ + int clean; + struct sdev_node *dv = VTOSDEV(vp); + struct sdev_node *ddv = dv->sdev_dotdot; + struct sdev_node *idv; + struct sdev_node *prev = NULL; + int state; + struct devname_nsmap *map = NULL; + struct devname_ops *dirops = NULL; + void (*fn)(devname_handle_t *, struct cred *) = NULL; + + rw_enter(&ddv->sdev_contents, RW_WRITER); + state = dv->sdev_state; + + mutex_enter(&vp->v_lock); + ASSERT(vp->v_count >= 1); + + clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE); + + /* + * last ref count on the ZOMBIE node is released. + * clean up the sdev_node, and + * release the hold on the backing store node so that + * the ZOMBIE backing stores also cleaned out. + */ + if (clean) { + ASSERT(ddv); + if (SDEV_IS_GLOBAL(dv)) { + map = ddv->sdev_mapinfo; + dirops = map ? map->dir_ops : NULL; + if (dirops && (fn = dirops->devnops_inactive)) + (*fn)(&(dv->sdev_handle), cred); + } + + ddv->sdev_nlink--; + if (vp->v_type == VDIR) { + dv->sdev_nlink--; + } + for (idv = ddv->sdev_dot; idv && idv != dv; + prev = idv, idv = idv->sdev_next); + ASSERT(idv == dv); + if (prev == NULL) + ddv->sdev_dot = dv->sdev_next; + else + prev->sdev_next = dv->sdev_next; + dv->sdev_next = NULL; + dv->sdev_nlink--; + --vp->v_count; + mutex_exit(&vp->v_lock); + sdev_nodedestroy(dv, 0); + } else { + --vp->v_count; + mutex_exit(&vp->v_lock); + } + rw_exit(&ddv->sdev_contents); +} + +static int +sdev_fid(struct vnode *vp, struct fid *fidp) +{ + struct sdev_node *dv = VTOSDEV(vp); + struct sdev_fid *sdev_fid; + + if (fidp->fid_len < (sizeof (struct sdev_fid) - sizeof (ushort_t))) { + fidp->fid_len = sizeof (struct sdev_fid) - sizeof (ushort_t); + return (ENOSPC); + } + + sdev_fid = (struct sdev_fid *)fidp; + bzero(sdev_fid, sizeof (struct sdev_fid)); + sdev_fid->sdevfid_len = + (int)sizeof (struct sdev_fid) - sizeof (ushort_t); + sdev_fid->sdevfid_ino = dv->sdev_ino; + + return (0); +} + +/* + * This pair of routines bracket all VOP_READ, VOP_WRITE + * and VOP_READDIR requests. The contents lock stops things + * moving around while we're looking at them. + */ +static void +sdev_rwlock(struct vnode *vp, int write_flag) +{ + rw_enter(&VTOSDEV(vp)->sdev_contents, write_flag ? RW_WRITER : + RW_READER); +} + +/*ARGSUSED1*/ +static void +sdev_rwunlock(struct vnode *vp, int write_flag) +{ + rw_exit(&VTOSDEV(vp)->sdev_contents); +} + +/*ARGSUSED1*/ +static int +sdev_seek(struct vnode *vp, offset_t ooff, offset_t *noffp) +{ + struct vnode *attrvp = VTOSDEV(vp)->sdev_attrvp; + + ASSERT(vp->v_type != VCHR && + vp->v_type != VBLK && vp->v_type != VLNK); + + if (vp->v_type == VDIR) + return (fs_seek(vp, ooff, noffp)); + + ASSERT(attrvp); + return (VOP_SEEK(attrvp, ooff, noffp)); +} + +/*ARGSUSED1*/ +static int +sdev_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, + offset_t offset, struct flk_callback *flk_cbp, struct cred *cr) +{ + int error; + struct sdev_node *dv = VTOSDEV(vp); + + ASSERT(dv); + ASSERT(dv->sdev_attrvp); + error = VOP_FRLOCK(dv->sdev_attrvp, cmd, bfp, flag, offset, + flk_cbp, cr); + + return (error); +} + +static int +sdev_setfl(struct vnode *vp, int oflags, int nflags, cred_t *cr) +{ + struct sdev_node *dv = VTOSDEV(vp); + ASSERT(dv); + ASSERT(dv->sdev_attrvp); + + return (VOP_SETFL(dv->sdev_attrvp, oflags, nflags, cr)); +} + +static int +sdev_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr) +{ + switch (cmd) { + case _PC_ACL_ENABLED: + *valp = SDEV_ACL_FLAVOR(vp); + return (0); + } + + return (fs_pathconf(vp, cmd, valp, cr)); +} + +vnodeops_t *sdev_vnodeops; + +const fs_operation_def_t sdev_vnodeops_tbl[] = { + VOPNAME_OPEN, sdev_open, + VOPNAME_CLOSE, sdev_close, + VOPNAME_READ, sdev_read, + VOPNAME_WRITE, sdev_write, + VOPNAME_IOCTL, sdev_ioctl, + VOPNAME_GETATTR, sdev_getattr, + VOPNAME_SETATTR, sdev_setattr, + VOPNAME_ACCESS, sdev_access, + VOPNAME_LOOKUP, sdev_lookup, + VOPNAME_CREATE, sdev_create, + VOPNAME_RENAME, sdev_rename, + VOPNAME_REMOVE, sdev_remove, + VOPNAME_MKDIR, sdev_mkdir, + VOPNAME_RMDIR, sdev_rmdir, + VOPNAME_READDIR, sdev_readdir, + VOPNAME_SYMLINK, sdev_symlink, + VOPNAME_READLINK, sdev_readlink, /* readlink */ + VOPNAME_FSYNC, (fs_generic_func_p) fs_sync, + VOPNAME_INACTIVE, (fs_generic_func_p)sdev_inactive, + VOPNAME_FID, sdev_fid, + VOPNAME_RWLOCK, (fs_generic_func_p)sdev_rwlock, + VOPNAME_RWUNLOCK, (fs_generic_func_p)sdev_rwunlock, + VOPNAME_SEEK, sdev_seek, + VOPNAME_FRLOCK, sdev_frlock, + VOPNAME_PATHCONF, sdev_pathconf, + VOPNAME_SETFL, sdev_setfl, + VOPNAME_SETSECATTR, sdev_setsecattr, /* setsecattr */ + VOPNAME_GETSECATTR, sdev_getsecattr, /* getsecattr */ + NULL, NULL +}; + +int sdev_vnodeops_tbl_size = sizeof (sdev_vnodeops_tbl); |