diff options
| author | Robert Mustacchi <rm@joyent.com> | 2013-05-08 01:46:41 +0000 |
|---|---|---|
| committer | Robert Mustacchi <rm@joyent.com> | 2013-08-15 10:09:03 -0700 |
| commit | 9e5aa9d8a21f8efa8ba9c9e4a0aa6edc66d07eb2 (patch) | |
| tree | 731a7085007b8f4cd0e7f5fe028e15aeb3b960f0 /usr/src | |
| parent | dd9c3b29f8e9f6b99b80e1fd8fc03241abd67311 (diff) | |
| download | illumos-joyent-9e5aa9d8a21f8efa8ba9c9e4a0aa6edc66d07eb2.tar.gz | |
3978 renaming dynamic sdev nodes is problematic
3979 sdev dynamic directories can be created multiple times
3980 sdev vfs refcount assertion violation
3981 sdev backing store nodes not always present
3982 sdev could use some theory statements
3983 sdev_shadow_node slept through a vn_rele
Reviewed by: Keith M Wesolowski <wesolows@foobazco.org>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Gordon Ross <gwr@nexenta.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Garrett D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src')
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_ipnetops.c | 9 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_netops.c | 13 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_profile.c | 11 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_ptsops.c | 2 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_subr.c | 345 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_vnops.c | 351 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_vtops.c | 13 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_zvolops.c | 15 | ||||
| -rw-r--r-- | usr/src/uts/common/sys/fs/sdev_impl.h | 50 |
9 files changed, 518 insertions, 291 deletions
diff --git a/usr/src/uts/common/fs/dev/sdev_ipnetops.c b/usr/src/uts/common/fs/dev/sdev_ipnetops.c index dbca322afb..8d348a31d9 100644 --- a/usr/src/uts/common/fs/dev/sdev_ipnetops.c +++ b/usr/src/uts/common/fs/dev/sdev_ipnetops.c @@ -161,6 +161,14 @@ devipnet_filldir(struct sdev_node *ddv) if (rw_tryupgrade(&ddv->sdev_contents) == NULL) { rw_exit(&ddv->sdev_contents); rw_enter(&ddv->sdev_contents, RW_WRITER); + /* + * We've been made a zombie while we weren't looking. We'll bail + * if that's the case. + */ + if (ddv->sdev_state == SDEV_ZOMBIE) { + rw_exit(&ddv->sdev_contents); + return; + } } for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) { @@ -186,6 +194,7 @@ devipnet_filldir(struct sdev_node *ddv) /* remove the cache node */ (void) sdev_cache_update(ddv, &dv, dv->sdev_name, SDEV_CACHE_DELETE); + SDEV_RELE(dv); } ipnet_walk_if(devipnet_filldir_entry, ddv, getzoneid()); diff --git a/usr/src/uts/common/fs/dev/sdev_netops.c b/usr/src/uts/common/fs/dev/sdev_netops.c index a4c4ab168d..4eaf38f484 100644 --- a/usr/src/uts/common/fs/dev/sdev_netops.c +++ b/usr/src/uts/common/fs/dev/sdev_netops.c @@ -58,7 +58,6 @@ devnet_validate(struct sdev_node *dv) datalink_id_t linkid; zoneid_t zoneid; - ASSERT(!(dv->sdev_flags & SDEV_STALE)); ASSERT(dv->sdev_state == SDEV_READY); if (dls_mgmt_get_linkid(dv->sdev_name, &linkid) != 0) @@ -149,13 +148,9 @@ devnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, * directory cache lookup: */ if ((dv = sdev_cache_lookup(ddv, nm)) != NULL) { - if (dv->sdev_state == SDEV_READY) { - if (!(dv->sdev_flags & SDEV_ATTR_INVALID)) - goto found; - } else { - ASSERT(dv->sdev_state == SDEV_ZOMBIE); - goto failed; - } + ASSERT(dv->sdev_state == SDEV_READY); + if (!(dv->sdev_flags & SDEV_ATTR_INVALID)) + goto found; } /* @@ -170,7 +165,6 @@ devnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, error = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL, cred, SDEV_READY); if (error != 0) { - ASSERT(dv == NULL); dls_devnet_close(ddh); goto failed; } @@ -286,6 +280,7 @@ devnet_filldir(struct sdev_node *ddv) /* remove the cache node */ (void) sdev_cache_update(ddv, &dv, dv->sdev_name, SDEV_CACHE_DELETE); + SDEV_RELE(dv); } if (((ddv->sdev_flags & SDEV_BUILD) == 0) && !dls_devnet_rebuild()) diff --git a/usr/src/uts/common/fs/dev/sdev_profile.c b/usr/src/uts/common/fs/dev/sdev_profile.c index 01d4aab1ae..65890b1a2f 100644 --- a/usr/src/uts/common/fs/dev/sdev_profile.c +++ b/usr/src/uts/common/fs/dev/sdev_profile.c @@ -161,7 +161,7 @@ prof_mknode(struct sdev_node *dir, char *name, struct sdev_node **newdv, return (rv); } - rv = sdev_cache_update(dir, &dv, name, SDEV_CACHE_ADD); + sdev_cache_update(dir, &dv, name, SDEV_CACHE_ADD); *newdv = dv; /* put it in ready state */ @@ -173,6 +173,9 @@ prof_mknode(struct sdev_node *dir, char *name, struct sdev_node **newdv, sdcmn_err10(("sdev_origin for %s set to 0x%p\n", name, arg)); apply_glob_pattern(dir, *newdv); + } else { + sdev_cache_update(dir, &dv, name, SDEV_CACHE_DELETE); + SDEV_RELE(dv); } return (rv); } @@ -198,8 +201,8 @@ prof_make_dir(char *name, struct sdev_node **gdirp, struct sdev_node **dirp) if (newdv = sdev_cache_lookup(dir, name)) { *dirp = newdv; *gdirp = newdv->sdev_origin; - SDEV_RELE(dir); rw_exit(&dir->sdev_contents); + SDEV_RELE(dir); return (0); } rw_exit(&dir->sdev_contents); @@ -676,6 +679,10 @@ check_build: == gdir->sdev_gdir_gen)) return; /* already up to date */ + /* We may have become a zombie (across a try) */ + if (ddv->sdev_state == SDEV_ZOMBIE) + return; + if (firsttime && rw_tryupgrade(&ddv->sdev_contents) == 0) { rw_exit(&ddv->sdev_contents); firsttime = 0; diff --git a/usr/src/uts/common/fs/dev/sdev_ptsops.c b/usr/src/uts/common/fs/dev/sdev_ptsops.c index a228d05086..af98df755d 100644 --- a/usr/src/uts/common/fs/dev/sdev_ptsops.c +++ b/usr/src/uts/common/fs/dev/sdev_ptsops.c @@ -107,7 +107,6 @@ devpts_validate(struct sdev_node *dv) timestruc_t now; char *nm = dv->sdev_name; - ASSERT(!(dv->sdev_flags & SDEV_STALE)); ASSERT(dv->sdev_state == SDEV_READY); /* validate only READY nodes */ @@ -258,6 +257,7 @@ devpts_prunedir(struct sdev_node *ddv) /* remove the cache node */ (void) sdev_cache_update(ddv, &dv, dv->sdev_name, SDEV_CACHE_DELETE); + SDEV_RELE(dv); } rw_downgrade(&ddv->sdev_contents); } diff --git a/usr/src/uts/common/fs/dev/sdev_subr.c b/usr/src/uts/common/fs/dev/sdev_subr.c index f29918e351..b4b27e6285 100644 --- a/usr/src/uts/common/fs/dev/sdev_subr.c +++ b/usr/src/uts/common/fs/dev/sdev_subr.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ /* @@ -370,7 +370,8 @@ sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, } /* - * transition a sdev_node into SDEV_READY state + * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the + * caller to transition the node to the SDEV_ZOMBIE state. */ int sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp, @@ -433,26 +434,12 @@ sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp, /* transition to READY state */ sdev_set_nodestate(dv, SDEV_READY); sdev_nc_node_exists(dv); - } else { - sdev_set_nodestate(dv, SDEV_ZOMBIE); } rw_exit(&dv->sdev_contents); return (error); } /* - * setting ZOMBIE state - */ -static int -sdev_nodezombied(struct sdev_node *dv) -{ - rw_enter(&dv->sdev_contents, RW_WRITER); - sdev_set_nodestate(dv, SDEV_ZOMBIE); - rw_exit(&dv->sdev_contents); - return (0); -} - -/* * Build the VROOT sdev_node. */ /*ARGSUSED*/ @@ -568,23 +555,31 @@ static struct sdev_vop_table vtab[] = { NULL, NULL, NULL, NULL, NULL, 0} }; +/* + * We need to match off of the sdev_path, not the sdev_name. We are only allowed + * to exist directly under /dev. + */ struct sdev_vop_table * sdev_match(struct sdev_node *dv) { int vlen; int i; + const char *path; + + if (strlen(dv->sdev_path) <= 5) + return (NULL); + + if (strncmp(dv->sdev_path, "/dev/", 5) != 0) + return (NULL); + path = dv->sdev_path + 5; for (i = 0; vtab[i].vt_name; i++) { - if (strcmp(vtab[i].vt_name, dv->sdev_name) == 0) + if (strcmp(vtab[i].vt_name, path) == 0) return (&vtab[i]); if (vtab[i].vt_flags & SDEV_SUBDIR) { - char *ptr; - - ASSERT(strlen(dv->sdev_path) > 5); - ptr = dv->sdev_path + 5; vlen = strlen(vtab[i].vt_name); - if ((strncmp(vtab[i].vt_name, ptr, - vlen - 1) == 0) && ptr[vlen] == '/') + if ((strncmp(vtab[i].vt_name, path, + vlen - 1) == 0) && path[vlen] == '/') return (&vtab[i]); } @@ -610,10 +605,14 @@ sdev_get_vop(struct sdev_node *dv) /* gets the vtab entry it matches */ if ((vtp = sdev_match(dv)) != NULL) { dv->sdev_flags |= vtp->vt_flags; + if (SDEV_IS_PERSIST(dv->sdev_dotdot) && + (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv))) + dv->sdev_flags |= SDEV_PERSIST; if (vtp->vt_vops) { if (vtp->vt_global_vops) *(vtp->vt_global_vops) = vtp->vt_vops; + return (vtp->vt_vops); } @@ -631,8 +630,10 @@ sdev_get_vop(struct sdev_node *dv) *(vtp->vt_global_vops) = vtp->vt_vops; } sdev_free_vtab(templ); + return (vtp->vt_vops); } + return (sdev_vnodeops); } @@ -743,7 +744,6 @@ sdev_follow_link(struct sdev_node *dv) ASSERT(linkvp->v_type == VLNK); err = sdev_getlink(linkvp, &link); if (err) { - (void) sdev_nodezombied(dv); dv->sdev_symlink = NULL; return (ENOENT); } @@ -839,12 +839,7 @@ sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, ASSERT(dv); /* insert into the directory cache */ - error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD); - if (error) { - sdcmn_err9(("sdev_mknode: node %s can not" - " be added into directory cache\n", nm)); - return (ENOENT); - } + sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD); } ASSERT(dv); @@ -878,7 +873,13 @@ sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, *newdv = dv; ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE); } else { - SDEV_SIMPLE_RELE(dv); + sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE); + /* + * We created this node, it wasn't passed into us. Therefore it + * is up to us to delete it. + */ + if (*newdv == NULL) + SDEV_SIMPLE_RELE(dv); *newdv = NULL; } @@ -986,6 +987,7 @@ sdev_findbyname(struct sdev_node *ddv, char *nm) if (dv) { ASSERT(dv->sdev_dotdot == ddv); ASSERT(strcmp(dv->sdev_name, nm) == 0); + ASSERT(dv->sdev_state != SDEV_ZOMBIE); SDEV_HOLD(dv); return (dv); } @@ -1004,6 +1006,7 @@ sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv) ASSERT(SDEVTOV(ddv)->v_type == VDIR); ASSERT(ddv->sdev_nlink >= 2); ASSERT(dv->sdev_nlink == 0); + ASSERT(dv->sdev_state != SDEV_ZOMBIE); dv->sdev_dotdot = ddv; VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL); @@ -1019,60 +1022,55 @@ sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv) static void decr_link(struct sdev_node *dv) { - if (dv->sdev_state != SDEV_INIT) + VERIFY(RW_WRITE_HELD(&dv->sdev_contents)); + if (dv->sdev_state != SDEV_INIT) { + VERIFY(dv->sdev_nlink >= 1); dv->sdev_nlink--; - else - ASSERT(dv->sdev_nlink == 0); + } else { + VERIFY(dv->sdev_nlink == 0); + } } /* * Delete an existing dv from directory cache * - * In the case of a node is still held by non-zero reference count, - * the node is put into ZOMBIE state. Once the reference count - * reaches "0", the node is unlinked and destroyed, - * in sdev_inactive(). + * In the case of a node is still held by non-zero reference count, the node is + * put into ZOMBIE state. The node is always unlinked from its parent, but it is + * not destroyed via sdev_inactive until its reference count reaches "0". */ -static int +static void sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv) { struct vnode *vp; + sdev_node_state_t os; ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); vp = SDEVTOV(dv); mutex_enter(&vp->v_lock); + rw_enter(&dv->sdev_contents, RW_WRITER); + os = dv->sdev_state; + ASSERT(os != SDEV_ZOMBIE); + dv->sdev_state = SDEV_ZOMBIE; - /* dv is held still */ - if (vp->v_count > 1) { - rw_enter(&dv->sdev_contents, RW_WRITER); - if (dv->sdev_state == SDEV_READY) { - sdcmn_err9(( - "sdev_dirdelete: node %s busy with count %d\n", - dv->sdev_name, vp->v_count)); - dv->sdev_state = SDEV_ZOMBIE; - } - rw_exit(&dv->sdev_contents); - --vp->v_count; - mutex_exit(&vp->v_lock); - return (EBUSY); - } - ASSERT(vp->v_count == 1); - - /* unlink from the memory cache */ - ddv->sdev_nlink--; /* .. to above */ - if (vp->v_type == VDIR) { - decr_link(dv); /* . to self */ - } - + /* + * unlink ourselves from the parent directory now to take care of the .. + * link. However, if we're a directory, we don't remove our reference to + * ourself eg. '.' until we are torn down in the inactive callback. + */ + decr_link(ddv); avl_remove(&ddv->sdev_entries, dv); - decr_link(dv); /* name, back to zero */ - vp->v_count--; + /* + * sdev_inactive expects nodes to have a link to themselves when we're + * tearing them down. If we're transitioning from the initial state to + * zombie and not via ready, then we're not going to have this link that + * comes from the node being ready. As a result, we need to increment + * our link count by one to account for this. + */ + if (os == SDEV_INIT && dv->sdev_nlink == 0) + dv->sdev_nlink++; + rw_exit(&dv->sdev_contents); mutex_exit(&vp->v_lock); - - /* destroy the node */ - sdev_nodedestroy(dv, 0); - return (0); } /* @@ -1169,6 +1167,35 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, goto err_out; } + /* fix the source for a symlink */ + if (vattr.va_type == VLNK) { + if (odv->sdev_symlink == NULL) { + error = sdev_follow_link(odv); + if (error) { + /* + * The underlying symlink doesn't exist. This + * node probably shouldn't even exist. While + * it's a bit jarring to consumers, we're going + * to remove the node from /dev. + */ + if (SDEV_IS_PERSIST((*ndvp))) + bkstore = 1; + sdev_dirdelete(oddv, odv); + if (bkstore) { + ASSERT(nddv->sdev_attrvp); + error = VOP_REMOVE(nddv->sdev_attrvp, + nnm, cred, NULL, 0); + if (error) + goto err_out; + } + error = ENOENT; + goto err_out; + } + } + ASSERT(odv->sdev_symlink); + link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); + } + /* destination existing */ if (*ndvp) { nvp = SDEVTOV(*ndvp); @@ -1205,7 +1232,12 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, } vn_vfsunlock(nvp); - (void) sdev_dirdelete(nddv, *ndvp); + /* + * We did not place the hold on *ndvp, so even though + * we're deleting the node, we should not get rid of our + * reference. + */ + sdev_dirdelete(nddv, *ndvp); *ndvp = NULL; ASSERT(nddv->sdev_attrvp); error = VOP_RMDIR(nddv->sdev_attrvp, nnm, @@ -1223,11 +1255,11 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, } /* - * get rid of the node from the directory cache - * note, in case EBUSY is returned, the ZOMBIE - * node is taken care in sdev_mknode. + * Get rid of the node from the directory cache note. + * Don't forget that it's not up to us to remove the vn + * ref on the sdev node, as we did not place it. */ - (void) sdev_dirdelete(nddv, *ndvp); + sdev_dirdelete(nddv, *ndvp); *ndvp = NULL; if (bkstore) { ASSERT(nddv->sdev_attrvp); @@ -1239,19 +1271,6 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, } } - /* fix the source for a symlink */ - if (vattr.va_type == VLNK) { - if (odv->sdev_symlink == NULL) { - error = sdev_follow_link(odv); - if (error) { - error = ENOENT; - goto err_out; - } - } - ASSERT(odv->sdev_symlink); - link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); - } - /* * make a fresh node from the source attrs */ @@ -1259,8 +1278,10 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, error = sdev_mknode(nddv, nnm, ndvp, &vattr, NULL, (void *)link, cred, SDEV_READY); - if (link) + if (link != NULL) { kmem_free(link, strlen(link) + 1); + link = NULL; + } if (error) goto err_out; @@ -1271,9 +1292,11 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, if (doingdir) { for (idv = SDEV_FIRST_ENTRY(odv); idv; idv = SDEV_NEXT_ENTRY(odv, idv)) { + SDEV_HOLD(idv); error = sdev_rnmnode(odv, idv, (struct sdev_node *)(*ndvp), &ndv, idv->sdev_name, cred); + SDEV_RELE(idv); if (error) goto err_out; ndv = NULL; @@ -1307,6 +1330,11 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, return (error); err_out: + if (link != NULL) { + kmem_free(link, strlen(link) + 1); + link = NULL; + } + rw_exit(&nddv->sdev_contents); if (!samedir) rw_exit(&oddv->sdev_contents); @@ -1465,18 +1493,8 @@ sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred) vp = NULLVP; dv = sdev_cache_lookup(ddv, nm); if (dv) { - if (dv->sdev_state != SDEV_ZOMBIE) { - SDEV_SIMPLE_RELE(dv); - } else { - /* - * A ZOMBIE node may not have been - * cleaned up from the backing store, - * bypass this entry in this case, - * and clean it up from the directory - * cache if this is the last call. - */ - (void) sdev_dirdelete(ddv, dv); - } + VERIFY(dv->sdev_state != SDEV_ZOMBIE); + SDEV_SIMPLE_RELE(dv); continue; } @@ -1547,11 +1565,16 @@ sdev_filldir_dynamic(struct sdev_node *ddv) vap->va_ctime = vap->va_atime; for (i = 0; vtab[i].vt_name != NULL; i++) { /* - * This early, we may be in a read-only /dev - * environment: leave the creation of any nodes we'd - * attempt to persist to devfsadm. + * This early, we may be in a read-only /dev environment: leave + * the creation of any nodes we'd attempt to persist to + * devfsadm. Because /dev itself is normally persistent, any + * node which is not marked dynamic will end up being marked + * persistent. However, some nodes are both dynamic and + * persistent, mostly lofi and rlofi, so we need to be careful + * in our check. */ - if (vtab[i].vt_flags & SDEV_PERSIST) + if ((vtab[i].vt_flags & SDEV_PERSIST) || + !(vtab[i].vt_flags & SDEV_DYNAMIC)) continue; nm = vtab[i].vt_name; ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); @@ -1614,6 +1637,8 @@ lookup: error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL); sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n", (void *)(*rvp), error)); + if (!error) + VN_RELE(*rvp); break; case VCHR: case VBLK: @@ -1647,74 +1672,49 @@ lookup: return (error); } -static int +static void sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm) { - int error = 0; struct sdev_node *dup = NULL; ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); if ((dup = sdev_findbyname(ddv, nm)) == NULL) { sdev_direnter(ddv, *dv); } else { - if (dup->sdev_state == SDEV_ZOMBIE) { - error = sdev_dirdelete(ddv, dup); - /* - * The ZOMBIE node is still hanging - * around with more than one reference counts. - * Fail the new node creation so that - * the directory cache won't have - * duplicate entries for the same named node - */ - if (error == EBUSY) { - SDEV_SIMPLE_RELE(*dv); - sdev_nodedestroy(*dv, 0); - *dv = NULL; - return (error); - } - sdev_direnter(ddv, *dv); - } else { - ASSERT((*dv)->sdev_state != SDEV_ZOMBIE); - SDEV_SIMPLE_RELE(*dv); - sdev_nodedestroy(*dv, 0); - *dv = dup; - } + VERIFY(dup->sdev_state != SDEV_ZOMBIE); + SDEV_SIMPLE_RELE(*dv); + sdev_nodedestroy(*dv, 0); + *dv = dup; } - - return (0); } -static int +static void sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv) { ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); - return (sdev_dirdelete(ddv, *dv)); + sdev_dirdelete(ddv, *dv); } /* * update the in-core directory cache */ -int +void sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm, sdev_cache_ops_t ops) { - int error = 0; - ASSERT((SDEV_HELD(*dv))); ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); switch (ops) { case SDEV_CACHE_ADD: - error = sdev_cache_add(ddv, dv, nm); + sdev_cache_add(ddv, dv, nm); break; case SDEV_CACHE_DELETE: - error = sdev_cache_delete(ddv, dv); + sdev_cache_delete(ddv, dv); break; default: break; } - - return (error); } /* @@ -2157,7 +2157,6 @@ lookup_create_node: } found: - ASSERT(!(dv->sdev_flags & SDEV_STALE)); ASSERT(dv->sdev_state == SDEV_READY); if (vtor) { /* @@ -2176,13 +2175,11 @@ found: rw_exit(&ddv->sdev_contents); rw_enter(&ddv->sdev_contents, RW_WRITER); } - error = sdev_cache_update(ddv, &dv, nm, - SDEV_CACHE_DELETE); + sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE); rw_downgrade(&ddv->sdev_contents); - if (error == 0) { - dv = NULL; - goto lookup_create_node; - } + SDEV_RELE(dv); + dv = NULL; + goto lookup_create_node; /* FALLTHRU */ case SDEV_VTOR_INVALID: SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); @@ -2229,9 +2226,10 @@ nolock_notfound: * changes. Re-check. */ if (dv->sdev_state == SDEV_INIT) { - (void) sdev_dirdelete(ddv, dv); + sdev_dirdelete(ddv, dv); rw_exit(&ddv->sdev_contents); sdev_lookup_failed(ddv, nm, failed_flags); + SDEV_RELE(dv); *vpp = NULL; return (ENOENT); } @@ -2263,15 +2261,14 @@ sdev_stale(struct sdev_node *ddv) ASSERT(SDEVTOV(ddv)->v_type == VDIR); rw_enter(&ddv->sdev_contents, RW_WRITER); - for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) { + while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) { vp = SDEVTOV(dv); + SDEV_HOLD(dv); if (vp->v_type == VDIR) sdev_stale(dv); - sdcmn_err9(("sdev_stale: setting stale %s\n", - dv->sdev_path)); - dv->sdev_flags |= SDEV_STALE; - avl_remove(&ddv->sdev_entries, dv); + sdev_dirdelete(ddv, dv); + SDEV_RELE(dv); } ddv->sdev_flags |= SDEV_BUILD; rw_exit(&ddv->sdev_contents); @@ -2289,7 +2286,7 @@ sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) int error = 0; int busy = 0; struct vnode *vp; - struct sdev_node *dv, *next = NULL; + struct sdev_node *dv; int bkstore = 0; int len = 0; char *bks_name = NULL; @@ -2300,8 +2297,7 @@ sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) * We try our best to destroy all unused sdev_node's */ rw_enter(&ddv->sdev_contents, RW_WRITER); - for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) { - next = SDEV_NEXT_ENTRY(ddv, dv); + while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) { vp = SDEVTOV(dv); if (expr && gmatch(dv->sdev_name, expr) == 0) @@ -2338,15 +2334,10 @@ sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) bcopy(dv->sdev_name, bks_name, len); } - error = sdev_dirdelete(ddv, dv); - - if (error == EBUSY) { - sdcmn_err9(("sdev_cleandir: dir busy\n")); - busy++; - } + sdev_dirdelete(ddv, dv); /* take care the backing store clean up */ - if (bkstore && (error == 0)) { + if (bkstore) { ASSERT(bks_name); ASSERT(ddv->sdev_attrvp); @@ -2370,6 +2361,9 @@ sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) bks_name = NULL; len = 0; } + + ddv->sdev_flags |= SDEV_BUILD; + SDEV_RELE(dv); } ddv->sdev_flags |= SDEV_BUILD; @@ -3114,42 +3108,41 @@ devname_inactive_func(struct vnode *vp, struct cred *cred, { int clean; struct sdev_node *dv = VTOSDEV(vp); - struct sdev_node *ddv = dv->sdev_dotdot; int state; - rw_enter(&ddv->sdev_contents, RW_WRITER); - state = dv->sdev_state; - mutex_enter(&vp->v_lock); ASSERT(vp->v_count >= 1); + if (vp->v_count == 1 && callback != NULL) callback(vp); + rw_enter(&dv->sdev_contents, RW_WRITER); + state = dv->sdev_state; + clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE); /* - * last ref count on the ZOMBIE node is released. - * clean up the sdev_node, and - * release the hold on the backing store node so that - * the ZOMBIE backing stores also cleaned out. + * sdev is a rather bad public citizen. It violates the general + * agreement that in memory nodes should always have a valid reference + * count on their vnode. But that's not the case here. This means that + * we do actually have to distinguish between getting inactive callbacks + * for zombies and otherwise. This should probably be fixed. */ if (clean) { - ASSERT(ddv); - - ddv->sdev_nlink--; + /* Remove the . entry to ourselves */ if (vp->v_type == VDIR) { - dv->sdev_nlink--; + decr_link(dv); } - if ((dv->sdev_flags & SDEV_STALE) == 0) - avl_remove(&ddv->sdev_entries, dv); - dv->sdev_nlink--; + VERIFY(dv->sdev_nlink == 1); + decr_link(dv); --vp->v_count; + rw_exit(&dv->sdev_contents); mutex_exit(&vp->v_lock); sdev_nodedestroy(dv, 0); } else { --vp->v_count; + rw_exit(&dv->sdev_contents); mutex_exit(&vp->v_lock); } - rw_exit(&ddv->sdev_contents); } diff --git a/usr/src/uts/common/fs/dev/sdev_vnops.c b/usr/src/uts/common/fs/dev/sdev_vnops.c index 89c5decbf0..3b6fe5e79a 100644 --- a/usr/src/uts/common/fs/dev/sdev_vnops.c +++ b/usr/src/uts/common/fs/dev/sdev_vnops.c @@ -21,6 +21,9 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ /* * vnode ops for the /dev filesystem @@ -32,6 +35,248 @@ * not supported for now */ +/* + * sdev has a few basic goals: + * o Provide /dev for the global zone as well as various non-global zones. + * o Provide the basic functionality that devfsadm might need (mknod, + * symlinks, etc.) + * o Allow persistent permissions on files in /dev. + * o Allow for dynamic directories and nodes for use by various services (pts, + * zvol, net, etc.) + * + * The sdev file system is primarily made up of sdev_node_t's which is sdev's + * counterpart to the vnode_t. There are two different classes of sdev_node_t's + * that we generally care about, dynamic and otherwise. + * + * Persisting Information + * ---------------------- + * + * When sdev is mounted, it keeps track of the underlying file system it is + * mounted over. In certain situations, sdev will go and create entries in that + * underlying file system. These underlying 'back end' nodes are used as proxies + * for various changes in permissions. While specific sets of nodes, such as + * dynamic ones, are exempt, this process stores permission changes against + * these back end nodes. The point of all of this is to allow for these settings + * to persist across host and zone reboots. As an example, consider the entry + * /dev/dsk/c0t0d0 which is a character device and that / is in UFS. Upon + * changing the permissions on c0t0d0 you'd have the following logical + * relationships: + * + * +------------------+ sdev_vnode +--------------+ + * | sdev_node_t |<---------------->| vnode_t | + * | /dev/dsk/c0t0d0 |<---------------->| for sdev | + * +------------------+ +--------------+ + * | + * | sdev_attrvp + * | + * | +---------------------+ + * +--->| vnode_t for UFS|ZFS | + * | /dev/dsk/c0t0d0 | + * +---------------------+ + * + * sdev is generally in memory. Therefore when a lookup happens and there is no + * entry already inside of a directory cache, it will next check the backing + * store. If the backing store exists, we will reconstitute the sdev_node based + * on the information that we persisted. When we create the backing store node, + * we use the struct vattr information that we already have in sdev_node_t. + * Because of this, we already know if the entry was previously a symlink, + * directory, or some other kind of type. Note that not all types of nodes are + * supported. Currently only VDIR, VCHR, VBLK, VREG, VDOOR, and VLNK are + * eligible to be persisted. + * + * When the sdev_node is created and the lookup is done, we grab a hold on the + * underlying vnode as part of the call to VOP_LOOKUP. That reference is held + * until the sdev_node becomes inactive. Once its reference count reaches one + * and the VOP_INACTIVE callback fires leading to the destruction of the node, + * the reference on the underlying vnode will be released. + * + * The backing store node will be deleted only when the node itself is deleted + * through the means of a VOP_REMOVE, VOP_RMDIR, or similar call. + * + * Not everything can be persisted, see The Rules section for more details. + * + * Dynamic Nodes + * ------------- + * + * Dynamic nodes allow for specific interactions with various kernel subsystems + * when looking up directory entries. This allows the lookup and readdir + * functions to check against the kernel subsystem's for validity. eg. does a + * zvol or nic still exist. + * + * More specifically, when we create various directories we check if the + * directory name matches that of one of the names in the vtab[] (sdev_subr.c). + * If it does, we swap out the vnode operations into a new set which combine the + * normal sdev vnode operations with the dynamic set here. + * + * In addition, various dynamic nodes implement a verification entry point. This + * verification entry is used as a part of lookup and readdir. The goal for + * these dynamic nodes is to allow them to check with the underlying subsystems + * to ensure that these devices are still present, or if they have gone away, to + * remove them from the results. This is indicated by using the SDEV_VTOR flag + * in vtab[]. + * + * Dynamic nodes have additional restrictions placed upon them. They may only + * appear at the top level directory of the file system. In addition, users + * cannot create dirents below any leve of a dynamic node aside from its special + * vnops. + * + * Profiles + * -------- + * + * Profiles exist for the purpose of non-global zones. They work with the zone + * brands and zoneadmd to set up a filter of allowed devices that can appear in + * a non-global zone's /dev. These are sent to sdev by means of libdevinfo and a + * modctl system call. Specifically it allows one to add patterns of device + * paths to include and exclude. It allows for a collection of symlinks to be + * added and it allows for remapping names. + * + * When operating in a non-global zone, several of the sdev vnops are redirected + * to the profile versions. These impose additional restrictions such as + * enforcing that a non-global zone's /dev is read only. + * + * sdev_node_t States + * ------------------ + * + * A given sdev_node_t has a field called the sdev_state which describes where + * in the sdev life cycle it is. There are three primary states: SDEV_INIT, + * SDEV_READY, and SDEV_ZOMBIE. + * + * SDEV_INIT: When a new /dev file is first looked up, a sdev_node + * is allocated, initialized and added to the directory's + * sdev_node cache. A node at this state will also + * have the SDEV_LOOKUP flag set. + * + * Other threads that are trying to look up a node at + * this state will be blocked until the SDEV_LOOKUP flag + * is cleared. + * + * When the SDEV_LOOKUP flag is cleared, the node may + * transition into the SDEV_READY state for a successful + * lookup or the node is removed from the directory cache + * and destroyed if the named node can not be found. + * An ENOENT error is returned for the second case. + * + * SDEV_READY: A /dev file has been successfully looked up and + * associated with a vnode. The /dev file is available + * for the supported /dev file system operations. + * + * SDEV_ZOMBIE: Deletion of a /dev file has been explicitly issued + * to an SDEV_READY node. The node is transitioned into + * the SDEV_ZOMBIE state if the vnode reference count + * is still held. A SDEV_ZOMBIE node does not support + * any of the /dev file system operations. A SDEV_ZOMBIE + * node is immediately removed from the directory cache + * and destroyed once the reference count reaches zero. + * + * Historically nodes that were marked SDEV_ZOMBIE were not removed from the + * underlying directory caches. This has been the source of numerous bugs and + * thus to better mimic what happens on a real file system, it is no longer the + * case. + * + * The following state machine describes the life cycle of a given node and its + * associated states: + * + * node is . . . . . + * allocated via . +-------------+ . . . . . . . vnode_t refcount + * sdev_nodeinit() . | Unallocated | . reaches zero and + * +--------*-----| Memory |<--------*---+ sdev_inactive is + * | +-------------+ | called. + * | +------------^ | called. + * v | | + * +-----------+ * . . sdev_nodeready() +-------------+ + * | SDEV_INIT | | or related setup | SDEV_ZOMBIE | + * +-----------+ | failure +-------------+ + * | | ^ + * | | +------------+ | + * +-*----------->| SDEV_READY |--------*-----+ + * . +------------+ . The node is no longer + * . . node successfully . . . . . valid or we've been + * inserted into the asked to remove it. + * directory cache This happens via + * and sdev_nodready() sdev_dirdelete(). + * call successful. + * + * Adding and Removing Dirents, Zombie Nodes + * ----------------------------------------- + * + * As part of doing a lookup, readdir, or an explicit creation operation like + * mkdir or create, nodes may be created. Every directory has an avl tree which + * contains its children, the sdev_entries tree. This is only used if the type + * is VDIR. Access to this is controlled by the sdev_node_t's contents_lock and + * it is managed through sdev_cache_update(). + * + * Every sdev_node_t has a field sdev_state, which describes the current state + * of the node. A node is generally speaking in the SDEV_READY state. When it is + * there, it can be looked up, accessed, and operations performed on it. When a + * node is going to be removed from the directory cache it is marked as a + * zombie. Once a node becomes a zombie, no other file system operations will + * succeed and it will continue to exist as a node until the vnode count on the + * node reaches zero. At that point, the node will be freed. However, once a + * node has been marked as a zombie, it will be removed immediately from the + * directory cache such that no one else may find it again. This means that + * someone else can insert a new entry into that directory with the same name + * and without a problem. + * + * To remove a node, see the section on that in The Rules. + * + * The Rules + * --------- + * These are the rules to live by when working in sdev. These are not + * exhaustive. + * + * - Set 1: Working with Backing Nodes + * o If there is a SDEV_READY sdev_node_t, it knows about its backing node. + * o If we find a backing node when looking up an sdev_node_t for the first + * time, we use its attributes to build our sdev_node_t. + * o If there is a found backing node, or we create a backing node, that's + * when we grab the hold on its vnode. + * o If we mark an sdev_node_t a ZOMBIE, we must remove its backing node from + * the underlying file system. It must not be searchable or findable. + * o We release our hold on the backing node vnode when we destroy the + * sdev_node_t. + * + * - Set 2: Locking rules for sdev (not exhaustive) + * o The majority of nodes contain an sdev_contents rw lock. You must hold it + * for read or write if manipulating its contents appropriately. + * o You must lock your parent before yourself. + * o If you need your vnode's v_lock and the sdev_contents rw lock, you must + * grab the v_lock before the sdev_contents rw_lock. + * o If you release a lock on the node as a part of upgrading it, you must + * verify that the node has not become a zombie as a part of this process. + * + * - Set 3: Zombie Status and What it Means + * o If you encounter a node that is a ZOMBIE, that means that it has been + * unlinked from the backing store. + * o If you release your contents lock and acquire it again (say as part of + * trying to grab a write lock) you must check that the node has not become + * a zombie. + * o You should VERIFY that a looked up node is not a zombie. This follows + * from the following logic. To mark something as a zombie means that it is + * removed from the parents directory cache. To do that, you must have a + * write lock on the parent's sdev_contents. To lookup through that + * directory you must have a read lock. This then becomes a simple ordering + * problem. If you've been granted the lock then the other operation cannot + * be in progress or must have already succeeded. + * + * - Set 4: Removing Directory Entries (aka making nodes Zombies) + * o Write lock must be held on the directory + * o Write lock must be held on the node + * o Remove the sdev_node_t from its parent cache + * o Remove the corresponding backing store node, if it exists, eg. use + * VOP_REMOVE or VOP_RMDIR. + * o You must NOT make any change in the vnode reference count! Nodes should + * only be cleaned up through VOP_INACTIVE callbacks. + * o VOP_INACTIVE is the only one responsible for doing the final vn_rele of + * the backing store vnode that was grabbed during lookup. + * + * - Set 5: What Nodes may be Persisted + * o The root, /dev is always persisted + * o Any node in vtab which is marked SDEV_DYNAMIC, may not be persisted + * unless it is also marked SDEV_PERSIST + * o Anything whose parent directory is marked SDEV_PERSIST will pass that + * along to the child as long as it does not contradict the above rules + */ + #include <sys/types.h> #include <sys/param.h> #include <sys/t_lock.h> @@ -559,6 +804,12 @@ sdev_remove(struct vnode *dvp, char *nm, struct cred *cred, if (!rw_tryupgrade(&parent->sdev_contents)) { rw_exit(&parent->sdev_contents); rw_enter(&parent->sdev_contents, RW_WRITER); + /* Make sure we didn't become a zombie */ + if (parent->sdev_state == SDEV_ZOMBIE) { + rw_exit(&parent->sdev_contents); + VN_RELE(vp); + return (ENOENT); + } } /* we do not support unlinking a non-empty directory */ @@ -574,39 +825,28 @@ sdev_remove(struct vnode *dvp, char *nm, struct cred *cred, * - destroying the sdev_node * - releasing the hold on attrvp */ - error = sdev_cache_update(parent, &dv, nm, SDEV_CACHE_DELETE); + sdev_cache_update(parent, &dv, nm, SDEV_CACHE_DELETE); + VN_RELE(vp); rw_exit(&parent->sdev_contents); - sdcmn_err2(("sdev_remove: cache_update error %d\n", error)); - if (error && (error != EBUSY)) { - /* report errors other than EBUSY */ - VN_RELE(vp); - } else { - sdcmn_err2(("sdev_remove: cleaning node %s from cache " - " with error %d\n", nm, error)); - + /* + * best efforts clean up the backing store + */ + if (bkstore) { + ASSERT(parent->sdev_attrvp); + error = VOP_REMOVE(parent->sdev_attrvp, nm, cred, + ct, flags); /* - * best efforts clean up the backing store + * do not report BUSY error + * because the backing store ref count is released + * when the last ref count on the sdev_node is + * released. */ - if (bkstore) { - ASSERT(parent->sdev_attrvp); - error = VOP_REMOVE(parent->sdev_attrvp, nm, cred, - ct, flags); - /* - * do not report BUSY error - * because the backing store ref count is released - * when the last ref count on the sdev_node is - * released. - */ - if (error == EBUSY) { - sdcmn_err2(("sdev_remove: device %s is still on" - "disk %s\n", nm, parent->sdev_path)); - error = 0; - } - } - - if (error == EBUSY) + if (error == EBUSY) { + sdcmn_err2(("sdev_remove: device %s is still on" + "disk %s\n", nm, parent->sdev_path)); error = 0; + } } return (error); @@ -716,6 +956,8 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, if (error = VOP_GETATTR(odvp, &vattr, 0, cred, ct)) { mutex_exit(&sdev_lock); VN_RELE(ovp); + if (nvp != NULL) + VN_RELE(nvp); return (error); } fsid = vattr.va_fsid; @@ -723,11 +965,15 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, if (error = VOP_GETATTR(ndvp, &vattr, 0, cred, ct)) { mutex_exit(&sdev_lock); VN_RELE(ovp); + if (nvp != NULL) + VN_RELE(nvp); return (error); } if (fsid != vattr.va_fsid) { mutex_exit(&sdev_lock); VN_RELE(ovp); + if (nvp != NULL) + VN_RELE(nvp); return (EXDEV); } } @@ -737,6 +983,8 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, if (error) { mutex_exit(&sdev_lock); VN_RELE(ovp); + if (nvp != NULL) + VN_RELE(nvp); return (error); } @@ -747,6 +995,8 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, if (error) { mutex_exit(&sdev_lock); VN_RELE(ovp); + if (nvp != NULL) + VN_RELE(nvp); return (error); } } @@ -755,21 +1005,31 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, ASSERT(fromdv); /* destination file exists */ - if (nvp) { + if (nvp != NULL) { todv = VTOSDEV(nvp); ASSERT(todv); } + if ((fromdv->sdev_flags & SDEV_DYNAMIC) != 0 || + (todv != NULL && (todv->sdev_flags & SDEV_DYNAMIC) != 0)) { + mutex_exit(&sdev_lock); + if (nvp != NULL) + VN_RELE(nvp); + VN_RELE(ovp); + return (EACCES); + } + /* - * link source to new target in the memory + * link source to new target in the memory. Regardless of failure, we + * must rele our hold on nvp. */ error = sdev_rnmnode(fromparent, fromdv, toparent, &todv, nnm, cred); + if (nvp != NULL) + VN_RELE(nvp); if (error) { sdcmn_err2(("sdev_rename: renaming %s to %s failed " " with error %d\n", onm, nnm, error)); mutex_exit(&sdev_lock); - if (nvp) - VN_RELE(nvp); VN_RELE(ovp); return (error); } @@ -782,6 +1042,7 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, if (fromdv == NULL) { rw_exit(&fromparent->sdev_contents); mutex_exit(&sdev_lock); + VN_RELE(ovp); sdcmn_err2(("sdev_rename: the source is deleted already\n")); return (0); } @@ -790,6 +1051,7 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, rw_exit(&fromparent->sdev_contents); mutex_exit(&sdev_lock); VN_RELE(SDEVTOV(fromdv)); + VN_RELE(ovp); sdcmn_err2(("sdev_rename: the source is being deleted\n")); return (0); } @@ -809,8 +1071,9 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, rw_enter(&fromparent->sdev_contents, RW_WRITER); bkstore = SDEV_IS_PERSIST(fromdv) ? 1 : 0; - error = sdev_cache_update(fromparent, &fromdv, onm, + sdev_cache_update(fromparent, &fromdv, onm, SDEV_CACHE_DELETE); + VN_RELE(SDEVTOV(fromdv)); /* best effforts clean up the backing store */ if (bkstore) { @@ -1078,27 +1341,23 @@ sdev_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred, rw_exit(&self->sdev_contents); /* unlink it from the directory cache */ - error = sdev_cache_update(parent, &self, nm, SDEV_CACHE_DELETE); + sdev_cache_update(parent, &self, nm, SDEV_CACHE_DELETE); rw_exit(&parent->sdev_contents); vn_vfsunlock(vp); + VN_RELE(vp); - if (error && (error != EBUSY)) { - VN_RELE(vp); - } else { - sdcmn_err2(("sdev_rmdir: cleaning node %s from directory " - " cache with error %d\n", nm, error)); + /* best effort to clean up the backing store */ + if (SDEV_IS_PERSIST(parent)) { + ASSERT(parent->sdev_attrvp); + error = VOP_RMDIR(parent->sdev_attrvp, nm, + parent->sdev_attrvp, kcred, ct, flags); - /* best effort to clean up the backing store */ - if (SDEV_IS_PERSIST(parent)) { - ASSERT(parent->sdev_attrvp); - error = VOP_RMDIR(parent->sdev_attrvp, nm, - parent->sdev_attrvp, kcred, ct, flags); + if (error) sdcmn_err2(("sdev_rmdir: cleaning device %s is on" " disk error %d\n", parent->sdev_path, error)); - } - if (error == EBUSY) error = 0; + } return (error); diff --git a/usr/src/uts/common/fs/dev/sdev_vtops.c b/usr/src/uts/common/fs/dev/sdev_vtops.c index 39b82c4285..e29262d257 100644 --- a/usr/src/uts/common/fs/dev/sdev_vtops.c +++ b/usr/src/uts/common/fs/dev/sdev_vtops.c @@ -101,7 +101,6 @@ devvt_validate(struct sdev_node *dv) char *nm = dv->sdev_name; int rval; - ASSERT(!(dv->sdev_flags & SDEV_STALE)); ASSERT(dv->sdev_state == SDEV_READY); ASSERT(RW_LOCK_HELD(&(dv->sdev_dotdot)->sdev_contents)); @@ -273,6 +272,11 @@ devvt_create_snode(struct sdev_node *ddv, char *nm, struct cred *cred, int type) kmem_free(link, MAXPATHLEN); } + if (error != 0) { + SDEV_RELE(sdv); + return; + } + mutex_enter(&sdv->sdev_lookup_lock); SDEV_UNBLOCK_OTHERS(sdv, SDEV_LOOKUP); mutex_exit(&sdv->sdev_lookup_lock); @@ -337,6 +341,7 @@ devvt_prunedir(struct sdev_node *ddv) SDEV_HOLD(dv); (void) sdev_cache_update(ddv, &dv, dv->sdev_name, SDEV_CACHE_DELETE); + SDEV_RELE(dv); break; case SDEV_VTOR_STALE: devvt_rebuild_stale_link(ddv, dv); @@ -382,9 +387,6 @@ devvt_cleandir(struct vnode *dvp, struct cred *cred) for (dv = SDEV_FIRST_ENTRY(sdvp); dv; dv = next) { next = SDEV_NEXT_ENTRY(sdvp, dv); - /* skip stale nodes */ - if (dv->sdev_flags & SDEV_STALE) - continue; /* validate only ready nodes */ if (dv->sdev_state != SDEV_READY) continue; @@ -403,9 +405,6 @@ devvt_cleandir(struct vnode *dvp, struct cred *cred) for (dv = SDEV_FIRST_ENTRY(sdvp); dv; dv = next) { next = SDEV_NEXT_ENTRY(sdvp, dv); - /* skip stale nodes */ - if (dv->sdev_flags & SDEV_STALE) - continue; /* validate only ready nodes */ if (dv->sdev_state != SDEV_READY) continue; diff --git a/usr/src/uts/common/fs/dev/sdev_zvolops.c b/usr/src/uts/common/fs/dev/sdev_zvolops.c index 192b794c67..6543504e97 100644 --- a/usr/src/uts/common/fs/dev/sdev_zvolops.c +++ b/usr/src/uts/common/fs/dev/sdev_zvolops.c @@ -464,12 +464,6 @@ devzvol_prunedir(struct sdev_node *ddv) dv = SDEV_FIRST_ENTRY(ddv); while (dv) { sdcmn_err13(("sdev_name '%s'", dv->sdev_name)); - /* skip stale nodes */ - if (dv->sdev_flags & SDEV_STALE) { - sdcmn_err13((" stale")); - dv = SDEV_NEXT_ENTRY(ddv, dv); - continue; - } switch (devzvol_validate(dv)) { case SDEV_VTOR_VALID: @@ -489,11 +483,10 @@ devzvol_prunedir(struct sdev_node *ddv) } SDEV_HOLD(dv); /* remove the cache node */ - if (sdev_cache_update(ddv, &dv, dv->sdev_name, - SDEV_CACHE_DELETE) == 0) - dv = SDEV_FIRST_ENTRY(ddv); - else - dv = SDEV_NEXT_ENTRY(ddv, dv); + sdev_cache_update(ddv, &dv, dv->sdev_name, + SDEV_CACHE_DELETE); + SDEV_RELE(dv); + dv = SDEV_FIRST_ENTRY(ddv); } rw_downgrade(&ddv->sdev_contents); } diff --git a/usr/src/uts/common/sys/fs/sdev_impl.h b/usr/src/uts/common/sys/fs/sdev_impl.h index 0a841e9d22..19a147d88a 100644 --- a/usr/src/uts/common/sys/fs/sdev_impl.h +++ b/usr/src/uts/common/sys/fs/sdev_impl.h @@ -181,35 +181,8 @@ typedef struct sdev_node { #define SDEV_NEXT_ENTRY(ddv, dv) AVL_NEXT(&(ddv)->sdev_entries, (dv)) /* - * sdev_state - * - * A sdev_node may go through 3 states: - * SDEV_INIT: When a new /dev file is first looked up, a sdev_node - * is allocated, initialized and added to the directory's - * sdev_node cache. A node at this state will also - * have the SDEV_LOOKUP flag set. - * - * Other threads that are trying to look up a node at - * this state will be blocked until the SDEV_LOOKUP flag - * is cleared. - * - * When the SDEV_LOOKUP flag is cleared, the node may - * transition into the SDEV_READY state for a successful - * lookup or the node is removed from the directory cache - * and destroyed if the named node can not be found. - * An ENOENT error is returned for the second case. - * - * SDEV_READY: A /dev file has been successfully looked up and - * associated with a vnode. The /dev file is available - * for the supported /dev filesystem operations. - * - * SDEV_ZOMBIE: Deletion of a /dev file has been explicitely issued - * to an SDEV_READY node. The node is transitioned into - * the SDEV_ZOMBIE state if the vnode reference count - * is still held. A SDEV_ZOMBIE node does not support - * any of the /dev filesystem operations. A SDEV_ZOMBIE - * node is removed from the directory cache and destroyed - * once the reference count reaches "zero". + * See the big theory statement in sdev_vnops.c for an explanation of these + * states. */ typedef enum { SDEV_ZOMBIE = -1, @@ -219,17 +192,16 @@ typedef enum { /* sdev_flags */ #define SDEV_BUILD 0x0001 /* directory cache out-of-date */ -#define SDEV_STALE 0x0002 /* stale sdev nodes */ -#define SDEV_GLOBAL 0x0004 /* global /dev nodes */ -#define SDEV_PERSIST 0x0008 /* backing store persisted node */ -#define SDEV_NO_NCACHE 0x0010 /* do not include in neg. cache */ -#define SDEV_DYNAMIC 0x0020 /* special-purpose vnode ops */ +#define SDEV_GLOBAL 0x0002 /* global /dev nodes */ +#define SDEV_PERSIST 0x0004 /* backing store persisted node */ +#define SDEV_NO_NCACHE 0x0008 /* do not include in neg. cache */ +#define SDEV_DYNAMIC 0x0010 /* special-purpose vnode ops */ /* (ex: pts) */ -#define SDEV_VTOR 0x0040 /* validate sdev_nodes during search */ -#define SDEV_ATTR_INVALID 0x0080 /* invalid node attributes, */ +#define SDEV_VTOR 0x0020 /* validate sdev_nodes during search */ +#define SDEV_ATTR_INVALID 0x0040 /* invalid node attributes, */ /* need update */ -#define SDEV_SUBDIR 0x0100 /* match all subdirs under here */ -#define SDEV_ZONED 0x0200 /* zoned subdir */ +#define SDEV_SUBDIR 0x0080 /* match all subdirs under here */ +#define SDEV_ZONED 0x0100 /* zoned subdir */ /* sdev_lookup_flags */ #define SDEV_LOOKUP 0x0001 /* node creation in progress */ @@ -471,7 +443,7 @@ typedef enum { } sdev_cache_ops_t; extern struct sdev_node *sdev_cache_lookup(struct sdev_node *, char *); -extern int sdev_cache_update(struct sdev_node *, struct sdev_node **, char *, +extern void sdev_cache_update(struct sdev_node *, struct sdev_node **, char *, sdev_cache_ops_t); extern void sdev_node_cache_init(void); extern void sdev_node_cache_fini(void); |
