diff options
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_ipnetops.c | 9 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_netops.c | 13 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_profile.c | 11 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_ptsops.c | 2 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_subr.c | 345 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_vnops.c | 351 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_vtops.c | 13 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/dev/sdev_zvolops.c | 15 | ||||
| -rw-r--r-- | usr/src/uts/common/sys/fs/sdev_impl.h | 50 | 
9 files changed, 518 insertions, 291 deletions
| diff --git a/usr/src/uts/common/fs/dev/sdev_ipnetops.c b/usr/src/uts/common/fs/dev/sdev_ipnetops.c index dbca322afb..8d348a31d9 100644 --- a/usr/src/uts/common/fs/dev/sdev_ipnetops.c +++ b/usr/src/uts/common/fs/dev/sdev_ipnetops.c @@ -161,6 +161,14 @@ devipnet_filldir(struct sdev_node *ddv)  	if (rw_tryupgrade(&ddv->sdev_contents) == NULL) {  		rw_exit(&ddv->sdev_contents);  		rw_enter(&ddv->sdev_contents, RW_WRITER); +		/* +		 * We've been made a zombie while we weren't looking. We'll bail +		 * if that's the case. +		 */ +		if (ddv->sdev_state == SDEV_ZOMBIE) { +			rw_exit(&ddv->sdev_contents); +			return; +		}  	}  	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) { @@ -186,6 +194,7 @@ devipnet_filldir(struct sdev_node *ddv)  		/* remove the cache node */  		(void) sdev_cache_update(ddv, &dv, dv->sdev_name,  		    SDEV_CACHE_DELETE); +		SDEV_RELE(dv);  	}  	ipnet_walk_if(devipnet_filldir_entry, ddv, getzoneid()); diff --git a/usr/src/uts/common/fs/dev/sdev_netops.c b/usr/src/uts/common/fs/dev/sdev_netops.c index a4c4ab168d..4eaf38f484 100644 --- a/usr/src/uts/common/fs/dev/sdev_netops.c +++ b/usr/src/uts/common/fs/dev/sdev_netops.c @@ -58,7 +58,6 @@ devnet_validate(struct sdev_node *dv)  	datalink_id_t linkid;  	zoneid_t zoneid; -	ASSERT(!(dv->sdev_flags & SDEV_STALE));  	ASSERT(dv->sdev_state == SDEV_READY);  	if (dls_mgmt_get_linkid(dv->sdev_name, &linkid) != 0) @@ -149,13 +148,9 @@ devnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,  	 * directory cache lookup:  	 */  	if ((dv = sdev_cache_lookup(ddv, nm)) != NULL) { -		if (dv->sdev_state == SDEV_READY) { -			if (!(dv->sdev_flags & SDEV_ATTR_INVALID)) -				goto found; -		} else { -			ASSERT(dv->sdev_state == SDEV_ZOMBIE); -			goto failed; -		} +		ASSERT(dv->sdev_state == SDEV_READY); +		if (!(dv->sdev_flags & SDEV_ATTR_INVALID)) +			goto found;  	}  	/* @@ -170,7 +165,6 @@ devnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,  	error = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL, cred, SDEV_READY);  	if (error != 0) { -		ASSERT(dv == NULL);  		dls_devnet_close(ddh);  		goto failed;  	} @@ -286,6 +280,7 @@ devnet_filldir(struct sdev_node *ddv)  		/* remove the cache node */  		(void) sdev_cache_update(ddv, &dv, dv->sdev_name,  		    SDEV_CACHE_DELETE); +		SDEV_RELE(dv);  	}  	if (((ddv->sdev_flags & SDEV_BUILD) == 0) && !dls_devnet_rebuild()) diff --git a/usr/src/uts/common/fs/dev/sdev_profile.c b/usr/src/uts/common/fs/dev/sdev_profile.c index 01d4aab1ae..65890b1a2f 100644 --- a/usr/src/uts/common/fs/dev/sdev_profile.c +++ b/usr/src/uts/common/fs/dev/sdev_profile.c @@ -161,7 +161,7 @@ prof_mknode(struct sdev_node *dir, char *name, struct sdev_node **newdv,  		return (rv);  	} -	rv = sdev_cache_update(dir, &dv, name, SDEV_CACHE_ADD); +	sdev_cache_update(dir, &dv, name, SDEV_CACHE_ADD);  	*newdv = dv;  	/* put it in ready state */ @@ -173,6 +173,9 @@ prof_mknode(struct sdev_node *dir, char *name, struct sdev_node **newdv,  			sdcmn_err10(("sdev_origin for %s set to 0x%p\n",  			    name, arg));  		apply_glob_pattern(dir, *newdv); +	} else { +		sdev_cache_update(dir, &dv, name, SDEV_CACHE_DELETE); +		SDEV_RELE(dv);  	}  	return (rv);  } @@ -198,8 +201,8 @@ prof_make_dir(char *name, struct sdev_node **gdirp, struct sdev_node **dirp)  	if (newdv = sdev_cache_lookup(dir, name)) {  		*dirp = newdv;  		*gdirp = newdv->sdev_origin; -		SDEV_RELE(dir);  		rw_exit(&dir->sdev_contents); +		SDEV_RELE(dir);  		return (0);  	}  	rw_exit(&dir->sdev_contents); @@ -676,6 +679,10 @@ check_build:  	    == gdir->sdev_gdir_gen))  		return;		/* already up to date */ +	/* We may have become a zombie (across a try) */ +	if (ddv->sdev_state == SDEV_ZOMBIE) +		return; +  	if (firsttime && rw_tryupgrade(&ddv->sdev_contents) == 0) {  		rw_exit(&ddv->sdev_contents);  		firsttime = 0; diff --git a/usr/src/uts/common/fs/dev/sdev_ptsops.c b/usr/src/uts/common/fs/dev/sdev_ptsops.c index a228d05086..af98df755d 100644 --- a/usr/src/uts/common/fs/dev/sdev_ptsops.c +++ b/usr/src/uts/common/fs/dev/sdev_ptsops.c @@ -107,7 +107,6 @@ devpts_validate(struct sdev_node *dv)  	timestruc_t now;  	char *nm = dv->sdev_name; -	ASSERT(!(dv->sdev_flags & SDEV_STALE));  	ASSERT(dv->sdev_state == SDEV_READY);  	/* validate only READY nodes */ @@ -258,6 +257,7 @@ devpts_prunedir(struct sdev_node *ddv)  		/* remove the cache node */  		(void) sdev_cache_update(ddv, &dv, dv->sdev_name,  		    SDEV_CACHE_DELETE); +		SDEV_RELE(dv);  	}  	rw_downgrade(&ddv->sdev_contents);  } diff --git a/usr/src/uts/common/fs/dev/sdev_subr.c b/usr/src/uts/common/fs/dev/sdev_subr.c index f29918e351..b4b27e6285 100644 --- a/usr/src/uts/common/fs/dev/sdev_subr.c +++ b/usr/src/uts/common/fs/dev/sdev_subr.c @@ -20,7 +20,7 @@   */  /*   * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved.   */  /* @@ -370,7 +370,8 @@ sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,  }  /* - * transition a sdev_node into SDEV_READY state + * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the + * caller to transition the node to the SDEV_ZOMBIE state.   */  int  sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp, @@ -433,26 +434,12 @@ sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,  		/* transition to READY state */  		sdev_set_nodestate(dv, SDEV_READY);  		sdev_nc_node_exists(dv); -	} else { -		sdev_set_nodestate(dv, SDEV_ZOMBIE);  	}  	rw_exit(&dv->sdev_contents);  	return (error);  }  /* - * setting ZOMBIE state - */ -static int -sdev_nodezombied(struct sdev_node *dv) -{ -	rw_enter(&dv->sdev_contents, RW_WRITER); -	sdev_set_nodestate(dv, SDEV_ZOMBIE); -	rw_exit(&dv->sdev_contents); -	return (0); -} - -/*   * Build the VROOT sdev_node.   */  /*ARGSUSED*/ @@ -568,23 +555,31 @@ static struct sdev_vop_table vtab[] =  	{ NULL, NULL, NULL, NULL, NULL, 0}  }; +/* + * We need to match off of the sdev_path, not the sdev_name. We are only allowed + * to exist directly under /dev. + */  struct sdev_vop_table *  sdev_match(struct sdev_node *dv)  {  	int vlen;  	int i; +	const char *path; + +	if (strlen(dv->sdev_path) <= 5) +		return (NULL); + +	if (strncmp(dv->sdev_path, "/dev/", 5) != 0) +		return (NULL); +	path = dv->sdev_path + 5;  	for (i = 0; vtab[i].vt_name; i++) { -		if (strcmp(vtab[i].vt_name, dv->sdev_name) == 0) +		if (strcmp(vtab[i].vt_name, path) == 0)  			return (&vtab[i]);  		if (vtab[i].vt_flags & SDEV_SUBDIR) { -			char *ptr; - -			ASSERT(strlen(dv->sdev_path) > 5); -			ptr = dv->sdev_path + 5;  			vlen = strlen(vtab[i].vt_name); -			if ((strncmp(vtab[i].vt_name, ptr, -			    vlen - 1) == 0) && ptr[vlen] == '/') +			if ((strncmp(vtab[i].vt_name, path, +			    vlen - 1) == 0) && path[vlen] == '/')  				return (&vtab[i]);  		} @@ -610,10 +605,14 @@ sdev_get_vop(struct sdev_node *dv)  	/* gets the vtab entry it matches */  	if ((vtp = sdev_match(dv)) != NULL) {  		dv->sdev_flags |= vtp->vt_flags; +		if (SDEV_IS_PERSIST(dv->sdev_dotdot) && +		    (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv))) +			dv->sdev_flags |= SDEV_PERSIST;  		if (vtp->vt_vops) {  			if (vtp->vt_global_vops)  				*(vtp->vt_global_vops) = vtp->vt_vops; +  			return (vtp->vt_vops);  		} @@ -631,8 +630,10 @@ sdev_get_vop(struct sdev_node *dv)  				*(vtp->vt_global_vops) = vtp->vt_vops;  			}  			sdev_free_vtab(templ); +  			return (vtp->vt_vops);  		} +  		return (sdev_vnodeops);  	} @@ -743,7 +744,6 @@ sdev_follow_link(struct sdev_node *dv)  	ASSERT(linkvp->v_type == VLNK);  	err = sdev_getlink(linkvp, &link);  	if (err) { -		(void) sdev_nodezombied(dv);  		dv->sdev_symlink = NULL;  		return (ENOENT);  	} @@ -839,12 +839,7 @@ sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,  		ASSERT(dv);  		/* insert into the directory cache */ -		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD); -		if (error) { -			sdcmn_err9(("sdev_mknode: node %s can not" -			    " be added into directory cache\n", nm)); -			return (ENOENT); -		} +		sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);  	}  	ASSERT(dv); @@ -878,7 +873,13 @@ sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,  		*newdv = dv;  		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);  	} else { -		SDEV_SIMPLE_RELE(dv); +		sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE); +		/* +		 * We created this node, it wasn't passed into us. Therefore it +		 * is up to us to delete it. +		 */ +		if (*newdv == NULL) +			SDEV_SIMPLE_RELE(dv);  		*newdv = NULL;  	} @@ -986,6 +987,7 @@ sdev_findbyname(struct sdev_node *ddv, char *nm)  	if (dv) {  		ASSERT(dv->sdev_dotdot == ddv);  		ASSERT(strcmp(dv->sdev_name, nm) == 0); +		ASSERT(dv->sdev_state != SDEV_ZOMBIE);  		SDEV_HOLD(dv);  		return (dv);  	} @@ -1004,6 +1006,7 @@ sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)  	ASSERT(SDEVTOV(ddv)->v_type == VDIR);  	ASSERT(ddv->sdev_nlink >= 2);  	ASSERT(dv->sdev_nlink == 0); +	ASSERT(dv->sdev_state != SDEV_ZOMBIE);  	dv->sdev_dotdot = ddv;  	VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL); @@ -1019,60 +1022,55 @@ sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)  static void  decr_link(struct sdev_node *dv)  { -	if (dv->sdev_state != SDEV_INIT) +	VERIFY(RW_WRITE_HELD(&dv->sdev_contents)); +	if (dv->sdev_state != SDEV_INIT) { +		VERIFY(dv->sdev_nlink >= 1);  		dv->sdev_nlink--; -	else -		ASSERT(dv->sdev_nlink == 0); +	} else { +		VERIFY(dv->sdev_nlink == 0); +	}  }  /*   * Delete an existing dv from directory cache   * - * In the case of a node is still held by non-zero reference count, - *     the node is put into ZOMBIE state. Once the reference count - *     reaches "0", the node is unlinked and destroyed, - *     in sdev_inactive(). + * In the case of a node is still held by non-zero reference count, the node is + * put into ZOMBIE state. The node is always unlinked from its parent, but it is + * not destroyed via sdev_inactive until its reference count reaches "0".   */ -static int +static void  sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)  {  	struct vnode *vp; +	sdev_node_state_t os;  	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));  	vp = SDEVTOV(dv);  	mutex_enter(&vp->v_lock); +	rw_enter(&dv->sdev_contents, RW_WRITER); +	os = dv->sdev_state; +	ASSERT(os != SDEV_ZOMBIE); +	dv->sdev_state = SDEV_ZOMBIE; -	/* dv is held still */ -	if (vp->v_count > 1) { -		rw_enter(&dv->sdev_contents, RW_WRITER); -		if (dv->sdev_state == SDEV_READY) { -			sdcmn_err9(( -			    "sdev_dirdelete: node %s busy with count %d\n", -			    dv->sdev_name, vp->v_count)); -			dv->sdev_state = SDEV_ZOMBIE; -		} -		rw_exit(&dv->sdev_contents); -		--vp->v_count; -		mutex_exit(&vp->v_lock); -		return (EBUSY); -	} -	ASSERT(vp->v_count == 1); - -	/* unlink from the memory cache */ -	ddv->sdev_nlink--;	/* .. to above */ -	if (vp->v_type == VDIR) { -		decr_link(dv);		/* . to self */ -	} - +	/* +	 * unlink ourselves from the parent directory now to take care of the .. +	 * link. However, if we're a directory, we don't remove our reference to +	 * ourself eg. '.' until we are torn down in the inactive callback. +	 */ +	decr_link(ddv);  	avl_remove(&ddv->sdev_entries, dv); -	decr_link(dv);	/* name, back to zero */ -	vp->v_count--; +	/* +	 * sdev_inactive expects nodes to have a link to themselves when we're +	 * tearing them down. If we're transitioning from the initial state to +	 * zombie and not via ready, then we're not going to have this link that +	 * comes from the node being ready. As a result, we need to increment +	 * our link count by one to account for this. +	 */ +	if (os == SDEV_INIT && dv->sdev_nlink == 0) +		dv->sdev_nlink++; +	rw_exit(&dv->sdev_contents);  	mutex_exit(&vp->v_lock); - -	/* destroy the node */ -	sdev_nodedestroy(dv, 0); -	return (0);  }  /* @@ -1169,6 +1167,35 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,  			goto err_out;  	} +	/* fix the source for a symlink */ +	if (vattr.va_type == VLNK) { +		if (odv->sdev_symlink == NULL) { +			error = sdev_follow_link(odv); +			if (error) { +				/* +				 * The underlying symlink doesn't exist. This +				 * node probably shouldn't even exist. While +				 * it's a bit jarring to consumers, we're going +				 * to remove the node from /dev. +				 */ +				if (SDEV_IS_PERSIST((*ndvp))) +					bkstore = 1; +				sdev_dirdelete(oddv, odv); +				if (bkstore) { +					ASSERT(nddv->sdev_attrvp); +					error = VOP_REMOVE(nddv->sdev_attrvp, +					    nnm, cred, NULL, 0); +					if (error) +						goto err_out; +				} +				error = ENOENT; +				goto err_out; +			} +		} +		ASSERT(odv->sdev_symlink); +		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); +	} +  	/* destination existing */  	if (*ndvp) {  		nvp = SDEVTOV(*ndvp); @@ -1205,7 +1232,12 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,  			}  			vn_vfsunlock(nvp); -			(void) sdev_dirdelete(nddv, *ndvp); +			/* +			 * We did not place the hold on *ndvp, so even though +			 * we're deleting the node, we should not get rid of our +			 * reference. +			 */ +			sdev_dirdelete(nddv, *ndvp);  			*ndvp = NULL;  			ASSERT(nddv->sdev_attrvp);  			error = VOP_RMDIR(nddv->sdev_attrvp, nnm, @@ -1223,11 +1255,11 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,  			}  			/* -			 * get rid of the node from the directory cache -			 * note, in case EBUSY is returned, the ZOMBIE -			 * node is taken care in sdev_mknode. +			 * Get rid of the node from the directory cache note. +			 * Don't forget that it's not up to us to remove the vn +			 * ref on the sdev node, as we did not place it.  			 */ -			(void) sdev_dirdelete(nddv, *ndvp); +			sdev_dirdelete(nddv, *ndvp);  			*ndvp = NULL;  			if (bkstore) {  				ASSERT(nddv->sdev_attrvp); @@ -1239,19 +1271,6 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,  		}  	} -	/* fix the source for a symlink */ -	if (vattr.va_type == VLNK) { -		if (odv->sdev_symlink == NULL) { -			error = sdev_follow_link(odv); -			if (error) { -				error = ENOENT; -				goto err_out; -			} -		} -		ASSERT(odv->sdev_symlink); -		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); -	} -  	/*  	 * make a fresh node from the source attrs  	 */ @@ -1259,8 +1278,10 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,  	error = sdev_mknode(nddv, nnm, ndvp, &vattr,  	    NULL, (void *)link, cred, SDEV_READY); -	if (link) +	if (link != NULL) {  		kmem_free(link, strlen(link) + 1); +		link = NULL; +	}  	if (error)  		goto err_out; @@ -1271,9 +1292,11 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,  	if (doingdir) {  		for (idv = SDEV_FIRST_ENTRY(odv); idv;  		    idv = SDEV_NEXT_ENTRY(odv, idv)) { +			SDEV_HOLD(idv);  			error = sdev_rnmnode(odv, idv,  			    (struct sdev_node *)(*ndvp), &ndv,  			    idv->sdev_name, cred); +			SDEV_RELE(idv);  			if (error)  				goto err_out;  			ndv = NULL; @@ -1307,6 +1330,11 @@ sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,  	return (error);  err_out: +	if (link != NULL) { +		kmem_free(link, strlen(link) + 1); +		link = NULL; +	} +  	rw_exit(&nddv->sdev_contents);  	if (!samedir)  		rw_exit(&oddv->sdev_contents); @@ -1465,18 +1493,8 @@ sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)  			vp = NULLVP;  			dv = sdev_cache_lookup(ddv, nm);  			if (dv) { -				if (dv->sdev_state != SDEV_ZOMBIE) { -					SDEV_SIMPLE_RELE(dv); -				} else { -					/* -					 * A ZOMBIE node may not have been -					 * cleaned up from the backing store, -					 * bypass this entry in this case, -					 * and clean it up from the directory -					 * cache if this is the last call. -					 */ -					(void) sdev_dirdelete(ddv, dv); -				} +				VERIFY(dv->sdev_state != SDEV_ZOMBIE); +				SDEV_SIMPLE_RELE(dv);  				continue;  			} @@ -1547,11 +1565,16 @@ sdev_filldir_dynamic(struct sdev_node *ddv)  	vap->va_ctime = vap->va_atime;  	for (i = 0; vtab[i].vt_name != NULL; i++) {  		/* -		 * This early, we may be in a read-only /dev -		 * environment: leave the creation of any nodes we'd -		 * attempt to persist to devfsadm. +		 * This early, we may be in a read-only /dev environment: leave +		 * the creation of any nodes we'd attempt to persist to +		 * devfsadm. Because /dev itself is normally persistent, any +		 * node which is not marked dynamic will end up being marked +		 * persistent. However, some nodes are both dynamic and +		 * persistent, mostly lofi and rlofi, so we need to be careful +		 * in our check.  		 */ -		if (vtab[i].vt_flags & SDEV_PERSIST) +		if ((vtab[i].vt_flags & SDEV_PERSIST) || +		    !(vtab[i].vt_flags & SDEV_DYNAMIC))  			continue;  		nm = vtab[i].vt_name;  		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); @@ -1614,6 +1637,8 @@ lookup:  		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);  		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",  		    (void *)(*rvp), error)); +		if (!error) +			VN_RELE(*rvp);  		break;  	case VCHR:  	case VBLK: @@ -1647,74 +1672,49 @@ lookup:  	return (error);  } -static int +static void  sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)  { -	int error = 0;  	struct sdev_node *dup = NULL;  	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));  	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {  		sdev_direnter(ddv, *dv);  	} else { -		if (dup->sdev_state == SDEV_ZOMBIE) { -			error = sdev_dirdelete(ddv, dup); -			/* -			 * The ZOMBIE node is still hanging -			 * around with more than one reference counts. -			 * Fail the new node creation so that -			 * the directory cache won't have -			 * duplicate entries for the same named node -			 */ -			if (error == EBUSY) { -				SDEV_SIMPLE_RELE(*dv); -				sdev_nodedestroy(*dv, 0); -				*dv = NULL; -				return (error); -			} -			sdev_direnter(ddv, *dv); -		} else { -			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE); -			SDEV_SIMPLE_RELE(*dv); -			sdev_nodedestroy(*dv, 0); -			*dv = dup; -		} +		VERIFY(dup->sdev_state != SDEV_ZOMBIE); +		SDEV_SIMPLE_RELE(*dv); +		sdev_nodedestroy(*dv, 0); +		*dv = dup;  	} - -	return (0);  } -static int +static void  sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)  {  	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); -	return (sdev_dirdelete(ddv, *dv)); +	sdev_dirdelete(ddv, *dv);  }  /*   * update the in-core directory cache   */ -int +void  sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,      sdev_cache_ops_t ops)  { -	int error = 0; -  	ASSERT((SDEV_HELD(*dv)));  	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));  	switch (ops) {  	case SDEV_CACHE_ADD: -		error = sdev_cache_add(ddv, dv, nm); +		sdev_cache_add(ddv, dv, nm);  		break;  	case SDEV_CACHE_DELETE: -		error = sdev_cache_delete(ddv, dv); +		sdev_cache_delete(ddv, dv);  		break;  	default:  		break;  	} - -	return (error);  }  /* @@ -2157,7 +2157,6 @@ lookup_create_node:  	}  found: -	ASSERT(!(dv->sdev_flags & SDEV_STALE));  	ASSERT(dv->sdev_state == SDEV_READY);  	if (vtor) {  		/* @@ -2176,13 +2175,11 @@ found:  				rw_exit(&ddv->sdev_contents);  				rw_enter(&ddv->sdev_contents, RW_WRITER);  			} -			error = sdev_cache_update(ddv, &dv, nm, -			    SDEV_CACHE_DELETE); +			sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);  			rw_downgrade(&ddv->sdev_contents); -			if (error == 0) { -				dv = NULL; -				goto lookup_create_node; -			} +			SDEV_RELE(dv); +			dv = NULL; +			goto lookup_create_node;  			/* FALLTHRU */  		case SDEV_VTOR_INVALID:  			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); @@ -2229,9 +2226,10 @@ nolock_notfound:  		 * changes. Re-check.  		 */  		if (dv->sdev_state == SDEV_INIT) { -			(void) sdev_dirdelete(ddv, dv); +			sdev_dirdelete(ddv, dv);  			rw_exit(&ddv->sdev_contents);  			sdev_lookup_failed(ddv, nm, failed_flags); +			SDEV_RELE(dv);  			*vpp = NULL;  			return (ENOENT);  		} @@ -2263,15 +2261,14 @@ sdev_stale(struct sdev_node *ddv)  	ASSERT(SDEVTOV(ddv)->v_type == VDIR);  	rw_enter(&ddv->sdev_contents, RW_WRITER); -	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) { +	while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {  		vp = SDEVTOV(dv); +		SDEV_HOLD(dv);  		if (vp->v_type == VDIR)  			sdev_stale(dv); -		sdcmn_err9(("sdev_stale: setting stale %s\n", -		    dv->sdev_path)); -		dv->sdev_flags |= SDEV_STALE; -		avl_remove(&ddv->sdev_entries, dv); +		sdev_dirdelete(ddv, dv); +		SDEV_RELE(dv);  	}  	ddv->sdev_flags |= SDEV_BUILD;  	rw_exit(&ddv->sdev_contents); @@ -2289,7 +2286,7 @@ sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)  	int error = 0;  	int busy = 0;  	struct vnode *vp; -	struct sdev_node *dv, *next = NULL; +	struct sdev_node *dv;  	int bkstore = 0;  	int len = 0;  	char *bks_name = NULL; @@ -2300,8 +2297,7 @@ sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)  	 * We try our best to destroy all unused sdev_node's  	 */  	rw_enter(&ddv->sdev_contents, RW_WRITER); -	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) { -		next = SDEV_NEXT_ENTRY(ddv, dv); +	while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {  		vp = SDEVTOV(dv);  		if (expr && gmatch(dv->sdev_name, expr) == 0) @@ -2338,15 +2334,10 @@ sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)  			bcopy(dv->sdev_name, bks_name, len);  		} -		error = sdev_dirdelete(ddv, dv); - -		if (error == EBUSY) { -			sdcmn_err9(("sdev_cleandir: dir busy\n")); -			busy++; -		} +		sdev_dirdelete(ddv, dv);  		/* take care the backing store clean up */ -		if (bkstore && (error == 0)) { +		if (bkstore) {  			ASSERT(bks_name);  			ASSERT(ddv->sdev_attrvp); @@ -2370,6 +2361,9 @@ sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)  			bks_name = NULL;  			len = 0;  		} + +		ddv->sdev_flags |= SDEV_BUILD; +		SDEV_RELE(dv);  	}  	ddv->sdev_flags |= SDEV_BUILD; @@ -3114,42 +3108,41 @@ devname_inactive_func(struct vnode *vp, struct cred *cred,  {  	int clean;  	struct sdev_node *dv = VTOSDEV(vp); -	struct sdev_node *ddv = dv->sdev_dotdot;  	int state; -	rw_enter(&ddv->sdev_contents, RW_WRITER); -	state = dv->sdev_state; -  	mutex_enter(&vp->v_lock);  	ASSERT(vp->v_count >= 1); +  	if (vp->v_count == 1 && callback != NULL)  		callback(vp); +	rw_enter(&dv->sdev_contents, RW_WRITER); +	state = dv->sdev_state; +  	clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);  	/* -	 * last ref count on the ZOMBIE node is released. -	 * clean up the sdev_node, and -	 * release the hold on the backing store node so that -	 * the ZOMBIE backing stores also cleaned out. +	 * sdev is a rather bad public citizen. It violates the general +	 * agreement that in memory nodes should always have a valid reference +	 * count on their vnode. But that's not the case here. This means that +	 * we do actually have to distinguish between getting inactive callbacks +	 * for zombies and otherwise. This should probably be fixed.  	 */  	if (clean) { -		ASSERT(ddv); - -		ddv->sdev_nlink--; +		/* Remove the . entry to ourselves */  		if (vp->v_type == VDIR) { -			dv->sdev_nlink--; +			decr_link(dv);  		} -		if ((dv->sdev_flags & SDEV_STALE) == 0) -			avl_remove(&ddv->sdev_entries, dv); -		dv->sdev_nlink--; +		VERIFY(dv->sdev_nlink == 1); +		decr_link(dv);  		--vp->v_count; +		rw_exit(&dv->sdev_contents);  		mutex_exit(&vp->v_lock);  		sdev_nodedestroy(dv, 0);  	} else {  		--vp->v_count; +		rw_exit(&dv->sdev_contents);  		mutex_exit(&vp->v_lock);  	} -	rw_exit(&ddv->sdev_contents);  } diff --git a/usr/src/uts/common/fs/dev/sdev_vnops.c b/usr/src/uts/common/fs/dev/sdev_vnops.c index 89c5decbf0..3b6fe5e79a 100644 --- a/usr/src/uts/common/fs/dev/sdev_vnops.c +++ b/usr/src/uts/common/fs/dev/sdev_vnops.c @@ -21,6 +21,9 @@  /*   * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.   */ +/* + * Copyright (c) 2013, Joyent, Inc.  All rights reserved. + */  /*   * vnode ops for the /dev filesystem @@ -32,6 +35,248 @@   *    not supported for now   */ +/* + * sdev has a few basic goals: + *   o Provide /dev for the global zone as well as various non-global zones. + *   o Provide the basic functionality that devfsadm might need (mknod, + *     symlinks, etc.) + *   o Allow persistent permissions on files in /dev. + *   o Allow for dynamic directories and nodes for use by various services (pts, + *     zvol, net, etc.) + * + * The sdev file system is primarily made up of sdev_node_t's which is sdev's + * counterpart to the vnode_t. There are two different classes of sdev_node_t's + * that we generally care about, dynamic and otherwise. + * + * Persisting Information + * ---------------------- + * + * When sdev is mounted, it keeps track of the underlying file system it is + * mounted over. In certain situations, sdev will go and create entries in that + * underlying file system. These underlying 'back end' nodes are used as proxies + * for various changes in permissions. While specific sets of nodes, such as + * dynamic ones, are exempt, this process stores permission changes against + * these back end nodes. The point of all of this is to allow for these settings + * to persist across host and zone reboots. As an example, consider the entry + * /dev/dsk/c0t0d0 which is a character device and that / is in UFS. Upon + * changing the permissions on c0t0d0 you'd have the following logical + * relationships: + * + *    +------------------+   sdev_vnode     +--------------+ + *    | sdev_node_t      |<---------------->| vnode_t      | + *    | /dev/dsk/c0t0d0  |<---------------->| for sdev     | + *    +------------------+                  +--------------+ + *           | + *           | sdev_attrvp + *           | + *           |    +---------------------+ + *           +--->| vnode_t for UFS|ZFS | + *                | /dev/dsk/c0t0d0     | + *                +---------------------+ + * + * sdev is generally in memory. Therefore when a lookup happens and there is no + * entry already inside of a directory cache, it will next check the backing + * store. If the backing store exists, we will reconstitute the sdev_node based + * on the information that we persisted. When we create the backing store node, + * we use the struct vattr information that we already have in sdev_node_t. + * Because of this, we already know if the entry was previously a symlink, + * directory, or some other kind of type. Note that not all types of nodes are + * supported. Currently only VDIR, VCHR, VBLK, VREG, VDOOR, and VLNK are + * eligible to be persisted. + * + * When the sdev_node is created and the lookup is done, we grab a hold on the + * underlying vnode as part of the call to VOP_LOOKUP. That reference is held + * until the sdev_node becomes inactive. Once its reference count reaches one + * and the VOP_INACTIVE callback fires leading to the destruction of the node, + * the reference on the underlying vnode will be released. + * + * The backing store node will be deleted only when the node itself is deleted + * through the means of a VOP_REMOVE, VOP_RMDIR, or similar call. + * + * Not everything can be persisted, see The Rules section for more details. + * + * Dynamic Nodes + * ------------- + * + * Dynamic nodes allow for specific interactions with various kernel subsystems + * when looking up directory entries. This allows the lookup and readdir + * functions to check against the kernel subsystem's for validity. eg. does a + * zvol or nic still exist. + * + * More specifically, when we create various directories we check if the + * directory name matches that of one of the names in the vtab[] (sdev_subr.c). + * If it does, we swap out the vnode operations into a new set which combine the + * normal sdev vnode operations with the dynamic set here. + * + * In addition, various dynamic nodes implement a verification entry point. This + * verification entry is used as a part of lookup and readdir. The goal for + * these dynamic nodes is to allow them to check with the underlying subsystems + * to ensure that these devices are still present, or if they have gone away, to + * remove them from the results. This is indicated by using the SDEV_VTOR flag + * in vtab[]. + * + * Dynamic nodes have additional restrictions placed upon them. They may only + * appear at the top level directory of the file system. In addition, users + * cannot create dirents below any leve of a dynamic node aside from its special + * vnops. + * + * Profiles + * -------- + * + * Profiles exist for the purpose of non-global zones. They work with the zone + * brands and zoneadmd to set up a filter of allowed devices that can appear in + * a non-global zone's /dev. These are sent to sdev by means of libdevinfo and a + * modctl system call. Specifically it allows one to add patterns of device + * paths to include and exclude. It allows for a collection of symlinks to be + * added and it allows for remapping names. + * + * When operating in a non-global zone, several of the sdev vnops are redirected + * to the profile versions. These impose additional restrictions such as + * enforcing that a non-global zone's /dev is read only. + * + * sdev_node_t States + * ------------------ + * + * A given sdev_node_t has a field called the sdev_state which describes where + * in the sdev life cycle it is. There are three primary states: SDEV_INIT, + * SDEV_READY, and SDEV_ZOMBIE. + * + *	SDEV_INIT: When a new /dev file is first looked up, a sdev_node + *		   is allocated, initialized and added to the directory's + *		   sdev_node cache. A node at this state will also + *		   have the SDEV_LOOKUP flag set. + * + *		   Other threads that are trying to look up a node at + *		   this state will be blocked until the SDEV_LOOKUP flag + *		   is cleared. + * + *		   When the SDEV_LOOKUP flag is cleared, the node may + *		   transition into the SDEV_READY state for a successful + *		   lookup or the node is removed from the directory cache + *		   and destroyed if the named node can not be found. + *		   An ENOENT error is returned for the second case. + * + *	SDEV_READY: A /dev file has been successfully looked up and + *		    associated with a vnode. The /dev file is available + *		    for the supported /dev file system operations. + * + *	SDEV_ZOMBIE: Deletion of a /dev file has been explicitly issued + *		    to an SDEV_READY node. The node is transitioned into + *		    the SDEV_ZOMBIE state if the vnode reference count + *		    is still held. A SDEV_ZOMBIE node does not support + *		    any of the /dev file system operations. A SDEV_ZOMBIE + *		    node is immediately removed from the directory cache + *		    and destroyed once the reference count reaches zero. + * + * Historically nodes that were marked SDEV_ZOMBIE were not removed from the + * underlying directory caches. This has been the source of numerous bugs and + * thus to better mimic what happens on a real file system, it is no longer the + * case. + * + * The following state machine describes the life cycle of a given node and its + * associated states: + * + * node is . . . . . + * allocated via   .     +-------------+         . . . . . . . vnode_t refcount + * sdev_nodeinit() .     | Unallocated |         .             reaches zero and + *        +--------*-----|   Memory    |<--------*---+         sdev_inactive is + *        |              +-------------+             |         called. + *        |       +------------^                     |         called. + *        v       |                                  | + *  +-----------+ * . . sdev_nodeready()      +-------------+ + *  | SDEV_INIT | |     or related setup      | SDEV_ZOMBIE | + *  +-----------+ |     failure               +-------------+ + *        |       |                                  ^ + *        |       |      +------------+              | + *        +-*----------->| SDEV_READY |--------*-----+ + *          .            +------------+        .          The node is no longer + *          . . node successfully              . . . . .  valid or we've been + *              inserted into the                         asked to remove it. + *              directory cache                           This happens via + *              and sdev_nodready()                       sdev_dirdelete(). + *              call successful. + * + * Adding and Removing Dirents, Zombie Nodes + * ----------------------------------------- + * + * As part of doing a lookup, readdir, or an explicit creation operation like + * mkdir or create, nodes may be created. Every directory has an avl tree which + * contains its children, the sdev_entries tree. This is only used if the type + * is VDIR. Access to this is controlled by the sdev_node_t's contents_lock and + * it is managed through sdev_cache_update(). + * + * Every sdev_node_t has a field sdev_state, which describes the current state + * of the node. A node is generally speaking in the SDEV_READY state. When it is + * there, it can be looked up, accessed, and operations performed on it. When a + * node is going to be removed from the directory cache it is marked as a + * zombie. Once a node becomes a zombie, no other file system operations will + * succeed and it will continue to exist as a node until the vnode count on the + * node reaches zero. At that point, the node will be freed.  However, once a + * node has been marked as a zombie, it will be removed immediately from the + * directory cache such that no one else may find it again.  This means that + * someone else can insert a new entry into that directory with the same name + * and without a problem. + * + * To remove a node, see the section on that in The Rules. + * + * The Rules + * --------- + * These are the rules to live by when working in sdev. These are not + * exhaustive. + * + * - Set 1: Working with Backing Nodes + *   o If there is a SDEV_READY sdev_node_t, it knows about its backing node. + *   o If we find a backing node when looking up an sdev_node_t for the first + *     time, we use its attributes to build our sdev_node_t. + *   o If there is a found backing node, or we create a backing node, that's + *     when we grab the hold on its vnode. + *   o If we mark an sdev_node_t a ZOMBIE, we must remove its backing node from + *     the underlying file system. It must not be searchable or findable. + *   o We release our hold on the backing node vnode when we destroy the + *     sdev_node_t. + * + * - Set 2: Locking rules for sdev (not exhaustive) + *   o The majority of nodes contain an sdev_contents rw lock. You must hold it + *     for read or write if manipulating its contents appropriately. + *   o You must lock your parent before yourself. + *   o If you need your vnode's v_lock and the sdev_contents rw lock, you must + *     grab the v_lock before the sdev_contents rw_lock. + *   o If you release a lock on the node as a part of upgrading it, you must + *     verify that the node has not become a zombie as a part of this process. + * + * - Set 3: Zombie Status and What it Means + *   o If you encounter a node that is a ZOMBIE, that means that it has been + *     unlinked from the backing store. + *   o If you release your contents lock and acquire it again (say as part of + *     trying to grab a write lock) you must check that the node has not become + *     a zombie. + *   o You should VERIFY that a looked up node is not a zombie. This follows + *     from the following logic. To mark something as a zombie means that it is + *     removed from the parents directory cache. To do that, you must have a + *     write lock on the parent's sdev_contents. To lookup through that + *     directory you must have a read lock. This then becomes a simple ordering + *     problem. If you've been granted the lock then the other operation cannot + *     be in progress or must have already succeeded. + * + * - Set 4: Removing Directory Entries (aka making nodes Zombies) + *   o Write lock must be held on the directory + *   o Write lock must be held on the node + *   o Remove the sdev_node_t from its parent cache + *   o Remove the corresponding backing store node, if it exists, eg. use + *     VOP_REMOVE or VOP_RMDIR. + *   o You must NOT make any change in the vnode reference count! Nodes should + *     only be cleaned up through VOP_INACTIVE callbacks. + *   o VOP_INACTIVE is the only one responsible for doing the final vn_rele of + *     the backing store vnode that was grabbed during lookup. + * + * - Set 5: What Nodes may be Persisted + *   o The root, /dev is always persisted + *   o Any node in vtab which is marked SDEV_DYNAMIC, may not be persisted + *     unless it is also marked SDEV_PERSIST + *   o Anything whose parent directory is marked SDEV_PERSIST will pass that + *     along to the child as long as it does not contradict the above rules + */ +  #include <sys/types.h>  #include <sys/param.h>  #include <sys/t_lock.h> @@ -559,6 +804,12 @@ sdev_remove(struct vnode *dvp, char *nm, struct cred *cred,  	if (!rw_tryupgrade(&parent->sdev_contents)) {  		rw_exit(&parent->sdev_contents);  		rw_enter(&parent->sdev_contents, RW_WRITER); +		/* Make sure we didn't become a zombie */ +		if (parent->sdev_state == SDEV_ZOMBIE) { +			rw_exit(&parent->sdev_contents); +			VN_RELE(vp); +			return (ENOENT); +		}  	}  	/* we do not support unlinking a non-empty directory */ @@ -574,39 +825,28 @@ sdev_remove(struct vnode *dvp, char *nm, struct cred *cred,  	 *  - destroying the sdev_node  	 *  - releasing the hold on attrvp  	 */ -	error = sdev_cache_update(parent, &dv, nm, SDEV_CACHE_DELETE); +	sdev_cache_update(parent, &dv, nm, SDEV_CACHE_DELETE); +	VN_RELE(vp);  	rw_exit(&parent->sdev_contents); -	sdcmn_err2(("sdev_remove: cache_update error %d\n", error)); -	if (error && (error != EBUSY)) { -		/* report errors other than EBUSY */ -		VN_RELE(vp); -	} else { -		sdcmn_err2(("sdev_remove: cleaning node %s from cache " -		    " with error %d\n", nm, error)); - +	/* +	 * best efforts clean up the backing store +	 */ +	if (bkstore) { +		ASSERT(parent->sdev_attrvp); +		error = VOP_REMOVE(parent->sdev_attrvp, nm, cred, +		    ct, flags);  		/* -		 * best efforts clean up the backing store +		 * do not report BUSY error +		 * because the backing store ref count is released +		 * when the last ref count on the sdev_node is +		 * released.  		 */ -		if (bkstore) { -			ASSERT(parent->sdev_attrvp); -			error = VOP_REMOVE(parent->sdev_attrvp, nm, cred, -			    ct, flags); -			/* -			 * do not report BUSY error -			 * because the backing store ref count is released -			 * when the last ref count on the sdev_node is -			 * released. -			 */ -			if (error == EBUSY) { -				sdcmn_err2(("sdev_remove: device %s is still on" -				    "disk %s\n", nm, parent->sdev_path)); -				error = 0; -			} -		} - -		if (error == EBUSY) +		if (error == EBUSY) { +			sdcmn_err2(("sdev_remove: device %s is still on" +			    "disk %s\n", nm, parent->sdev_path));  			error = 0; +		}  	}  	return (error); @@ -716,6 +956,8 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm,  		if (error = VOP_GETATTR(odvp, &vattr, 0, cred, ct)) {  			mutex_exit(&sdev_lock);  			VN_RELE(ovp); +			if (nvp != NULL) +				VN_RELE(nvp);  			return (error);  		}  		fsid = vattr.va_fsid; @@ -723,11 +965,15 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm,  		if (error = VOP_GETATTR(ndvp, &vattr, 0, cred, ct)) {  			mutex_exit(&sdev_lock);  			VN_RELE(ovp); +			if (nvp != NULL) +				VN_RELE(nvp);  			return (error);  		}  		if (fsid != vattr.va_fsid) {  			mutex_exit(&sdev_lock);  			VN_RELE(ovp); +			if (nvp != NULL) +				VN_RELE(nvp);  			return (EXDEV);  		}  	} @@ -737,6 +983,8 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm,  	if (error) {  		mutex_exit(&sdev_lock);  		VN_RELE(ovp); +		if (nvp != NULL) +			VN_RELE(nvp);  		return (error);  	} @@ -747,6 +995,8 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm,  		if (error) {  			mutex_exit(&sdev_lock);  			VN_RELE(ovp); +			if (nvp != NULL) +				VN_RELE(nvp);  			return (error);  		}  	} @@ -755,21 +1005,31 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm,  	ASSERT(fromdv);  	/* destination file exists */ -	if (nvp) { +	if (nvp != NULL) {  		todv = VTOSDEV(nvp);  		ASSERT(todv);  	} +	if ((fromdv->sdev_flags & SDEV_DYNAMIC) != 0 || +	    (todv != NULL && (todv->sdev_flags & SDEV_DYNAMIC) != 0)) { +		mutex_exit(&sdev_lock); +		if (nvp != NULL) +			VN_RELE(nvp); +		VN_RELE(ovp); +		return (EACCES); +	} +  	/* -	 * link source to new target in the memory +	 * link source to new target in the memory. Regardless of failure, we +	 * must rele our hold on nvp.  	 */  	error = sdev_rnmnode(fromparent, fromdv, toparent, &todv, nnm, cred); +	if (nvp != NULL) +		VN_RELE(nvp);  	if (error) {  		sdcmn_err2(("sdev_rename: renaming %s to %s failed "  		    " with error %d\n", onm, nnm, error));  		mutex_exit(&sdev_lock); -		if (nvp) -			VN_RELE(nvp);  		VN_RELE(ovp);  		return (error);  	} @@ -782,6 +1042,7 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm,  	if (fromdv == NULL) {  		rw_exit(&fromparent->sdev_contents);  		mutex_exit(&sdev_lock); +		VN_RELE(ovp);  		sdcmn_err2(("sdev_rename: the source is deleted already\n"));  		return (0);  	} @@ -790,6 +1051,7 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm,  		rw_exit(&fromparent->sdev_contents);  		mutex_exit(&sdev_lock);  		VN_RELE(SDEVTOV(fromdv)); +		VN_RELE(ovp);  		sdcmn_err2(("sdev_rename: the source is being deleted\n"));  		return (0);  	} @@ -809,8 +1071,9 @@ sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm,  	rw_enter(&fromparent->sdev_contents, RW_WRITER);  	bkstore = SDEV_IS_PERSIST(fromdv) ? 1 : 0; -	error = sdev_cache_update(fromparent, &fromdv, onm, +	sdev_cache_update(fromparent, &fromdv, onm,  	    SDEV_CACHE_DELETE); +	VN_RELE(SDEVTOV(fromdv));  	/* best effforts clean up the backing store */  	if (bkstore) { @@ -1078,27 +1341,23 @@ sdev_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred,  	rw_exit(&self->sdev_contents);  	/* unlink it from the directory cache */ -	error = sdev_cache_update(parent, &self, nm, SDEV_CACHE_DELETE); +	sdev_cache_update(parent, &self, nm, SDEV_CACHE_DELETE);  	rw_exit(&parent->sdev_contents);  	vn_vfsunlock(vp); +	VN_RELE(vp); -	if (error && (error != EBUSY)) { -		VN_RELE(vp); -	} else { -		sdcmn_err2(("sdev_rmdir: cleaning node %s from directory " -		    " cache with error %d\n", nm, error)); +	/* best effort to clean up the backing store */ +	if (SDEV_IS_PERSIST(parent)) { +		ASSERT(parent->sdev_attrvp); +		error = VOP_RMDIR(parent->sdev_attrvp, nm, +		    parent->sdev_attrvp, kcred, ct, flags); -		/* best effort to clean up the backing store */ -		if (SDEV_IS_PERSIST(parent)) { -			ASSERT(parent->sdev_attrvp); -			error = VOP_RMDIR(parent->sdev_attrvp, nm, -			    parent->sdev_attrvp, kcred, ct, flags); +		if (error)  			sdcmn_err2(("sdev_rmdir: cleaning device %s is on"  			    " disk error %d\n", parent->sdev_path, error)); -		} -  		if (error == EBUSY)  			error = 0; +  	}  	return (error); diff --git a/usr/src/uts/common/fs/dev/sdev_vtops.c b/usr/src/uts/common/fs/dev/sdev_vtops.c index 39b82c4285..e29262d257 100644 --- a/usr/src/uts/common/fs/dev/sdev_vtops.c +++ b/usr/src/uts/common/fs/dev/sdev_vtops.c @@ -101,7 +101,6 @@ devvt_validate(struct sdev_node *dv)  	char *nm = dv->sdev_name;  	int rval; -	ASSERT(!(dv->sdev_flags & SDEV_STALE));  	ASSERT(dv->sdev_state == SDEV_READY);  	ASSERT(RW_LOCK_HELD(&(dv->sdev_dotdot)->sdev_contents)); @@ -273,6 +272,11 @@ devvt_create_snode(struct sdev_node *ddv, char *nm, struct cred *cred, int type)  		kmem_free(link, MAXPATHLEN);  	} +	if (error != 0) { +		SDEV_RELE(sdv); +		return; +	} +  	mutex_enter(&sdv->sdev_lookup_lock);  	SDEV_UNBLOCK_OTHERS(sdv, SDEV_LOOKUP);  	mutex_exit(&sdv->sdev_lookup_lock); @@ -337,6 +341,7 @@ devvt_prunedir(struct sdev_node *ddv)  			SDEV_HOLD(dv);  			(void) sdev_cache_update(ddv, &dv,  			    dv->sdev_name, SDEV_CACHE_DELETE); +			SDEV_RELE(dv);  			break;  		case SDEV_VTOR_STALE:  			devvt_rebuild_stale_link(ddv, dv); @@ -382,9 +387,6 @@ devvt_cleandir(struct vnode *dvp, struct cred *cred)  		for (dv = SDEV_FIRST_ENTRY(sdvp); dv; dv = next) {  			next = SDEV_NEXT_ENTRY(sdvp, dv); -			/* skip stale nodes */ -			if (dv->sdev_flags & SDEV_STALE) -				continue;  			/* validate only ready nodes */  			if (dv->sdev_state != SDEV_READY)  				continue; @@ -403,9 +405,6 @@ devvt_cleandir(struct vnode *dvp, struct cred *cred)  	for (dv = SDEV_FIRST_ENTRY(sdvp); dv; dv = next) {  		next = SDEV_NEXT_ENTRY(sdvp, dv); -		/* skip stale nodes */ -		if (dv->sdev_flags & SDEV_STALE) -			continue;  		/* validate only ready nodes */  		if (dv->sdev_state != SDEV_READY)  			continue; diff --git a/usr/src/uts/common/fs/dev/sdev_zvolops.c b/usr/src/uts/common/fs/dev/sdev_zvolops.c index 192b794c67..6543504e97 100644 --- a/usr/src/uts/common/fs/dev/sdev_zvolops.c +++ b/usr/src/uts/common/fs/dev/sdev_zvolops.c @@ -464,12 +464,6 @@ devzvol_prunedir(struct sdev_node *ddv)  	dv = SDEV_FIRST_ENTRY(ddv);  	while (dv) {  		sdcmn_err13(("sdev_name '%s'", dv->sdev_name)); -		/* skip stale nodes */ -		if (dv->sdev_flags & SDEV_STALE) { -			sdcmn_err13(("  stale")); -			dv = SDEV_NEXT_ENTRY(ddv, dv); -			continue; -		}  		switch (devzvol_validate(dv)) {  		case SDEV_VTOR_VALID: @@ -489,11 +483,10 @@ devzvol_prunedir(struct sdev_node *ddv)  		}  		SDEV_HOLD(dv);  		/* remove the cache node */ -		if (sdev_cache_update(ddv, &dv, dv->sdev_name, -		    SDEV_CACHE_DELETE) == 0) -			dv = SDEV_FIRST_ENTRY(ddv); -		else -			dv = SDEV_NEXT_ENTRY(ddv, dv); +		sdev_cache_update(ddv, &dv, dv->sdev_name, +		    SDEV_CACHE_DELETE); +		SDEV_RELE(dv); +		dv = SDEV_FIRST_ENTRY(ddv);  	}  	rw_downgrade(&ddv->sdev_contents);  } diff --git a/usr/src/uts/common/sys/fs/sdev_impl.h b/usr/src/uts/common/sys/fs/sdev_impl.h index 0a841e9d22..19a147d88a 100644 --- a/usr/src/uts/common/sys/fs/sdev_impl.h +++ b/usr/src/uts/common/sys/fs/sdev_impl.h @@ -181,35 +181,8 @@ typedef struct sdev_node {  #define	SDEV_NEXT_ENTRY(ddv, dv)	AVL_NEXT(&(ddv)->sdev_entries, (dv))  /* - * sdev_state - * - * A sdev_node may go through 3 states: - *	SDEV_INIT: When a new /dev file is first looked up, a sdev_node - *		   is allocated, initialized and added to the directory's - *		   sdev_node cache. A node at this state will also - *		   have the SDEV_LOOKUP flag set. - * - *		   Other threads that are trying to look up a node at - *		   this state will be blocked until the SDEV_LOOKUP flag - *		   is cleared. - * - *		   When the SDEV_LOOKUP flag is cleared, the node may - *		   transition into the SDEV_READY state for a successful - *		   lookup or the node is removed from the directory cache - *		   and destroyed if the named node can not be found. - *		   An ENOENT error is returned for the second case. - * - *	SDEV_READY: A /dev file has been successfully looked up and - *		    associated with a vnode. The /dev file is available - *		    for the supported /dev filesystem operations. - * - *	SDEV_ZOMBIE: Deletion of a /dev file has been explicitely issued - *		    to an SDEV_READY node. The node is transitioned into - *		    the SDEV_ZOMBIE state if the vnode reference count - *		    is still held. A SDEV_ZOMBIE node does not support - *		    any of the /dev filesystem operations. A SDEV_ZOMBIE - *		    node is removed from the directory cache and destroyed - *		    once the reference count reaches "zero". + * See the big theory statement in sdev_vnops.c for an explanation of these + * states.   */  typedef enum {  	SDEV_ZOMBIE = -1, @@ -219,17 +192,16 @@ typedef enum {  /* sdev_flags */  #define	SDEV_BUILD		0x0001	/* directory cache out-of-date */ -#define	SDEV_STALE		0x0002	/* stale sdev nodes */ -#define	SDEV_GLOBAL		0x0004	/* global /dev nodes */ -#define	SDEV_PERSIST		0x0008	/* backing store persisted node */ -#define	SDEV_NO_NCACHE		0x0010	/* do not include in neg. cache */ -#define	SDEV_DYNAMIC		0x0020	/* special-purpose vnode ops */ +#define	SDEV_GLOBAL		0x0002	/* global /dev nodes */ +#define	SDEV_PERSIST		0x0004	/* backing store persisted node */ +#define	SDEV_NO_NCACHE		0x0008	/* do not include in neg. cache */ +#define	SDEV_DYNAMIC		0x0010	/* special-purpose vnode ops */  					/* (ex: pts) */ -#define	SDEV_VTOR		0x0040	/* validate sdev_nodes during search */ -#define	SDEV_ATTR_INVALID	0x0080	/* invalid node attributes, */ +#define	SDEV_VTOR		0x0020	/* validate sdev_nodes during search */ +#define	SDEV_ATTR_INVALID	0x0040	/* invalid node attributes, */  					/* need update */ -#define	SDEV_SUBDIR		0x0100	/* match all subdirs under here */ -#define	SDEV_ZONED		0x0200  /* zoned subdir */ +#define	SDEV_SUBDIR		0x0080	/* match all subdirs under here */ +#define	SDEV_ZONED		0x0100  /* zoned subdir */  /* sdev_lookup_flags */  #define	SDEV_LOOKUP	0x0001	/* node creation in progress */ @@ -471,7 +443,7 @@ typedef enum {  } sdev_cache_ops_t;  extern struct sdev_node *sdev_cache_lookup(struct sdev_node *, char *); -extern int sdev_cache_update(struct sdev_node *, struct sdev_node **, char *, +extern void sdev_cache_update(struct sdev_node *, struct sdev_node **, char *,      sdev_cache_ops_t);  extern void sdev_node_cache_init(void);  extern void sdev_node_cache_fini(void); | 
