summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/fs/cachefs/cachefs_cnode.c
diff options
context:
space:
mode:
authorstevel@tonic-gate <none@none>2005-06-14 00:00:00 -0700
committerstevel@tonic-gate <none@none>2005-06-14 00:00:00 -0700
commit7c478bd95313f5f23a4c958a745db2134aa03244 (patch)
treec871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/uts/common/fs/cachefs/cachefs_cnode.c
downloadillumos-gate-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz
OpenSolaris Launch
Diffstat (limited to 'usr/src/uts/common/fs/cachefs/cachefs_cnode.c')
-rw-r--r--usr/src/uts/common/fs/cachefs/cachefs_cnode.c1965
1 files changed, 1965 insertions, 0 deletions
diff --git a/usr/src/uts/common/fs/cachefs/cachefs_cnode.c b/usr/src/uts/common/fs/cachefs/cachefs_cnode.c
new file mode 100644
index 0000000000..2036c59f20
--- /dev/null
+++ b/usr/src/uts/common/fs/cachefs/cachefs_cnode.c
@@ -0,0 +1,1965 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/cred.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/pathname.h>
+#include <sys/uio.h>
+#include <sys/tiuser.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <sys/buf.h>
+#include <netinet/in.h>
+#include <rpc/types.h>
+#include <rpc/xdr.h>
+#include <rpc/auth.h>
+#include <rpc/clnt.h>
+#include <sys/mount.h>
+#include <sys/ioctl.h>
+#include <sys/statvfs.h>
+#include <sys/errno.h>
+#include <sys/debug.h>
+#include <sys/cmn_err.h>
+#include <sys/utsname.h>
+#include <sys/modctl.h>
+#include <vm/pvn.h>
+
+#include <sys/fs/cachefs_fs.h>
+
+/*
+ * cachefs_max_idle is a global that is tunable.
+ * This value decides how frequently or when the
+ * cachefs_cnode_idleclean is run.
+ * The default value is set to CFS_FS_MAXIDLE.
+ * The tunable if set to X triggers a cleanup when
+ * the number of idle cnodes reach X, and cleans up
+ * (.25 * X) idle cnodes.
+ */
+int cachefs_max_idle = CFS_FS_MAXIDLE;
+
+
+struct kmem_cache *cachefs_cnode_cache = NULL;
+
+/*
+ * Functions for cnode management.
+ */
+
+/*
+ * Puts cnode on idle list. Only call from an async thread or no
+ * locks held.
+ */
+/*ARGSUSED1*/
+void
+cachefs_cnode_idle(struct vnode *vp, cred_t *cr)
+{
+ cnode_t *cp = VTOC(vp);
+ fscache_t *fscp = C_TO_FSCACHE(cp);
+ int cleanidle;
+ vnode_t *unldvp;
+ cred_t *unlcred;
+ char *unlname;
+ int error;
+
+ /*
+ * The key to this routine is not to drop the vnode count
+ * while on the idle list. This prevents this routine from
+ * being called again by vn_rele on an inactive cnode.
+ * Nothing bad happens if an "active" cnode is put on the idle
+ * list. It eventually gets pulled off.
+ * Also this routine is only called from a thread message sent
+ * by cachefs_inactive(). It is not safe for this routine
+ * to be the "inactive" entry point because of the dnlc.
+ */
+
+ for (;;) {
+ /* get access to the file system */
+ error = cachefs_cd_access(fscp, 0, 1);
+ ASSERT(error == 0);
+
+ /* get exclusive access to this cnode */
+ mutex_enter(&cp->c_statelock);
+
+ /* done with this loop if not unlinking a file */
+ if (cp->c_unldvp == NULL)
+ break;
+
+ /* get unlink info out of the cnode */
+ unldvp = cp->c_unldvp;
+ unlcred = cp->c_unlcred;
+ unlname = cp->c_unlname;
+ cp->c_unldvp = NULL;
+ cp->c_unlcred = NULL;
+ cp->c_unlname = NULL;
+ mutex_exit(&cp->c_statelock);
+
+ /* finish the remove operation */
+ if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
+ error = cachefs_remove_connected(unldvp,
+ unlname, unlcred, vp);
+ } else {
+ error = cachefs_remove_disconnected(unldvp,
+ unlname, unlcred, vp);
+ }
+
+ /* reaquire cnode lock */
+ mutex_enter(&cp->c_statelock);
+
+ /* if a timeout occurred */
+ if (CFS_TIMEOUT(fscp, error)) {
+ /* restore cnode state */
+ if (cp->c_unldvp == NULL) {
+ cp->c_unldvp = unldvp;
+ cp->c_unlcred = unlcred;
+ cp->c_unlname = unlname;
+ if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
+ mutex_exit(&cp->c_statelock);
+ cachefs_cd_release(fscp);
+ cachefs_cd_timedout(fscp);
+ continue;
+ } else {
+ cp->c_flags |= CN_PENDRM;
+ mutex_exit(&cp->c_statelock);
+ goto out;
+ }
+ }
+ }
+ /* free up resources */
+ VN_RELE(unldvp);
+ cachefs_kmem_free(unlname, MAXNAMELEN);
+ crfree(unlcred);
+ break;
+ }
+
+ ASSERT((cp->c_flags & CN_IDLE) == 0);
+ /*
+ * If we are going to destroy this cnode,
+ * do it now instead of later.
+ */
+ if (cp->c_flags & (CN_DESTROY | CN_STALE)) {
+ mutex_exit(&cp->c_statelock);
+ (void) cachefs_cnode_inactive(vp, cr);
+ goto out;
+ }
+
+ /*
+ * mark cnode as idle, put it on the idle list, and increment the
+ * number of idle cnodes
+ */
+ cp->c_flags |= CN_IDLE;
+ mutex_enter(&fscp->fs_idlelock);
+ cachefs_cnode_idleadd(cp);
+ if ((fscp->fs_idlecnt > cachefs_max_idle) &&
+ (fscp->fs_idleclean == 0) &&
+ (fscp->fs_cdtransition == 0)) {
+ fscp->fs_idleclean = 1;
+ cleanidle = 1;
+ } else {
+ cleanidle = 0;
+ }
+ mutex_exit(&fscp->fs_idlelock);
+
+ /* release cnode */
+ mutex_exit(&cp->c_statelock);
+
+ /* if should reduce the number of idle cnodes */
+ if (cleanidle) {
+ ASSERT(fscp->fs_idlecnt > 1);
+ fscache_hold(fscp);
+ cachefs_cnode_idleclean(fscp, 0);
+ /* XXX race with cachefs_unmount() calling destroy */
+ fscache_rele(fscp);
+ }
+
+out:
+ /* release hold on the file system */
+ /* XXX unmount() could have called destroy after fscache_rele() */
+ cachefs_cd_release(fscp);
+}
+
+/*
+ * Removes cnodes from the idle list and destroys them.
+ */
+void
+cachefs_cnode_idleclean(fscache_t *fscp, int unmount)
+{
+ int remcnt;
+ cnode_t *cp;
+
+ mutex_enter(&fscp->fs_idlelock);
+
+ /* determine number of cnodes to destroy */
+ if (unmount) {
+ /* destroy all plus any that go idle while in this routine */
+ remcnt = fscp->fs_idlecnt * 2;
+ } else {
+ /* reduce to 75% of max allowed idle cnodes */
+ remcnt = (fscp->fs_idlecnt - cachefs_max_idle) +
+ (cachefs_max_idle >> 2);
+ }
+
+ for (; remcnt > 0; remcnt--) {
+ /* get cnode on back of idle list and hold it */
+ cp = fscp->fs_idleback;
+ if (cp == NULL)
+ break;
+ VN_HOLD(CTOV(cp));
+ mutex_exit(&fscp->fs_idlelock);
+
+ /* if the cnode is still on the idle list */
+ mutex_enter(&cp->c_statelock);
+ if (cp->c_flags & CN_IDLE) {
+ cp->c_flags &= ~CN_IDLE;
+
+ /* remove cnode from the idle list */
+ mutex_enter(&fscp->fs_idlelock);
+ cachefs_cnode_idlerem(cp);
+ mutex_exit(&fscp->fs_idlelock);
+ mutex_exit(&cp->c_statelock);
+
+ /* destroy the cnode */
+ VN_RELE(CTOV(cp));
+ (void) cachefs_cnode_inactive(CTOV(cp), kcred);
+ } else {
+ /* cnode went active, just skip it */
+ mutex_exit(&cp->c_statelock);
+ VN_RELE(CTOV(cp));
+ }
+ mutex_enter(&fscp->fs_idlelock);
+ }
+
+ fscp->fs_idleclean = 0;
+ mutex_exit(&fscp->fs_idlelock);
+}
+
+/*
+ * This routine does the real work of inactivating a cachefs vnode.
+ */
+int
+cachefs_cnode_inactive(register struct vnode *vp, cred_t *cr)
+{
+ cnode_t *cp;
+ struct fscache *fscp;
+ struct filegrp *fgp;
+ cachefscache_t *cachep;
+ struct cachefs_metadata *mdp;
+ int meta_destroyed = 0;
+
+ cp = VTOC(vp);
+
+ fscp = C_TO_FSCACHE(cp);
+ cachep = fscp->fs_cache;
+ ASSERT(cachep != NULL);
+ fgp = cp->c_filegrp;
+
+ ASSERT((cp->c_flags & CN_IDLE) == 0);
+
+ /* truncate the front file if necessary */
+ mutex_enter(&cp->c_statelock);
+ if ((cp->c_flags & CN_NOCACHE) && (cp->c_metadata.md_flags & MD_FILE) &&
+ cp->c_metadata.md_frontblks) {
+
+ ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
+
+#ifdef CFSDEBUG
+ CFS_DEBUG(CFSDEBUG_INVALIDATE)
+ printf("c_cnode_inactive: invalidating %llu\n",
+ (u_longlong_t)cp->c_id.cid_fileno);
+#endif
+ /*
+ * If the cnode is being populated, and we're not the
+ * populating thread, then block until the pop thread
+ * completes. If we are the pop thread, then we may come in
+ * here, but not to nuke the directory cnode at a critical
+ * juncture.
+ */
+ while ((cp->c_flags & CN_ASYNC_POP_WORKING) &&
+ (cp->c_popthrp != curthread))
+ cv_wait(&cp->c_popcv, &cp->c_statelock);
+
+ cachefs_inval_object(cp);
+ }
+ mutex_exit(&cp->c_statelock);
+
+ for (;;) {
+ /* see if vnode is really inactive */
+ mutex_enter(&vp->v_lock);
+ ASSERT(vp->v_count > 0);
+ if (vp->v_count > 1) {
+ /*
+ * It's impossible for us to be cnode_inactive for
+ * the root cnode _unless_ we are being called from
+ * cachefs_unmount (where inactive is called
+ * explictly). If the count is not 1, there is
+ * still an outstanding reference to the root cnode,
+ * and we return EBUSY; this allows cachefs_unmount
+ * to fail.
+ */
+ if (cp->c_flags & CN_ROOT) {
+ mutex_exit(&vp->v_lock);
+ return (EBUSY);
+ }
+ cp->c_ipending = 0;
+ vp->v_count--; /* release our hold from vn_rele */
+ mutex_exit(&vp->v_lock);
+ return (0);
+ }
+ mutex_exit(&vp->v_lock);
+
+ /* get rid of any pages, do not care if cannot be pushed */
+ if (vn_has_cached_data(vp)) {
+ ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
+ (void) cachefs_putpage_common(vp, (offset_t)0, 0,
+ B_INVAL | B_FORCE, cr);
+ }
+
+ /* if need to sync metadata, the call is a no op for NFSv4 */
+ if ((cp->c_flags & (CN_UPDATED | CN_DESTROY)) == CN_UPDATED) {
+ (void) cachefs_sync_metadata(cp);
+ continue;
+ }
+ break;
+ }
+
+ /*
+ * Lock out possible race with makecachefsnode.
+ * Makecachefsnode will fix up the rl/active list stuff to
+ * be correct when it gets to run.
+ * We have to do the rl/active stuff while the cnode is on the hash
+ * list to sync actions on the rl/active list.
+ */
+ mutex_enter(&fgp->fg_cnodelock);
+ mutex_enter(&cp->c_statelock);
+
+ /* see if vnode is still inactive */
+ mutex_enter(&vp->v_lock);
+ ASSERT(vp->v_count > 0);
+ if (vp->v_count > 1) {
+ cp->c_ipending = 0;
+ vp->v_count--;
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&cp->c_statelock);
+ mutex_exit(&fgp->fg_cnodelock);
+#ifdef CFSDEBUG
+ CFS_DEBUG(CFSDEBUG_INVALIDATE)
+ printf("cachefs_cnode_inactive: %u vp %p\n",
+ vp->v_count, vp);
+#endif
+ return (0);
+ }
+ mutex_exit(&vp->v_lock);
+
+ /* check for race with remove */
+ if (cp->c_unldvp) {
+ mutex_exit(&cp->c_statelock);
+ mutex_exit(&fgp->fg_cnodelock);
+
+ /* this causes cachefs_inactive to be called again */
+ VN_RELE(vp);
+ return (0);
+ }
+
+ /* if any pages left, really get rid of them */
+ if (vn_has_cached_data(vp)) {
+ ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
+ (void) pvn_vplist_dirty(vp, 0, NULL, B_INVAL | B_TRUNC, cr);
+ }
+ ASSERT(vp->v_count == 1);
+
+ mdp = &cp->c_metadata;
+
+ /* if we can (and should) destroy the front file and metadata */
+ if ((cp->c_flags & (CN_DESTROY | CN_STALE)) &&
+ (fgp->fg_flags & CFS_FG_WRITE) && !CFS_ISFS_BACKFS_NFSV4(fscp)) {
+ if (mdp->md_rlno) {
+ cachefs_removefrontfile(mdp, &cp->c_id, fgp);
+ cachefs_rlent_moveto(cachep, CACHEFS_RL_FREE,
+ mdp->md_rlno, 0);
+ mdp->md_rlno = 0;
+ mdp->md_rltype = CACHEFS_RL_NONE;
+ }
+ if ((cp->c_flags & CN_ALLOC_PENDING) == 0) {
+ (void) filegrp_destroy_metadata(fgp, &cp->c_id);
+ meta_destroyed = 1;
+ }
+ }
+
+ /* else put the front file on the gc list */
+ else if (mdp->md_rlno &&
+ (fgp->fg_flags & CFS_FG_WRITE) &&
+ (cp->c_metadata.md_rltype == CACHEFS_RL_ACTIVE)) {
+#ifdef CFSDEBUG
+ cachefs_rlent_verify(cachep, CACHEFS_RL_ACTIVE,
+ mdp->md_rlno);
+#endif /* CFSDEBUG */
+
+ ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
+ cachefs_rlent_moveto(cachep, CACHEFS_RL_GC, mdp->md_rlno,
+ mdp->md_frontblks);
+ mdp->md_rltype = CACHEFS_RL_GC;
+ cp->c_flags |= CN_UPDATED;
+ }
+
+ /* if idlelist pointer(s) not null, remove from idle list */
+ if ((cp->c_idlefront != NULL) || (cp->c_idleback != NULL)) {
+ mutex_enter(&fscp->fs_idlelock);
+ cachefs_cnode_idlerem(cp);
+ mutex_exit(&fscp->fs_idlelock);
+ }
+
+ /* remove from the filegrp list prior to releasing the cnode lock */
+ cachefs_cnode_listrem(cp);
+
+ mutex_exit(&cp->c_statelock);
+ if (! meta_destroyed)
+ (void) cachefs_sync_metadata(cp);
+
+ mutex_exit(&fgp->fg_cnodelock);
+
+ if (cp->c_cred != NULL) {
+ crfree(cp->c_cred);
+ cp->c_cred = NULL;
+ }
+
+ if (cp->c_frontvp)
+ VN_RELE(cp->c_frontvp);
+
+ if (cp->c_backvp)
+ VN_RELE(cp->c_backvp);
+
+ if (cp->c_acldirvp)
+ VN_RELE(cp->c_acldirvp);
+
+ rw_destroy(&cp->c_rwlock);
+ mutex_destroy(&cp->c_statelock);
+ cv_destroy(&cp->c_popcv);
+ mutex_destroy(&cp->c_iomutex);
+ cv_destroy(&cp->c_iocv);
+
+ /* free up cnode memory */
+ vn_invalid(cp->c_vnode);
+ vn_free(cp->c_vnode);
+ kmem_cache_free(cachefs_cnode_cache, cp);
+
+ filegrp_rele(fgp);
+ (void) fscache_cnodecnt(fscp, -1);
+ return (0);
+}
+
+/*
+ * Add a cnode to the filegrp list.
+ */
+void
+cachefs_cnode_listadd(struct cnode *cp)
+{
+ filegrp_t *fgp = cp->c_filegrp;
+
+ ASSERT(MUTEX_HELD(&fgp->fg_cnodelock));
+ ASSERT(cp->c_next == NULL);
+
+ cp->c_next = fgp->fg_cnodelist;
+ fgp->fg_cnodelist = cp;
+}
+
+/*
+ * Remove a cnode from the filegrp list.
+ */
+void
+cachefs_cnode_listrem(struct cnode *cp)
+{
+ filegrp_t *fgp = cp->c_filegrp;
+ struct cnode **headpp;
+
+#ifdef CFSDEBUG
+ int found = 0;
+#endif
+
+ ASSERT(MUTEX_HELD(&fgp->fg_cnodelock));
+ ASSERT(cp->c_idleback == NULL);
+ ASSERT(cp->c_idlefront == NULL);
+
+ for (headpp = &fgp->fg_cnodelist;
+ *headpp != NULL; headpp = &(*headpp)->c_next) {
+ if (*headpp == cp) {
+ *headpp = cp->c_next;
+ cp->c_next = NULL;
+#ifdef CFSDEBUG
+ found++;
+#endif
+ break;
+ }
+ }
+#ifdef CFSDEBUG
+ ASSERT(found);
+#endif
+}
+
+/*
+ * Add a cnode to the front of the fscache idle list.
+ */
+void
+cachefs_cnode_idleadd(struct cnode *cp)
+{
+ fscache_t *fscp = C_TO_FSCACHE(cp);
+
+ ASSERT(MUTEX_HELD(&cp->c_statelock));
+ ASSERT(MUTEX_HELD(&fscp->fs_idlelock));
+
+ /* put cnode on the front of the idle list */
+ cp->c_idlefront = fscp->fs_idlefront;
+ cp->c_idleback = NULL;
+
+ if (fscp->fs_idlefront)
+ fscp->fs_idlefront->c_idleback = cp;
+ else {
+ ASSERT(fscp->fs_idleback == NULL);
+ fscp->fs_idleback = cp;
+ }
+ fscp->fs_idlefront = cp;
+ fscp->fs_idlecnt++;
+}
+
+/*
+ * Remove a cnode from the fscache idle list.
+ */
+void
+cachefs_cnode_idlerem(struct cnode *cp)
+{
+ fscache_t *fscp = C_TO_FSCACHE(cp);
+
+ ASSERT(MUTEX_HELD(&cp->c_statelock));
+ ASSERT(MUTEX_HELD(&fscp->fs_idlelock));
+
+ if (cp->c_idlefront == NULL) {
+ ASSERT(fscp->fs_idleback == cp);
+ fscp->fs_idleback = cp->c_idleback;
+ if (fscp->fs_idleback != NULL)
+ fscp->fs_idleback->c_idlefront = NULL;
+ } else {
+ cp->c_idlefront->c_idleback = cp->c_idleback;
+ }
+
+ if (cp->c_idleback == NULL) {
+ ASSERT(fscp->fs_idlefront == cp);
+ fscp->fs_idlefront = cp->c_idlefront;
+ if (fscp->fs_idlefront != NULL)
+ fscp->fs_idlefront->c_idleback = NULL;
+ } else {
+ cp->c_idleback->c_idlefront = cp->c_idlefront;
+ cp->c_idleback = NULL;
+ }
+ cp->c_idlefront = NULL;
+ fscp->fs_idlecnt--;
+ ASSERT(fscp->fs_idlecnt >= 0);
+}
+
+/*
+ * Search the cnode list of the input file group, looking for a cnode which
+ * matches the supplied file ident fileno.
+ *
+ * Returns:
+ * *cpp = NULL, if no valid matching cnode is found
+ * *cpp = address of cnode with matching fileno, with c_statelock held
+ * return status is 0 if no cnode found, or if found & cookies match
+ * return status is 1 if a cnode was found, but the cookies don't match
+ *
+ * Note: must grab the c_statelock for each cnode, or its state could
+ * change while we're processing it. Also, if a cnode is found, must return
+ * with c_statelock still held, so that the cnode state cannot change until
+ * the calling routine releases the lock.
+ */
+int
+cachefs_cnode_find(filegrp_t *fgp, cfs_cid_t *cidp, fid_t *cookiep,
+ struct cnode **cpp, struct vnode *backvp, vattr_t *vap)
+{
+ struct cnode *cp;
+ int badcookie = 0;
+ uint32_t is_nfsv4;
+
+#ifdef CFSDEBUG
+ CFS_DEBUG(CFSDEBUG_CNODE)
+ cmn_err(CE_NOTE, "cachefs_cnode_find: fileno %llu fgp %p\n",
+ (u_longlong_t)cidp->cid_fileno, (void *)fgp);
+#endif
+ ASSERT(MUTEX_HELD(&fgp->fg_cnodelock));
+
+ *cpp = NULL;
+ is_nfsv4 = CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp);
+
+ /*
+ * Cookie should be filled unless disconnected operation or
+ * backfilesystem is NFSv4
+ */
+ if (cookiep == NULL && !CFS_ISFS_SNR(fgp->fg_fscp) &&
+ !CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp)) {
+ goto out;
+ }
+
+ for (cp = fgp->fg_cnodelist; cp != NULL; cp = cp->c_next) {
+ mutex_enter(&cp->c_statelock);
+
+ if ((cidp->cid_fileno != cp->c_id.cid_fileno &&
+ (is_nfsv4 == FALSE || cp->c_backvp != backvp)) ||
+ (cp->c_flags & (CN_STALE | CN_DESTROY))) {
+ mutex_exit(&cp->c_statelock);
+ continue;
+ }
+
+ /*
+ * Having found a non stale, non destroy pending cnode with
+ * matching fileno, will be exiting the for loop, after
+ * determining return status
+ */
+ *cpp = cp;
+
+ if ((cookiep != NULL) &&
+ ((cookiep->fid_len != cp->c_cookie.fid_len) ||
+ (bcmp((caddr_t)cookiep->fid_data,
+ (caddr_t)&cp->c_cookie.fid_data, cookiep->fid_len)) != 0)) {
+#ifdef CFSDEBUG
+ CFS_DEBUG(CFSDEBUG_GENERAL) {
+ cmn_err(CE_NOTE,
+ "cachefs: dup fileno %llu, cp %p\n",
+ (u_longlong_t)cidp->cid_fileno, (void *)cp);
+ }
+#endif
+ badcookie = 1;
+ }
+
+ /*
+ * For NFSv4 since there is no fid, add a check to
+ * ensure the backvp and vap matches that in the cnode.
+ * If it doesn't then someone tried to use a stale cnode.
+ */
+ if (is_nfsv4) {
+ if (backvp && backvp != cp->c_backvp ||
+ vap && vap->va_type != cp->c_attr.va_type ||
+ cidp->cid_fileno != cp->c_id.cid_fileno) {
+ CFS_DPRINT_BACKFS_NFSV4(C_TO_FSCACHE(cp),
+ ("cachefs_cnode_find (nfsv4): stale cnode "
+ "cnode %p, backvp %p, new-backvp %p, vap %p "
+ "fileno=%llx cp-fileno=%llx\n",
+ cp, cp->c_backvp, backvp, vap,
+ cidp->cid_fileno, cp->c_id.cid_fileno));
+ badcookie = 1;
+ }
+ }
+ break;
+ }
+out:
+
+#ifdef CFSDEBUG
+ CFS_DEBUG(CFSDEBUG_CNODE)
+ cmn_err(CE_NOTE, "cachefs_cnode_find: cp %p\n", (void *)*cpp);
+#endif
+ return (badcookie);
+}
+
+/*
+ * We have to initialize the cnode contents. Fill in the contents from the
+ * cache (attrcache file), from the info passed in, whatever it takes.
+ */
+static int
+cachefs_cnode_init(cfs_cid_t *cidp, cnode_t *cp, fscache_t *fscp,
+ filegrp_t *fgp, fid_t *cookiep, vattr_t *vap, vnode_t *backvp,
+ int flag, cred_t *cr)
+{
+ int error = 0;
+ int slotfound;
+ vnode_t *vp;
+ int null_cookie;
+ cachefscache_t *cachep = fscp->fs_cache;
+
+ bzero(cp, sizeof (cnode_t));
+ cp->c_vnode = vn_alloc(KM_SLEEP);
+
+ vp = CTOV(cp);
+
+ vp->v_data = (caddr_t)cp;
+
+ rw_init(&cp->c_rwlock, NULL, RW_DEFAULT, NULL);
+ mutex_init(&cp->c_statelock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&cp->c_popcv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&cp->c_iomutex, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&cp->c_iocv, NULL, CV_DEFAULT, NULL);
+
+ vn_setops(vp, cachefs_getvnodeops());
+ cp->c_id = *cidp;
+ if (backvp != NULL) {
+ cp->c_backvp = backvp;
+ VN_HOLD(backvp);
+ }
+ cp->c_flags |= flag;
+ filegrp_hold(fgp);
+ cp->c_filegrp = fgp;
+ if (cookiep)
+ cp->c_cookie = *cookiep;
+ mutex_enter(&cp->c_statelock);
+
+ /*
+ * if nocache is set then ignore anything cached for this file,
+ * if nfsv4 flag is set, then create the cnode but don't do
+ * any caching.
+ */
+ if (cp->c_flags & CN_NOCACHE || CFS_ISFS_BACKFS_NFSV4(fscp)) {
+ /*
+ * this case only happens while booting without a cache
+ * or if NFSv4 is the backfilesystem
+ */
+ ASSERT(!CFS_ISFS_SNR(fscp));
+ ASSERT(fscp->fs_cdconnected == CFS_CD_CONNECTED);
+ if (cookiep || CFS_ISFS_BACKFS_NFSV4(fscp)) {
+ error = CFSOP_INIT_COBJECT(fscp, cp, vap, cr);
+ if (error)
+ goto out;
+ cp->c_flags |= CN_UPDATED | CN_ALLOC_PENDING;
+ ASSERT(cp->c_attr.va_type != 0);
+ VN_SET_VFS_TYPE_DEV(vp, fscp->fs_cfsvfsp,
+ cp->c_attr.va_type, cp->c_attr.va_rdev);
+ cachefs_cnode_setlocalstats(cp);
+ } else
+ error = ESTALE;
+ goto out;
+ }
+
+ /*
+ * see if there's a slot for this filegrp/cid fileno
+ * if not, and there's no cookie info, nothing can be done, but if
+ * there's cookie data indicate we need to create a metadata slot.
+ */
+ slotfound = cachefs_cid_inuse(cp->c_filegrp, cidp);
+ if (slotfound == 0) {
+ if (cookiep == NULL) {
+ error = ENOENT;
+ goto out;
+ }
+ cp->c_flags |= CN_ALLOC_PENDING;
+ } else {
+ /*
+ * if a slot was found, then increment the slot in use count
+ * and try to read the metadata.
+ */
+ cp->c_filegrp->fg_header->ach_count++;
+ error = filegrp_read_metadata(cp->c_filegrp, cidp,
+ &cp->c_metadata);
+ }
+ /*
+ * if there wasn't a slot, or an attempt to read it results in ENOENT,
+ * then init the cache object, create the vnode, etc...
+ */
+ if ((slotfound == 0) || (error == ENOENT)) {
+ error = CFSOP_INIT_COBJECT(fscp, cp, vap, cr);
+ if (error)
+ goto out;
+ ASSERT(cp->c_attr.va_type != 0);
+ VN_SET_VFS_TYPE_DEV(vp, fscp->fs_cfsvfsp,
+ cp->c_attr.va_type, cp->c_attr.va_rdev);
+ cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
+ } else if (error == 0) {
+ /* slot found, no error occurred on the metadata read */
+ cp->c_size = cp->c_attr.va_size;
+
+ if ((cachep->c_flags & CACHE_CHECK_RLTYPE) &&
+ (cp->c_metadata.md_rlno != 0) &&
+ (cp->c_metadata.md_rltype == CACHEFS_RL_ACTIVE)) {
+ rl_entry_t rl, *rlp;
+
+ mutex_enter(&cachep->c_contentslock);
+ error = cachefs_rl_entry_get(cachep,
+ cp->c_metadata.md_rlno, &rlp);
+ if (error) {
+ mutex_exit(&cachep->c_contentslock);
+ goto out;
+ }
+ rl = *rlp;
+ mutex_exit(&cachep->c_contentslock);
+ if (cp->c_metadata.md_rltype != rl.rl_current) {
+ cp->c_flags |= CN_UPDATED;
+ cp->c_metadata.md_rltype = rl.rl_current;
+ }
+ }
+
+ /*
+ * If no cookie is specified, or if this is a local file,
+ * accept the one in the metadata.
+ */
+ null_cookie = 0;
+ if ((cookiep == NULL) || (cp->c_id.cid_flags & CFS_CID_LOCAL)) {
+ cookiep = &cp->c_metadata.md_cookie;
+ null_cookie = 1;
+ }
+
+ /* if cookies do not match, reset the metadata */
+ if ((cookiep->fid_len != cp->c_cookie.fid_len) ||
+ (bcmp(&cookiep->fid_data, &cp->c_cookie.fid_data,
+ (size_t)cookiep->fid_len) != 0)) {
+ cp->c_cookie = *cookiep;
+ cp->c_flags |= CN_UPDATED;
+ cp->c_metadata.md_timestamp.tv_sec = 0;
+ /* clear all but the front file bit */
+ cp->c_metadata.md_flags &= MD_FILE;
+ error = CFSOP_INIT_COBJECT(fscp, cp, vap, cr);
+ ASSERT(cp->c_attr.va_type != 0);
+ VN_SET_VFS_TYPE_DEV(vp, fscp->fs_cfsvfsp,
+ cp->c_attr.va_type, cp->c_attr.va_rdev);
+ }
+
+ /* else if the consistency type changed, fix it up */
+ else if (cp->c_metadata.md_consttype != fscp->fs_consttype) {
+ ASSERT(cp->c_attr.va_type != 0);
+ VN_SET_VFS_TYPE_DEV(vp, fscp->fs_cfsvfsp,
+ cp->c_attr.va_type, cp->c_attr.va_rdev);
+ CFSOP_CONVERT_COBJECT(fscp, cp, cr);
+ if (!null_cookie) {
+ error = CFSOP_CHECK_COBJECT(fscp, cp,
+ C_BACK_CHECK, cr);
+ }
+ }
+
+ /* else check the consistency of the data */
+ else {
+ ASSERT(cp->c_attr.va_type != 0);
+ VN_SET_VFS_TYPE_DEV(vp, fscp->fs_cfsvfsp,
+ cp->c_attr.va_type, cp->c_attr.va_rdev);
+ if (!null_cookie) {
+ error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
+ }
+ }
+ } else {
+ goto out;
+ }
+ cachefs_cnode_setlocalstats(cp);
+
+out:
+ mutex_exit(&cp->c_statelock);
+ if (error) {
+ if (cp->c_frontvp)
+ VN_RELE(cp->c_frontvp);
+ if (cp->c_backvp)
+ VN_RELE(cp->c_backvp);
+ if (cp->c_acldirvp)
+ VN_RELE(cp->c_acldirvp);
+ filegrp_rele(fgp);
+ rw_destroy(&cp->c_rwlock);
+ mutex_destroy(&cp->c_statelock);
+ cv_destroy(&cp->c_popcv);
+ mutex_destroy(&cp->c_iomutex);
+ cv_destroy(&cp->c_iocv);
+ }
+ return (error);
+}
+
+/*
+ * Finds the cnode for the specified fileno and fid.
+ * Creates the cnode if it does not exist.
+ * The cnode is returned held.
+ */
+int
+cachefs_cnode_make(cfs_cid_t *cidp, fscache_t *fscp, fid_t *cookiep,
+ vattr_t *vap, vnode_t *backvp, cred_t *cr, int flag, cnode_t **cpp)
+{
+ struct cnode *cp;
+ int error;
+ struct filegrp *fgp;
+ struct cachefs_metadata *mdp;
+ fid_t cookie;
+
+#ifdef CFSDEBUG
+ CFS_DEBUG(CFSDEBUG_CNODE)
+ printf("cachefs_cnode_make: ENTER fileno %llu\n",
+ (u_longlong_t)cidp->cid_fileno);
+#endif
+
+ /* get the file group that owns this file */
+ mutex_enter(&fscp->fs_fslock);
+ fgp = filegrp_list_find(fscp, cidp);
+ if (fgp == NULL) {
+ fgp = filegrp_create(fscp, cidp);
+ filegrp_list_add(fscp, fgp);
+ }
+ filegrp_hold(fgp);
+ mutex_exit(&fscp->fs_fslock);
+
+ /* grab the cnode list lock */
+ mutex_enter(&fgp->fg_cnodelock);
+
+ if ((fgp->fg_flags & CFS_FG_READ) == 0)
+ flag |= CN_NOCACHE;
+
+ error = 0;
+ cp = NULL;
+
+ /* look for the cnode on the cnode list */
+ error = cachefs_cnode_find(fgp, cidp, cookiep, &cp, backvp, vap);
+
+ /*
+ * If there already is a cnode with this cid but a different cookie,
+ * (or backvp) we're not going to be using the one we found.
+ */
+ if (error && CFS_ISFS_BACKFS_NFSV4(fscp)) {
+ ASSERT(MUTEX_HELD(&cp->c_statelock));
+ cachefs_cnode_stale(cp);
+ mutex_exit(&cp->c_statelock);
+ cp = NULL;
+ error = 0;
+ } else if (error) {
+ ASSERT(cp);
+ ASSERT(cookiep);
+
+ mutex_exit(&cp->c_statelock);
+
+ /*
+ * If backvp is NULL then someone tried to use
+ * a stale cookie.
+ */
+ if (backvp == NULL) {
+ mutex_exit(&fgp->fg_cnodelock);
+ error = ESTALE;
+ goto out;
+ }
+
+ /* verify the backvp */
+ error = cachefs_getcookie(backvp, &cookie, NULL, cr, TRUE);
+ if (error ||
+ ((cookiep->fid_len != cookie.fid_len) ||
+ (bcmp(&cookiep->fid_data, cookie.fid_data,
+ (size_t)cookiep->fid_len) != 0))) {
+ mutex_exit(&fgp->fg_cnodelock);
+ error = ESTALE;
+ goto out;
+ }
+
+ /* make the old cnode give up its front file resources */
+ VN_HOLD(CTOV(cp));
+ (void) cachefs_sync_metadata(cp);
+ mutex_enter(&cp->c_statelock);
+ mdp = &cp->c_metadata;
+ if (mdp->md_rlno) {
+ /* XXX sam: should this assert be NOCACHE? */
+ /* XXX sam: maybe we should handle NOFILL as no-op */
+ ASSERT((fscp->fs_cache->c_flags & CACHE_NOFILL) == 0);
+
+ /* if modified in the cache, move to lost+found */
+ if ((cp->c_attr.va_type == VREG) &&
+ (cp->c_metadata.md_rltype == CACHEFS_RL_MODIFIED)) {
+ error = cachefs_cnode_lostfound(cp, NULL);
+ if (error) {
+ mutex_exit(&cp->c_statelock);
+ VN_RELE(CTOV(cp));
+ mutex_exit(&fgp->fg_cnodelock);
+ error = ESTALE;
+ goto out;
+ }
+ }
+
+ /* else nuke the front file */
+ else {
+ cachefs_cnode_stale(cp);
+ }
+ } else {
+ cachefs_cnode_stale(cp);
+ }
+ mutex_exit(&cp->c_statelock);
+ VN_RELE(CTOV(cp));
+ cp = NULL;
+ error = 0;
+ }
+
+
+ /* if the cnode does not exist */
+ if (cp == NULL) {
+ /* XXX should we drop all locks for this? */
+ cp = kmem_cache_alloc(cachefs_cnode_cache, KM_SLEEP);
+
+ error = cachefs_cnode_init(cidp, cp, fscp, fgp,
+ cookiep, vap, backvp, flag, cr);
+ if (error) {
+ mutex_exit(&fgp->fg_cnodelock);
+ vn_free(cp->c_vnode);
+ kmem_cache_free(cachefs_cnode_cache, cp);
+ goto out;
+ }
+
+ if (cp->c_metadata.md_rlno &&
+ (cp->c_metadata.md_rltype == CACHEFS_RL_GC) &&
+ ((fscp->fs_cache->c_flags & CACHE_NOFILL) == 0)) {
+#ifdef CFSDEBUG
+ cachefs_rlent_verify(fscp->fs_cache,
+ CACHEFS_RL_GC, cp->c_metadata.md_rlno);
+#endif /* CFSDEBUG */
+ cachefs_rlent_moveto(fscp->fs_cache,
+ CACHEFS_RL_ACTIVE, cp->c_metadata.md_rlno,
+ cp->c_metadata.md_frontblks);
+ cp->c_metadata.md_rltype = CACHEFS_RL_ACTIVE;
+ cp->c_flags |= CN_UPDATED;
+ }
+
+ cachefs_cnode_listadd(cp);
+ vn_exists(cp->c_vnode);
+ mutex_exit(&fgp->fg_cnodelock);
+ (void) fscache_cnodecnt(fscp, 1);
+ }
+
+ /* else if the cnode exists */
+ else {
+ VN_HOLD(CTOV(cp));
+
+ /* remove from idle list if on it */
+ if (cp->c_flags & CN_IDLE) {
+ cp->c_flags &= ~CN_IDLE;
+
+ mutex_enter(&fscp->fs_idlelock);
+ cachefs_cnode_idlerem(cp);
+ mutex_exit(&fscp->fs_idlelock);
+ VN_RELE(CTOV(cp));
+ cp->c_ipending = 0;
+ }
+ mutex_exit(&cp->c_statelock);
+ mutex_exit(&fgp->fg_cnodelock);
+ }
+
+ /*
+ * Assertion to ensure the cnode matches
+ * the backvp and attribute type information.
+ */
+ ASSERT((CFS_ISFS_BACKFS_NFSV4(fscp) == 0) ||
+ ((cp->c_backvp == backvp) &&
+ (cp->c_attr.va_type == vap->va_type)));
+out:
+ *cpp = ((error == 0) ? cp : NULL);
+ filegrp_rele(fgp);
+
+#ifdef CFSDEBUG
+ CFS_DEBUG(CFSDEBUG_CNODE)
+ printf("cachefs_cnode_make: EXIT cp %p, error %d\n",
+ (void *)*cpp, error);
+#endif
+ return (error);
+}
+
+/*
+ * cachefs_cid_inuse()
+ *
+ * returns nonzero if a cid has any data in the cache; either a cnode
+ * or metadata.
+ */
+
+int
+cachefs_cid_inuse(filegrp_t *fgp, cfs_cid_t *cidp)
+{
+ cnode_t *cp;
+ int status = 0;
+
+ ASSERT(MUTEX_HELD(&fgp->fg_cnodelock));
+
+ /*
+ * Since we don't care about the cookie data, we don't care about any
+ * status that find might return.
+ */
+
+ cp = NULL;
+ (void) cachefs_cnode_find(fgp, cidp, NULL, &cp, NULL, NULL);
+ if (cp != NULL) {
+ mutex_exit(&cp->c_statelock);
+ status = 1;
+ return (status);
+ }
+
+ /*
+ * Don't want to use filegrp_read_metadata, since it will return
+ * ENOENT if the metadata slot exists but hasn't been written to yet.
+ * That condition still counts as the slot (metadata) being in use.
+ * Instead, as long as the filegrp attrcache has been created and
+ * there's a slot assigned for this cid, then the metadata is in use.
+ */
+ if (((fgp->fg_flags & CFS_FG_ALLOC_ATTR) == 0) &&
+ (filegrp_cid_to_slot(fgp, cidp) != 0))
+ status = 1;
+
+ return (status);
+}
+
+/*
+ * cachefs_fileno_inuse()
+ *
+ * returns nonzero if a fileno is known to the cache, as either a
+ * local or a normal file.
+ */
+
+int
+cachefs_fileno_inuse(fscache_t *fscp, ino64_t fileno)
+{
+ cfs_cid_t cid;
+ filegrp_t *fgp;
+ int known = 0;
+
+ ASSERT(MUTEX_HELD(&fscp->fs_fslock));
+ cid.cid_fileno = fileno;
+
+ /* if there's no filegrp for this cid range, then there's no data */
+ fgp = filegrp_list_find(fscp, &cid);
+ if (fgp == NULL)
+ return (known);
+
+ filegrp_hold(fgp);
+ mutex_enter(&fgp->fg_cnodelock);
+
+ cid.cid_flags = CFS_CID_LOCAL;
+ if (cachefs_cid_inuse(fgp, &cid)) {
+ known = 1;
+ goto out;
+ }
+ cid.cid_flags = 0;
+ if (cachefs_cid_inuse(fgp, &cid))
+ known = 1;
+out:
+ mutex_exit(&fgp->fg_cnodelock);
+ filegrp_rele(fgp);
+ return (known);
+}
+
+/*
+ * Creates a cnode from an unused inode in the cache.
+ * The cnode is returned held.
+ */
+int
+cachefs_cnode_create(fscache_t *fscp, vattr_t *vap, int flag, cnode_t **cpp)
+{
+ struct cnode *cp;
+ int error, found;
+ struct filegrp *fgp;
+ cfs_cid_t cid, cid2;
+
+ ASSERT(CFS_ISFS_SNR(fscp));
+ ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
+
+ cid.cid_flags = CFS_CID_LOCAL;
+ cid2.cid_flags = 0;
+
+ /* find an unused local file in the cache */
+ for (;;) {
+ mutex_enter(&fscp->fs_fslock);
+
+ /* make sure we did not wrap */
+ fscp->fs_info.fi_localfileno++;
+ if (fscp->fs_info.fi_localfileno == 0)
+ fscp->fs_info.fi_localfileno = 3;
+ cid.cid_fileno = fscp->fs_info.fi_localfileno;
+ fscp->fs_flags |= CFS_FS_DIRTYINFO;
+
+ /* avoid fileno conflict in non-local space */
+ cid2.cid_fileno = cid.cid_fileno;
+ fgp = filegrp_list_find(fscp, &cid2);
+ if (fgp != NULL) {
+ filegrp_hold(fgp);
+ mutex_enter(&fgp->fg_cnodelock);
+ found = cachefs_cid_inuse(fgp, &cid2);
+ mutex_exit(&fgp->fg_cnodelock);
+ filegrp_rele(fgp);
+ if (found) {
+ mutex_exit(&fscp->fs_fslock);
+ continue;
+ }
+ }
+
+ /* get the file group that owns this fileno */
+ fgp = filegrp_list_find(fscp, &cid);
+ if (fgp == NULL) {
+ fgp = filegrp_create(fscp, &cid);
+ filegrp_list_add(fscp, fgp);
+ }
+
+ /* see if there is any room left in this file group */
+ mutex_enter(&fgp->fg_mutex);
+ if (fgp->fg_header &&
+ (fgp->fg_header->ach_count ==
+ fscp->fs_info.fi_fgsize)) {
+ /* no more room, set up for the next file group */
+ fscp->fs_info.fi_localfileno = fgp->fg_id.cid_fileno +
+ fscp->fs_info.fi_fgsize;
+ mutex_exit(&fgp->fg_mutex);
+ mutex_exit(&fscp->fs_fslock);
+ continue;
+ }
+ mutex_exit(&fgp->fg_mutex);
+
+ filegrp_hold(fgp);
+ mutex_exit(&fscp->fs_fslock);
+
+ ASSERT((fgp->fg_flags &
+ (CFS_FG_READ | CFS_FG_WRITE)) ==
+ (CFS_FG_READ | CFS_FG_WRITE));
+
+ /* grab the cnode list lock */
+ mutex_enter(&fgp->fg_cnodelock);
+
+ if ((fgp->fg_flags & CFS_FG_READ) == 0)
+ flag |= CN_NOCACHE;
+
+ /* keep looking if a cnode or metadata exist for this fileno */
+ if (cachefs_cid_inuse(fgp, &cid)) {
+ mutex_exit(&fgp->fg_cnodelock);
+ filegrp_rele(fgp);
+#ifdef CFSDEBUG
+ CFS_DEBUG(CFSDEBUG_CNODE)
+ cmn_err(CE_NOTE, "cachefs_cnode_create: "
+ "fileno %llu exists.\n",
+ (u_longlong_t)cid.cid_fileno);
+#endif
+ continue;
+ }
+ break;
+ }
+
+ vap->va_nodeid = cid.cid_fileno;
+
+ /* create space for the cnode */
+ cp = kmem_cache_alloc(cachefs_cnode_cache, KM_SLEEP);
+
+ /* set up the cnode */
+ error = cachefs_cnode_init(&cid, cp, fscp, fgp,
+ &cp->c_cookie, vap, NULL, flag, kcred);
+ if (error) {
+ mutex_exit(&fgp->fg_cnodelock);
+ vn_free(cp->c_vnode);
+ kmem_cache_free(cachefs_cnode_cache, cp);
+ goto out;
+ }
+
+ /* save copy of fileno that is returned to the user */
+ cp->c_metadata.md_flags |= MD_LOCALFILENO;
+ cp->c_metadata.md_localfileno = cid.cid_fileno;
+ cp->c_flags |= CN_UPDATED;
+
+ cachefs_cnode_listadd(cp);
+ mutex_exit(&fgp->fg_cnodelock);
+ (void) fscache_cnodecnt(fscp, 1);
+
+out:
+ *cpp = ((error == 0) ? cp : NULL);
+ filegrp_rele(fgp);
+ return (error);
+}
+
+/*
+ * Moves the cnode to its new location in the cache.
+ * Before calling this routine other steps must be taken
+ * to ensure that other file system routines that operate
+ * on cnodes do not run.
+ */
+void
+cachefs_cnode_move(cnode_t *cp)
+{
+ fscache_t *fscp = C_TO_FSCACHE(cp);
+ cfs_cid_t cid;
+ filegrp_t *fgp;
+ filegrp_t *ofgp = cp->c_filegrp;
+ struct cachefs_metadata *mdp;
+ cnode_t *xcp;
+ char oname[CFS_FRONTFILE_NAME_SIZE];
+ char nname[CFS_FRONTFILE_NAME_SIZE];
+ int ffnuke = 0;
+ int error;
+
+ ASSERT(CFS_ISFS_SNR(fscp));
+ ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
+ ASSERT(cp->c_id.cid_flags & CFS_CID_LOCAL);
+ ASSERT(cp->c_attr.va_nodeid != 0);
+
+ /* construct the cid of the new file location */
+ cid.cid_fileno = cp->c_attr.va_nodeid;
+ cid.cid_flags = 0;
+
+ /* see if there already is a file occupying our slot */
+ error = cachefs_cnode_make(&cid, fscp, NULL, NULL, NULL, kcred,
+ 0, &xcp);
+ if (error == 0) {
+ mutex_enter(&xcp->c_statelock);
+ cachefs_cnode_stale(xcp);
+ mutex_exit(&xcp->c_statelock);
+ VN_RELE(CTOV(xcp));
+ xcp = NULL;
+ error = 0;
+ }
+
+ /* get the file group that this file is moving to */
+ mutex_enter(&fscp->fs_fslock);
+ fgp = filegrp_list_find(fscp, &cid);
+ if (fgp == NULL) {
+ fgp = filegrp_create(fscp, &cid);
+ filegrp_list_add(fscp, fgp);
+ }
+ filegrp_hold(fgp);
+ mutex_exit(&fscp->fs_fslock);
+
+ /* XXX fix to not have to create metadata to hold rl slot */
+ /* get a metadata slot in the new file group */
+ if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
+ (void) filegrp_allocattr(fgp);
+ }
+ /* XXX can fix create_metadata to call allocattr if necessary? */
+ error = filegrp_create_metadata(fgp, &cp->c_metadata, &cid);
+ if (error)
+ ffnuke = 1;
+ if ((ffnuke == 0) && filegrp_ffhold(fgp))
+ ffnuke = 1;
+
+ /* move the front file to the new file group */
+ if ((ffnuke == 0) && (cp->c_metadata.md_flags & MD_FILE)) {
+ make_ascii_name(&cp->c_id, oname);
+ make_ascii_name(&cid, nname);
+ error = VOP_RENAME(ofgp->fg_dirvp, oname, fgp->fg_dirvp,
+ nname, kcred);
+ if (error) {
+ ffnuke = 1;
+#ifdef CFSDEBUG
+ if (error != ENOSPC) {
+ CFS_DEBUG(CFSDEBUG_CNODE)
+ printf("cachefs: cnode_move "
+ "1: error %d\n", error);
+ }
+#endif
+ }
+ }
+
+ /* remove the file from the old file group */
+ mutex_enter(&ofgp->fg_cnodelock);
+ mutex_enter(&cp->c_statelock);
+ if (cp->c_frontvp) {
+ VN_RELE(cp->c_frontvp);
+ cp->c_frontvp = NULL;
+ }
+ if (cp->c_acldirvp) {
+ VN_RELE(cp->c_acldirvp);
+ cp->c_acldirvp = NULL;
+ }
+ mdp = &cp->c_metadata;
+ if (mdp->md_rlno) {
+ if (ffnuke) {
+ cachefs_removefrontfile(mdp, &cp->c_id, ofgp);
+ cachefs_rlent_moveto(fscp->fs_cache,
+ CACHEFS_RL_FREE, mdp->md_rlno, 0);
+ mdp->md_rlno = 0;
+ mdp->md_rltype = CACHEFS_RL_NONE;
+ } else {
+ filegrp_ffrele(ofgp);
+ }
+ }
+ if (ffnuke)
+ mdp->md_flags &= ~MD_PACKED;
+ if ((cp->c_flags & CN_ALLOC_PENDING) == 0) {
+ (void) filegrp_destroy_metadata(ofgp, &cp->c_id);
+ cp->c_flags |= CN_ALLOC_PENDING;
+ }
+ cachefs_cnode_listrem(cp);
+ cp->c_filegrp = NULL;
+ mutex_exit(&cp->c_statelock);
+ mutex_exit(&ofgp->fg_cnodelock);
+
+ /* add the cnode to the new file group */
+ mutex_enter(&fgp->fg_cnodelock);
+ mutex_enter(&cp->c_statelock);
+ cp->c_id = cid;
+ cp->c_filegrp = fgp;
+ cp->c_flags |= CN_UPDATED;
+ mutex_exit(&cp->c_statelock);
+ cachefs_cnode_listadd(cp);
+ if (mdp->md_rlno)
+ cachefs_rl_changefileno(fscp->fs_cache, mdp->md_rlno,
+ cp->c_id.cid_fileno);
+ mutex_exit(&fgp->fg_cnodelock);
+
+ filegrp_rele(ofgp);
+}
+
+/*
+ * Syncs out the specified cnode.
+ * Only called via cnode_traverse from fscache_sync
+ */
+void
+cachefs_cnode_sync(cnode_t *cp)
+{
+ vnode_t *vp = CTOV(cp);
+ int error = 0;
+ fscache_t *fscp = C_TO_FSCACHE(cp);
+ int held = 0;
+
+ if (cp->c_flags & (CN_STALE | CN_DESTROY))
+ return;
+
+ if (fscp->fs_backvfsp && fscp->fs_backvfsp->vfs_flag & VFS_RDONLY)
+ return;
+
+ for (;;) {
+ /* get (or renew) access to the file system */
+ if (held) {
+ cachefs_cd_release(fscp);
+ held = 0;
+ }
+ /*
+ * Getting file system access for reading is really cheating.
+ * However we are getting called from sync so we do not
+ * want to hang up if the cachefsd is not running.
+ */
+ error = cachefs_cd_access(fscp, 0, 0);
+ if (error)
+ break;
+ held = 1;
+
+ /* if a regular file, write out the pages */
+ if ((vp->v_type == VREG) && vn_has_cached_data(vp)) {
+ ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
+ error = cachefs_putpage_common(vp, (offset_t)0,
+ 0, 0, kcred);
+ if (CFS_TIMEOUT(fscp, error)) {
+ if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
+ cachefs_cd_release(fscp);
+ held = 0;
+ cachefs_cd_timedout(fscp);
+ continue;
+ } else {
+ /* cannot push, give up */
+ break;
+ }
+ }
+
+ /* clear the cnode error if putpage worked */
+ if ((error == 0) && cp->c_error) {
+ mutex_enter(&cp->c_statelock);
+ cp->c_error = 0;
+ mutex_exit(&cp->c_statelock);
+ }
+
+ if (error)
+ break;
+ }
+
+ /* if connected, sync the backvp */
+ if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) &&
+ cp->c_backvp) {
+ mutex_enter(&cp->c_statelock);
+ if (cp->c_backvp) {
+ error = VOP_FSYNC(cp->c_backvp, FSYNC, kcred);
+ if (CFS_TIMEOUT(fscp, error)) {
+ mutex_exit(&cp->c_statelock);
+ cachefs_cd_release(fscp);
+ held = 0;
+ cachefs_cd_timedout(fscp);
+ continue;
+ } else if (error && (error != EINTR))
+ cp->c_error = error;
+ }
+ mutex_exit(&cp->c_statelock);
+ }
+
+ /* sync the metadata and the front file to the front fs */
+ (void) cachefs_sync_metadata(cp);
+ break;
+ }
+
+ if (held)
+ cachefs_cd_release(fscp);
+}
+
+/*
+ * Moves the specified file to the lost+found directory for the
+ * cached file system.
+ * Invalidates cached data and attributes.
+ * Returns 0 or an error if could not perform operation.
+ */
+int
+cachefs_cnode_lostfound(cnode_t *cp, char *rname)
+{
+ int error = 0;
+ fscache_t *fscp;
+ cachefscache_t *cachep;
+ char oname[CFS_FRONTFILE_NAME_SIZE];
+ filegrp_t *fgp;
+ char *namep, *strp;
+ char *namebuf = NULL;
+ vnode_t *nvp;
+ int index;
+ int len;
+
+ fscp = C_TO_FSCACHE(cp);
+ cachep = fscp->fs_cache;
+
+ ASSERT(MUTEX_HELD(&cp->c_statelock));
+ ASSERT((cachep->c_flags & (CACHE_NOCACHE|CACHE_NOFILL)) == 0);
+ ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
+
+ fgp = cp->c_filegrp;
+
+ /* set up the file group if necessary */
+ if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
+ error = filegrp_allocattr(fgp);
+ if (error)
+ goto out;
+ }
+ ASSERT(fgp->fg_dirvp);
+
+ namebuf = cachefs_kmem_alloc(MAXNAMELEN * 2, KM_SLEEP);
+
+ if ((cp->c_attr.va_type != VREG) ||
+ (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) ||
+ ((cp->c_metadata.md_flags & MD_POPULATED) == 0) ||
+ ((cp->c_metadata.md_flags & MD_FILE) == 0) ||
+ (cp->c_metadata.md_rlno == 0)) {
+#ifdef CFSDEBUG
+ CFS_DEBUG(CFSDEBUG_CNODE)
+ printf("cachefs_cnode_lostfound cp %p cannot save\n",
+ (void *)cp);
+#endif
+ error = EINVAL;
+ goto out;
+ }
+
+ /* lock out other users of the lost+found directory */
+ mutex_enter(&cachep->c_contentslock);
+
+ /* find a name we can use in lost+found */
+ if (rname)
+ namep = rname;
+ else
+ namep = "lostfile";
+ error = VOP_LOOKUP(cachep->c_lostfoundvp, namep, &nvp,
+ NULL, 0, NULL, kcred);
+ if (error == 0)
+ VN_RELE(nvp);
+ if (error != ENOENT) {
+#define MAXTRIES 1000
+ strp = namep;
+ for (index = 0; index < MAXTRIES; index++) {
+ (void) sprintf(namebuf, "%s.%" PRIx64, strp,
+ gethrestime_sec() * cp->c_id.cid_fileno * index);
+ len = (int)strlen(namebuf) + 1;
+ if (len > MAXNAMELEN)
+ namep = &namebuf[len - MAXNAMELEN];
+ else
+ namep = namebuf;
+ error = VOP_LOOKUP(cachep->c_lostfoundvp, namep, &nvp,
+ NULL, 0, NULL, kcred);
+ if (error == 0)
+ VN_RELE(nvp);
+ if (error == ENOENT)
+ break;
+ }
+ if (index == MAXTRIES) {
+ error = EIO;
+ mutex_exit(&cachep->c_contentslock);
+ goto out;
+ }
+ }
+
+ /* get the name of the front file */
+ make_ascii_name(&cp->c_id, oname);
+
+ /* rename the file into the lost+found directory */
+ error = VOP_RENAME(fgp->fg_dirvp, oname, cachep->c_lostfoundvp,
+ namep, kcred);
+ if (error) {
+ mutex_exit(&cachep->c_contentslock);
+ goto out;
+ }
+ mutex_exit(&cachep->c_contentslock);
+
+ /* copy out the new name */
+ if (rname)
+ (void) strcpy(rname, namep);
+
+out:
+ /* clean up */
+ cachefs_cnode_stale(cp);
+
+ if (namebuf)
+ cachefs_kmem_free(namebuf, MAXNAMELEN * 2);
+
+#if 0 /* XXX until we can put filesystem in read-only mode */
+ if (error) {
+ /* XXX put file system in read-only mode */
+ }
+#endif
+
+ return (error);
+}
+
+/*
+ * Traverses the list of cnodes on the fscache and calls the
+ * specified routine with the held cnode.
+ */
+void
+cachefs_cnode_traverse(fscache_t *fscp, void (*routinep)(cnode_t *))
+{
+ filegrp_t *fgp, *ofgp;
+ cnode_t *cp, *ocp;
+ int index;
+
+ /* lock the fscache while we traverse the file groups */
+ mutex_enter(&fscp->fs_fslock);
+
+ /* for each bucket of file groups */
+ for (index = 0; index < CFS_FS_FGP_BUCKET_SIZE; index++) {
+ ofgp = NULL;
+
+ /* for each file group in a bucket */
+ for (fgp = fscp->fs_filegrp[index];
+ fgp != NULL;
+ fgp = fgp->fg_next) {
+
+ /* hold the file group */
+ filegrp_hold(fgp);
+
+ /* drop fscache lock so others can use it */
+ mutex_exit(&fscp->fs_fslock);
+
+ /* drop hold on previous file group */
+ if (ofgp)
+ filegrp_rele(ofgp);
+ ofgp = fgp;
+
+ /* lock the cnode list while we traverse it */
+ mutex_enter(&fgp->fg_cnodelock);
+ ocp = NULL;
+
+ /* for each cnode in this file group */
+ for (cp = fgp->fg_cnodelist;
+ cp != NULL;
+ cp = cp->c_next) {
+
+ /* hold the cnode */
+ VN_HOLD(CTOV(cp));
+
+ /* drop cnode list lock so others can use it */
+ mutex_exit(&fgp->fg_cnodelock);
+
+ /* drop hold on previous cnode */
+ if (ocp) {
+ VN_RELE(CTOV(ocp));
+ }
+ ocp = cp;
+
+ /*
+ * Execute routine for this cnode.
+ * At this point no locks are held.
+ */
+ (routinep)(cp);
+
+ /* reaquire the cnode list lock */
+ mutex_enter(&fgp->fg_cnodelock);
+ }
+
+ /* drop cnode list lock */
+ mutex_exit(&fgp->fg_cnodelock);
+
+ /* drop hold on last cnode */
+ if (ocp) {
+ VN_RELE(CTOV(ocp));
+ }
+
+ /* reaquire the fscache lock */
+ mutex_enter(&fscp->fs_fslock);
+ }
+
+ /* drop hold on last file group */
+ if (ofgp)
+ filegrp_rele(ofgp);
+ }
+ mutex_exit(&fscp->fs_fslock);
+}
+
+void
+cachefs_cnode_disable_caching(struct cnode *cp)
+{
+ mutex_enter(&cp->c_statelock);
+ cp->c_flags |= CN_NOCACHE;
+ if (cp->c_frontvp != NULL) {
+ VN_RELE(cp->c_frontvp);
+ cp->c_frontvp = NULL;
+ }
+ mutex_exit(&cp->c_statelock);
+}
+
+#define TIMEMATCH(a, b) ((a)->tv_sec == (b)->tv_sec && \
+ (a)->tv_nsec == (b)->tv_nsec)
+
+static void
+cnode_enable_caching(struct cnode *cp)
+{
+ struct vnode *iovp;
+ struct filegrp *fgp;
+ struct cachefs_metadata md;
+ cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
+ int error;
+
+ ASSERT((cachep->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) == 0);
+ ASSERT(CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)) == 0);
+
+ iovp = NULL;
+ if (CTOV(cp)->v_type == VREG)
+ iovp = cp->c_backvp;
+ if (iovp) {
+ (void) VOP_PUTPAGE(iovp, (offset_t)0,
+ (uint_t)0, B_INVAL, kcred);
+ }
+ mutex_enter(&cp->c_statelock);
+ if (cp->c_backvp) {
+ VN_RELE(cp->c_backvp);
+ cp->c_backvp = NULL;
+ }
+ fgp = cp->c_filegrp;
+ ASSERT(fgp);
+ error = filegrp_read_metadata(fgp, &cp->c_id, &md);
+ if (error == 0) {
+ if ((cachep->c_flags & CACHE_CHECK_RLTYPE) &&
+ (md.md_rlno != 0) &&
+ (md.md_rltype == CACHEFS_RL_ACTIVE)) {
+ rl_entry_t *rlp, rl;
+
+ mutex_enter(&cachep->c_contentslock);
+ error = cachefs_rl_entry_get(cachep, md.md_rlno, &rlp);
+ if (error) {
+ mutex_exit(&cachep->c_contentslock);
+ goto out;
+ }
+
+ rl = *rlp;
+ mutex_exit(&cachep->c_contentslock);
+
+ if (rl.rl_current != md.md_rltype) {
+ md.md_rltype = rl.rl_current;
+ cp->c_flags |= CN_UPDATED;
+ }
+ }
+
+ /*
+ * A rudimentary consistency check
+ * here. If the cookie and mtime
+ * from the cnode match those from the
+ * cache metadata, we assume for now that
+ * the cached data is OK.
+ */
+ if (bcmp(&md.md_cookie.fid_data, &cp->c_cookie.fid_data,
+ (size_t)cp->c_cookie.fid_len) == 0 &&
+ TIMEMATCH(&cp->c_attr.va_mtime, &md.md_vattr.va_mtime)) {
+ cp->c_metadata = md;
+ } else {
+ /*
+ * Here we're skeptical about the validity of
+ * the front file.
+ * We'll keep the attributes already present in
+ * the cnode, and bring along the parts of the
+ * metadata that we need to eventually nuke this
+ * bogus front file -- in inactive or getfrontfile,
+ * whichever comes first...
+ */
+ if (cp->c_frontvp != NULL) {
+ VN_RELE(cp->c_frontvp);
+ cp->c_frontvp = NULL;
+ }
+ cp->c_metadata.md_flags = md.md_flags;
+ cp->c_metadata.md_flags |= MD_NEEDATTRS;
+ cp->c_metadata.md_rlno = md.md_rlno;
+ cp->c_metadata.md_rltype = md.md_rltype;
+ cp->c_metadata.md_consttype = md.md_consttype;
+ cp->c_metadata.md_fid = md.md_fid;
+ cp->c_metadata.md_frontblks = md.md_frontblks;
+ cp->c_metadata.md_timestamp.tv_sec = 0;
+ cp->c_metadata.md_timestamp.tv_nsec = 0;
+ bzero(&cp->c_metadata.md_allocinfo,
+ cp->c_metadata.md_allocents *
+ sizeof (struct cachefs_allocmap));
+ cp->c_metadata.md_allocents = 0;
+ cp->c_metadata.md_flags &= ~MD_POPULATED;
+ if ((cp->c_metadata.md_rlno != 0) &&
+ (cp->c_metadata.md_rltype == CACHEFS_RL_PACKED)) {
+ cachefs_rlent_moveto(cachep,
+ CACHEFS_RL_PACKED_PENDING,
+ cp->c_metadata.md_rlno,
+ cp->c_metadata.md_frontblks);
+ cp->c_metadata.md_rltype =
+ CACHEFS_RL_PACKED_PENDING;
+ }
+
+ cp->c_flags |= CN_UPDATED;
+#ifdef CFSDEBUG
+ CFS_DEBUG(CFSDEBUG_GENERAL) {
+ printf(
+ "fileno %lld ignores cached data due "
+ "to cookie and/or mtime mismatch\n",
+ (longlong_t)cp->c_id.cid_fileno);
+ }
+#endif
+ }
+ if (cp->c_metadata.md_rltype == CACHEFS_RL_GC) {
+ cachefs_rlent_moveto(cachep, CACHEFS_RL_ACTIVE,
+ cp->c_metadata.md_rlno,
+ cp->c_metadata.md_frontblks);
+ cp->c_metadata.md_rltype = CACHEFS_RL_ACTIVE;
+ cp->c_flags |= CN_UPDATED;
+ }
+ }
+
+out:
+ cp->c_flags &= ~CN_NOCACHE;
+ mutex_exit(&cp->c_statelock);
+
+ (void) cachefs_pack_common(CTOV(cp), kcred);
+}
+
+void
+cachefs_enable_caching(struct fscache *fscp)
+{
+
+ /*
+ * This function is only called when a remount occurs,
+ * with "nocache" and "nofill" options configured
+ * (currently these aren't supported). Since this
+ * function can write into the cache, make sure that
+ * its not in use with NFSv4.
+ */
+ if (CFS_ISFS_BACKFS_NFSV4(fscp))
+ return;
+
+ /*
+ * set up file groups so we can read them. Note that general
+ * users (makecfsnode) will *not* start using them (i.e., all
+ * newly created cnodes will be NOCACHE)
+ * until we "enable_caching_rw" below.
+ */
+ mutex_enter(&fscp->fs_fslock);
+ filegrp_list_enable_caching_ro(fscp);
+ mutex_exit(&fscp->fs_fslock);
+
+ cachefs_cnode_traverse(fscp, cnode_enable_caching);
+
+ /* enable general use of the filegrps */
+ mutex_enter(&fscp->fs_fslock);
+ filegrp_list_enable_caching_rw(fscp);
+ mutex_exit(&fscp->fs_fslock);
+}
+
+/*
+ * This function makes a cnode stale by performing the following tasks:
+ * 1) remove the front file
+ * 2) Remove any resource file entries
+ * 3) Remove any metadata entry from the attrcache file
+ * 4) Set the stale bit in the cnode flags field
+ */
+void
+cachefs_cnode_stale(cnode_t *cp)
+{
+ fscache_t *fscp = C_TO_FSCACHE(cp);
+ struct cachefs_metadata *mdp;
+
+ ASSERT(MUTEX_HELD(&cp->c_statelock));
+
+ /*
+ * Remove a metadata entry if the file exists
+ */
+ mdp = &cp->c_metadata;
+ if (mdp->md_rlno) {
+
+ ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
+
+ /*
+ * destroy the frontfile
+ */
+ cachefs_removefrontfile(mdp, &cp->c_id, cp->c_filegrp);
+ /*
+ * Remove resource file entry
+ */
+ cachefs_rlent_moveto(fscp->fs_cache, CACHEFS_RL_FREE,
+ mdp->md_rlno, 0);
+ mdp->md_rlno = 0;
+ mdp->md_rltype = CACHEFS_RL_NONE;
+ }
+
+ /*
+ * Remove attrcache metadata
+ */
+ if (CFS_ISFS_BACKFS_NFSV4(fscp) == 0)
+ (void) filegrp_destroy_metadata(cp->c_filegrp, &cp->c_id);
+ mdp->md_flags = 0;
+
+ if (cp->c_frontvp) {
+ VN_RELE(cp->c_frontvp);
+ cp->c_frontvp = NULL;
+ }
+
+ /*
+ * For NFSv4 need to hang on to the backvp until vn_rele()
+ * frees this cnode.
+ */
+ if (cp->c_backvp && !CFS_ISFS_BACKFS_NFSV4(fscp)) {
+ VN_RELE(cp->c_backvp);
+ cp->c_backvp = NULL;
+ }
+ if (cp->c_acldirvp) {
+ VN_RELE(cp->c_acldirvp);
+ cp->c_acldirvp = NULL;
+ }
+
+ cp->c_flags |= CN_STALE | CN_ALLOC_PENDING | CN_NOCACHE;
+}
+
+/*
+ * Sets up the local attributes in the metadata from the attributes.
+ */
+void
+cachefs_cnode_setlocalstats(cnode_t *cp)
+{
+ fscache_t *fscp = C_TO_FSCACHE(cp);
+ cachefs_metadata_t *mdp = &cp->c_metadata;
+
+ ASSERT(MUTEX_HELD(&cp->c_statelock));
+
+ /* allow over writing of local attributes if a remount occurred */
+ if (fscp->fs_info.fi_resettimes != mdp->md_resettimes) {
+ mdp->md_flags &= ~(MD_LOCALCTIME | MD_LOCALMTIME);
+ mdp->md_resettimes = fscp->fs_info.fi_resettimes;
+ }
+ if (fscp->fs_info.fi_resetfileno != mdp->md_resetfileno) {
+ mdp->md_flags &= ~MD_LOCALFILENO;
+ mdp->md_resetfileno = fscp->fs_info.fi_resetfileno;
+ }
+
+ /* overwrite old fileno and timestamps if not local versions */
+ if ((mdp->md_flags & MD_LOCALFILENO) == 0)
+ mdp->md_localfileno = mdp->md_vattr.va_nodeid;
+ if ((mdp->md_flags & MD_LOCALCTIME) == 0)
+ mdp->md_localctime = mdp->md_vattr.va_ctime;
+ if ((mdp->md_flags & MD_LOCALMTIME) == 0)
+ mdp->md_localmtime = mdp->md_vattr.va_mtime;
+ cp->c_flags |= CN_UPDATED;
+}