diff options
author | praks <none@none> | 2007-07-31 11:47:23 -0700 |
---|---|---|
committer | praks <none@none> | 2007-07-31 11:47:23 -0700 |
commit | 31ceb98b622e1a310256f4c4a1472beb92046db3 (patch) | |
tree | 4a2a05b4c17a361256d89e7f25e2823a61cebfc9 /usr | |
parent | 699bceb8d4ff29f35719c611605a58400018dd5e (diff) | |
download | illumos-gate-31ceb98b622e1a310256f4c4a1472beb92046db3.tar.gz |
PSARC/2007/027 File Events Notification API
6367770 RFE: add userland interface to fem (file event monitoring)
Diffstat (limited to 'usr')
35 files changed, 2987 insertions, 110 deletions
diff --git a/usr/src/lib/libbsm/audit_event.txt b/usr/src/lib/libbsm/audit_event.txt index 536c80831a..0baf923310 100644 --- a/usr/src/lib/libbsm/audit_event.txt +++ b/usr/src/lib/libbsm/audit_event.txt @@ -344,6 +344,7 @@ 298:AUE_PF_POLICY_FLIP:Flip IPsec policy:as 299:AUE_PF_POLICY_FLUSH:Flush IPsec policy rules:as 300:AUE_PF_POLICY_ALGS:Update IPsec algorithms:as +301:AUE_PORTFS:portfs(2) - file events source:fa # # user level audit events # 2048 - 6143 Reserved diff --git a/usr/src/uts/common/c2/audit_event.c b/usr/src/uts/common/c2/audit_event.c index 5dfc75ec04..8b99857df6 100644 --- a/usr/src/uts/common/c2/audit_event.c +++ b/usr/src/uts/common/c2/audit_event.c @@ -74,6 +74,8 @@ #include <sys/socketvar.h> #include <netinet/in.h> #include <sys/ddi.h> +#include <sys/port_impl.h> + extern token_t *au_to_sock_inet(struct sockaddr_in *); @@ -92,6 +94,7 @@ static au_event_t aui_execv(au_event_t); static au_event_t aui_execve(au_event_t); static au_event_t aui_memcntl(au_event_t); static au_event_t aui_sysinfo(au_event_t); +static au_event_t aui_portfs(au_event_t); static au_event_t aui_auditsys(au_event_t); static au_event_t aui_modctl(au_event_t); static au_event_t aui_acl(au_event_t); @@ -577,7 +580,7 @@ aui_null, AUE_NULL, aus_null, /* 180 (loadable) kaio */ auf_null, 0, aui_null, AUE_NULL, aus_null, /* 181 (loadable) */ auf_null, 0, -aui_null, AUE_NULL, aus_null, /* 182 (loadable) */ +aui_portfs, AUE_PORTFS, aus_null, /* 182 (loadable) portfs */ auf_null, 0, aui_null, AUE_NULL, aus_null, /* 183 (loadable) */ auf_null, 0, @@ -5608,3 +5611,34 @@ aui_forksys(au_event_t e) return (e); } + +/*ARGSUSED*/ +static au_event_t +aui_portfs(au_event_t e) +{ + struct a { /* portfs */ + long a1; + long a2; + long a3; + } *uap = (struct a *)ttolwp(curthread)->lwp_ap; + + /* + * check opcode + */ + switch (((uint_t)uap->a1) & PORT_CODE_MASK) { + case PORT_ASSOCIATE: + case PORT_DISSOCIATE: + /* + * check source + */ + if ((uint_t)uap->a3 == PORT_SOURCE_FILE) { + e = AUE_PORTFS; + } else { + e = AUE_NULL; + } + break; + default: + e = AUE_NULL; + } + return (e); +} diff --git a/usr/src/uts/common/c2/audit_kevents.h b/usr/src/uts/common/c2/audit_kevents.h index 01a522ba5b..dbb7323185 100644 --- a/usr/src/uts/common/c2/audit_kevents.h +++ b/usr/src/uts/common/c2/audit_kevents.h @@ -337,9 +337,10 @@ extern "C" { #define AUE_PF_POLICY_FLIP 298 /* =as Flip IPsec policy */ #define AUE_PF_POLICY_FLUSH 299 /* =as Flush IPsec policy rules */ #define AUE_PF_POLICY_ALGS 300 /* =as Update IPsec algorithms */ +#define AUE_PORTFS 301 /* =fa */ /* NOTE: update MAX_KEVENTS below if events are added. */ -#define MAX_KEVENTS 300 +#define MAX_KEVENTS 301 #ifdef __cplusplus diff --git a/usr/src/uts/common/fs/autofs/auto_vnops.c b/usr/src/uts/common/fs/autofs/auto_vnops.c index 0587ed066e..dec18bd6e1 100644 --- a/usr/src/uts/common/fs/autofs/auto_vnops.c +++ b/usr/src/uts/common/fs/autofs/auto_vnops.c @@ -105,6 +105,7 @@ const fs_operation_def_t auto_vnodeops_template[] = { VOPNAME_FRLOCK, { .error = fs_error }, VOPNAME_DISPOSE, { .error = fs_error }, VOPNAME_SHRLOCK, { .error = fs_error }, + VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, NULL, NULL }; diff --git a/usr/src/uts/common/fs/fem.c b/usr/src/uts/common/fs/fem.c index 93cae4d737..afb8d5da20 100644 --- a/usr/src/uts/common/fs/fem.c +++ b/usr/src/uts/common/fs/fem.c @@ -1595,7 +1595,7 @@ vhead_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, } static int -vhead_vnevent(vnode_t *vp, vnevent_t vnevent) +vhead_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *cname) { femarg_t farg; struct fem_list *femsp; @@ -1607,7 +1607,7 @@ vhead_vnevent(vnode_t *vp, vnevent_t vnevent) func = (int (*)()) (vp->v_op->vop_vnevent); arg0 = vp; fem_unlock(vp->v_femhead); - errc = (*func)(arg0, vnevent); + errc = (*func)(arg0, vnevent, dvp, cname); } else { fem_addref(femsp); fem_unlock(vp->v_femhead); @@ -1615,7 +1615,7 @@ vhead_vnevent(vnode_t *vp, vnevent_t vnevent) farg.fa_fnode = femsp->feml_nodes + femsp->feml_tos; vsop_find(&farg, &func, int, &arg0, vop_vnevent, femop_vnevent); - errc = (*func)(arg0, vnevent); + errc = (*func)(arg0, vnevent, dvp, cname); fem_release(femsp); } return (errc); @@ -2583,7 +2583,7 @@ vnext_shrlock(femarg_t *vf, int cmd, struct shrlock *shr, int flag, } int -vnext_vnevent(femarg_t *vf, vnevent_t vnevent) +vnext_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, char *cname) { int (*func)() = NULL; void *arg0 = NULL; @@ -2593,9 +2593,10 @@ vnext_vnevent(femarg_t *vf, vnevent_t vnevent) vsop_find(vf, &func, int, &arg0, vop_vnevent, femop_vnevent); ASSERT(func != NULL); ASSERT(arg0 != NULL); - return ((*func)(arg0, vnevent)); + return ((*func)(arg0, vnevent, dvp, cname)); } + int vfsnext_mount(fsemarg_t *vf, vnode_t *mvp, struct mounta *uap, cred_t *cr) { diff --git a/usr/src/uts/common/fs/fs_subr.c b/usr/src/uts/common/fs/fs_subr.c index 0fe058556d..c88e8b3268 100644 --- a/usr/src/uts/common/fs/fs_subr.c +++ b/usr/src/uts/common/fs/fs_subr.c @@ -633,7 +633,7 @@ fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr) /*ARGSUSED1*/ int -fs_vnevent_nosupport(vnode_t *vp, vnevent_t vnevent) +fs_vnevent_nosupport(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *cname) { ASSERT(vp != NULL); return (ENOTSUP); @@ -641,7 +641,7 @@ fs_vnevent_nosupport(vnode_t *vp, vnevent_t vnevent) /*ARGSUSED1*/ int -fs_vnevent_support(vnode_t *vp, vnevent_t vnevent) +fs_vnevent_support(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *cname) { ASSERT(vp != NULL); return (0); diff --git a/usr/src/uts/common/fs/fs_subr.h b/usr/src/uts/common/fs/fs_subr.h index 2f2056e6f3..f0b536d0f0 100644 --- a/usr/src/uts/common/fs/fs_subr.h +++ b/usr/src/uts/common/fs/fs_subr.h @@ -22,7 +22,7 @@ /* All Rights Reserved */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -78,8 +78,8 @@ extern void fs_nodispose(struct vnode *, page_t *, int, int, struct cred *); extern int fs_fab_acl(struct vnode *, vsecattr_t *, int flag, cred_t *); extern int fs_shrlock(struct vnode *, int, struct shrlock *, int, cred_t *); -extern int fs_vnevent_nosupport(vnode_t *, vnevent_t); -extern int fs_vnevent_support(vnode_t *, vnevent_t); +extern int fs_vnevent_nosupport(vnode_t *, vnevent_t, vnode_t *, char *); +extern int fs_vnevent_support(vnode_t *, vnevent_t, vnode_t *, char *); extern int fs_acl_nontrivial(struct vnode *vp, struct cred *cr); extern int fs_need_estale_retry(int); diff --git a/usr/src/uts/common/fs/mntfs/mntvfsops.c b/usr/src/uts/common/fs/mntfs/mntvfsops.c index f148bb4af4..5ea37b3e91 100644 --- a/usr/src/uts/common/fs/mntfs/mntvfsops.c +++ b/usr/src/uts/common/fs/mntfs/mntvfsops.c @@ -104,11 +104,14 @@ _info(struct modinfo *modinfop) * is ever modified to become unloadable. */ -static int mntfstype; +extern int mntfstype; static major_t mnt_major; static minor_t mnt_minor; static kmutex_t mnt_minor_lock; +extern struct vnode *mntdummyvp; +struct vnodeops *mntdummyvnodeops; + /* * /mnttab VFS operations vector. */ @@ -122,6 +125,7 @@ mntinitrootnode(mntnode_t *mnp) bzero((caddr_t)mnp, sizeof (*mnp)); mnp->mnt_vnode = vn_alloc(KM_SLEEP); + mntdummyvp = vn_alloc(KM_SLEEP); vp = MTOV(mnp); @@ -129,6 +133,10 @@ mntinitrootnode(mntnode_t *mnp) vn_setops(vp, mntvnodeops); vp->v_type = VREG; vp->v_data = (caddr_t)mnp; + mntdummyvp->v_flag = VNOMOUNT|VNOMAP|VNOSWAP|VNOCACHE; + vn_setops(mntdummyvp, mntdummyvnodeops); + mntdummyvp->v_type = VREG; + mntdummyvp->v_data = (caddr_t)mnp; } static int @@ -142,6 +150,7 @@ mntinit(int fstype, char *name) NULL, NULL }; extern const fs_operation_def_t mnt_vnodeops_template[]; + extern const fs_operation_def_t mnt_dummyvnodeops_template[]; int error; mntfstype = fstype; @@ -158,6 +167,10 @@ mntinit(int fstype, char *name) /* Vnode ops too. */ error = vn_make_ops(name, mnt_vnodeops_template, &mntvnodeops); + if (!error) { + error = vn_make_ops(name, mnt_dummyvnodeops_template, + &mntdummyvnodeops); + } if (error != 0) { (void) vfs_freevfsops_by_type(fstype); cmn_err(CE_WARN, "mntinit: bad vnode ops template"); @@ -236,6 +249,7 @@ mntmount(struct vfs *vfsp, struct vnode *mvp, vfsp->vfs_bsize = DEV_BSIZE; mntinitrootnode(mnp); MTOV(mnp)->v_vfsp = vfsp; + mntdummyvp->v_vfsp = vfsp; mnp->mnt_mountvp = mvp; vn_exists(MTOV(mnp)); return (0); @@ -252,6 +266,22 @@ mntunmount(struct vfs *vfsp, int flag, struct cred *cr) return (EPERM); /* + * Ensure that the dummy vnode is not being referenced. + */ + if (mntdummyvp) { + mutex_enter(&mntdummyvp->v_lock); + if (vp->v_count > 1) { + mutex_exit(&mntdummyvp->v_lock); + return (EBUSY); + } + + mutex_exit(&mntdummyvp->v_lock); + vn_invalid(mntdummyvp); + vn_free(mntdummyvp); + mntdummyvp = NULL; + } + + /* * Ensure that no /mnttab vnodes are in use on this mount point. */ mutex_enter(&vp->v_lock); diff --git a/usr/src/uts/common/fs/mntfs/mntvnops.c b/usr/src/uts/common/fs/mntfs/mntvnops.c index 79f3a090b7..a10946a151 100644 --- a/usr/src/uts/common/fs/mntfs/mntvnops.c +++ b/usr/src/uts/common/fs/mntfs/mntvnops.c @@ -46,6 +46,8 @@ static mntnode_t *mntgetnode(vnode_t *); vnodeops_t *mntvnodeops; +vnodeops_t *mntdummyvnodeops; +extern struct vnode *mntdummyvp; /* * Design of kernel mnttab accounting. @@ -488,6 +490,40 @@ typedef struct extmnttab32 { #endif /* + * called to generate a dummy read vop call so that + * any module monitoring /etc/mnttab for access gets notified. + */ +static void +mntdummyreadop() +{ + struct uio uio; + struct iovec iov; + char tbuf[1]; + + /* + * Make a VOP_READ call on the dummy vnode so that any + * module interested in mnttab getting modified could + * intercept this vnode and capture the event. + * + * Pass a dummy uio struct. Nobody should reference the buffer. + * We need to pass a valid uio struct pointer to take care of + * any module intercepting this vnode which could attempt to + * look at it. Currently only the file events notification + * module intercepts this vnode. + */ + bzero(&uio, sizeof (uio)); + bzero(&iov, sizeof (iov)); + iov.iov_base = tbuf; + iov.iov_len = 0; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_loffset = 0; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_resid = 0; + (void) VOP_READ(mntdummyvp, &uio, 0, kcred, NULL); +} + +/* * Snapshot the latest version of the kernel mounted resource information * * There are two types of snapshots: one destined for reading, and one destined @@ -633,7 +669,7 @@ mntfs_snapshot(mntnode_t *mnp, int forread, int datamodel) mntfs_freesnap(snap); return (ENOMEM); } - + mntdummyreadop(); return (0); } @@ -760,7 +796,7 @@ mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct) error = uiomove(buf, len, UIO_READ, uio); } kmem_free(buf, len); - + mntdummyreadop(); return (error); } @@ -1128,6 +1164,22 @@ mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, return (error); } +/* ARGSUSED */ +static int +mntdummyread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, + caller_context_t *ct) +{ + return (0); +} + +/* ARGSUSED */ +static int +mntdummywrite(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, + caller_context_t *ct) +{ + return (0); +} + /* * /mntfs vnode operations vector @@ -1147,3 +1199,10 @@ const fs_operation_def_t mnt_vnodeops_template[] = { VOPNAME_SHRLOCK, { .error = fs_error }, NULL, NULL }; + +const fs_operation_def_t mnt_dummyvnodeops_template[] = { + VOPNAME_READ, { .vop_read = mntdummyread }, + VOPNAME_WRITE, { .vop_write = mntdummywrite }, + VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, + NULL, NULL +}; diff --git a/usr/src/uts/common/fs/nfs/nfs3_vnops.c b/usr/src/uts/common/fs/nfs/nfs3_vnops.c index 7a9355b4a4..39bdc1aa00 100644 --- a/usr/src/uts/common/fs/nfs/nfs3_vnops.c +++ b/usr/src/uts/common/fs/nfs/nfs3_vnops.c @@ -257,6 +257,7 @@ const fs_operation_def_t nfs3_vnodeops_template[] = { VOPNAME_SETSECATTR, { .vop_setsecattr = nfs3_setsecattr }, VOPNAME_GETSECATTR, { .vop_getsecattr = nfs3_getsecattr }, VOPNAME_SHRLOCK, { .vop_shrlock = nfs3_shrlock }, + VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, NULL, NULL }; @@ -2225,8 +2226,13 @@ top: nfs_rw_exit(&drp->r_rwlock); if (error) { VN_RELE(vp); - } else + } else { + /* + * existing file got truncated, notify. + */ + vnevent_create(vp); *vpp = vp; + } return (error); } @@ -2861,6 +2867,9 @@ nfs3_remove(vnode_t *dvp, char *nm, cred_t *cr) } } + if (error == 0) { + vnevent_remove(vp, dvp, nm); + } VN_RELE(vp); nfs_rw_exit(&drp->r_rwlock); @@ -2936,6 +2945,12 @@ nfs3_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr) nfs_rw_exit(&tdrp->r_rwlock); + if (!error) { + /* + * Notify the source file of this link operation. + */ + vnevent_link(svp); + } return (error); } @@ -2962,7 +2977,7 @@ nfs3rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr) RENAME3args args; RENAME3res res; int douprintf; - vnode_t *nvp; + vnode_t *nvp = NULL; vnode_t *ovp = NULL; char *tmpname; rnode_t *rp; @@ -3120,8 +3135,6 @@ nfs3rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr) } mutex_exit(&rp->r_statelock); } - - VN_RELE(nvp); } if (ovp == NULL) { @@ -3144,6 +3157,9 @@ nfs3rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr) if (error) { nfs_rw_exit(&odrp->r_rwlock); nfs_rw_exit(&ndrp->r_rwlock); + if (nvp) { + VN_RELE(nvp); + } return (error); } ASSERT(ovp != NULL); @@ -3170,6 +3186,9 @@ nfs3rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr) VN_RELE(ovp); nfs_rw_exit(&odrp->r_rwlock); nfs_rw_exit(&ndrp->r_rwlock); + if (nvp) { + VN_RELE(nvp); + } return (error); } @@ -3232,6 +3251,19 @@ nfs3rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr) error = EEXIST; } + if (error == 0) { + if (nvp) + vnevent_rename_dest(nvp, ndvp, nnm); + + if (odvp != ndvp) + vnevent_rename_dest_dir(ndvp); + ASSERT(ovp != NULL); + vnevent_rename_src(ovp, odvp, onm); + } + + if (nvp) { + VN_RELE(nvp); + } VN_RELE(ovp); nfs_rw_exit(&odrp->r_rwlock); @@ -3436,6 +3468,9 @@ nfs3_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr) error = EEXIST; } + if (error == 0) { + vnevent_rmdir(vp, dvp, nm); + } VN_RELE(vp); nfs_rw_exit(&drp->r_rwlock); @@ -4687,9 +4722,9 @@ again: } if (!readahead_issued && !error) { - mutex_enter(&rp->r_statelock); - rp->r_nextr = io_off + io_len; - mutex_exit(&rp->r_statelock); + mutex_enter(&rp->r_statelock); + rp->r_nextr = io_off + io_len; + mutex_exit(&rp->r_statelock); } } } diff --git a/usr/src/uts/common/fs/nfs/nfs4_deleg_ops.c b/usr/src/uts/common/fs/nfs/nfs4_deleg_ops.c index 8ac83ac53c..90cd69efb4 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_deleg_ops.c +++ b/usr/src/uts/common/fs/nfs/nfs4_deleg_ops.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -346,10 +345,13 @@ deleg_setsecattr( return (vnext_setsecattr(arg, vsap, flag, cr)); } +/* ARGSUSED */ int deleg_vnevent( femarg_t *arg, - vnevent_t vnevent) + vnevent_t vnevent, + vnode_t *dvp, + char *name) { clock_t rc; rfs4_file_t *fp; @@ -380,5 +382,5 @@ deleg_vnevent( default: break; } - return (vnext_vnevent(arg, vnevent)); + return (vnext_vnevent(arg, vnevent, dvp, name)); } diff --git a/usr/src/uts/common/fs/nfs/nfs4_vnops.c b/usr/src/uts/common/fs/nfs/nfs4_vnops.c index 565dd58e32..bac5e0b72b 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_vnops.c +++ b/usr/src/uts/common/fs/nfs/nfs4_vnops.c @@ -404,6 +404,7 @@ const fs_operation_def_t nfs4_vnodeops_template[] = { VOPNAME_SETSECATTR, { .vop_setsecattr = nfs4_setsecattr }, VOPNAME_GETSECATTR, { .vop_getsecattr = nfs4_getsecattr }, VOPNAME_SHRLOCK, { .vop_shrlock = nfs4_shrlock }, + VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, NULL, NULL }; @@ -6387,6 +6388,7 @@ nfs4_create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive, vnode_t *tempvp; enum createmode4 createmode; bool_t must_trunc = FALSE; + int truncating = 0; if (nfs_zone() != VTOMI4(dvp)->mi_zone) return (EPERM); @@ -6517,6 +6519,7 @@ top: AT_TYPE | AT_MODE); vattr.va_type = VREG; createmode = UNCHECKED4; + truncating = 1; goto create_otw; } } @@ -6526,6 +6529,16 @@ top: if (error) { VN_RELE(vp); } else { + vnode_t *tvp; + rnode4_t *trp; + /* + * existing file got truncated, notify. + */ + trp = VTOR4(vp); + tvp = vp; + if (IS_SHADOW(vp, trp)) + tvp = RTOV4(trp); + vnevent_create(tvp); *vpp = vp; } return (error); @@ -6620,6 +6633,18 @@ create_otw: goto top; } nfs_rw_exit(&drp->r_rwlock); + if (truncating && !error && *vpp) { + vnode_t *tvp; + rnode4_t *trp; + /* + * existing file got truncated, notify. + */ + tvp = *vpp; + trp = VTOR4(tvp); + if (IS_SHADOW(vp, trp)) + tvp = RTOV4(trp); + vnevent_create(tvp); + } return (error); } @@ -7262,6 +7287,15 @@ recov_retry: if (resp) (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)resp); + if (e.error == 0) { + vnode_t *tvp; + rnode4_t *trp; + trp = VTOR4(vp); + tvp = vp; + if (IS_SHADOW(vp, trp)) + tvp = RTOV4(trp); + vnevent_remove(tvp, dvp, nm); + } VN_RELE(vp); return (e.error); } @@ -7472,6 +7506,18 @@ recov_retry: ASSERT(nfs4_consistent_type(nvp)); VN_RELE(nvp); + if (!e.error) { + vnode_t *tvp; + rnode4_t *trp; + /* + * Notify the source file of this link operation. + */ + trp = VTOR4(svp); + tvp = svp; + if (IS_SHADOW(svp, trp)) + tvp = RTOV4(trp); + vnevent_link(tvp); + } out: kmem_free(argop, argoplist_size); if (resp) @@ -7507,7 +7553,7 @@ nfs4rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr) { int error; mntinfo4_t *mi; - vnode_t *nvp; + vnode_t *nvp = NULL; vnode_t *ovp = NULL; char *tmpname = NULL; rnode4_t *rp; @@ -7706,7 +7752,6 @@ link_call: (void) nfs4delegreturn(VTOR4(nvp), NFS4_DR_PUSH|NFS4_DR_REOPEN); ASSERT(nfs4_consistent_type(nvp)); - VN_RELE(nvp); } if (ovp == NULL) { @@ -7728,6 +7773,9 @@ link_call: if (error) { nfs_rw_exit(&odrp->r_rwlock); nfs_rw_exit(&ndrp->r_rwlock); + if (nvp) { + VN_RELE(nvp); + } return (error); } ASSERT(ovp != NULL); @@ -7755,6 +7803,9 @@ link_call: VN_RELE(ovp); nfs_rw_exit(&odrp->r_rwlock); nfs_rw_exit(&ndrp->r_rwlock); + if (nvp) { + VN_RELE(nvp); + } return (EINVAL); } @@ -7811,6 +7862,9 @@ link_call: } mutex_exit(&rp->r_statelock); + if (nvp) { + VN_RELE(nvp); + } goto link_call; } @@ -7818,6 +7872,9 @@ link_call: VN_RELE(ovp); nfs_rw_exit(&odrp->r_rwlock); nfs_rw_exit(&ndrp->r_rwlock); + if (nvp) { + VN_RELE(nvp); + } return (error); } @@ -7853,6 +7910,47 @@ link_call: } mutex_exit(&rp->r_statelock); + /* + * Notify the rename vnevents to source vnode, and to the target + * vnode if it already existed. + */ + if (error == 0) { + vnode_t *tvp; + rnode4_t *trp; + /* + * Notify the vnode. Each links is represented by + * a different vnode, in nfsv4. + */ + if (nvp) { + trp = VTOR4(nvp); + tvp = nvp; + if (IS_SHADOW(nvp, trp)) + tvp = RTOV4(trp); + vnevent_rename_dest(tvp, ndvp, nnm); + } + + /* + * if the source and destination directory are not the + * same notify the destination directory. + */ + if (VTOR4(odvp) != VTOR4(ndvp)) { + trp = VTOR4(ndvp); + tvp = ndvp; + if (IS_SHADOW(ndvp, trp)) + tvp = RTOV4(trp); + vnevent_rename_dest_dir(tvp); + } + + trp = VTOR4(ovp); + tvp = ovp; + if (IS_SHADOW(ovp, trp)) + tvp = RTOV4(trp); + vnevent_rename_src(tvp, odvp, onm); + } + + if (nvp) { + VN_RELE(nvp); + } VN_RELE(ovp); nfs_rw_exit(&odrp->r_rwlock); @@ -8561,6 +8659,16 @@ recov_retry: if (resp) (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)resp); + if (e.error == 0) { + vnode_t *tvp; + rnode4_t *trp; + trp = VTOR4(vp); + tvp = vp; + if (IS_SHADOW(vp, trp)) + tvp = RTOV4(trp); + vnevent_rmdir(tvp, dvp, nm); + } + VN_RELE(vp); return (e.error); @@ -10768,7 +10876,14 @@ nfs4_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, static int nfs4_realvp(vnode_t *vp, vnode_t **vpp) { - return (EINVAL); + rnode4_t *rp; + rp = VTOR4(vp); + + if (IS_SHADOW(vp, rp)) { + vp = RTOV4(rp); + } + *vpp = vp; + return (0); } /* diff --git a/usr/src/uts/common/fs/nfs/nfs_vnops.c b/usr/src/uts/common/fs/nfs/nfs_vnops.c index 5e587027d7..96b7044e6a 100644 --- a/usr/src/uts/common/fs/nfs/nfs_vnops.c +++ b/usr/src/uts/common/fs/nfs/nfs_vnops.c @@ -218,6 +218,7 @@ const fs_operation_def_t nfs_vnodeops_template[] = { VOPNAME_SETSECATTR, { .vop_setsecattr = nfs_setsecattr }, VOPNAME_GETSECATTR, { .vop_getsecattr = nfs_getsecattr }, VOPNAME_SHRLOCK, { .vop_shrlock = nfs_shrlock }, + VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, NULL, NULL }; @@ -1964,8 +1965,13 @@ nfs_create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive, nfs_rw_exit(&drp->r_rwlock); if (error) { VN_RELE(vp); - } else + } else { + /* + * existing file got truncated, notify. + */ + vnevent_create(vp); *vpp = vp; + } return (error); } @@ -2246,6 +2252,9 @@ nfs_remove(vnode_t *dvp, char *nm, cred_t *cr) } } + if (error == 0) { + vnevent_remove(vp, dvp, nm); + } VN_RELE(vp); nfs_rw_exit(&drp->r_rwlock); @@ -2297,6 +2306,12 @@ nfs_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr) nfs_rw_exit(&tdrp->r_rwlock); + if (!error) { + /* + * Notify the source file of this link operation. + */ + vnevent_link(svp); + } return (error); } @@ -2323,7 +2338,7 @@ nfsrename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr) enum nfsstat status; struct nfsrnmargs args; int douprintf; - vnode_t *nvp; + vnode_t *nvp = NULL; vnode_t *ovp = NULL; char *tmpname; rnode_t *rp; @@ -2479,8 +2494,6 @@ nfsrename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr) } mutex_exit(&rp->r_statelock); } - - VN_RELE(nvp); } if (ovp == NULL) { @@ -2504,6 +2517,9 @@ nfsrename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr) if (error) { nfs_rw_exit(&odrp->r_rwlock); nfs_rw_exit(&ndrp->r_rwlock); + if (nvp) { + VN_RELE(nvp); + } return (error); } ASSERT(ovp != NULL); @@ -2577,6 +2593,20 @@ nfsrename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr) } } + if (error == 0) { + if (nvp) + vnevent_rename_dest(nvp, ndvp, nnm); + + if (odvp != ndvp) + vnevent_rename_dest_dir(ndvp); + + ASSERT(ovp != NULL); + vnevent_rename_src(ovp, odvp, onm); + } + + if (nvp) { + VN_RELE(nvp); + } VN_RELE(ovp); nfs_rw_exit(&odrp->r_rwlock); @@ -2758,6 +2788,9 @@ nfs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr) error = EEXIST; } + if (error == 0) { + vnevent_rmdir(vp, dvp, nm); + } VN_RELE(vp); nfs_rw_exit(&drp->r_rwlock); diff --git a/usr/src/uts/common/fs/pcfs/pc_dir.c b/usr/src/uts/common/fs/pcfs/pc_dir.c index 88b895a212..616efe23cb 100644 --- a/usr/src/uts/common/fs/pcfs/pc_dir.c +++ b/usr/src/uts/common/fs/pcfs/pc_dir.c @@ -467,9 +467,9 @@ pc_dirremove( if (error == 0) { if (type == VDIR) { - vnevent_rmdir(PCTOV(pcp)); + vnevent_rmdir(PCTOV(pcp), vp, namep); } else { - vnevent_remove(PCTOV(pcp)); + vnevent_remove(PCTOV(pcp), vp, namep); } } @@ -633,7 +633,7 @@ top: newisdir = tpcp->pc_entry.pcd_attr & PCA_DIR; brelse(slot.sl_bp); - vnevent_rename_dest(PCTOV(tpcp)); + vnevent_rename_dest(PCTOV(tpcp), PCTOV(tdp), tnm); VN_RELE(PCTOV(tpcp)); /* @@ -815,7 +815,11 @@ top: } } out: - vnevent_rename_src(PCTOV(pcp)); + vnevent_rename_src(PCTOV(pcp), PCTOV(dp), snm); + if (dp != tdp) { + vnevent_rename_dest_dir(PCTOV(tdp)); + } + VN_RELE(PCTOV(pcp)); return (error); diff --git a/usr/src/uts/common/fs/pcfs/pc_vnops.c b/usr/src/uts/common/fs/pcfs/pc_vnops.c index 1e6252aceb..bb49ad5ea2 100644 --- a/usr/src/uts/common/fs/pcfs/pc_vnops.c +++ b/usr/src/uts/common/fs/pcfs/pc_vnops.c @@ -1052,8 +1052,11 @@ pcfs_create( } else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { error = pc_truncate(pcp, 0L); - if (error) + if (error) { VN_RELE(PCTOV(pcp)); + } else { + vnevent_create(PCTOV(pcp)); + } } } if (error) { diff --git a/usr/src/uts/common/fs/portfs/port.c b/usr/src/uts/common/fs/portfs/port.c index a2d6b95170..14be8cbbae 100644 --- a/usr/src/uts/common/fs/portfs/port.c +++ b/usr/src/uts/common/fs/portfs/port.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -79,6 +79,8 @@ * (see port_alert(3c)). * PORT_SOURCE_USER : events submitted by applications with * port_send(3c) or port_sendn(3c). + * PORT_SOURCE_FILE : events submitted per file being watched for file + * change events (see port_create(3c). * * There is a user API implemented in the libc library as well as a * kernel API implemented in port_subr.c in genunix. @@ -172,6 +174,21 @@ * This type of event is generated from user level using the port_send() * function to send a user event to a port or the port_sendn() function * to send an event to a list of ports. + * PORT_SOURCE_FILE: + * This event source uses the port_associate() interface to register + * a file to be monitored for changes. The file name that needs to be + * monitored is specified in the file_obj_t structure, a pointer to which + * is passed as an argument. The event types to be monitored are specified + * in the events argument. + * A file events monitor is represented internal per port per object + * address(the file_obj_t pointer). Which means there can be multiple + * watches registered on the same file using different file_obj_t + * structure pointer. With the help of the FEM(File Event Monitoring) + * hooks, the file's vnode ops are intercepted and relevant events + * delivered. The port_dissociate() function is used to de-register a + * file events monitor on a file. When the specified file is + * removed/renamed, the file events watch/monitor is automatically + * removed. * * EVENT DELIVERY / RETRIEVING EVENTS * Events remain in the port queue until: @@ -215,6 +232,8 @@ * This type of event is not shareable between processes. * PORT_SOURCE_TIMER events * This type of event is not shareable between processes. + * PORT_SOURCE_FILE events + * This type of event is not shareable between processes. * * FORK BEHAVIOUR * On fork(2) the child process inherits all opened file descriptors from @@ -622,13 +641,19 @@ portfs(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, } case PORT_ASSOCIATE: { - /* currently only PORT_SOURCE_FD is implemented */ - if ((int)a1 != PORT_SOURCE_FD) { + switch ((int)a1) { + case PORT_SOURCE_FD: + error = port_associate_fd(pp, (int)a1, (uintptr_t)a2, + (int)a3, (void *)a4); + break; + case PORT_SOURCE_FILE: + error = port_associate_fop(pp, (int)a1, (uintptr_t)a2, + (int)a3, (void *)a4); + break; + default: error = EINVAL; break; } - error = port_associate_fd(pp, (int)a1, (uintptr_t)a2, (int)a3, - (void *)a4); break; } case PORT_SEND: @@ -654,12 +679,17 @@ portfs(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, } case PORT_DISSOCIATE: { - /* currently only PORT_SOURCE_FD is implemented */ - if ((int)a1 != PORT_SOURCE_FD) { + switch ((int)a1) { + case PORT_SOURCE_FD: + error = port_dissociate_fd(pp, (uintptr_t)a2); + break; + case PORT_SOURCE_FILE: + error = port_dissociate_fop(pp, (uintptr_t)a2); + break; + default: error = EINVAL; break; } - error = port_dissociate_fd(pp, (uintptr_t)a2); break; } case PORT_ALERT: diff --git a/usr/src/uts/common/fs/portfs/port_fd.c b/usr/src/uts/common/fs/portfs/port_fd.c index d40756e0b4..dd26b0af9c 100644 --- a/usr/src/uts/common/fs/portfs/port_fd.c +++ b/usr/src/uts/common/fs/portfs/port_fd.c @@ -285,7 +285,7 @@ port_associate_fd(port_t *pp, int source, uintptr_t object, int events, * Remove any events that where already fired * for this fd and are still in the port queue. */ - port_remove_done_event(pkevp); + (void) port_remove_done_event(pkevp); } else { mutex_exit(&pkevp->portkev_lock); } diff --git a/usr/src/uts/common/fs/portfs/port_fop.c b/usr/src/uts/common/fs/portfs/port_fop.c new file mode 100644 index 0000000000..d7ccf8253f --- /dev/null +++ b/usr/src/uts/common/fs/portfs/port_fop.c @@ -0,0 +1,2154 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * File Events Notification + * ------------------------ + * + * The File Events Notification facility provides file and directory change + * notification. It is implemented as an event source(PORT_SOURCE_FILE) + * under the Event Ports framework. Therefore the API is an extension to + * the Event Ports API. + * + * It uses the FEM (File Events Monitoring) framework to intercept + * operations on the files & directories and generate appropriate events. + * + * It provides event notification in accordance with what an application + * can find out by stat`ing the file and comparing time stamps. The various + * system calls that update the file's access, modification, and change + * time stamps are documented in the man page section 2. + * + * It is non intrusive. That is, having an active file event watch on a file + * or directory will not prevent it from being removed or renamed or block an + * unmount operation of the file system where the watched file or directory + * resides. + * + * + * Interface: + * ---------- + * + * The object for this event source is of type 'struct file_obj *' + * + * The file that needs to be monitored is specified in 'fo_name'. + * The time stamps collected by a stat(2) call are passed in fo_atime, + * fo_mtime, fo_ctime. At the time a file events watch is registered, the + * time stamps passed in are compared with the current time stamps of the + * file. If it has changed, relavant events are sent immediately. If the time + * stamps are all '0', they will not be compared. + * + * + * The events are delivered to an event port. A port is created using + * port_create(). + * + * To register a file events watch on a file or directory. + * + * port_associate(int port, PORT_SOURCE_FILE, (uintptr_t)&fobj, events, user) + * + * 'user' is the user pointer to be returned with the event. + * + * To de-register a file events watch, + * + * port_dissociate(int port, PORT_SOURCE_FILE, (uintptr_t)&fobj) + * + * The events are collected using the port_get()/port_getn() interface. The + * event source will be PORT_SOURCE_FILE. + * + * After an event is delivered, the file events watch gets de-activated. To + * receive the next event, the process will have to re-register the watch and + * activate it by calling port_associate() again. This behavior is intentional + * and support proper multi threaded programming when using file events + * notification API. + * + * + * Implementation overview: + * ------------------------ + * + * Each file events watch is represented by 'portfop_t' in the kernel. A + * cache(portfop_cache_t) of these file portfop_t's are maintained per event + * port by this source. The object here is the pointer to the file_obj + * structure. The portfop_t's are hashed in using the object pointer. Therefore + * it is possible to have multiple file event watches on a file by the same + * process by using different object structure(file_obj_t) and hence can + * receive multiple event notification for a file. These watches can be for + * different event types. + * + * The cached entries of these file objects are retained, even after delivering + * an event makring them inactive, for performance reason. The assumption + * is that the process would come back and re-register the file to receive + * further events. When there are more then 'port_fop_maxpfps' watches per file + * it will attempt to free the oldest inactive watch. + * + * In case the event that is being delivered is an exception event, the cached + * entries get removed. An exception event on a file or directory means its + * identity got changed(rename to/from, delete, mounted over, file system + * unmount). + * + * If the event port gets closed, all the associated file event watches will be + * removed and discarded. + * + * + * Data structures: + * ---------------- + * + * The list of file event watches per file are managed by the data structure + * portfop_vp_t. The first time a file events watch is registered for a file, + * the portfop_vp_t is installed on the vnode_t's member v_fopdata. This gets + * removed and freed only when the vnode becomes inactive. The FEM hooks are + * also installed when the first watch is registered on a file. The FEM hooks + * get un-installed when all the watches are removed. + * + * Each file events watch is represented by the structure portfop_t. They + * get added to a list of portfop_t's on the vnode(portfop_vp_t). After + * delivering an event, the portfop_t is marked inactive but retained. It is + * moved to the end of the list. All the active portfop_t's are maintained at + * the beginning. In case of exception events, the portfop_t will be removed + * and discarded. + * + * To intercept unmount operations, FSEM hooks are added to the file system + * under which files are being watched. A hash table('portfop_vfs_hash_t') of + * active file systems is maintained. Each file system that has active watches + * is represented by 'portfop_vfs_t' and is added to the hash table. + * The vnode's 'portfop_vp_t' structure is added to the list of files(vnodes) + * being watched on the portfop_vfs_t structure. + * + * + * File system support: + * ------------------- + * + * The file systems implementation has to provide vnode event notifications + * (vnevents) in order to support watching any files on that file system. + * The vnode events(vnevents) are notifications provided by the file system + * for name based file operations like rename, remove etc, which do not go + * thru the VOP_** interfaces. If the file system does not implement vnode + * notifications, watching for file events on such file systems is not + * supported. The vnode event notifications support is determined by the call + * vnevent_support(vp) (VOP_VNEVENT(vp, VE_SUPPORT)), which the file system + * has to implement. + * + * + * Locking order: + * -------------- + * + * A file(vnode) can have file event watches registered by different processes. + * There is one portfop_t per watch registered. These are on the vnode's list + * protected by the mutex 'pvp_mutex' in 'portfop_vp_t'. The portfop_t's are + * also on the per port cache. The cache is protected by the pfc_lock of + * portfop_cache_t. The lock order here is 'pfc_lock' -> 'pvp_mutex'. + * + */ + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/stat.h> +#include <sys/errno.h> +#include <sys/kmem.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> +#include <sys/vnode.h> +#include <sys/poll_impl.h> +#include <sys/port_impl.h> +#include <sys/fem.h> +#include <sys/vfs_opreg.h> +#include <sys/atomic.h> + +/* + * For special case support of /etc/mnttab + */ +extern struct vnode *mntdummyvp; +extern int mntfstype; + +#define PORTFOP_PVFSH(vfsp) (&portvfs_hash[PORTFOP_PVFSHASH(vfsp)]) +portfop_vfs_hash_t portvfs_hash[PORTFOP_PVFSHASH_SZ]; + +/* + * Inactive file event watches(portfop_t) are retained on the vnode's list + * for performance reason. If the applications re-registers the file, the + * inactive entry is made active and moved up the list. + * + * If there are greater then the following number of watches on a vnode, + * it will attempt to discard an oldest inactive watch(pfp) at the time + * a new watch is being registerd and when events get delivered. We + * do this to avoid accumulating inactive watches on a file. + */ +int port_fop_maxpfps = 20; + +/* local functions */ +static int port_fop_callback(void *, int *, pid_t, int, void *); + +static void port_pcache_insert(portfop_cache_t *, portfop_t *); +static void port_pcache_delete(portfop_cache_t *, portfop_t *); +static void port_close_fop(void *arg, int port, pid_t pid, int lastclose); + +/* + * port fop functions that will be the fem hooks. + */ +static int port_fop_open(femarg_t *vf, int mode, cred_t *cr); +static int port_fop_read(femarg_t *vf, uio_t *uiop, int ioflag, cred_t *cr, + struct caller_context *ct); +static int port_fop_write(femarg_t *vf, uio_t *uiop, int ioflag, cred_t *cr, + caller_context_t *ct); +static int port_fop_map(femarg_t *vf, offset_t off, struct as *as, + caddr_t *addrp, size_t len, uchar_t prot, uchar_t maxport, + uint_t flags, cred_t *cr); +static int port_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr, + caller_context_t *ct); +static int port_fop_create(femarg_t *vf, char *name, vattr_t *vap, + vcexcl_t excl, int mode, vnode_t **vpp, cred_t *cr, + int flag); +static int port_fop_remove(femarg_t *vf, char *nm, cred_t *cr); +static int port_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr); +static int port_fop_rename(femarg_t *vf, char *snm, vnode_t *tdvp, char *tnm, + cred_t *cr); +static int port_fop_mkdir(femarg_t *vf, char *dirname, vattr_t *vap, + vnode_t **vpp, cred_t *cr); +static int port_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr); +static int port_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp); +static int port_fop_symlink(femarg_t *vf, char *linkname, vattr_t *vap, + char *target, cred_t *cr); +static int port_fop_setsecattr(femarg_t *vf, vsecattr_t *vsap, int flag, + cred_t *cr); +static int port_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, + char *cname); + +static int port_fop_unmount(fsemarg_t *vf, int flag, cred_t *cr); + + +/* + * Fem hooks. + */ +const fs_operation_def_t port_vnodesrc_template[] = { + VOPNAME_OPEN, { .femop_open = port_fop_open }, + VOPNAME_READ, { .femop_read = port_fop_read }, + VOPNAME_WRITE, { .femop_write = port_fop_write }, + VOPNAME_MAP, { .femop_map = port_fop_map }, + VOPNAME_SETATTR, { .femop_setattr = port_fop_setattr }, + VOPNAME_CREATE, { .femop_create = port_fop_create }, + VOPNAME_REMOVE, { .femop_remove = port_fop_remove }, + VOPNAME_LINK, { .femop_link = port_fop_link }, + VOPNAME_RENAME, { .femop_rename = port_fop_rename }, + VOPNAME_MKDIR, { .femop_mkdir = port_fop_mkdir }, + VOPNAME_RMDIR, { .femop_rmdir = port_fop_rmdir }, + VOPNAME_READDIR, { .femop_readdir = port_fop_readdir }, + VOPNAME_SYMLINK, { .femop_symlink = port_fop_symlink }, + VOPNAME_SETSECATTR, { .femop_setsecattr = port_fop_setsecattr }, + VOPNAME_VNEVENT, { .femop_vnevent = port_fop_vnevent }, + NULL, NULL +}; + +/* + * Fsem - vfs ops hooks + */ +const fs_operation_def_t port_vfssrc_template[] = { + VFSNAME_UNMOUNT, { .fsemop_unmount = port_fop_unmount }, + NULL, NULL +}; + +fem_t *fop_femop; +fsem_t *fop_fsemop; + +static fem_t * +port_fop_femop() +{ + fem_t *femp; + if (fop_femop != NULL) + return (fop_femop); + if (fem_create("portfop_fem", + (const struct fs_operation_def *)port_vnodesrc_template, + (fem_t **)&femp)) { + return (NULL); + } + if (casptr(&fop_femop, NULL, femp) != NULL) { + /* + * some other thread beat us to it. + */ + fem_free(femp); + } + return (fop_femop); +} + +static fsem_t * +port_fop_fsemop() +{ + fsem_t *fsemp; + if (fop_fsemop != NULL) + return (fop_fsemop); + if (fsem_create("portfop_fsem", port_vfssrc_template, &fsemp)) { + return (NULL); + } + if (casptr(&fop_fsemop, NULL, fsemp) != NULL) { + /* + * some other thread beat us to it. + */ + fsem_free(fsemp); + } + return (fop_fsemop); +} + +/* + * port_fop_callback() + * - PORT_CALLBACK_DEFAULT + * The file event will be delivered to the application. + * - PORT_CALLBACK_DISSOCIATE + * The object will be dissociated from the port. + * - PORT_CALLBACK_CLOSE + * The object will be dissociated from the port because the port + * is being closed. + */ +/* ARGSUSED */ +static int +port_fop_callback(void *arg, int *events, pid_t pid, int flag, void *evp) +{ + portfop_t *pfp = (portfop_t *)arg; + port_kevent_t *pkevp = (port_kevent_t *)evp; + int error = 0; + + ASSERT((events != NULL)); + if (flag == PORT_CALLBACK_DEFAULT) { + if (curproc->p_pid != pid) { + return (EACCES); /* deny delivery of events */ + } + + *events = pkevp->portkev_events; + pkevp->portkev_events = 0; + if (pfp != NULL) { + pfp->pfop_flags &= ~PORT_FOP_KEV_ONQ; + } + } + return (error); +} + +/* + * Inserts a portfop_t into the port sources cache's. + */ +static void +port_pcache_insert(portfop_cache_t *pfcp, portfop_t *pfp) +{ + portfop_t **bucket; + + ASSERT(MUTEX_HELD(&pfcp->pfc_lock)); + bucket = PORT_FOP_BUCKET(pfcp, pfp->pfop_object); + pfp->pfop_hashnext = *bucket; + *bucket = pfp; + pfcp->pfc_objcount++; +} + +/* + * Remove the pfp from the port source cache. + */ +static void +port_pcache_delete(portfop_cache_t *pfcp, portfop_t *pfp) +{ + portfop_t *lpdp; + portfop_t *cpdp; + portfop_t **bucket; + + bucket = PORT_FOP_BUCKET(pfcp, pfp->pfop_object); + cpdp = *bucket; + if (pfp == cpdp) { + *bucket = pfp->pfop_hashnext; + } else { + while (cpdp != NULL) { + lpdp = cpdp; + cpdp = cpdp->pfop_hashnext; + if (cpdp == pfp) { + /* portfop struct found */ + lpdp->pfop_hashnext = pfp->pfop_hashnext; + break; + } + } + } + pfcp->pfc_objcount--; +} + +/* + * The vnode's(portfop_vp_t) pfp list management. The 'pvp_mutex' is held + * when these routines are called. + * + * The 'pvp_lpfop' member points to the oldest inactive entry on the list. + * It is used to discard the oldtest inactive pfp if the number of entries + * exceed the limit. + */ +static void +port_fop_listinsert(portfop_vp_t *pvp, portfop_t *pfp, int where) +{ + if (where == 1) { + list_insert_head(&pvp->pvp_pfoplist, (void *)pfp); + } else { + list_insert_tail(&pvp->pvp_pfoplist, (void *)pfp); + } + if (pvp->pvp_lpfop == NULL) { + pvp->pvp_lpfop = pfp; + } + pvp->pvp_cnt++; +} + +static void +port_fop_listinsert_head(portfop_vp_t *pvp, portfop_t *pfp) +{ + port_fop_listinsert(pvp, pfp, 1); +} + +static void +port_fop_listinsert_tail(portfop_vp_t *pvp, portfop_t *pfp) +{ + /* + * We point lpfop to an inactive one, if it was initially pointing + * to an active one. Insert to the tail is done only when a pfp goes + * inactive. + */ + if (pvp->pvp_lpfop && pvp->pvp_lpfop->pfop_flags & PORT_FOP_ACTIVE) { + pvp->pvp_lpfop = pfp; + } + port_fop_listinsert(pvp, pfp, 0); +} + +static void +port_fop_listremove(portfop_vp_t *pvp, portfop_t *pfp) +{ + if (pvp->pvp_lpfop == pfp) { + pvp->pvp_lpfop = list_next(&pvp->pvp_pfoplist, (void *)pfp); + } + + list_remove(&pvp->pvp_pfoplist, (void *)pfp); + + pvp->pvp_cnt--; + if (pvp->pvp_cnt && pvp->pvp_lpfop == NULL) { + pvp->pvp_lpfop = list_head(&pvp->pvp_pfoplist); + } +} + +static void +port_fop_listmove(portfop_vp_t *pvp, list_t *tlist) +{ + list_move_tail(tlist, &pvp->pvp_pfoplist); + pvp->pvp_lpfop = NULL; + pvp->pvp_cnt = 0; +} + +/* + * Remove a portfop_t from the port cache hash table and discard it. + * It is called only when pfp is not on the vnode's list. Otherwise, + * port_remove_fop() is called. + */ +void +port_pcache_remove_fop(portfop_cache_t *pfcp, portfop_t *pfp) +{ + port_kevent_t *pkevp; + + + ASSERT(MUTEX_HELD(&pfcp->pfc_lock)); + + pkevp = pfp->pfop_pev; + pfp->pfop_pev = NULL; + + if (pkevp != NULL) { + (void) port_remove_done_event(pkevp); + port_free_event_local(pkevp, 0); + } + + port_pcache_delete(pfcp, pfp); + + if (pfp->pfop_cname != NULL) + kmem_free(pfp->pfop_cname, pfp->pfop_clen + 1); + kmem_free(pfp, sizeof (portfop_t)); + if (pfcp->pfc_objcount == 0) + cv_signal(&pfcp->pfc_lclosecv); +} + +/* + * if we have too many watches on the vnode, attempt to discard an + * inactive one. + */ +static void +port_fop_trimpfplist(vnode_t *vp) +{ + portfop_vp_t *pvp; + portfop_t *pfp = NULL; + portfop_cache_t *pfcp; + + /* + * Due to a reference the vnode cannot disappear, v_fopdata should + * not change. + */ + if ((pvp = vp->v_fopdata) != NULL && + pvp->pvp_cnt > port_fop_maxpfps) { + mutex_enter(&pvp->pvp_mutex); + pfp = pvp->pvp_lpfop; + pfcp = pfp->pfop_pcache; + /* + * only if we can get the cache lock, we need to + * do this due to reverse lock order and some thread + * that may be trying to reactivate this entry. + */ + if (mutex_tryenter(&pfcp->pfc_lock)) { + if (pfp && !(pfp->pfop_flags & PORT_FOP_ACTIVE) && + !(pfp->pfop_flags & PORT_FOP_KEV_ONQ)) { + port_fop_listremove(pvp, pfp); + pfp->pfop_flags |= PORT_FOP_REMOVING; + } else { + mutex_exit(&pfcp->pfc_lock); + pfp = NULL; + } + } else { + pfp = NULL; + } + mutex_exit(&pvp->pvp_mutex); + + /* + * discard pfp if any. + */ + if (pfp != NULL) { + port_pcache_remove_fop(pfcp, pfp); + mutex_exit(&pfcp->pfc_lock); + } + } +} + +void +port_fop_femuninstall(vnode_t *vp) +{ + portfop_vp_t *pvp; + vfs_t *vfsp; + portfop_vfs_t *pvfsp; + portfop_vfs_hash_t *pvfsh; + kmutex_t *mtx; + + /* + * if list is empty, uninstall fem. + */ + pvp = vp->v_fopdata; + ASSERT(MUTEX_HELD(&pvp->pvp_mutex)); + + /* + * make sure the list is empty. + */ + if (!list_head(&pvp->pvp_pfoplist)) { + + /* + * we could possibly uninstall the fem hooks when + * the vnode becomes inactive and the v_fopdata is + * free. But the hooks get triggered uncessarily + * even though there are no active watches. So, we + * uninstall it here. + */ + (void) fem_uninstall(vp, (fem_t *)pvp->pvp_femp, vp); + pvp->pvp_femp = NULL; + mutex_exit(&pvp->pvp_mutex); + + + /* + * If we uinstalled fem means no process is watching this + * vnode, remove it from the vfs's list of watched vnodes. + */ + pvfsp = pvp->pvp_pvfsp; + vfsp = vp->v_vfsp; + pvfsh = PORTFOP_PVFSH(vfsp); + mtx = &pvfsh->pvfshash_mutex; + mutex_enter(mtx); + /* + * If unmount is in progress, that thread will remove and + * release the vnode from the vfs's list, just leave. + */ + if (!pvfsp->pvfs_unmount) { + list_remove(&pvfsp->pvfs_pvplist, pvp); + mutex_exit(mtx); + VN_RELE(vp); + } else { + mutex_exit(mtx); + } + } else { + mutex_exit(&pvp->pvp_mutex); + } +} + +/* + * Remove pfp from the vnode's watch list and the cache and discard it. + * If it is the last pfp on the vnode's list, the fem hooks get uninstalled. + * Returns 1 if removed successfully. + * + * The *active is set to indicate if the pfp was still active(no events had + * been posted, or the posted event had not been collected yet and it was + * able to remove it from the port's queue). + */ +int +port_remove_fop(portfop_t *pfp, portfop_cache_t *pfcp, int cleanup, + int *active) +{ + vnode_t *vp; + portfop_vp_t *pvp; + int tactive = 0; + + ASSERT(MUTEX_HELD(&pfcp->pfc_lock)); + vp = pfp->pfop_vp; + pvp = vp->v_fopdata; + mutex_enter(&pvp->pvp_mutex); + + /* + * if not cleanup, remove it only if the pfp is still active and + * is not being removed by some other thread. + */ + if (!cleanup && (!(pfp->pfop_flags & PORT_FOP_ACTIVE) || + pfp->pfop_flags & PORT_FOP_REMOVING)) { + mutex_exit(&pvp->pvp_mutex); + return (0); + } + + /* + * mark it inactive. + */ + if (pfp->pfop_flags & PORT_FOP_ACTIVE) { + pfp->pfop_flags &= ~PORT_FOP_ACTIVE; + tactive = 1; + } + + /* + * Check if the pfp is still on the vnode's list. This can + * happen if port_fop_excep() is in the process of removing it. + * In case of cleanup, just mark this pfp as inactive so that no + * new events (VNEVENT) will be delivered, and remove it from the + * event queue if it was already queued. Since the cache lock is + * held, the pfp will not disappear, even though it is being + * removed. + */ + if (pfp->pfop_flags & PORT_FOP_REMOVING) { + mutex_exit(&pvp->pvp_mutex); + if (!tactive && port_remove_done_event(pfp->pfop_pev)) { + pfp->pfop_flags &= ~PORT_FOP_KEV_ONQ; + tactive = 1; + } + if (active) { + *active = tactive; + } + return (1); + } + + /* + * if we find an event on the queue and removed it, then this + * association is considered active. + */ + if (!tactive && port_remove_done_event(pfp->pfop_pev)) { + pfp->pfop_flags &= ~PORT_FOP_KEV_ONQ; + tactive = 1; + } + + if (active) { + *active = tactive; + } + pvp = (portfop_vp_t *)vp->v_fopdata; + + /* + * remove pfp from the vnode's list + */ + port_fop_listremove(pvp, pfp); + + /* + * If no more associations on the vnode, uninstall fem hooks. + * The pvp mutex will be released in this routine. + */ + port_fop_femuninstall(vp); + port_pcache_remove_fop(pfcp, pfp); + return (1); +} + +/* + * This routine returns a pointer to a cached portfop entry, or NULL if it + * does not find it in the hash table. The object pointer is used as index. + * The entries are hashed by the object's address. We need to match the pid + * as the evet port can be shared between processes. The file events + * watches are per process only. + */ +portfop_t * +port_cache_lookup_fop(portfop_cache_t *pfcp, pid_t pid, uintptr_t obj) +{ + portfop_t *pfp = NULL; + portfop_t **bucket; + + ASSERT(MUTEX_HELD(&pfcp->pfc_lock)); + bucket = PORT_FOP_BUCKET(pfcp, obj); + pfp = *bucket; + while (pfp != NULL) { + if (pfp->pfop_object == obj && pfp->pfop_pid == pid) + break; + pfp = pfp->pfop_hashnext; + } + return (pfp); +} + +/* + * Given the file name, get the vnode and also the directory vnode + * On return, the vnodes are held (VN_HOLD). The caller has to VN_RELE + * the vnode(s). + */ +int +port_fop_getdvp(void *objptr, vnode_t **vp, vnode_t **dvp, + char **cname, int *len, int follow) +{ + int error = 0; + struct pathname pn; + char *fname; + + if (get_udatamodel() == DATAMODEL_NATIVE) { + fname = ((file_obj_t *)objptr)->fo_name; +#ifdef _SYSCALL32_IMPL + } else { + fname = (caddr_t)(uintptr_t)((file_obj32_t *)objptr)->fo_name; +#endif /* _SYSCALL32_IMPL */ + } + + /* + * lookuppn may fail with EINVAL, if dvp is non-null(like when + * looking for "."). So call again with dvp = NULL. + */ + if ((error = pn_get(fname, UIO_USERSPACE, &pn)) != 0) { + return (error); + } + + error = lookuppn(&pn, NULL, follow, dvp, vp); + if (error == EINVAL) { + pn_free(&pn); + if ((error = pn_get(fname, UIO_USERSPACE, &pn)) != 0) { + return (error); + } + error = lookuppn(&pn, NULL, follow, NULL, vp); + if (dvp != NULL) { + *dvp = NULL; + } + } + + if (error == 0 && cname != NULL && len != NULL) { + pn_setlast(&pn); + *len = pn.pn_pathlen; + *cname = kmem_alloc(*len + 1, KM_SLEEP); + (void) strcpy(*cname, pn.pn_path); + } else { + if (cname != NULL && len != NULL) { + *cname = NULL; + *len = 0; + } + } + + pn_free(&pn); + return (error); +} + +port_source_t * +port_getsrc(port_t *pp, int source) +{ + port_source_t *pse; + int lock = 0; + /* + * get the port source structure. + */ + if (!MUTEX_HELD(&pp->port_queue.portq_source_mutex)) { + mutex_enter(&pp->port_queue.portq_source_mutex); + lock = 1; + } + + pse = pp->port_queue.portq_scache[PORT_SHASH(source)]; + for (; pse != NULL; pse = pse->portsrc_next) { + if (pse->portsrc_source == source) + break; + } + + if (lock) { + mutex_exit(&pp->port_queue.portq_source_mutex); + } + return (pse); +} + + +/* + * compare time stamps and generate an event if it has changed. + */ +static void +port_check_timestamp(vnode_t *vp, portfop_t *pfp, void *objptr) +{ + vattr_t vatt; + portfop_vp_t *pvp = vp->v_fopdata; + int events = 0; + port_kevent_t *pkevp; + file_obj_t *fobj; + + if (!(pfp->pfop_flags & PORT_FOP_ACTIVE)) { + /* + * some event got delivered, don't bother with + * checking the timestamps. + */ + return; + } + + /* + * If time stamps is specified, get attributes and compare. This + * needs to be done after registering. We should check if any + * timestamps have been specified before getting attr XXX. + */ + vatt.va_mask = AT_ATIME|AT_MTIME|AT_CTIME; + if (get_udatamodel() == DATAMODEL_NATIVE) { + fobj = (file_obj_t *)objptr; + if (fobj->fo_atime.tv_sec || fobj->fo_atime.tv_nsec || + fobj->fo_mtime.tv_sec || fobj->fo_mtime.tv_nsec || + fobj->fo_ctime.tv_sec || fobj->fo_ctime.tv_nsec) { + if (VOP_GETATTR(vp, &vatt, 0, CRED())) { + return; + } + } else { + /* + * timestamp not specified, all 0's, + */ + return; + } +#ifdef _SYSCALL32_IMPL + } else { + file_obj32_t *fobj32; + fobj32 = (file_obj32_t *)objptr; + if (fobj32->fo_atime.tv_sec || fobj32->fo_atime.tv_nsec || + fobj32->fo_mtime.tv_sec || fobj32->fo_mtime.tv_nsec || + fobj32->fo_ctime.tv_sec || fobj32->fo_ctime.tv_nsec) { + if (VOP_GETATTR(vp, &vatt, 0, CRED())) { + return; + } + } else { + /* + * timestamp not specified, all 0. + */ + return; + } +#endif /* _SYSCALL32_IMPL */ + } + + mutex_enter(&pvp->pvp_mutex); + /* + * The pfp cannot dissappear as the port cache lock is held. + * While the pvp_mutex is held, no events will get delivered. + */ + if (pfp->pfop_flags & PORT_FOP_ACTIVE && + !(pfp->pfop_flags & PORT_FOP_REMOVING)) { + if (get_udatamodel() == DATAMODEL_NATIVE) { + fobj = (file_obj_t *)objptr; + if (pfp->pfop_events & FILE_ACCESS && + (fobj->fo_atime.tv_sec || fobj->fo_atime.tv_nsec) && + (vatt.va_atime.tv_sec != fobj->fo_atime.tv_sec || + vatt.va_atime.tv_nsec != fobj->fo_atime.tv_nsec)) + events |= FILE_ACCESS; + + if (pfp->pfop_events & FILE_MODIFIED && + (fobj->fo_mtime.tv_sec || fobj->fo_mtime.tv_nsec) && + (vatt.va_mtime.tv_sec != fobj->fo_mtime.tv_sec || + vatt.va_mtime.tv_nsec != fobj->fo_mtime.tv_nsec)) + events |= FILE_MODIFIED; + + if (pfp->pfop_events & FILE_ATTRIB && + (fobj->fo_ctime.tv_sec || fobj->fo_ctime.tv_nsec) && + (vatt.va_ctime.tv_sec != fobj->fo_ctime.tv_sec || + vatt.va_ctime.tv_nsec != fobj->fo_ctime.tv_nsec)) + events |= FILE_ATTRIB; +#ifdef _SYSCALL32_IMPL + } else { + file_obj32_t *fobj32; + fobj32 = (file_obj32_t *)objptr; + if (pfp->pfop_events & FILE_ACCESS && + (fobj32->fo_atime.tv_sec || + fobj32->fo_atime.tv_nsec) && + (vatt.va_atime.tv_sec != fobj32->fo_atime.tv_sec || + vatt.va_atime.tv_nsec != fobj32->fo_atime.tv_nsec)) + events |= FILE_ACCESS; + + if (pfp->pfop_events & FILE_MODIFIED && + (fobj32->fo_mtime.tv_sec || + fobj32->fo_mtime.tv_nsec) && + (vatt.va_mtime.tv_sec != fobj32->fo_mtime.tv_sec || + vatt.va_mtime.tv_nsec != fobj32->fo_mtime.tv_nsec)) + events |= FILE_MODIFIED; + + if (pfp->pfop_events & FILE_ATTRIB && + (fobj32->fo_ctime.tv_sec || + fobj32->fo_ctime.tv_nsec) && + (vatt.va_ctime.tv_sec != fobj32->fo_ctime.tv_sec || + vatt.va_ctime.tv_nsec != fobj32->fo_ctime.tv_nsec)) + events |= FILE_ATTRIB; +#endif /* _SYSCALL32_IMPL */ + } + + /* + * No events to deliver + */ + if (events == 0) { + mutex_exit(&pvp->pvp_mutex); + return; + } + + /* + * Deliver the event now. + */ + pkevp = pfp->pfop_pev; + pfp->pfop_flags &= ~PORT_FOP_ACTIVE; + pkevp->portkev_events |= events; + /* + * Move it to the tail as active once are in the + * begining of the list. + */ + port_fop_listremove(pvp, pfp); + port_fop_listinsert_tail(pvp, pfp); + port_send_event(pkevp); + pfp->pfop_flags |= PORT_FOP_KEV_ONQ; + } + mutex_exit(&pvp->pvp_mutex); +} + +/* + * Add the event source to the port and return the port source cache pointer. + */ +int +port_fop_associate_source(portfop_cache_t **pfcpp, port_t *pp, int source) +{ + portfop_cache_t *pfcp; + port_source_t *pse; + int error; + + /* + * associate PORT_SOURCE_FILE source with the port, if it is + * not associated yet. Note the PORT_SOURCE_FILE source is + * associated once and will not be dissociated. + */ + if ((pse = port_getsrc(pp, PORT_SOURCE_FILE)) == NULL) { + if (error = port_associate_ksource(pp->port_fd, source, + &pse, port_close_fop, pp, NULL)) { + *pfcpp = NULL; + return (error); + } + } + + /* + * Get the portfop cache pointer. + */ + if ((pfcp = pse->portsrc_data) == NULL) { + /* + * This is the first time that a file is being associated, + * create the portfop cache. + */ + pfcp = kmem_zalloc(sizeof (portfop_cache_t), KM_SLEEP); + mutex_enter(&pp->port_queue.portq_source_mutex); + if (pse->portsrc_data == NULL) { + pse->portsrc_data = pfcp; + mutex_exit(&pp->port_queue.portq_source_mutex); + } else { + /* + * someone else created the port cache, free + * what we just now allocated. + */ + mutex_exit(&pp->port_queue.portq_source_mutex); + kmem_free(pfcp, sizeof (portfop_cache_t)); + pfcp = pse->portsrc_data; + } + } + *pfcpp = pfcp; + return (0); +} + +/* + * Add the given pvp on the file system's list of vnodes watched. + */ +int +port_fop_pvfsadd(portfop_vp_t *pvp) +{ + int error = 0; + vnode_t *vp = pvp->pvp_vp; + portfop_vfs_hash_t *pvfsh; + portfop_vfs_t *pvfsp; + fsem_t *fsemp; + + pvfsh = PORTFOP_PVFSH(vp->v_vfsp); + mutex_enter(&pvfsh->pvfshash_mutex); + for (pvfsp = pvfsh->pvfshash_pvfsp; pvfsp && + pvfsp->pvfs != vp->v_vfsp; pvfsp = pvfsp->pvfs_next) + ; + + if (!pvfsp) { + if ((fsemp = port_fop_fsemop()) != NULL) { + if ((error = fsem_install(vp->v_vfsp, fsemp, + vp->v_vfsp, OPUNIQ, NULL, NULL))) { + mutex_exit(&pvfsh->pvfshash_mutex); + return (error); + } + } else { + mutex_exit(&pvfsh->pvfshash_mutex); + return (EINVAL); + } + pvfsp = kmem_zalloc(sizeof (portfop_vfs_t), KM_SLEEP); + pvfsp->pvfs = vp->v_vfsp; + list_create(&(pvfsp->pvfs_pvplist), sizeof (portfop_vp_t), + offsetof(portfop_vp_t, pvp_pvfsnode)); + pvfsp->pvfs_fsemp = fsemp; + pvfsp->pvfs_next = pvfsh->pvfshash_pvfsp; + pvfsh->pvfshash_pvfsp = pvfsp; + } + + /* + * check if an unmount is in progress. + */ + if (!pvfsp->pvfs_unmount) { + /* + * insert the pvp on list. + */ + pvp->pvp_pvfsp = pvfsp; + list_insert_head(&pvfsp->pvfs_pvplist, (void *)pvp); + } else { + error = EINVAL; + } + mutex_exit(&pvfsh->pvfshash_mutex); + return (error); +} + +/* + * Installs the portfop_vp_t data structure on the + * vnode. The 'pvp_femp == NULL' indicates it is not + * active. The fem hooks have to be installed. + * The portfop_vp_t is only freed when the vnode gets freed. + */ +void +port_install_fopdata(vnode_t *vp) +{ + portfop_vp_t *npvp; + + npvp = kmem_zalloc(sizeof (*npvp), KM_SLEEP); + mutex_init(&npvp->pvp_mutex, NULL, MUTEX_DEFAULT, NULL); + list_create(&npvp->pvp_pfoplist, sizeof (portfop_t), + offsetof(portfop_t, pfop_node)); + npvp->pvp_vp = vp; + /* + * If v_fopdata is not null, some other thread beat us to it. + */ + if (casptr(&vp->v_fopdata, NULL, npvp) != NULL) { + mutex_destroy(&npvp->pvp_mutex); + list_destroy(&npvp->pvp_pfoplist); + kmem_free(npvp, sizeof (*npvp)); + } +} + + +/* + * Allocate and add a portfop_t to the per port cache. Also add the portfop_t + * to the vnode's list. The association is identified by the object pointer + * address and pid. + */ +int +port_pfp_setup(portfop_t **pfpp, port_t *pp, vnode_t *vp, portfop_cache_t *pfcp, + uintptr_t object, int events, void *user, char *cname, int clen, + vnode_t *dvp) +{ + portfop_t *pfp = NULL; + port_kevent_t *pkevp; + fem_t *femp; + int error = 0; + portfop_vp_t *pvp; + + + /* + * The port cache mutex is held. + */ + *pfpp = NULL; + + + /* + * At this point the fem monitor is installed. + * Allocate a port event structure per vnode association. + */ + if (pfp == NULL) { + if (error = port_alloc_event_local(pp, PORT_SOURCE_FILE, + PORT_ALLOC_CACHED, &pkevp)) { + return (error); + } + pfp = kmem_zalloc(sizeof (portfop_t), KM_SLEEP); + pfp->pfop_pev = pkevp; + } + + pfp->pfop_vp = vp; + pfp->pfop_pid = curproc->p_pid; + pfp->pfop_pcache = pfcp; + pfp->pfop_pp = pp; + pfp->pfop_flags |= PORT_FOP_ACTIVE; + pfp->pfop_cname = cname; + pfp->pfop_clen = clen; + pfp->pfop_dvp = dvp; + pfp->pfop_object = object; + + pkevp->portkev_callback = port_fop_callback; + pkevp->portkev_arg = pfp; + pkevp->portkev_object = object; + pkevp->portkev_user = user; + pkevp->portkev_events = 0; + + port_pcache_insert(pfcp, pfp); + + /* + * Register a new file events monitor for this file(vnode), if not + * done already. + */ + if ((pvp = vp->v_fopdata) == NULL) { + port_install_fopdata(vp); + pvp = vp->v_fopdata; + } + + mutex_enter(&pvp->pvp_mutex); + /* + * if the vnode does not have the file events hooks, install it. + */ + if (pvp->pvp_femp == NULL) { + if ((femp = port_fop_femop()) != NULL) { + if (!(error = fem_install(pfp->pfop_vp, femp, + (void *)vp, OPUNIQ, NULL, NULL))) { + pvp->pvp_femp = femp; + /* + * add fsem_t hooks to the vfsp and add pvp to + * the list of vnodes for this vfs. + */ + if (!(error = port_fop_pvfsadd(pvp))) { + /* + * Hold a reference to the vnode since + * we successfully installed the hooks. + */ + VN_HOLD(vp); + } else { + (void) fem_uninstall(vp, femp, vp); + pvp->pvp_femp = NULL; + } + } + } else { + error = EINVAL; + } + } + + if (error) { + /* + * pkevp will get freed here. + */ + port_pcache_remove_fop(pfcp, pfp); + mutex_exit(&pvp->pvp_mutex); + return (error); + } + + /* + * insert the pfp on the vnode's list. After this + * events can get delivered. + */ + pfp->pfop_events = events; + port_fop_listinsert_head(pvp, pfp); + + mutex_exit(&pvp->pvp_mutex); + *pfpp = pfp; + return (0); +} + +vnode_t * +port_resolve_vp(vnode_t *vp) +{ + vnode_t *rvp; + /* + * special case /etc/mnttab, the only mntfs type + * file that can exist. + */ + if (mntdummyvp && vp->v_vfsp->vfs_fstype == mntfstype) { + VN_RELE(vp); + vp = mntdummyvp; + VN_HOLD(mntdummyvp); + } + + /* + * This should take care of lofs mounted fs systems and nfs4 + * hardlinks. + */ + if ((VOP_REALVP(vp, &rvp) == 0) && vp != rvp) { + VN_HOLD(rvp); + VN_RELE(vp); + vp = rvp; + } + return (vp); +} + +/* + * Register a file events watch on the given file associated to the port *pp. + * + * The association is identified by the object pointer and the pid. + * The events argument contains the events to be monitored for. + */ +int +port_associate_fop(port_t *pp, int source, uintptr_t object, int events, + void *user) +{ + portfop_cache_t *pfcp; + vnode_t *vp, *dvp; + portfop_t *pfp; + int error = 0; + file_obj_t fobj; + void *objptr; + char *cname; + int clen; + int removing = 0; + int follow; + + /* + * check that events specified are valid. + */ + if ((events & ~FILE_EVENTS_MASK) != 0) + return (EINVAL); + + if (get_udatamodel() == DATAMODEL_NATIVE) { + if (copyin((void *)object, &fobj, sizeof (file_obj_t))) + return (EFAULT); + objptr = (void *)&fobj; +#ifdef _SYSCALL32_IMPL + } else { + file_obj32_t fobj32; + if (copyin((void *)object, &fobj32, sizeof (file_obj32_t))) + return (EFAULT); + objptr = (void *)&fobj32; +#endif /* _SYSCALL32_IMPL */ + } + + vp = dvp = NULL; + + /* + * findout if we need to follow symbolic links. + */ + follow = !(events & FILE_NOFOLLOW); + events = events & ~FILE_NOFOLLOW; + + /* + * lookup and find the vnode and its directory vnode of the given + * file. + */ + if ((error = port_fop_getdvp(objptr, &vp, &dvp, &cname, &clen, + follow)) != 0) { + return (error); + } + + if (dvp != NULL) { + dvp = port_resolve_vp(dvp); + VN_RELE(dvp); + } + + /* + * Not found + */ + if (vp == NULL) { + error = ENOENT; + goto errout; + } + + vp = port_resolve_vp(vp); + + + if (vp != NULL && vnevent_support(vp)) { + error = ENOTSUP; + goto errout; + } + + /* + * Associate this source to the port and get the per port + * fop cache pointer. If the source is already associated, it + * will just return the cache pointer. + */ + if (error = port_fop_associate_source(&pfcp, pp, source)) { + goto errout; + } + + /* + * Check if there is an existing association of this file. + */ + mutex_enter(&pfcp->pfc_lock); + pfp = port_cache_lookup_fop(pfcp, curproc->p_pid, object); + + /* + * if it is not the same vnode, just discard it. + */ + if (pfp != NULL && (pfp->pfop_vp != vp || pfp->pfop_dvp != dvp)) { + (void) port_remove_fop(pfp, pfcp, 1, NULL); + pfp = NULL; + } + + if (pfp == NULL) { + /* + * Add a new association, save the file name and the + * directory vnode pointer. + */ + if (error = port_pfp_setup(&pfp, pp, vp, pfcp, object, + events, user, cname, clen, dvp)) { + mutex_exit(&pfcp->pfc_lock); + goto errout; + } + + /* + * File name used, so make sure we don't free it. + */ + cname = NULL; + + /* + * We need to check if the file was removed after the + * the lookup and before the fem hooks where added. If + * so, return error. The vnode will still exist as we have + * a hold on it. + */ + if (pfp->pfop_flags & PORT_FOP_ACTIVE && + !(pfp->pfop_flags & PORT_FOP_REMOVING)) { + vnode_t *tvp; + int error; + + tvp = NULL; + if ((error = port_fop_getdvp(objptr, &tvp, NULL, + NULL, NULL, follow)) == 0) { + if (tvp != NULL) { + tvp = port_resolve_vp(tvp); + } + } + if (error || tvp == NULL || tvp != vp) { + + /* + * remove the pfp and fem hooks, if pfp still + * active and it is not being removed from + * the vnode list. This is checked in + * port_remove_fop with the vnode lock held. + */ + if (port_remove_fop(pfp, pfcp, 0, NULL)) { + /* + * the pfp was removed, means no + * events where queued. Report the + * error now. + */ + error = EINVAL; + if (tvp != NULL) + VN_RELE(tvp); + mutex_exit(&pfcp->pfc_lock); + goto errout; + } + } else { + VN_RELE(tvp); + } + } + } else { + portfop_vp_t *pvp = vp->v_fopdata; + + /* + * Re-association of the object. + */ + mutex_enter(&pvp->pvp_mutex); + + /* + * remove any queued up event. + */ + if (port_remove_done_event(pfp->pfop_pev)) { + pfp->pfop_flags &= ~PORT_FOP_KEV_ONQ; + } + + /* + * set new events to watch. + */ + pfp->pfop_events = events; + + /* + * check if this pfp is being removed. Port_fop_excep() + * will deliver an exception event. + */ + if (pfp->pfop_flags & PORT_FOP_REMOVING) { + removing = 1; + } + + /* + * If not active, mark it active even if it is being + * removed. Then it can send an exception event. + * + * Move it to the head, as the active ones are only + * in the begining. If removing, the pfp will be on + * a temporary list, no need to move it to the front + * all the entries will be processed. + */ + if (!(pfp->pfop_flags & PORT_FOP_ACTIVE)) { + pfp->pfop_flags |= PORT_FOP_ACTIVE; + if (!removing) { + pvp = (portfop_vp_t *)vp->v_fopdata; + port_fop_listremove(pvp, pfp); + port_fop_listinsert_head(pvp, pfp); + } + } + mutex_exit(&pvp->pvp_mutex); + } + + + /* + * compare time stamps and deliver events. The pfp cannot + * dissappear since we are holding the cache lock. + */ + if (!removing && vp->v_type != VFIFO) { + port_check_timestamp(vp, pfp, objptr); + } + + mutex_exit(&pfcp->pfc_lock); + error = 0; + + /* + * If we have too many watches on the vnode, discard an + * inactive watch. + */ + port_fop_trimpfplist(vp); + +errout: + /* + * Release the hold acquired due to the lookup operation. + */ + if (vp != NULL) + VN_RELE(vp); + + /* + * copied file name not used, free it. + */ + if (cname != NULL) { + kmem_free(cname, clen + 1); + } + return (error); +} + + +/* + * The port_dissociate_fop() function dissociates the file object + * from the event port and removes any events that are already on the queue. + * Only the owner of the association is allowed to dissociate the file from + * the port. Returns success (0) if it was found and removed. Otherwise + * ENOENT. + */ +int +port_dissociate_fop(port_t *pp, uintptr_t object) +{ + portfop_cache_t *pfcp; + portfop_t *pfp; + port_source_t *pse; + int active = 0; + + pse = port_getsrc(pp, PORT_SOURCE_FILE); + + /* + * if this source is not associated or if there is no + * cache, nothing to do just return. + */ + if (pse == NULL || + (pfcp = (portfop_cache_t *)pse->portsrc_data) == NULL) + return (EINVAL); + + /* + * Check if this object is on the cache. Only the owner pid + * is allowed to dissociate. + */ + mutex_enter(&pfcp->pfc_lock); + pfp = port_cache_lookup_fop(pfcp, curproc->p_pid, object); + if (pfp == NULL) { + mutex_exit(&pfcp->pfc_lock); + return (ENOENT); + } + + /* + * If this was the last association, it will release + * the hold on the vnode. There is a race condition where + * the the pfp is being removed due to an exception event + * in port_fop_sendevent()->port_fop_excep() and port_remove_fop(). + * Since port source cache lock is held, port_fop_excep() cannot + * complete. And the vnode itself will not dissapear as long pfp's + * have a reference. + */ + (void) port_remove_fop(pfp, pfcp, 1, &active); + mutex_exit(&pfcp->pfc_lock); + return (active ? 0 : ENOENT); +} + + +/* + * port_close() calls this function to request the PORT_SOURCE_FILE source + * to remove/free all resources allocated and associated with the port. + */ + +/* ARGSUSED */ +static void +port_close_fop(void *arg, int port, pid_t pid, int lastclose) +{ + port_t *pp = arg; + portfop_cache_t *pfcp; + portfop_t **hashtbl; + portfop_t *pfp; + portfop_t *pfpnext; + int index; + port_source_t *pse; + + + pse = port_getsrc(pp, PORT_SOURCE_FILE); + + /* + * No source or no cache, nothing to do. + */ + if (pse == NULL || + (pfcp = (portfop_cache_t *)pse->portsrc_data) == NULL) + return; + /* + * Scan the cache and free all allocated portfop_t and port_kevent_t + * structures of this pid. + */ + mutex_enter(&pfcp->pfc_lock); + hashtbl = (portfop_t **)pfcp->pfc_hash; + for (index = 0; index < PORTFOP_HASHSIZE; index++) { + for (pfp = hashtbl[index]; pfp != NULL; pfp = pfpnext) { + pfpnext = pfp->pfop_hashnext; + if (pid == pfp->pfop_pid) { + (void) port_remove_fop(pfp, pfcp, 1, NULL); + } + } + } + + /* + * Due to a race between port_close_fop() and port_fop() + * trying to remove the pfp's from the port's cache, it is + * possible that some pfp's are still in the process of being + * freed so we wait. + */ + while (lastclose && pfcp->pfc_objcount) { + (void) cv_wait_sig(&pfcp->pfc_lclosecv, &pfcp->pfc_lock); + } + mutex_exit(&pfcp->pfc_lock); + /* + * last close, free the cache. + */ + if (lastclose) { + ASSERT(pfcp->pfc_objcount == 0); + pse->portsrc_data = NULL; + kmem_free(pfcp, sizeof (portfop_cache_t)); + } +} + +/* + * Given the list of associations(watches), it will send exception events, + * if still active, and discard them. The exception events are handled + * seperately because, the pfp needs to be removed from the port cache and + * freed as the vnode's identity is changing or being removed. To remove + * the pfp from the port's cache, we need to hold the cache lock (pfc_lock). + * The lock order is pfc_lock -> pvp_mutex(vnode's) mutex and that is why + * the cache's lock cannot be acquired in port_fop_sendevent(). + */ +static void +port_fop_excep(list_t *tlist, int op) +{ + portfop_t *pfp; + portfop_cache_t *pfcp; + port_t *pp; + port_kevent_t *pkevp; + int error = 0; + + while (pfp = (portfop_t *)list_head(tlist)) { + int removed = 0; + /* + * remove from the temp list. Since PORT_FOP_REMOVING is + * set, no other thread should attempt to perform a + * list_remove on this pfp. + */ + list_remove(tlist, pfp); + + pfcp = pfp->pfop_pcache; + mutex_enter(&pfcp->pfc_lock); + + /* + * Remove the event from the port queue if it was queued up. + * No need to clear the PORT_FOP_KEV_ONQ flag as this pfp is + * no longer on the vnode's list. + */ + if ((pfp->pfop_flags & PORT_FOP_KEV_ONQ)) { + removed = port_remove_done_event(pfp->pfop_pev); + } + + /* + * If still active or the event was queued up and + * had not been collected yet, send an EXCEPTION event. + */ + if (pfp->pfop_flags & (PORT_FOP_ACTIVE) || removed) { + pp = pfp->pfop_pp; + /* + * Allocate a port_kevent_t non cached to send this + * event since we will be de-registering. + * The port_kevent_t cannot be pointing back to the + * pfp anymore. + */ + pfp->pfop_flags &= ~PORT_FOP_ACTIVE; + error = port_alloc_event_local(pp, PORT_SOURCE_FILE, + PORT_ALLOC_DEFAULT, &pkevp); + if (!error) { + + pkevp->portkev_callback = port_fop_callback; + pkevp->portkev_arg = NULL; + pkevp->portkev_object = + pfp->pfop_pev->portkev_object; + pkevp->portkev_user = + pfp->pfop_pev->portkev_user; + /* + * Copy the pid of the watching process. + */ + pkevp->portkev_pid = + pfp->pfop_pev->portkev_pid; + pkevp->portkev_events = op; + port_send_event(pkevp); + } + } + /* + * At this point the pfp has been removed from the vnode's + * list its cached port_kevent_t is not on the done queue. + * Remove the pfp and free it from the cache. + */ + port_pcache_remove_fop(pfcp, pfp); + mutex_exit(&pfcp->pfc_lock); + } +} + +/* + * Send the file events to all of the processes watching this + * vnode. In case of hard links, the directory vnode pointer and + * the file name are compared. If the names match, then the specified + * event is sent or else, the FILE_ATTRIB event is sent, This is the + * documented behavior. + */ +void +port_fop_sendevent(vnode_t *vp, int events, vnode_t *dvp, char *cname) +{ + port_kevent_t *pkevp; + portfop_t *pfp, *npfp; + portfop_vp_t *pvp; + list_t tmplist; + int removeall = 0; + + pvp = (portfop_vp_t *)vp->v_fopdata; + mutex_enter(&pvp->pvp_mutex); + + /* + * Check if the list is empty. + * + * All entries have been removed by some other thread. + * The vnode may be still active and we got called, + * but some other thread is in the process of removing the hooks. + */ + if (!list_head(&pvp->pvp_pfoplist)) { + mutex_exit(&pvp->pvp_mutex); + return; + } + + if ((events & (FILE_EXCEPTION))) { + /* + * If it is an event for which we are going to remove + * the watches so just move it a temporary list and + * release this vnode. + */ + list_create(&tmplist, sizeof (portfop_t), + offsetof(portfop_t, pfop_node)); + + /* + * If it is an UNMOUNT, MOUNTEDOVER or no file name has been + * passed for an exception event, all associations need to be + * removed. + */ + if (dvp == NULL || cname == NULL) { + removeall = 1; + } + } + + if (!removeall) { + /* + * All the active ones are in the begining of the list. + */ + for (pfp = (portfop_t *)list_head(&pvp->pvp_pfoplist); + pfp && pfp->pfop_flags & PORT_FOP_ACTIVE; pfp = npfp) { + int levents = events; + + npfp = list_next(&pvp->pvp_pfoplist, pfp); + /* + * Hard links case - If the file is being + * removed/renamed, and the name matches + * the watched file, then it is an EXCEPTION + * event or else it will be just a FILE_ATTRIB. + */ + if ((events & (FILE_EXCEPTION))) { + ASSERT(dvp != NULL && cname != NULL); + if (pfp->pfop_dvp == NULL || + (pfp->pfop_dvp == dvp && + (strcmp(cname, pfp->pfop_cname) == 0))) { + /* + * It is an exception event, move it + * to temp list and process it later. + * Note we don't set the pfp->pfop_vp + * to NULL even thought it has been + * removed from the vnode's list. This + * pointer is referenced in + * port_remove_fop(). The vnode it + * self cannot dissapear until this + * pfp gets removed and freed. + */ + port_fop_listremove(pvp, pfp); + list_insert_tail(&tmplist, (void *)pfp); + pfp->pfop_flags |= PORT_FOP_REMOVING; + continue; + } else { + levents = FILE_ATTRIB; + } + + } + + if (pfp->pfop_events & levents) { + /* + * deactivate and move it to the tail. + * If the pfp was active, it cannot be + * on the port's done queue. + */ + pfp->pfop_flags &= ~PORT_FOP_ACTIVE; + port_fop_listremove(pvp, pfp); + port_fop_listinsert_tail(pvp, pfp); + + pkevp = pfp->pfop_pev; + pkevp->portkev_events |= + (levents & pfp->pfop_events); + port_send_event(pkevp); + pfp->pfop_flags |= PORT_FOP_KEV_ONQ; + } + } + } + + + if ((events & (FILE_EXCEPTION))) { + if (!removeall) { + /* + * Check the inactive associations and remove them if + * the file name matches. + */ + for (; pfp; pfp = npfp) { + npfp = list_next(&pvp->pvp_pfoplist, pfp); + if (dvp == NULL || cname == NULL || + pfp->pfop_dvp == NULL || + (pfp->pfop_dvp == dvp && + (strcmp(cname, pfp->pfop_cname) == 0))) { + port_fop_listremove(pvp, pfp); + list_insert_tail(&tmplist, (void *)pfp); + pfp->pfop_flags |= PORT_FOP_REMOVING; + } + } + } else { + /* + * Can be optimized to avoid two pass over this list + * by having a flag in the vnode's portfop_vp_t + * structure to indicate that it is going away, + * Or keep the list short by reusing inactive watches. + */ + port_fop_listmove(pvp, &tmplist); + for (pfp = (portfop_t *)list_head(&tmplist); + pfp; pfp = list_next(&tmplist, pfp)) { + pfp->pfop_flags |= PORT_FOP_REMOVING; + } + } + + /* + * Uninstall the fem hooks if there are no more associations. + * This will release the pvp mutex. + * + * Even thought all entries may have been removed, + * the vnode itself cannot disappear as there will be a + * hold on it due to this call to port_fop_sendevent. This is + * important to syncronize with a port_dissociate_fop() call + * that may be attempting to remove an object from the vnode's. + */ + port_fop_femuninstall(vp); + + /* + * Send exception events and discard the watch entries. + */ + port_fop_excep(&tmplist, events); + list_destroy(&tmplist); + + } else { + mutex_exit(&pvp->pvp_mutex); + + /* + * contain the list size. + */ + port_fop_trimpfplist(vp); + } +} + +/* + * Given the file operation, map it to the events types and send. + */ +void +port_fop(vnode_t *vp, int op, int retval) +{ + int event = 0; + /* + * deliver events only if the operation was successful. + */ + if (retval) + return; + + /* + * These events occuring on the watched file. + */ + if (op & FOP_MODIFIED_MASK) { + event = FILE_MODIFIED; + } + if (op & FOP_ACCESS_MASK) { + event |= FILE_ACCESS; + } + if (op & FOP_ATTRIB_MASK) { + event |= FILE_ATTRIB; + } + + if (event) { + port_fop_sendevent(vp, event, NULL, NULL); + } +} + +/* + * ----- the unmount filesystem op(fsem) hook. + */ +int +port_fop_unmount(fsemarg_t *vf, int flag, cred_t *cr) +{ + vfs_t *vfsp = (vfs_t *)vf->fa_fnode->fn_available; + kmutex_t *mtx; + portfop_vfs_t *pvfsp, **ppvfsp; + portfop_vp_t *pvp; + int error; + + mtx = &(portvfs_hash[PORTFOP_PVFSHASH(vfsp)].pvfshash_mutex); + ppvfsp = &(portvfs_hash[PORTFOP_PVFSHASH(vfsp)].pvfshash_pvfsp); + pvfsp = NULL; + mutex_enter(mtx); + /* + * since this fsem hook is triggered, tit has to be on + * the hash list. + */ + for (pvfsp = *ppvfsp; pvfsp->pvfs != vfsp; pvfsp = pvfsp->pvfs_next) + ; + + /* + * Indicate that the unmount is in process. Don't remove it yet. + * The underlying filesystem unmount routine sets the VFS_UNMOUNTED + * flag on the vfs_t structure. But we call the filesystem unmount + * routine after removing all the file watches for this filesystem, + * otherwise the unmount will fail due to active vnodes. + * Meanwhile setting pvfsp->unmount = 1 will prevent any thread + * attempting to add a file watch. + */ + pvfsp->pvfs_unmount = 1; + mutex_exit(mtx); + + /* + * uninstall the fsem hooks. + */ + (void) fsem_uninstall(vfsp, (fsem_t *)pvfsp->pvfs_fsemp, vfsp); + + while (pvp = list_head(&pvfsp->pvfs_pvplist)) { + list_remove(&pvfsp->pvfs_pvplist, pvp); + /* + * This should send an UNMOUNTED event to all the + * watched vnode of this filesystem and uninstall + * the fem hooks. We release the hold on the vnode here + * because port_fop_femuninstall() will not do it if + * unmount is in process. + */ + port_fop_sendevent(pvp->pvp_vp, UNMOUNTED, NULL, NULL); + VN_RELE(pvp->pvp_vp); + } + + error = vfsnext_unmount(vf, flag, cr); + + /* + * we free the pvfsp after the unmount has been completed. + */ + mutex_enter(mtx); + for (; *ppvfsp && (*ppvfsp)->pvfs != vfsp; + ppvfsp = &(*ppvfsp)->pvfs_next) + ; + + /* + * remove and free it. + */ + ASSERT(list_head(&pvfsp->pvfs_pvplist) == NULL); + if (*ppvfsp) { + pvfsp = *ppvfsp; + *ppvfsp = pvfsp->pvfs_next; + } + mutex_exit(mtx); + kmem_free(pvfsp, sizeof (portfop_vfs_t)); + return (error); +} + +/* + * ------------------------------file op hooks-------------------------- + * The O_TRUNC operation is caught with the VOP_SETATTR(AT_SIZE) call. + */ +static int +port_fop_open(femarg_t *vf, int mode, cred_t *cr) +{ + int retval; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + + retval = vnext_open(vf, mode, cr); + port_fop(vp, FOP_FILE_OPEN, retval); + return (retval); +} + +static int +port_fop_write(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr, + caller_context_t *ct) +{ + int retval; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + + retval = vnext_write(vf, uiop, ioflag, cr, ct); + port_fop(vp, FOP_FILE_WRITE, retval); + return (retval); +} + +static int +port_fop_map(femarg_t *vf, offset_t off, struct as *as, caddr_t *addrp, + size_t len, uchar_t prot, uchar_t maxport, uint_t flags, cred_t *cr) +{ + int retval; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + + retval = vnext_map(vf, off, as, addrp, len, prot, maxport, flags, cr); + port_fop(vp, FOP_FILE_MAP, retval); + return (retval); +} + +static int +port_fop_read(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr, + caller_context_t *ct) +{ + int retval; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + + retval = vnext_read(vf, uiop, ioflag, cr, ct); + port_fop(vp, FOP_FILE_READ, retval); + return (retval); +} + + +/* + * AT_SIZE - is for the open(O_TRUNC) case. + */ +int +port_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr, + caller_context_t *ct) +{ + int retval; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + int events = 0; + + retval = vnext_setattr(vf, vap, flags, cr, ct); + if (vap->va_mask & (AT_SIZE|AT_MTIME)) { + events |= FOP_FILE_SETATTR_MTIME; + } + if (vap->va_mask & AT_ATIME) { + events |= FOP_FILE_SETATTR_ATIME; + } + if (vap->va_mask & (AT_SIZE|AT_CTIME)) { + events |= FOP_FILE_SETATTR_CTIME; + } + + port_fop(vp, events, retval); + return (retval); +} + +int +port_fop_create(femarg_t *vf, char *name, vattr_t *vap, vcexcl_t excl, + int mode, vnode_t **vpp, cred_t *cr, int flag) +{ + int retval, got = 1; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + vattr_t vatt, vatt1; + + /* + * If the file already exists, then there will be no change + * to the directory. Therefore, we need to compare the + * modification time of the directory to determine if the + * file was actually created. + */ + if (VOP_GETATTR(vp, &vatt, 0, CRED())) { + got = 0; + } + retval = vnext_create(vf, name, vap, excl, mode, vpp, cr, flag); + + if (got && !VOP_GETATTR(vp, &vatt1, 0, CRED())) { + if ((vatt1.va_mtime.tv_sec > vatt.va_mtime.tv_sec || + (vatt1.va_mtime.tv_sec = vatt.va_mtime.tv_sec && + vatt1.va_mtime.tv_nsec > vatt.va_mtime.tv_nsec))) { + /* + * File was created. + */ + port_fop(vp, FOP_FILE_CREATE, retval); + } + } + return (retval); +} + +int +port_fop_remove(femarg_t *vf, char *nm, cred_t *cr) +{ + int retval; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + + retval = vnext_remove(vf, nm, cr); + port_fop(vp, FOP_FILE_REMOVE, retval); + return (retval); +} + +int +port_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr) +{ + int retval; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + + retval = vnext_link(vf, svp, tnm, cr); + port_fop(vp, FOP_FILE_LINK, retval); + return (retval); +} + +/* + * Rename operation is allowed only when from and to directories are + * on the same filesystem. This is checked in vn_rename(). + * The target directory is notified thru a VNEVENT by the filesystem + * if the source dir != target dir. + */ +int +port_fop_rename(femarg_t *vf, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr) +{ + int retval; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + + retval = vnext_rename(vf, snm, tdvp, tnm, cr); + port_fop(vp, FOP_FILE_RENAMESRC, retval); + return (retval); +} + +int +port_fop_mkdir(femarg_t *vf, char *dirname, vattr_t *vap, vnode_t **vpp, + cred_t *cr) +{ + int retval; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + + retval = vnext_mkdir(vf, dirname, vap, vpp, cr); + port_fop(vp, FOP_FILE_MKDIR, retval); + return (retval); +} + +int +port_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr) +{ + int retval; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + + retval = vnext_rmdir(vf, nm, cdir, cr); + port_fop(vp, FOP_FILE_RMDIR, retval); + return (retval); +} + +int +port_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp) +{ + int retval; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + + retval = vnext_readdir(vf, uiop, cr, eofp); + port_fop(vp, FOP_FILE_READDIR, retval); + return (retval); +} + +int +port_fop_symlink(femarg_t *vf, char *linkname, vattr_t *vap, char *target, + cred_t *cr) +{ + int retval; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + + retval = vnext_symlink(vf, linkname, vap, target, cr); + port_fop(vp, FOP_FILE_SYMLINK, retval); + return (retval); +} + +/* + * acl, facl call this. + */ +int +port_fop_setsecattr(femarg_t *vf, vsecattr_t *vsap, int flags, cred_t *cr) +{ + int retval; + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + retval = vnext_setsecattr(vf, vsap, flags, cr); + port_fop(vp, FOP_FILE_SETSECATTR, retval); + return (retval); +} + +/* + * these are events on the watched file/directory + */ +int +port_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, char *name) +{ + vnode_t *vp = (vnode_t *)vf->fa_fnode->fn_available; + + + switch (vnevent) { + case VE_RENAME_SRC: + port_fop_sendevent(vp, FILE_RENAME_FROM, dvp, name); + break; + case VE_RENAME_DEST: + port_fop_sendevent(vp, FILE_RENAME_TO, dvp, name); + break; + case VE_REMOVE: + port_fop_sendevent(vp, FILE_DELETE, dvp, name); + break; + case VE_RMDIR: + port_fop_sendevent(vp, FILE_DELETE, dvp, name); + break; + case VE_CREATE: + port_fop_sendevent(vp, FILE_MODIFIED|FILE_ATTRIB, + NULL, NULL); + break; + case VE_LINK: + port_fop_sendevent(vp, FILE_ATTRIB, NULL, NULL); + break; + + case VE_RENAME_DEST_DIR: + port_fop_sendevent(vp, FILE_MODIFIED|FILE_ATTRIB, + NULL, NULL); + break; + + case VE_MOUNTEDOVER: + port_fop_sendevent(vp, MOUNTEDOVER, NULL, NULL); + break; + default: + break; + } + return (vnext_vnevent(vf, vnevent, dvp, name)); +} diff --git a/usr/src/uts/common/fs/tmpfs/tmp_dir.c b/usr/src/uts/common/fs/tmpfs/tmp_dir.c index de405078c3..6e2aed9e97 100644 --- a/usr/src/uts/common/fs/tmpfs/tmp_dir.c +++ b/usr/src/uts/common/fs/tmpfs/tmp_dir.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -345,8 +344,12 @@ tdirenter( error = tdirrename(fromparent, tp, dir, name, found, tdp, cred); if (error == 0) { - vnevent_rename_dest(TNTOV(found)); + if (found != NULL) { + vnevent_rename_dest(TNTOV(found), + TNTOV(dir), name); + } } + tmpnode_rele(found); break; @@ -402,9 +405,6 @@ tdirenter( } } - if ((op == DE_RENAME) && (error == 0)) { - vnevent_rename_src(TNTOV(tp)); - } out: if (error && (op == DE_LINK || op == DE_RENAME)) { /* diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c index 8932732a29..c2d921bba9 100644 --- a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c +++ b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c @@ -975,6 +975,10 @@ again: } *vpp = newvp; } + + if (error == 0) { + vnevent_create(*vpp); + } return (0); } @@ -1043,7 +1047,7 @@ tmp_remove(struct vnode *dvp, char *nm, struct cred *cred) rw_exit(&tp->tn_rwlock); rw_exit(&parent->tn_rwlock); - vnevent_remove(TNTOV(tp)); + vnevent_remove(TNTOV(tp), dvp, nm); tmpnode_rele(tp); TRACE_3(TR_FAC_TMPFS, TR_TMPFS_REMOVE, @@ -1093,6 +1097,9 @@ tmp_link(struct vnode *dvp, struct vnode *srcvp, char *tnm, struct cred *cred) error = tdirenter(tm, parent, tnm, DE_LINK, (struct tmpnode *)NULL, from, NULL, (struct tmpnode **)NULL, cred); rw_exit(&parent->tn_rwlock); + if (error == 0) { + vnevent_link(srcvp); + } return (error); } @@ -1184,6 +1191,15 @@ tmp_rename( error = 0; goto done; } + vnevent_rename_src(TNTOV(fromtp), odvp, onm); + + /* + * Notify the target directory if not same as + * source directory. + */ + if (ndvp != odvp) { + vnevent_rename_dest_dir(ndvp); + } /* * Unlink from source. @@ -1338,7 +1354,7 @@ done: done1: rw_exit(&self->tn_rwlock); rw_exit(&parent->tn_rwlock); - vnevent_rmdir(TNTOV(self)); + vnevent_rmdir(TNTOV(self), dvp, nm); tmpnode_rele(self); return (error); diff --git a/usr/src/uts/common/fs/udfs/udf_dir.c b/usr/src/uts/common/fs/udfs/udf_dir.c index d97a80bb40..d470a2588a 100644 --- a/usr/src/uts/common/fs/udfs/udf_dir.c +++ b/usr/src/uts/common/fs/udfs/udf_dir.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -556,9 +555,13 @@ out: */ if (err == 0) { if (tip) { - vnevent_rename_dest(ITOV(tip)); + vnevent_rename_dest(ITOV(tip), ITOV(tdp), + namep); + } + + if (sdp != tdp) { + vnevent_rename_dest_dir(ITOV(tdp)); } - vnevent_rename_src(ITOV(sip)); } /* @@ -849,9 +852,9 @@ out_novfs: */ if (err == 0) { if (op == DR_REMOVE) { - vnevent_remove(ITOV(ip)); + vnevent_remove(ITOV(ip), ITOV(dp), namep); } else if (op == DR_RMDIR) { - vnevent_rmdir(ITOV(ip)); + vnevent_rmdir(ITOV(ip), ITOV(dp), namep); } } VN_RELE(ITOV(ip)); diff --git a/usr/src/uts/common/fs/udfs/udf_vnops.c b/usr/src/uts/common/fs/udfs/udf_vnops.c index b67d80c394..defbd544f5 100644 --- a/usr/src/uts/common/fs/udfs/udf_vnops.c +++ b/usr/src/uts/common/fs/udfs/udf_vnops.c @@ -732,6 +732,7 @@ udf_create(struct vnode *dvp, (void) ud_itrunc(ip, 0, 0, cr); rw_exit(&ip->i_rwlock); } + vnevent_create(ITOV(ip)); } } @@ -820,6 +821,10 @@ udf_link(struct vnode *tdvp, ITIMES(sip); ITIMES(tdp); + if (error == 0) { + vnevent_link(svp); + } + return (error); } @@ -913,6 +918,7 @@ udf_rename(struct vnode *sdvp, rw_exit(&tdp->i_rwlock); goto errout; } + vnevent_rename_src(ITOV(sip), sdvp, snm); rw_exit(&tdp->i_rwlock); rw_enter(&sdp->i_rwlock, RW_WRITER); diff --git a/usr/src/uts/common/fs/ufs/ufs_vnops.c b/usr/src/uts/common/fs/ufs/ufs_vnops.c index fe903b11e9..414aa037c4 100644 --- a/usr/src/uts/common/fs/ufs/ufs_vnops.c +++ b/usr/src/uts/common/fs/ufs/ufs_vnops.c @@ -2985,6 +2985,10 @@ again: cr); rw_exit(&ip->i_rwlock); } + + } + if (error == 0) { + vnevent_create(ITOV(ip)); } } } @@ -3131,7 +3135,7 @@ retry_remove: if (rmvp != NULL) { /* Only send the event if there were no errors */ if (error == 0) - vnevent_remove(rmvp); + vnevent_remove(rmvp, vp, nm); VN_RELE(rmvp); } out: @@ -3210,6 +3214,10 @@ unlock: TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_LINK, trans_size); ufs_lockfs_end(ulp); } + + if (!error) { + vnevent_link(svp); + } out: return (error); } @@ -3590,14 +3598,22 @@ unlock: */ if (error == 0) { if (tvp != NULL) - vnevent_rename_dest(tvp); + vnevent_rename_dest(tvp, tdvp, tnm); + + /* + * Notify the target directory of the rename event + * if source and target directories are not same. + */ + if (sdvp != tdvp) + vnevent_rename_dest_dir(tdvp); + /* * Note that if ufs_direnter_lr() returned ESAME then * this event will still be sent. This isn't expected * to be a problem for anticipated usage by consumers. */ if (sip != NULL) - vnevent_rename_src(ITOV(sip)); + vnevent_rename_src(ITOV(sip), sdvp, snm); } if (tvp != NULL) @@ -3743,7 +3759,7 @@ retry_rmdir: if (rmvp != NULL) { /* Only send the event if there were no errors */ if (error == 0) - vnevent_rmdir(rmvp); + vnevent_rmdir(rmvp, vp, nm); VN_RELE(rmvp); } out: diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c index c00879da35..9aa6a3f657 100644 --- a/usr/src/uts/common/fs/vfs.c +++ b/usr/src/uts/common/fs/vfs.c @@ -138,8 +138,9 @@ int vfshsz = 512; /* # of heads/locks in vfs hash arrays */ /* must be power of 2! */ timespec_t vfs_mnttab_ctime; /* mnttab created time */ timespec_t vfs_mnttab_mtime; /* mnttab last modified time */ -char *vfs_dummyfstype = "\0"; struct pollhead vfs_pollhd; /* for mnttab pollers */ +struct vnode *mntdummyvp; /* Will be set once mntfs is loaded */ +int mntfstype; /* will be set once mnt fs is mounted */ /* * Table for generic options recognized in the VFS layer and acted @@ -1437,6 +1438,10 @@ domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp, vfs_setresource(vfsp, resource); vfs_setmntpoint(vfsp, mountpt); + /* + * going to mount on this vnode, so notify. + */ + vnevent_mountedover(vp); error = VFS_MOUNT(vfsp, vp, uap, credp); if (uap->flags & MS_RDONLY) @@ -2634,6 +2639,9 @@ void vfs_mnttab_modtimeupd() { hrtime_t oldhrt, newhrt; + struct uio uio; + struct iovec iov; + char buf[1]; ASSERT(RW_WRITE_HELD(&vfslist)); oldhrt = ts2hrt(&vfs_mnttab_mtime); @@ -2649,6 +2657,30 @@ vfs_mnttab_modtimeupd() hrt2ts(newhrt, &vfs_mnttab_mtime); } pollwakeup(&vfs_pollhd, (short)POLLRDBAND); + + if (mntdummyvp != NULL) { + /* + * Make a VOP_WRITE call on the dummy vnode so that any + * module interested in mnttab getting modified could + * intercept this vnode and capture the event. + * + * Pass a dummy uio struct. Nobody should reference the + * buffer. We need to pass a valid uio struct pointer to take + * care of any module intercepting this vnode which could + * attempt to look at it. Currently only the file events + * notification module intercepts this vnode. + */ + bzero(&uio, sizeof (uio)); + bzero(&iov, sizeof (iov)); + iov.iov_base = buf; + iov.iov_len = 0; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_loffset = 0; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_resid = 0; + (void) VOP_WRITE(mntdummyvp, &uio, 0, kcred, NULL); + } } int diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c index 7129cfead9..55813947ec 100644 --- a/usr/src/uts/common/fs/vnode.c +++ b/usr/src/uts/common/fs/vnode.c @@ -89,6 +89,9 @@ static vopstats_t *vs_templatep; /* Kmem cache handle for vsk_anchor_t allocations */ kmem_cache_t *vsk_anchor_cache; +/* file events cleanup routine */ +extern void free_fopdata(vnode_t *); + /* * Root of AVL tree for the kstats associated with vopstats. Lock protects * updates to vsktat_tree. @@ -1988,10 +1991,10 @@ vn_cache_constructor(void *buf, void *cdrarg, int kmflags) cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL); rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL); rw_init(&vp->v_mslock, NULL, RW_DEFAULT, NULL); - vp->v_femhead = NULL; /* Must be done before vn_reinit() */ vp->v_path = NULL; vp->v_mpssdata = NULL; + vp->v_fopdata = NULL; return (0); } @@ -2058,6 +2061,10 @@ vn_recycle(vnode_t *vp) kmem_free(vp->v_path, strlen(vp->v_path) + 1); vp->v_path = NULL; } + + if (vp->v_fopdata != NULL) { + free_fopdata(vp); + } vp->v_mpssdata = NULL; } @@ -2110,6 +2117,7 @@ vn_alloc(int kmflag) if (vp != NULL) { vp->v_femhead = NULL; /* Must be done before vn_reinit() */ + vp->v_fopdata = NULL; vn_reinit(vp); } @@ -2138,6 +2146,10 @@ vn_free(vnode_t *vp) kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead))); vp->v_femhead = NULL; } + + if (vp->v_fopdata != NULL) { + free_fopdata(vp); + } vp->v_mpssdata = NULL; kmem_cache_free(vn_cache, vp); } @@ -2200,43 +2212,79 @@ vnevent_support(vnode_t *vp) if (vp == NULL) return (EINVAL); - return (VOP_VNEVENT(vp, VE_SUPPORT)); + return (VOP_VNEVENT(vp, VE_SUPPORT, NULL, NULL)); +} + +void +vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name); +} + +void +vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_RENAME_DEST, dvp, name); +} + +void +vnevent_rename_dest_dir(vnode_t *vp) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL); +} + +void +vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_REMOVE, dvp, name); } void -vnevent_rename_src(vnode_t *vp) +vnevent_rmdir(vnode_t *vp, vnode_t *dvp, char *name) { if (vp == NULL || vp->v_femhead == NULL) { return; } - (void) VOP_VNEVENT(vp, VE_RENAME_SRC); + (void) VOP_VNEVENT(vp, VE_RMDIR, dvp, name); } void -vnevent_rename_dest(vnode_t *vp) +vnevent_create(vnode_t *vp) { if (vp == NULL || vp->v_femhead == NULL) { return; } - (void) VOP_VNEVENT(vp, VE_RENAME_DEST); + (void) VOP_VNEVENT(vp, VE_CREATE, NULL, NULL); } void -vnevent_remove(vnode_t *vp) +vnevent_link(vnode_t *vp) { if (vp == NULL || vp->v_femhead == NULL) { return; } - (void) VOP_VNEVENT(vp, VE_REMOVE); + (void) VOP_VNEVENT(vp, VE_LINK, NULL, NULL); } void -vnevent_rmdir(vnode_t *vp) +vnevent_mountedover(vnode_t *vp) { if (vp == NULL || vp->v_femhead == NULL) { return; } - (void) VOP_VNEVENT(vp, VE_RMDIR); + (void) VOP_VNEVENT(vp, VE_MOUNTEDOVER, NULL, NULL); } /* @@ -3558,11 +3606,11 @@ fop_shrlock( } int -fop_vnevent(vnode_t *vp, vnevent_t vnevent) +fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm) { int err; - err = (*(vp)->v_op->vop_vnevent)(vp, vnevent); + err = (*(vp)->v_op->vop_vnevent)(vp, vnevent, dvp, fnm); VOPSTATS_UPDATE(vp, vnevent); return (err); } diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c index 24b07aa8e1..cd592628d9 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c @@ -1165,6 +1165,10 @@ top: VN_RELE(ZTOV(zp)); goto top; } + + if (error == 0) { + vnevent_create(ZTOV(zp)); + } } } out: @@ -1250,7 +1254,7 @@ top: goto out; } - vnevent_remove(vp); + vnevent_remove(vp, dvp, name); dnlc_remove(dvp, name); @@ -1513,7 +1517,7 @@ top: goto out; } - vnevent_rmdir(vp); + vnevent_rmdir(vp, dvp, name); /* * Grab a lock on the directory to make sure that noone is @@ -2433,9 +2437,17 @@ top: } } - vnevent_rename_src(ZTOV(szp)); + vnevent_rename_src(ZTOV(szp), sdvp, snm); if (tzp) - vnevent_rename_dest(ZTOV(tzp)); + vnevent_rename_dest(ZTOV(tzp), tdvp, tnm); + + /* + * notify the target directory if it is not the same + * as source directory. + */ + if (tdvp != sdvp) { + vnevent_rename_dest_dir(tdvp); + } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, szp->z_id); /* nlink changes */ @@ -2768,6 +2780,10 @@ top: zfs_dirent_unlock(dl); + if (error == 0) { + vnevent_link(svp); + } + ZFS_EXIT(zfsvfs); return (error); } @@ -3701,6 +3717,7 @@ const fs_operation_def_t zfs_dvnodeops_template[] = { VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, + VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, NULL, NULL }; diff --git a/usr/src/uts/common/nfs/nfs4.h b/usr/src/uts/common/nfs/nfs4.h index db7814a07c..ef4365b9d0 100644 --- a/usr/src/uts/common/nfs/nfs4.h +++ b/usr/src/uts/common/nfs/nfs4.h @@ -919,7 +919,7 @@ extern int deleg_setattr(femarg_t *, vattr_t *, int, cred_t *, extern int deleg_space(femarg_t *, int, flock64_t *, int, offset_t, cred_t *, caller_context_t *); extern int deleg_setsecattr(femarg_t *, vsecattr_t *, int, cred_t *); -extern int deleg_vnevent(femarg_t *, vnevent_t); +extern int deleg_vnevent(femarg_t *, vnevent_t, vnode_t *, char *); extern void rfs4_mon_hold(void *); extern void rfs4_mon_rele(void *); diff --git a/usr/src/uts/common/os/port_subr.c b/usr/src/uts/common/os/port_subr.c index 11a701a78a..ab73f2dd61 100644 --- a/usr/src/uts/common/os/port_subr.c +++ b/usr/src/uts/common/os/port_subr.c @@ -197,7 +197,7 @@ port_send_event(port_kevent_t *pkevp) * ports if being polled. */ if (pkevp->portkev_source != PORT_SOURCE_FD && - portq->portq_flags & PORTQ_POLLIN) { + portq->portq_flags & PORTQ_POLLIN) { portq->portq_flags &= ~PORTQ_POLLIN; mutex_exit(&portq->portq_mutex); pollwakeup(&pkevp->portkev_port->port_pollhd, POLLIN); @@ -390,10 +390,11 @@ port_remove_event_doneq(port_kevent_t *pkevp, port_queue_t *portq) * Currently this function is required to cancel a fired event because * the application is delivering new association data (see port_associate_fd()). */ -void +int port_remove_done_event(port_kevent_t *pkevp) { port_queue_t *portq; + int removed = 0; portq = &pkevp->portkev_port->port_queue; mutex_enter(&portq->portq_mutex); @@ -411,9 +412,11 @@ port_remove_done_event(port_kevent_t *pkevp) } /* now remove event from the port queue */ port_remove_event_doneq(pkevp, portq); + removed = 1; } port_unblock(portq); mutex_exit(&portq->portq_mutex); + return (removed); } /* @@ -777,3 +780,15 @@ port_dissociate_ksource(int port, int source, port_source_t *ps) releasef(port); return (0); } + +void +free_fopdata(vnode_t *vp) +{ + portfop_vp_t *pvp; + pvp = vp->v_fopdata; + ASSERT(pvp->pvp_femp == NULL); + mutex_destroy(&pvp->pvp_mutex); + list_destroy(&pvp->pvp_pfoplist); + kmem_free(pvp, sizeof (*pvp)); + vp->v_fopdata = NULL; +} diff --git a/usr/src/uts/common/sys/fem.h b/usr/src/uts/common/sys/fem.h index 04afc1466d..2816734291 100644 --- a/usr/src/uts/common/sys/fem.h +++ b/usr/src/uts/common/sys/fem.h @@ -233,7 +233,9 @@ struct fem_head { int flag, cred_t *cr); \ int (*femop_shrlock)(femarg_t *vf, int cmd, \ struct shrlock *shr, int flag, cred_t *cr); \ - int (*femop_vnevent)(femarg_t *vf, vnevent_t vnevent) /* NB: No ";" */ + int (*femop_vnevent)(femarg_t *vf, vnevent_t vnevent, \ + vnode_t *dvp, char *cname) + /* NB: No ";" */ struct fem { const char *name; @@ -340,7 +342,8 @@ extern int vnext_getsecattr(femarg_t *vf, vsecattr_t *vsap, int flag, cred_t *cr); extern int vnext_shrlock(femarg_t *vf, int cmd, struct shrlock *shr, int flag, cred_t *cr); -extern int vnext_vnevent(femarg_t *vf, vnevent_t vevent); +extern int vnext_vnevent(femarg_t *vf, vnevent_t vevent, vnode_t *dvp, + char *cname); extern int vfsnext_mount(fsemarg_t *vf, vnode_t *mvp, struct mounta *uap, cred_t *cr); diff --git a/usr/src/uts/common/sys/port.h b/usr/src/uts/common/sys/port.h index a0ed65ffce..ccb0308255 100644 --- a/usr/src/uts/common/sys/port.h +++ b/usr/src/uts/common/sys/port.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -42,6 +42,7 @@ extern "C" { #define PORT_SOURCE_FD 4 #define PORT_SOURCE_ALERT 5 #define PORT_SOURCE_MQ 6 +#define PORT_SOURCE_FILE 7 typedef struct port_event { int portev_events; /* event data is source specific */ @@ -57,8 +58,24 @@ typedef struct port_notify { } port_notify_t; +typedef struct file_obj { + timestruc_t fo_atime; /* Access time from stat(2) */ + timestruc_t fo_mtime; /* Modification time from stat(2) */ + timestruc_t fo_ctime; /* Change time from stat(2) */ + uintptr_t fo_pad[3]; /* For future expansion */ + char *fo_name; /* Null terminated file name */ +} file_obj_t; + #if defined(_SYSCALL32) +typedef struct file_obj32 { + timestruc32_t fo_atime; /* Access time got from stat(2) */ + timestruc32_t fo_mtime; /* Modification time from stat(2) */ + timestruc32_t fo_ctime; /* Change time from stat(2) */ + caddr32_t fo_pad[3]; /* For future expansion */ + caddr32_t fo_name; /* Null terminated file name */ +} file_obj32_t; + typedef struct port_event32 { int portev_events; /* events detected */ ushort_t portev_source; /* user, timer, aio, etc */ @@ -79,6 +96,45 @@ typedef struct port_notify32 { #define PORT_ALERT_UPDATE 0x02 #define PORT_ALERT_INVALID (PORT_ALERT_SET | PORT_ALERT_UPDATE) +/* + * PORT_SOURCE_FILE - events + */ + +/* + * User watchable file events + */ +#define FILE_ACCESS 0x00000001 +#define FILE_MODIFIED 0x00000002 +#define FILE_ATTRIB 0x00000004 +#define FILE_NOFOLLOW 0x10000000 + +/* + * exception file events + */ + +/* + * The watched file.. + */ +#define FILE_DELETE 0x00000010 +#define FILE_RENAME_TO 0x00000020 +#define FILE_RENAME_FROM 0x00000040 +/* + * The filesystem on which the watched file resides got + * unmounted. + */ +#define UNMOUNTED 0x20000000 +/* + * Some other file/filesystem got mounted over the + * watched file/directory. + */ +#define MOUNTEDOVER 0x40000000 + +/* + * Helper type + */ +#define FILE_EXCEPTION (UNMOUNTED|FILE_DELETE|FILE_RENAME_TO \ + |FILE_RENAME_FROM|MOUNTEDOVER) + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/port_impl.h b/usr/src/uts/common/sys/port_impl.h index a80e7afc84..e463e47a6e 100644 --- a/usr/src/uts/common/sys/port_impl.h +++ b/usr/src/uts/common/sys/port_impl.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,6 +44,7 @@ extern "C" { #include <sys/port.h> #include <sys/port_kernel.h> #include <sys/vnode.h> +#include <sys/fem.h> /* * port system call codes @@ -212,6 +213,138 @@ typedef struct portfd { (&(pcp)->pc_hash[((fd) % (pcp)->pc_hashsize)]) /* + * PORT_SOURCE_FILE -- File Events Notification sources + */ +#define PORT_FOP_BUCKET(pcp, id) \ + (portfop_t **)(&(pcp)->pfc_hash[(((ulong_t)id >> 8) & \ + (PORTFOP_HASHSIZE - 1))]) + +/* + * This structure is used to register a file object to be watched. + * + * The pfop_flags are protected by the vnode's pvp_mutex lock. + * The pfop list (vnode's list) is protected by the pvp_mutex when it is on + * the vnode's list. + * + * All the rest of the fields are protected by the port's source cache lock + * pfcp_lock. + */ +typedef struct portfop { + int pfop_events; + int pfop_flags; /* above flags. */ + uintptr_t pfop_object; /* object address */ + vnode_t *pfop_vp; + vnode_t *pfop_dvp; + port_t *pfop_pp; + fem_t *pfop_fem; + list_node_t pfop_node; /* list of pfop's per vnode */ + struct portfop *pfop_hashnext; /* hash list */ + pid_t pfop_pid; /* owner of portfop */ + struct portfop_cache *pfop_pcache; + port_kevent_t *pfop_pev; /* event pointers */ + char *pfop_cname; /* file component name */ + int pfop_clen; +} portfop_t; + +/* + * pfop_flags + */ +#define PORT_FOP_ACTIVE 0x1 +#define PORT_FOP_REMOVING 0x2 +#define PORT_FOP_KEV_ONQ 0x4 + +typedef struct portfop_vfs { + vfs_t *pvfs; + int pvfs_unmount; /* 1 if unmount in progress */ + list_t pvfs_pvplist; /* list of vnodes from */ + fsem_t *pvfs_fsemp; + struct portfop_vfs *pvfs_next; /* hash list */ +} portfop_vfs_t; + +typedef struct portfop_vfs_hash { + kmutex_t pvfshash_mutex; + struct portfop_vfs *pvfshash_pvfsp; +} portfop_vfs_hash_t; + +typedef struct portfop_vp { + vnode_t *pvp_vp; + kmutex_t pvp_mutex; + int pvp_cnt; /* number of watches */ + list_t pvp_pfoplist; + list_node_t pvp_pvfsnode; + struct portfop *pvp_lpfop; /* oldest pfop */ + fem_t *pvp_femp; + struct portfop_vfs *pvp_pvfsp; +} portfop_vp_t; + +#define PORTFOP_PVFSHASH_SZ 256 +#define PORTFOP_PVFSHASH(vfsp) (((uintptr_t)(vfsp) >> 4) % PORTFOP_PVFSHASH_SZ) + +/* + * file operations flag. + */ + +/* + * PORT_SOURCE_FILE - vnode operations + */ + +#define FOP_FILE_OPEN 0x00000001 +#define FOP_FILE_READ 0x00000002 +#define FOP_FILE_WRITE 0x00000004 +#define FOP_FILE_MAP 0x00000008 +#define FOP_FILE_IOCTL 0x00000010 +#define FOP_FILE_CREATE 0x00000020 +#define FOP_FILE_MKDIR 0x00000040 +#define FOP_FILE_SYMLINK 0x00000080 +#define FOP_FILE_LINK 0x00000100 +#define FOP_FILE_RENAME 0x00000200 +#define FOP_FILE_REMOVE 0x00000400 +#define FOP_FILE_RMDIR 0x00000800 +#define FOP_FILE_READDIR 0x00001000 +#define FOP_FILE_RENAMESRC 0x00002000 +#define FOP_FILE_RENAMEDST 0x00004000 +#define FOP_FILE_REMOVEFILE 0x00008000 +#define FOP_FILE_REMOVEDIR 0x00010000 +#define FOP_FILE_SETSECATTR 0x00020000 +#define FOP_FILE_SETATTR_ATIME 0x00040000 +#define FOP_FILE_SETATTR_MTIME 0x00080000 +#define FOP_FILE_SETATTR_CTIME 0x00100000 +#define FOP_FILE_LINK_SRC 0x00200000 + +/* + * File modification event. + */ +#define FOP_MODIFIED_MASK (FOP_FILE_WRITE|FOP_FILE_CREATE \ + |FOP_FILE_REMOVE|FOP_FILE_LINK \ + |FOP_FILE_RENAMESRC|FOP_FILE_RENAMEDST \ + |FOP_FILE_MKDIR|FOP_FILE_RMDIR \ + |FOP_FILE_SYMLINK|FOP_FILE_SETATTR_MTIME) + +/* + * File access event + */ +#define FOP_ACCESS_MASK (FOP_FILE_READ|FOP_FILE_READDIR \ + |FOP_FILE_MAP|FOP_FILE_SETATTR_ATIME) + +/* + * File attrib event + */ +#define FOP_ATTRIB_MASK (FOP_FILE_WRITE|FOP_FILE_CREATE \ + |FOP_FILE_REMOVE|FOP_FILE_LINK \ + |FOP_FILE_RENAMESRC|FOP_FILE_RENAMEDST \ + |FOP_FILE_MKDIR|FOP_FILE_RMDIR \ + |FOP_FILE_SYMLINK|FOP_FILE_SETATTR_CTIME \ + |FOP_FILE_LINK_SRC|FOP_FILE_SETSECATTR) + + +/* + * valid watchable events + */ +#define FILE_EVENTS_MASK (FILE_ACCESS|FILE_MODIFIED|FILE_ATTRIB \ + |FILE_NOFOLLOW) +/* --- End file events --- */ + +/* * port_kstat_t contains the event port kernel values which are * exported to kstat. * Currently only the number of active ports is exported. @@ -223,7 +356,7 @@ typedef struct port_kstat { /* misc functions */ int port_alloc_event_block(port_t *, int, int, struct port_kevent **); void port_push_eventq(port_queue_t *); -void port_remove_done_event(struct port_kevent *); +int port_remove_done_event(struct port_kevent *); struct port_kevent *port_get_kevent(list_t *, struct port_kevent *); void port_block(port_queue_t *); void port_unblock(port_queue_t *); diff --git a/usr/src/uts/common/sys/port_kernel.h b/usr/src/uts/common/sys/port_kernel.h index bfc65586fc..7456f63573 100644 --- a/usr/src/uts/common/sys/port_kernel.h +++ b/usr/src/uts/common/sys/port_kernel.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -98,12 +98,29 @@ typedef struct port_source { int portsrc_cnt; /* # of associations */ void (*portsrc_close)(void *, int, pid_t, int); void *portsrc_closearg; /* callback arg */ + void *portsrc_data; /* Private data of source */ struct port_source *portsrc_next; struct port_source *portsrc_prev; } port_source_t; /* + * PORT_SOURCE_FILE cache structure. + */ +#define PORTFOP_HASHSIZE 256 /* cache space for fop events */ + +/* + * One cache for each port that uses PORT_SOURCE_FILE. + */ +typedef struct portfop_cache { + kmutex_t pfc_lock; /* lock to protect cache */ + kcondvar_t pfc_lclosecv; /* last close cv */ + int pfc_objcount; /* track how many file obj are hashed */ + struct portfop *pfc_hash[PORTFOP_HASHSIZE]; /* hash table */ +} portfop_cache_t; + +/* + * PORT_SOURCE_FD cache per port. * One cache for each port that uses PORT_SOURCE_FD. * pc_lock must be the first element of port_fdcache_t to keep it * synchronized with the offset of pc_lock in pollcache_t (see pollrelock()). @@ -147,6 +164,8 @@ void port_init_event(port_kevent_t *, uintptr_t, void *, int port_dup_event(port_kevent_t *, port_kevent_t **, int); int port_associate_fd(struct port *, int, uintptr_t, int, void *); int port_dissociate_fd(struct port *, uintptr_t); +int port_associate_fop(struct port *, int, uintptr_t, int, void *); +int port_dissociate_fop(struct port *, uintptr_t); /* misc functions */ void port_free_event_local(port_kevent_t *, int counter); diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h index d2494e013e..8eaf4b7fc9 100644 --- a/usr/src/uts/common/sys/vnode.h +++ b/usr/src/uts/common/sys/vnode.h @@ -248,6 +248,7 @@ typedef struct vnode { struct vnode *v_msnext; /* list of vnodes on an mset */ struct vnode *v_msprev; /* list of vnodes on an mset */ krwlock_t v_mslock; /* protects v_mset */ + void *v_fopdata; /* list of file ops event watches */ } vnode_t; #define IS_DEVVP(vp) \ @@ -465,7 +466,11 @@ typedef enum vnevent { VE_RENAME_SRC = 1, /* Rename, with vnode as source */ VE_RENAME_DEST = 2, /* Rename, with vnode as target/destination */ VE_REMOVE = 3, /* Remove of vnode's name */ - VE_RMDIR = 4 /* Remove of directory vnode's name */ + VE_RMDIR = 4, /* Remove of directory vnode's name */ + VE_CREATE = 5, /* Create with vnode's name which exists */ + VE_LINK = 6, /* Link with vnode's name as source */ + VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */ + VE_MOUNTEDOVER = 8 /* File or Filesystem got mounted over vnode */ } vnevent_t; /* @@ -606,7 +611,8 @@ struct pollhead; int, cred_t *); \ int (*vop_shrlock)(vnode_t *, int, struct shrlock *, \ int, cred_t *); \ - int (*vop_vnevent)(vnode_t *, vnevent_t) /* NB: No ";" */ + int (*vop_vnevent)(vnode_t *, vnevent_t, vnode_t *, char *) \ + /* NB: No ";" */ /* * Operations on vnodes. Note: File systems must never operate directly @@ -675,7 +681,7 @@ extern void fop_dispose(vnode_t *, struct page *, int, int, cred_t *); extern int fop_setsecattr(vnode_t *, vsecattr_t *, int, cred_t *); extern int fop_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *); extern int fop_shrlock(vnode_t *, int, struct shrlock *, int, cred_t *); -extern int fop_vnevent(vnode_t *, vnevent_t); +extern int fop_vnevent(vnode_t *, vnevent_t, vnode_t *, char *); #endif /* _KERNEL */ @@ -765,8 +771,8 @@ extern int fop_vnevent(vnode_t *, vnevent_t); fop_setsecattr(vp, vsap, f, cr) #define VOP_SHRLOCK(vp, cmd, shr, f, cr) \ fop_shrlock(vp, cmd, shr, f, cr) -#define VOP_VNEVENT(vp, vnevent) \ - fop_vnevent(vp, vnevent) +#define VOP_VNEVENT(vp, vnevent, dvp, fnm) \ + fop_vnevent(vp, vnevent, dvp, fnm) #define VOPNAME_OPEN "open" #define VOPNAME_CLOSE "close" @@ -908,10 +914,14 @@ void vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp, const char *path, size_t plen); /* Vnode event notification */ -void vnevent_rename_src(vnode_t *); -void vnevent_rename_dest(vnode_t *); -void vnevent_remove(vnode_t *); -void vnevent_rmdir(vnode_t *); +void vnevent_rename_src(vnode_t *, vnode_t *, char *); +void vnevent_rename_dest(vnode_t *, vnode_t *, char *); +void vnevent_remove(vnode_t *, vnode_t *, char *); +void vnevent_rmdir(vnode_t *, vnode_t *, char *); +void vnevent_create(vnode_t *); +void vnevent_link(vnode_t *); +void vnevent_rename_dest_dir(vnode_t *); +void vnevent_mountedover(vnode_t *); int vnevent_support(vnode_t *); /* Context identification */ diff --git a/usr/src/uts/intel/portfs/Makefile b/usr/src/uts/intel/portfs/Makefile index 536b845c59..754a7b60b2 100644 --- a/usr/src/uts/intel/portfs/Makefile +++ b/usr/src/uts/intel/portfs/Makefile @@ -20,7 +20,7 @@ # # # uts/intel/portfs/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -35,7 +35,7 @@ # UTSBASE = ../.. -PORTFS_OBJS += port.o port_vnops.o port_fd.o +PORTFS_OBJS += port.o port_vnops.o port_fd.o port_fop.o # # Define the module and object file sets. diff --git a/usr/src/uts/sparc/portfs/Makefile b/usr/src/uts/sparc/portfs/Makefile index a15476eada..b08d2828e5 100644 --- a/usr/src/uts/sparc/portfs/Makefile +++ b/usr/src/uts/sparc/portfs/Makefile @@ -20,7 +20,7 @@ # # # uts/sparc/portfs/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -35,7 +35,7 @@ # UTSBASE = ../.. -PORTFS_OBJS += port.o port_vnops.o port_fd.o +PORTFS_OBJS += port.o port_vnops.o port_fd.o port_fop.o # # Define the module and object file sets. |